diff options
128 files changed, 2930 insertions, 1493 deletions
diff --git a/Documentation/devicetree/bindings/net/via-rhine.txt b/Documentation/devicetree/bindings/net/via-rhine.txt new file mode 100644 index 00000000000..334eca2bf93 --- /dev/null +++ b/Documentation/devicetree/bindings/net/via-rhine.txt @@ -0,0 +1,17 @@ +* VIA Rhine 10/100 Network Controller + +Required properties: +- compatible : Should be "via,vt8500-rhine" for integrated + Rhine controllers found in VIA VT8500, WonderMedia WM8950 + and similar. These are listed as 1106:3106 rev. 0x84 on the + virtual PCI bus under vendor-provided kernels +- reg : Address and length of the io space +- interrupts : Should contain the controller interrupt line + +Examples: + +ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; +}; diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index a383c00392d..9c723ecd002 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -585,13 +585,19 @@ mode balance-tlb or 5 Adaptive transmit load balancing: channel bonding that - does not require any special switch support. The - outgoing traffic is distributed according to the - current load (computed relative to the speed) on each - slave. Incoming traffic is received by the current - slave. If the receiving slave fails, another slave - takes over the MAC address of the failed receiving - slave. + does not require any special switch support. + + In tlb_dynamic_lb=1 mode; the outgoing traffic is + distributed according to the current load (computed + relative to the speed) on each slave. + + In tlb_dynamic_lb=0 mode; the load balancing based on + current load is disabled and the load is distributed + only using the hash distribution. + + Incoming traffic is received by the current slave. + If the receiving slave fails, another slave takes over + the MAC address of the failed receiving slave. Prerequisite: @@ -736,6 +742,28 @@ primary_reselect This option was added for bonding version 3.6.0. +tlb_dynamic_lb + + Specifies if dynamic shuffling of flows is enabled in tlb + mode. The value has no effect on any other modes. + + The default behavior of tlb mode is to shuffle active flows across + slaves based on the load in that interval. This gives nice lb + characteristics but can cause packet reordering. If re-ordering is + a concern use this variable to disable flow shuffling and rely on + load balancing provided solely by the hash distribution. + xmit-hash-policy can be used to select the appropriate hashing for + the setup. + + The sysfs entry can be used to change the setting per bond device + and the initial value is derived from the module parameter. The + sysfs entry is allowed to be changed only if the bond device is + down. + + The default value is "1" that enables flow shuffling while value "0" + disables it. This option was added in bonding driver 3.7.1 + + updelay Specifies the time, in milliseconds, to wait before enabling a @@ -769,7 +797,7 @@ use_carrier xmit_hash_policy Selects the transmit hash policy to use for slave selection in - balance-xor and 802.3ad modes. Possible values are: + balance-xor, 802.3ad, and tlb modes. Possible values are: layer2 diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 81f940f4e88..82e1cb0b3da 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -281,6 +281,7 @@ Possible BPF extensions are shown in the following table: cpu raw_smp_processor_id() vlan_tci vlan_tx_tag_get(skb) vlan_pr vlan_tx_tag_present(skb) + rand prandom_u32() These extensions can also be prefixed with '#'. Examples for low-level BPF: @@ -308,6 +309,18 @@ Examples for low-level BPF: ret #-1 drop: ret #0 +** icmp random packet sampling, 1 in 4 + ldh [12] + jne #0x800, drop + ldb [23] + jneq #1, drop + # get a random uint32 number + ld rand + mod #4 + jneq #1, drop + ret #-1 + drop: ret #0 + ** SECCOMP filter example: ld [4] /* offsetof(struct seccomp_data, arch) */ diff --git a/arch/arm/boot/dts/vt8500.dtsi b/arch/arm/boot/dts/vt8500.dtsi index 51d0e912c8f..1929ad390d8 100644 --- a/arch/arm/boot/dts/vt8500.dtsi +++ b/arch/arm/boot/dts/vt8500.dtsi @@ -165,5 +165,11 @@ reg = <0xd8100000 0x10000>; interrupts = <48>; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/arch/arm/boot/dts/wm8650.dtsi b/arch/arm/boot/dts/wm8650.dtsi index 7525982262a..b1c59a766a1 100644 --- a/arch/arm/boot/dts/wm8650.dtsi +++ b/arch/arm/boot/dts/wm8650.dtsi @@ -218,5 +218,11 @@ reg = <0xd8100000 0x10000>; interrupts = <48>; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/arch/arm/boot/dts/wm8850.dtsi b/arch/arm/boot/dts/wm8850.dtsi index d98386dd288..8fbccfbe75f 100644 --- a/arch/arm/boot/dts/wm8850.dtsi +++ b/arch/arm/boot/dts/wm8850.dtsi @@ -298,5 +298,11 @@ bus-width = <4>; sdon-inverted; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index b667a51ed21..9a0d61e0c18 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2440,7 +2440,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) goto err_free; } - slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg); + slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg; first_ok_slave = NULL; bond_for_each_slave_rcu(bond, slave, iter) { diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 9f69e818b00..70de039dad2 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1347,6 +1347,77 @@ void bond_alb_deinitialize(struct bonding *bond) rlb_deinitialize(bond); } +static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, + struct slave *tx_slave) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct ethhdr *eth_data = eth_hdr(skb); + + if (!tx_slave) { + /* unbalanced or unassigned, send through primary */ + tx_slave = rcu_dereference(bond->curr_active_slave); + if (bond->params.tlb_dynamic_lb) + bond_info->unbalanced_load += skb->len; + } + + if (tx_slave && SLAVE_IS_OK(tx_slave)) { + if (tx_slave != rcu_dereference(bond->curr_active_slave)) { + ether_addr_copy(eth_data->h_source, + tx_slave->dev->dev_addr); + } + + bond_dev_queue_xmit(bond, skb, tx_slave->dev); + goto out; + } + + if (tx_slave && bond->params.tlb_dynamic_lb) { + _lock_tx_hashtbl(bond); + __tlb_clear_slave(bond, tx_slave, 0); + _unlock_tx_hashtbl(bond); + } + + /* no suitable interface, frame not sent */ + dev_kfree_skb_any(skb); +out: + return NETDEV_TX_OK; +} + +int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct ethhdr *eth_data; + struct slave *tx_slave = NULL; + u32 hash_index; + + skb_reset_mac_header(skb); + eth_data = eth_hdr(skb); + + /* Do not TX balance any multicast or broadcast */ + if (!is_multicast_ether_addr(eth_data->h_dest)) { + switch (skb->protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPX): + /* In case of IPX, it will falback to L2 hash */ + case htons(ETH_P_IPV6): + hash_index = bond_xmit_hash(bond, skb); + if (bond->params.tlb_dynamic_lb) { + tx_slave = tlb_choose_channel(bond, + hash_index & 0xFF, + skb->len); + } else { + struct list_head *iter; + int idx = hash_index % bond->slave_cnt; + + bond_for_each_slave_rcu(bond, tx_slave, iter) + if (--idx < 0) + break; + } + break; + } + } + return bond_do_alb_xmit(skb, bond, tx_slave); +} + int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); @@ -1355,7 +1426,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) struct slave *tx_slave = NULL; static const __be32 ip_bcast = htonl(0xffffffff); int hash_size = 0; - int do_tx_balance = 1; + bool do_tx_balance = true; u32 hash_index = 0; const u8 *hash_start = NULL; struct ipv6hdr *ip6hdr; @@ -1370,7 +1441,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) || (iph->daddr == ip_bcast) || (iph->protocol == IPPROTO_IGMP)) { - do_tx_balance = 0; + do_tx_balance = false; break; } hash_start = (char *)&(iph->daddr); @@ -1382,7 +1453,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) * that here just in case. */ if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) { - do_tx_balance = 0; + do_tx_balance = false; break; } @@ -1390,7 +1461,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) * broadcasts in IPv4. */ if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { - do_tx_balance = 0; + do_tx_balance = false; break; } @@ -1400,7 +1471,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) */ ip6hdr = ipv6_hdr(skb); if (ipv6_addr_any(&ip6hdr->saddr)) { - do_tx_balance = 0; + do_tx_balance = false; break; } @@ -1410,7 +1481,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) case ETH_P_IPX: if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { /* something is wrong with this packet */ - do_tx_balance = 0; + do_tx_balance = false; break; } @@ -1419,7 +1490,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) * this family since it has an "ARP" like * mechanism */ - do_tx_balance = 0; + do_tx_balance = false; break; } @@ -1427,12 +1498,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) hash_size = ETH_ALEN; break; case ETH_P_ARP: - do_tx_balance = 0; + do_tx_balance = false; if (bond_info->rlb_enabled) tx_slave = rlb_arp_xmit(skb, bond); break; default: - do_tx_balance = 0; + do_tx_balance = false; break; } @@ -1441,32 +1512,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) tx_slave = tlb_choose_channel(bond, hash_index, skb->len); } - if (!tx_slave) { - /* unbalanced or unassigned, send through primary */ - tx_slave = rcu_dereference(bond->curr_active_slave); - bond_info->unbalanced_load += skb->len; - } - - if (tx_slave && SLAVE_IS_OK(tx_slave)) { - if (tx_slave != rcu_dereference(bond->curr_active_slave)) { - ether_addr_copy(eth_data->h_source, - tx_slave->dev->dev_addr); - } - - bond_dev_queue_xmit(bond, skb, tx_slave->dev); - goto out; - } - - if (tx_slave) { - _lock_tx_hashtbl(bond); - __tlb_clear_slave(bond, tx_slave, 0); - _unlock_tx_hashtbl(bond); - } - - /* no suitable interface, frame not sent */ - dev_kfree_skb_any(skb); -out: - return NETDEV_TX_OK; + return bond_do_alb_xmit(skb, bond, tx_slave); } void bond_alb_monitor(struct work_struct *work) diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h index e09dd4bfaff..5fc76c01636 100644 --- a/drivers/net/bonding/bond_alb.h +++ b/drivers/net/bonding/bond_alb.h @@ -175,6 +175,7 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave); void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link); void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); void bond_alb_monitor(struct work_struct *); int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr); void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 69aff72c895..9d08e007d85 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3015,20 +3015,18 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, * bond_xmit_hash - generate a hash value based on the xmit policy * @bond: bonding device * @skb: buffer to use for headers - * @count: modulo value * * This function will extract the necessary headers from the skb buffer and use * them to generate a hash based on the xmit_policy set in the bonding device - * which will be reduced modulo count before returning. */ -int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count) +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) { struct flow_keys flow; u32 hash; if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || !bond_flow_dissect(bond, skb, &flow)) - return bond_eth_hash(skb) % count; + return bond_eth_hash(skb); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) @@ -3039,7 +3037,7 @@ int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count) hash ^= (hash >> 16); hash ^= (hash >> 8); - return hash % count; + return hash; } /*-------------------------- Device entry points ----------------------------*/ @@ -3098,7 +3096,8 @@ static int bond_open(struct net_device *bond_dev) */ if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) return -ENOMEM; - queue_delayed_work(bond->wq, &bond->alb_work, 0); + if (bond->params.tlb_dynamic_lb) + queue_delayed_work(bond->wq, &bond->alb_work, 0); } if (bond->params.miimon) /* link check interval, in milliseconds. */ @@ -3666,7 +3665,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt)); + bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt); return NETDEV_TX_OK; } @@ -3776,8 +3775,9 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev case BOND_MODE_8023AD: return bond_3ad_xmit_xor(skb, dev); case BOND_MODE_ALB: - case BOND_MODE_TLB: return bond_alb_xmit(skb, dev); + case BOND_MODE_TLB: + return bond_tlb_xmit(skb, dev); default: /* Should never happen, mode already checked */ pr_err("%s: Error: Unknown bonding mode %d\n", @@ -3998,7 +3998,8 @@ static int bond_check_params(struct bond_params *params) if (xmit_hash_policy) { if ((bond_mode != BOND_MODE_XOR) && - (bond_mode != BOND_MODE_8023AD)) { + (bond_mode != BOND_MODE_8023AD) && + (bond_mode != BOND_MODE_TLB)) { pr_info("xmit_hash_policy param is irrelevant in mode %s\n", bond_mode_name(bond_mode)); } else { @@ -4304,6 +4305,7 @@ static int bond_check_params(struct bond_params *params) params->min_links = min_links; params->lp_interval = lp_interval; params->packets_per_slave = packets_per_slave; + params->tlb_dynamic_lb = 1; /* Default value */ if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 724e30fa20b..9fba7a1e6d5 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -70,6 +70,8 @@ static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_slaves_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { @@ -179,6 +181,12 @@ static const struct bond_opt_value bond_lp_interval_tbl[] = { { NULL, -1, 0}, }; +static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = { + { "off", 0, 0}, + { "on", 1, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0} +}; + static const struct bond_option bond_opts[] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -199,7 +207,7 @@ static const struct bond_option bond_opts[] = { [BOND_OPT_XMIT_HASH] = { .id = BOND_OPT_XMIT_HASH, .name = "xmit_hash_policy", - .desc = "balance-xor and 802.3ad hashing method", + .desc = "balance-xor, 802.3ad, and tlb hashing method", .values = bond_xmit_hashtype_tbl, .set = bond_option_xmit_hash_policy_set }, @@ -364,6 +372,15 @@ static const struct bond_option bond_opts[] = { .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_slaves_set }, + [BOND_OPT_TLB_DYNAMIC_LB] = { + .id = BOND_OPT_TLB_DYNAMIC_LB, + .name = "dynamic_lb", + .desc = "Enable dynamic flow shuffling", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB)), + .values = bond_tlb_dynamic_lb_tbl, + .flags = BOND_OPTFLAG_IFDOWN, + .set = bond_option_tlb_dynamic_lb_set, + }, { } }; @@ -1337,3 +1354,13 @@ err_no_cmd: ret = -EPERM; goto out; } + +static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + pr_info("%s: Setting dynamic-lb to %s (%llu)\n", + bond->dev->name, newval->string, newval->value); + bond->params.tlb_dynamic_lb = newval->value; + + return 0; +} diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h index 12be9e1bfb0..c1860f06145 100644 --- a/drivers/net/bonding/bond_options.h +++ b/drivers/net/bonding/bond_options.h @@ -62,6 +62,7 @@ enum { BOND_OPT_RESEND_IGMP, BOND_OPT_LP_INTERVAL, BOND_OPT_SLAVES, + BOND_OPT_TLB_DYNAMIC_LB, BOND_OPT_LAST }; diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 0e8b268da0a..431892f1a4c 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1039,6 +1039,34 @@ static ssize_t bonding_store_lp_interval(struct device *d, static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR, bonding_show_lp_interval, bonding_store_lp_interval); +static ssize_t bonding_show_tlb_dynamic_lb(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb); +} + +static ssize_t bonding_store_tlb_dynamic_lb(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct bonding *bond = to_bond(d); + int ret; + + ret = bond_opt_tryset_rtnl(bond, BOND_OPT_TLB_DYNAMIC_LB, + (char *)buf); + if (!ret) + ret = count; + + return ret; +} + +static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR, + bonding_show_tlb_dynamic_lb, + bonding_store_tlb_dynamic_lb); + static ssize_t bonding_show_packets_per_slave(struct device *d, struct device_attribute *attr, char *buf) @@ -1099,6 +1127,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_min_links.attr, &dev_attr_lp_interval.attr, &dev_attr_packets_per_slave.attr, + &dev_attr_tlb_dynamic_lb.attr, NULL, }; diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index b8bdd0acc8f..c1c7c2f12ac 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -174,6 +174,7 @@ struct bond_params { int resend_igmp; int lp_interval; int packets_per_slave; + int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; }; @@ -499,7 +500,7 @@ int bond_sysfs_slave_add(struct slave *slave); void bond_sysfs_slave_del(struct slave *slave); int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); -int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count); +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb); void bond_select_active_slave(struct bonding *bond); void bond_change_active_slave(struct bonding *bond, struct slave *new_active); void bond_create_debugfs(void); diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 97db5a7179d..2f67c7c8d41 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -120,6 +120,9 @@ static inline char *nic_name(struct pci_dev *pdev) #define MAX_VFS 30 /* Max VFs supported by BE3 FW */ #define FW_VER_LEN 32 +#define RSS_INDIR_TABLE_LEN 128 +#define RSS_HASH_KEY_LEN 40 + struct be_dma_mem { void *va; dma_addr_t dma; @@ -409,6 +412,13 @@ struct be_resources { u32 if_cap_flags; }; +struct rss_info { + u64 rss_flags; + u8 rsstable[RSS_INDIR_TABLE_LEN]; + u8 rss_queue[RSS_INDIR_TABLE_LEN]; + u8 rss_hkey[RSS_HASH_KEY_LEN]; +}; + struct be_adapter { struct pci_dev *pdev; struct net_device *netdev; @@ -507,7 +517,7 @@ struct be_adapter { u32 msg_enable; int be_get_temp_freq; u8 pf_number; - u64 rss_flags; + struct rss_info rss_info; }; #define be_physfn(adapter) (!adapter->virtfn) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index d1ec15af0d2..07e78e89a34 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2020,13 +2020,10 @@ int be_cmd_reset_function(struct be_adapter *adapter) } int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, - u32 rss_hash_opts, u16 table_size) + u32 rss_hash_opts, u16 table_size, u8 *rss_hkey) { struct be_mcc_wrb *wrb; struct be_cmd_req_rss_config *req; - u32 myhash[10] = {0x15d43fa5, 0x2534685a, 0x5f87693a, 0x5668494e, - 0x33cf6a53, 0x383334c6, 0x76ac4257, 0x59b242b2, - 0x3ea83c02, 0x4a110304}; int status; if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) @@ -2049,7 +2046,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, req->hdr.version = 1; memcpy(req->cpu_table, rsstable, table_size); - memcpy(req->hash, myhash, sizeof(myhash)); + memcpy(req->hash, rss_hkey, RSS_HASH_KEY_LEN); be_dws_cpu_to_le(req->hash, sizeof(req->hash)); status = be_mbox_notify_wait(adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index b60e4d53c1c..4ea79b9c67e 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2068,7 +2068,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num, u32 *function_mode, u32 *function_caps, u16 *asic_rev); int be_cmd_reset_function(struct be_adapter *adapter); int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, - u32 rss_hash_opts, u16 table_size); + u32 rss_hash_opts, u16 table_size, u8 *rss_hkey); int be_process_mcc(struct be_adapter *adapter); int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, u8 status, u8 state); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 15ba96cba65..6f3494e4151 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -933,27 +933,27 @@ static u64 be_get_rss_hash_opts(struct be_adapter *adapter, u64 flow_type) switch (flow_type) { case TCP_V4_FLOW: - if (adapter->rss_flags & RSS_ENABLE_IPV4) + if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV4) data |= RXH_IP_DST | RXH_IP_SRC; - if (adapter->rss_flags & RSS_ENABLE_TCP_IPV4) + if (adapter->rss_info.rss_flags & RSS_ENABLE_TCP_IPV4) data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; break; case UDP_V4_FLOW: - if (adapter->rss_flags & RSS_ENABLE_IPV4) + if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV4) data |= RXH_IP_DST | RXH_IP_SRC; - if (adapter->rss_flags & RSS_ENABLE_UDP_IPV4) + if (adapter->rss_info.rss_flags & RSS_ENABLE_UDP_IPV4) data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; break; case TCP_V6_FLOW: - if (adapter->rss_flags & RSS_ENABLE_IPV6) + if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV6) data |= RXH_IP_DST | RXH_IP_SRC; - if (adapter->rss_flags & RSS_ENABLE_TCP_IPV6) + if (adapter->rss_info.rss_flags & RSS_ENABLE_TCP_IPV6) data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; break; case UDP_V6_FLOW: - if (adapter->rss_flags & RSS_ENABLE_IPV6) + if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV6) data |= RXH_IP_DST | RXH_IP_SRC; - if (adapter->rss_flags & RSS_ENABLE_UDP_IPV6) + if (adapter->rss_info.rss_flags & RSS_ENABLE_UDP_IPV6) data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; break; } @@ -992,7 +992,7 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter, struct be_rx_obj *rxo; int status = 0, i, j; u8 rsstable[128]; - u32 rss_flags = adapter->rss_flags; + u32 rss_flags = adapter->rss_info.rss_flags; if (cmd->data != L3_RSS_FLAGS && cmd->data != (L3_RSS_FLAGS | L4_RSS_FLAGS)) @@ -1039,7 +1039,7 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter, return -EINVAL; } - if (rss_flags == adapter->rss_flags) + if (rss_flags == adapter->rss_info.rss_flags) return status; if (be_multi_rxq(adapter)) { @@ -1051,9 +1051,11 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter, } } } - status = be_cmd_rss_config(adapter, rsstable, rss_flags, 128); + + status = be_cmd_rss_config(adapter, adapter->rss_info.rsstable, + rss_flags, 128, adapter->rss_info.rss_hkey); if (!status) - adapter->rss_flags = rss_flags; + adapter->rss_info.rss_flags = rss_flags; return status; } @@ -1103,6 +1105,68 @@ static int be_set_channels(struct net_device *netdev, return be_update_queues(adapter); } +static u32 be_get_rxfh_indir_size(struct net_device *netdev) +{ + return RSS_INDIR_TABLE_LEN; +} + +static u32 be_get_rxfh_key_size(struct net_device *netdev) +{ + return RSS_HASH_KEY_LEN; +} + +static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) +{ + struct be_adapter *adapter = netdev_priv(netdev); + int i; + struct rss_info *rss = &adapter->rss_info; + + if (indir) { + for (i = 0; i < RSS_INDIR_TABLE_LEN; i++) + indir[i] = rss->rss_queue[i]; + } + + if (hkey) + memcpy(hkey, rss->rss_hkey, RSS_HASH_KEY_LEN); + + return 0; +} + +static int be_set_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) +{ + int rc = 0, i, j; + struct be_adapter *adapter = netdev_priv(netdev); + u8 rsstable[RSS_INDIR_TABLE_LEN]; + + if (indir) { + struct be_rx_obj *rxo; + for (i = 0; i < RSS_INDIR_TABLE_LEN; i++) { + j = indir[i]; + rxo = &adapter->rx_obj[j]; + rsstable[i] = rxo->rss_id; + adapter->rss_info.rss_queue[i] = j; + } + } else { + memcpy(rsstable, adapter->rss_info.rsstable, + RSS_INDIR_TABLE_LEN); + } + + if (!hkey) + hkey = adapter->rss_info.rss_hkey; + + rc = be_cmd_rss_config(adapter, rsstable, + adapter->rss_info.rss_flags, + RSS_INDIR_TABLE_LEN, hkey); + if (rc) { + adapter->rss_info.rss_flags = RSS_ENABLE_NONE; + return -EIO; + } + memcpy(adapter->rss_info.rss_hkey, hkey, RSS_HASH_KEY_LEN); + memcpy(adapter->rss_info.rsstable, rsstable, + RSS_INDIR_TABLE_LEN); + return 0; +} + const struct ethtool_ops be_ethtool_ops = { .get_settings = be_get_settings, .get_drvinfo = be_get_drvinfo, @@ -1129,6 +1193,10 @@ const struct ethtool_ops be_ethtool_ops = { .self_test = be_self_test, .get_rxnfc = be_get_rxnfc, .set_rxnfc = be_set_rxnfc, + .get_rxfh_indir_size = be_get_rxfh_indir_size, + .get_rxfh_key_size = be_get_rxfh_key_size, + .get_rxfh = be_get_rxfh, + .set_rxfh = be_set_rxfh, .get_channels = be_get_channels, .set_channels = be_set_channels }; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a18645407d2..a3c6a27d13f 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2774,7 +2774,8 @@ static int be_rx_qs_create(struct be_adapter *adapter) { struct be_rx_obj *rxo; int rc, i, j; - u8 rsstable[128]; + u8 rss_hkey[RSS_HASH_KEY_LEN]; + struct rss_info *rss = &adapter->rss_info; for_all_rx_queues(adapter, rxo, i) { rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN, @@ -2799,31 +2800,37 @@ static int be_rx_qs_create(struct be_adapter *adapter) } if (be_multi_rxq(adapter)) { - for (j = 0; j < 128; j += adapter->num_rx_qs - 1) { + for (j = 0; j < RSS_INDIR_TABLE_LEN; + j += adapter->num_rx_qs - 1) { for_all_rss_queues(adapter, rxo, i) { - if ((j + i) >= 128) + if ((j + i) >= RSS_INDIR_TABLE_LEN) break; - rsstable[j + i] = rxo->rss_id; + rss->rsstable[j + i] = rxo->rss_id; + rss->rss_queue[j + i] = i; } } - adapter->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 | - RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6; + rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 | + RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6; if (!BEx_chip(adapter)) - adapter->rss_flags |= RSS_ENABLE_UDP_IPV4 | - RSS_ENABLE_UDP_IPV6; + rss->rss_flags |= RSS_ENABLE_UDP_IPV4 | + RSS_ENABLE_UDP_IPV6; } else { /* Disable RSS, if only default RX Q is created */ - adapter->rss_flags = RSS_ENABLE_NONE; + rss->rss_flags = RSS_ENABLE_NONE; } - rc = be_cmd_rss_config(adapter, rsstable, adapter->rss_flags, - 128); + get_random_bytes(rss_hkey, RSS_HASH_KEY_LEN); + rc = be_cmd_rss_config(adapter, rss->rsstable, + rss->rss_flags, + 128, rss_hkey); if (rc) { - adapter->rss_flags = RSS_ENABLE_NONE; + rss->rss_flags = RSS_ENABLE_NONE; return rc; } + memcpy(rss->rss_hkey, rss_hkey, RSS_HASH_KEY_LEN); + /* First time posting */ for_all_rx_queues(adapter, rxo, i) be_post_rx_frags(rxo, GFP_KERNEL); diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 8b70ca7e342..f3658bdb64c 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -769,11 +769,6 @@ static int ethoc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return phy_mii_ioctl(phy, ifr, cmd); } -static int ethoc_config(struct net_device *dev, struct ifmap *map) -{ - return -ENOSYS; -} - static void ethoc_do_set_mac_address(struct net_device *dev) { struct ethoc *priv = netdev_priv(dev); @@ -995,7 +990,6 @@ static const struct net_device_ops ethoc_netdev_ops = { .ndo_open = ethoc_open, .ndo_stop = ethoc_stop, .ndo_do_ioctl = ethoc_ioctl, - .ndo_set_config = ethoc_config, .ndo_set_mac_address = ethoc_set_mac_address, .ndo_set_rx_mode = ethoc_set_multicast_list, .ndo_change_mtu = ethoc_change_mtu, diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 1471c5464a8..e27e6091094 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -265,10 +265,10 @@ struct e1000_adapter { u32 tx_hwtstamp_timeouts; /* Rx */ - bool (*clean_rx) (struct e1000_ring *ring, int *work_done, - int work_to_do) ____cacheline_aligned_in_smp; - void (*alloc_rx_buf) (struct e1000_ring *ring, int cleaned_count, - gfp_t gfp); + bool (*clean_rx)(struct e1000_ring *ring, int *work_done, + int work_to_do) ____cacheline_aligned_in_smp; + void (*alloc_rx_buf)(struct e1000_ring *ring, int cleaned_count, + gfp_t gfp); struct e1000_ring *rx_ring; u32 rx_int_delay; diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index cad250bc1b9..4e5ad7ebe1f 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -169,6 +169,7 @@ static int e1000_get_settings(struct net_device *netdev, } } else if (!pm_runtime_suspended(netdev->dev.parent)) { u32 status = er32(STATUS); + if (status & E1000_STATUS_LU) { if (status & E1000_STATUS_SPEED_1000) speed = SPEED_1000; @@ -783,25 +784,26 @@ static bool reg_pattern_test(struct e1000_adapter *adapter, u64 *data, reg + (offset << 2), val, (test[pat] & write & mask)); *data = reg; - return 1; + return true; } } - return 0; + return false; } static bool reg_set_and_check(struct e1000_adapter *adapter, u64 *data, int reg, u32 mask, u32 write) { u32 val; + __ew32(&adapter->hw, reg, write & mask); val = __er32(&adapter->hw, reg); if ((write & mask) != (val & mask)) { e_err("set/check test failed (reg 0x%05X): got 0x%08X expected 0x%08X\n", reg, (val & mask), (write & mask)); *data = reg; - return 1; + return true; } - return 0; + return false; } #define REG_PATTERN_TEST_ARRAY(reg, offset, mask, write) \ @@ -1717,6 +1719,7 @@ static int e1000_link_test(struct e1000_adapter *adapter, u64 *data) *data = 0; if (hw->phy.media_type == e1000_media_type_internal_serdes) { int i = 0; + hw->mac.serdes_has_link = false; /* On some blade server designs, link establishment diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9866f264f55..a2901139b20 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1320,6 +1320,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ if ((hw->mac.type == e1000_pch2lan) && link) { u32 reg; + reg = er32(STATUS); if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) { reg = er32(TIPG); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 3e69386add0..e4207efd13f 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -599,6 +599,7 @@ static void e1000e_update_rdt_wa(struct e1000_ring *rx_ring, unsigned int i) if (unlikely(!ret_val && (i != readl(rx_ring->tail)))) { u32 rctl = er32(RCTL); + ew32(RCTL, rctl & ~E1000_RCTL_EN); e_err("ME firmware caused invalid RDT - resetting\n"); schedule_work(&adapter->reset_task); @@ -615,6 +616,7 @@ static void e1000e_update_tdt_wa(struct e1000_ring *tx_ring, unsigned int i) if (unlikely(!ret_val && (i != readl(tx_ring->tail)))) { u32 tctl = er32(TCTL); + ew32(TCTL, tctl & ~E1000_TCTL_EN); e_err("ME firmware caused invalid TDT - resetting\n"); schedule_work(&adapter->reset_task); @@ -1198,6 +1200,7 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring) while ((eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) && (count < tx_ring->count)) { bool cleaned = false; + rmb(); /* read buffer_info after eop_desc */ for (; !cleaned; count++) { tx_desc = E1000_TX_DESC(*tx_ring, i); @@ -1753,6 +1756,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data) adapter->flags & FLAG_RX_NEEDS_RESTART) { /* disable receives */ u32 rctl = er32(RCTL); + ew32(RCTL, rctl & ~E1000_RCTL_EN); adapter->flags |= FLAG_RESTART_NOW; } @@ -1960,6 +1964,7 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) /* Workaround issue with spurious interrupts on 82574 in MSI-X mode */ if (hw->mac.type == e1000_82574) { u32 rfctl = er32(RFCTL); + rfctl |= E1000_RFCTL_ACK_DIS; ew32(RFCTL, rfctl); } @@ -2204,6 +2209,7 @@ static void e1000_irq_disable(struct e1000_adapter *adapter) if (adapter->msix_entries) { int i; + for (i = 0; i < adapter->num_vectors; i++) synchronize_irq(adapter->msix_entries[i].vector); } else { @@ -2921,6 +2927,7 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) if (adapter->flags2 & FLAG2_DMA_BURST) { u32 txdctl = er32(TXDCTL(0)); + txdctl &= ~(E1000_TXDCTL_PTHRESH | E1000_TXDCTL_HTHRESH | E1000_TXDCTL_WTHRESH); /* set up some performance related parameters to encourage the @@ -3239,6 +3246,7 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) if (adapter->flags & FLAG_IS_ICH) { u32 rxdctl = er32(RXDCTL(0)); + ew32(RXDCTL(0), rxdctl | 0x3); } @@ -4695,6 +4703,7 @@ static void e1000e_update_stats(struct e1000_adapter *adapter) /* Correctable ECC Errors */ if (hw->mac.type == e1000_pch_lpt) { u32 pbeccsts = er32(PBECCSTS); + adapter->corr_errors += pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; adapter->uncorr_errors += @@ -4808,6 +4817,7 @@ static void e1000e_enable_receives(struct e1000_adapter *adapter) (adapter->flags & FLAG_RESTART_NOW)) { struct e1000_hw *hw = &adapter->hw; u32 rctl = er32(RCTL); + ew32(RCTL, rctl | E1000_RCTL_EN); adapter->flags &= ~FLAG_RESTART_NOW; } @@ -4930,6 +4940,7 @@ static void e1000_watchdog_task(struct work_struct *work) if ((adapter->flags & FLAG_TARC_SPEED_MODE_BIT) && !txb2b) { u32 tarc0; + tarc0 = er32(TARC(0)); tarc0 &= ~SPEED_MODE_BIT; ew32(TARC(0), tarc0); @@ -5170,7 +5181,7 @@ static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb) __be16 protocol; if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; + return false; if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -5215,7 +5226,7 @@ static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb) i = 0; tx_ring->next_to_use = i; - return 1; + return true; } static int e1000_tx_map(struct e1000_ring *tx_ring, struct sk_buff *skb, @@ -6209,6 +6220,7 @@ static int __e1000_resume(struct pci_dev *pdev) e1e_wphy(&adapter->hw, BM_WUS, ~0); } else { u32 wus = er32(WUS); + if (wus) { e_info("MAC Wakeup cause - %s\n", wus & E1000_WUS_EX ? "Unicast Packet" : @@ -7027,7 +7039,7 @@ static const struct pci_error_handlers e1000_err_handler = { .resume = e1000_io_resume, }; -static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = { +static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_COPPER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_FIBER), board_82571 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER), board_82571 }, @@ -7144,6 +7156,7 @@ static struct pci_driver e1000_driver = { static int __init e1000_init_module(void) { int ret; + pr_info("Intel(R) PRO/1000 Network Driver - %s\n", e1000e_driver_version); pr_info("Copyright(c) 1999 - 2014 Intel Corporation.\n"); diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c index a9a976f04bf..b1f212b7baf 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.c +++ b/drivers/net/ethernet/intel/e1000e/nvm.c @@ -398,6 +398,7 @@ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) /* Loop to allow for up to whole page write of eeprom */ while (widx < words) { u16 word_out = data[widx]; + word_out = (word_out >> 8) | (word_out << 8); e1000_shift_out_eec_bits(hw, word_out, 16); widx++; diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c index d0ac0f3249c..aa1923f7ebd 100644 --- a/drivers/net/ethernet/intel/e1000e/param.c +++ b/drivers/net/ethernet/intel/e1000e/param.c @@ -436,6 +436,7 @@ void e1000e_check_options(struct e1000_adapter *adapter) if (num_IntMode > bd) { unsigned int int_mode = IntMode[bd]; + e1000_validate_option(&int_mode, &opt, adapter); adapter->int_mode = int_mode; } else { @@ -457,6 +458,7 @@ void e1000e_check_options(struct e1000_adapter *adapter) if (num_SmartPowerDownEnable > bd) { unsigned int spd = SmartPowerDownEnable[bd]; + e1000_validate_option(&spd, &opt, adapter); if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN) && spd) adapter->flags |= FLAG_SMART_POWER_DOWN; @@ -473,6 +475,7 @@ void e1000e_check_options(struct e1000_adapter *adapter) if (num_CrcStripping > bd) { unsigned int crc_stripping = CrcStripping[bd]; + e1000_validate_option(&crc_stripping, &opt, adapter); if (crc_stripping == OPTION_ENABLED) { adapter->flags2 |= FLAG2_CRC_STRIPPING; @@ -495,6 +498,7 @@ void e1000e_check_options(struct e1000_adapter *adapter) if (num_KumeranLockLoss > bd) { unsigned int kmrn_lock_loss = KumeranLockLoss[bd]; + e1000_validate_option(&kmrn_lock_loss, &opt, adapter); enabled = kmrn_lock_loss; } diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 00b3fc98bf3..b2005e13fb0 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -2896,6 +2896,7 @@ static s32 __e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data, (hw->phy.addr == 2) && !(MAX_PHY_REG_ADDRESS & reg) && (data & (1 << 11))) { u16 data2 = 0x7EFF; + ret_val = e1000_access_phy_debug_regs_hv(hw, (1 << 6) | 0x3, &data2, false); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index ed3902bf249..34415d342ec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -33,6 +33,16 @@ static void i40e_resume_aq(struct i40e_hw *hw); /** + * i40e_is_nvm_update_op - return true if this is an NVM update operation + * @desc: API request descriptor + **/ +static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc) +{ + return (desc->opcode == i40e_aqc_opc_nvm_erase) || + (desc->opcode == i40e_aqc_opc_nvm_update); +} + +/** * i40e_adminq_init_regs - Initialize AdminQ registers * @hw: pointer to the hardware structure * @@ -585,6 +595,7 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) /* pre-emptive resource lock release */ i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL); + hw->aq.nvm_busy = false; ret_code = i40e_aq_set_hmc_resource_profile(hw, I40E_HMC_PROFILE_DEFAULT, @@ -708,6 +719,12 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw, goto asq_send_command_exit; } + if (i40e_is_nvm_update_op(desc) && hw->aq.nvm_busy) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: NVM busy.\n"); + status = I40E_ERR_NVM; + goto asq_send_command_exit; + } + details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use); if (cmd_details) { *details = *cmd_details; @@ -835,6 +852,9 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw, hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval; } + if (i40e_is_nvm_update_op(desc)) + hw->aq.nvm_busy = true; + /* update the error if time out occurred */ if ((!cmd_completed) && (!details->async && !details->postpone)) { @@ -929,6 +949,9 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, e->msg_size); } + if (i40e_is_nvm_update_op(&e->desc)) + hw->aq.nvm_busy = false; + /* Restore the original datalen and buffer address in the desc, * FW updates datalen to indicate the event message * size diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h index 993f7685a91..b1552fbc48a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h @@ -90,6 +90,7 @@ struct i40e_adminq_info { u16 fw_min_ver; /* firmware minor version */ u16 api_maj_ver; /* api major version */ u16 api_min_ver; /* api minor version */ + bool nvm_busy; struct mutex asq_mutex; /* Send queue lock */ struct mutex arq_mutex; /* Receive queue lock */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 7b6374a8f8d..f2ba4b76ecd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -182,9 +182,6 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_add_mirror_rule = 0x0260, i40e_aqc_opc_delete_mirror_rule = 0x0261, - i40e_aqc_opc_set_storm_control_config = 0x0280, - i40e_aqc_opc_get_storm_control_config = 0x0281, - /* DCB commands */ i40e_aqc_opc_dcb_ignore_pfc = 0x0301, i40e_aqc_opc_dcb_updated = 0x0302, @@ -207,6 +204,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_query_switching_comp_bw_config = 0x041A, i40e_aqc_opc_suspend_port_tx = 0x041B, i40e_aqc_opc_resume_port_tx = 0x041C, + i40e_aqc_opc_configure_partition_bw = 0x041D, /* hmc */ i40e_aqc_opc_query_hmc_resource_profile = 0x0500, @@ -1289,27 +1287,6 @@ struct i40e_aqc_add_delete_mirror_rule_completion { I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule_completion); -/* Set Storm Control Configuration (direct 0x0280) - * Get Storm Control Configuration (direct 0x0281) - * the command and response use the same descriptor structure - */ -struct i40e_aqc_set_get_storm_control_config { - __le32 broadcast_threshold; - __le32 multicast_threshold; - __le32 control_flags; -#define I40E_AQC_STORM_CONTROL_MDIPW 0x01 -#define I40E_AQC_STORM_CONTROL_MDICW 0x02 -#define I40E_AQC_STORM_CONTROL_BDIPW 0x04 -#define I40E_AQC_STORM_CONTROL_BDICW 0x08 -#define I40E_AQC_STORM_CONTROL_BIDU 0x10 -#define I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT 8 -#define I40E_AQC_STORM_CONTROL_INTERVAL_MASK (0x3FF << \ - I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT) - u8 reserved[4]; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aqc_set_get_storm_control_config); - /* DCB 0x03xx*/ /* PFC Ignore (direct 0x0301) @@ -1499,6 +1476,15 @@ struct i40e_aqc_query_switching_comp_bw_config_resp { * (direct 0x041B and 0x041C) uses the generic SEID struct */ +/* Configure partition BW + * (indirect 0x041D) + */ +struct i40e_aqc_configure_partition_bw_data { + __le16 pf_valid_bits; + u8 min_bw[16]; /* guaranteed bandwidth */ + u8 max_bw[16]; /* bandwidth limit */ +}; + /* Get and set the active HMC resource profile and status. * (direct 0x0500) and (direct 0x0501) */ @@ -1583,11 +1569,8 @@ struct i40e_aq_get_phy_abilities_resp { #define I40E_AQ_PHY_FLAG_PAUSE_TX 0x01 #define I40E_AQ_PHY_FLAG_PAUSE_RX 0x02 #define I40E_AQ_PHY_FLAG_LOW_POWER 0x04 -#define I40E_AQ_PHY_FLAG_AN_SHIFT 3 -#define I40E_AQ_PHY_FLAG_AN_MASK (0x3 << I40E_AQ_PHY_FLAG_AN_SHIFT) -#define I40E_AQ_PHY_FLAG_AN_OFF 0x00 /* link forced on */ -#define I40E_AQ_PHY_FLAG_AN_OFF_LINK_DOWN 0x01 -#define I40E_AQ_PHY_FLAG_AN_ON 0x02 +#define I40E_AQ_PHY_LINK_ENABLED 0x08 +#define I40E_AQ_PHY_AN_ENABLED 0x10 #define I40E_AQ_PHY_FLAG_MODULE_QUAL 0x20 __le16 eee_capability; #define I40E_AQ_EEE_100BASE_TX 0x0002 diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 922cdcc45c5..f8dfb4b7e99 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2253,6 +2253,35 @@ static i40e_status i40e_aq_tx_sched_cmd(struct i40e_hw *hw, u16 seid, } /** + * i40e_aq_config_vsi_bw_limit - Configure VSI BW Limit + * @hw: pointer to the hw struct + * @seid: VSI seid + * @credit: BW limit credits (0 = disabled) + * @max_credit: Max BW limit credits + * @cmd_details: pointer to command details structure or NULL + **/ +i40e_status i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw, + u16 seid, u16 credit, u8 max_credit, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_configure_vsi_bw_limit *cmd = + (struct i40e_aqc_configure_vsi_bw_limit *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_configure_vsi_bw_limit); + + cmd->vsi_seid = cpu_to_le16(seid); + cmd->credit = cpu_to_le16(credit); + cmd->max_credit = max_credit; + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_config_vsi_tc_bw - Config VSI BW Allocation per TC * @hw: pointer to the hw struct * @seid: VSI seid diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 03d99cbc5c2..0cf47c95808 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -649,7 +649,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } rcu_read_lock(); - for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) { + for (j = 0; j < vsi->num_queue_pairs; j++) { struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[j]); struct i40e_ring *rx_ring; @@ -662,14 +662,16 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, data[i] = tx_ring->stats.packets; data[i + 1] = tx_ring->stats.bytes; } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); + i += 2; /* Rx ring is the 2nd half of the queue pair */ rx_ring = &tx_ring[1]; do { start = u64_stats_fetch_begin_irq(&rx_ring->syncp); - data[i + 2] = rx_ring->stats.packets; - data[i + 3] = rx_ring->stats.bytes; + data[i] = rx_ring->stats.packets; + data[i + 1] = rx_ring->stats.bytes; } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); + i += 2; } rcu_read_unlock(); if (vsi == pf->vsi[pf->lan_vsi]) { @@ -1189,6 +1191,12 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf, return -EINVAL; fsp->flow_type = rule->flow_type; + if (fsp->flow_type == IP_USER_FLOW) { + fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; + fsp->h_u.usr_ip4_spec.proto = 0; + fsp->m_u.usr_ip4_spec.proto = 0; + } + fsp->h_u.tcp_ip4_spec.psrc = rule->src_port; fsp->h_u.tcp_ip4_spec.pdst = rule->dst_port; fsp->h_u.tcp_ip4_spec.ip4src = rule->src_ip[0]; diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c index d5d98fe2691..5c341aeb5d5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c +++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c @@ -747,6 +747,7 @@ static struct i40e_context_ele i40e_hmc_rxq_ce_info[] = { { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphdata_ena), 1, 195 }, { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphhead_ena), 1, 196 }, { I40E_HMC_STORE(i40e_hmc_obj_rxq, lrxqthresh), 3, 198 }, + { I40E_HMC_STORE(i40e_hmc_obj_rxq, prefena), 1, 201 }, { 0 } }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h index 341de925a29..eb65fe23c4a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h +++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h @@ -56,6 +56,7 @@ struct i40e_hmc_obj_rxq { u8 tphdata_ena; u8 tphhead_ena; u8 lrxqthresh; + u8 prefena; /* NOTE: normally must be set to 1 at init */ }; /* Tx queue context data */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 59eada31e21..4fe15d58adf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -39,7 +39,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 0 #define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 36 +#define DRV_VERSION_BUILD 43 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -2312,6 +2312,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) rx_ctx.crcstrip = 1; rx_ctx.l2tsel = 1; rx_ctx.showiv = 1; + /* set the prefena field to 1 because the manual says to */ + rx_ctx.prefena = 1; /* clear the context in the HMC */ err = i40e_clear_lan_rx_queue_context(hw, pf_q); @@ -3163,9 +3165,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) usleep_range(1000, 2000); } /* Skip if the queue is already in the requested state */ - if (enable && (tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) - continue; - if (!enable && !(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) + if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) continue; /* turn on/off the queue */ @@ -3181,13 +3181,8 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) /* wait for the change to finish */ for (j = 0; j < 10; j++) { tx_reg = rd32(hw, I40E_QTX_ENA(pf_q)); - if (enable) { - if ((tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) - break; - } else { - if (!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) - break; - } + if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) + break; udelay(10); } @@ -3226,15 +3221,9 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) usleep_range(1000, 2000); } - if (enable) { - /* is STAT set ? */ - if ((rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) - continue; - } else { - /* is !STAT set ? */ - if (!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) - continue; - } + /* Skip if the queue is already in the requested state */ + if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) + continue; /* turn on/off the queue */ if (enable) @@ -3247,13 +3236,8 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) for (j = 0; j < 10; j++) { rx_reg = rd32(hw, I40E_QRX_ENA(pf_q)); - if (enable) { - if ((rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) - break; - } else { - if (!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) - break; - } + if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) + break; udelay(10); } @@ -3516,6 +3500,19 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi) } /** + * i40e_vsi_close - Shut down a VSI + * @vsi: the vsi to be quelled + **/ +static void i40e_vsi_close(struct i40e_vsi *vsi) +{ + if (!test_and_set_bit(__I40E_DOWN, &vsi->state)) + i40e_down(vsi); + i40e_vsi_free_irq(vsi); + i40e_vsi_free_tx_resources(vsi); + i40e_vsi_free_rx_resources(vsi); +} + +/** * i40e_quiesce_vsi - Pause a given VSI * @vsi: the VSI being paused **/ @@ -3528,8 +3525,7 @@ static void i40e_quiesce_vsi(struct i40e_vsi *vsi) if (vsi->netdev && netif_running(vsi->netdev)) { vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); } else { - set_bit(__I40E_DOWN, &vsi->state); - i40e_down(vsi); + i40e_vsi_close(vsi); } } @@ -3546,7 +3542,7 @@ static void i40e_unquiesce_vsi(struct i40e_vsi *vsi) if (vsi->netdev && netif_running(vsi->netdev)) vsi->netdev->netdev_ops->ndo_open(vsi->netdev); else - i40e_up(vsi); /* this clears the DOWN bit */ + i40e_vsi_open(vsi); /* this clears the DOWN bit */ } /** @@ -4312,24 +4308,32 @@ int i40e_vsi_open(struct i40e_vsi *vsi) if (err) goto err_setup_rx; - if (!vsi->netdev) { + if (vsi->netdev) { + snprintf(int_name, sizeof(int_name) - 1, "%s-%s", + dev_driver_string(&pf->pdev->dev), vsi->netdev->name); + err = i40e_vsi_request_irq(vsi, int_name); + if (err) + goto err_setup_rx; + + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(vsi->netdev, + vsi->num_queue_pairs); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(vsi->netdev, + vsi->num_queue_pairs); + if (err) + goto err_set_queues; + + } else if (vsi->type == I40E_VSI_FDIR) { + snprintf(int_name, sizeof(int_name) - 1, "%s-fdir", + dev_driver_string(&pf->pdev->dev)); + err = i40e_vsi_request_irq(vsi, int_name); + } else { err = EINVAL; goto err_setup_rx; } - snprintf(int_name, sizeof(int_name) - 1, "%s-%s", - dev_driver_string(&pf->pdev->dev), vsi->netdev->name); - err = i40e_vsi_request_irq(vsi, int_name); - if (err) - goto err_setup_rx; - - /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_queue_pairs); - if (err) - goto err_set_queues; - - err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_queue_pairs); - if (err) - goto err_set_queues; err = i40e_up_complete(vsi); if (err) @@ -4386,14 +4390,7 @@ static int i40e_close(struct net_device *netdev) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - if (test_and_set_bit(__I40E_DOWN, &vsi->state)) - return 0; - - i40e_down(vsi); - i40e_vsi_free_irq(vsi); - - i40e_vsi_free_tx_resources(vsi); - i40e_vsi_free_rx_resources(vsi); + i40e_vsi_close(vsi); return 0; } @@ -5229,9 +5226,6 @@ static int i40e_get_capabilities(struct i40e_pf *pf) } } while (err); - /* increment MSI-X count because current FW skips one */ - pf->hw.func_caps.num_msix_vectors++; - if (((pf->hw.aq.fw_maj_ver == 2) && (pf->hw.aq.fw_min_ver < 22)) || (pf->hw.aq.fw_maj_ver < 2)) { pf->hw.func_caps.num_msix_vectors++; @@ -5270,8 +5264,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi); static void i40e_fdir_sb_setup(struct i40e_pf *pf) { struct i40e_vsi *vsi; - bool new_vsi = false; - int err, i; + int i; if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) return; @@ -5291,47 +5284,12 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) pf->vsi[pf->lan_vsi]->seid, 0); if (!vsi) { dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n"); - goto err_vsi; + pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + return; } - new_vsi = true; - } - i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring); - - err = i40e_vsi_setup_tx_resources(vsi); - if (err) - goto err_setup_tx; - err = i40e_vsi_setup_rx_resources(vsi); - if (err) - goto err_setup_rx; - - if (new_vsi) { - char int_name[IFNAMSIZ + 9]; - err = i40e_vsi_configure(vsi); - if (err) - goto err_setup_rx; - snprintf(int_name, sizeof(int_name) - 1, "%s-fdir", - dev_driver_string(&pf->pdev->dev)); - err = i40e_vsi_request_irq(vsi, int_name); - if (err) - goto err_setup_rx; - err = i40e_up_complete(vsi); - if (err) - goto err_up_complete; - clear_bit(__I40E_NEEDS_RESTART, &vsi->state); } - return; - -err_up_complete: - i40e_down(vsi); - i40e_vsi_free_irq(vsi); -err_setup_rx: - i40e_vsi_free_rx_resources(vsi); -err_setup_tx: - i40e_vsi_free_tx_resources(vsi); -err_vsi: - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; - i40e_vsi_clear(vsi); + i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring); } /** @@ -6652,6 +6610,96 @@ static void i40e_del_vxlan_port(struct net_device *netdev, } #endif +#ifdef HAVE_FDB_OPS +#ifdef USE_CONST_DEV_UC_CHAR +static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags) +#else +static int i40e_ndo_fdb_add(struct ndmsg *ndm, + struct net_device *dev, + unsigned char *addr, + u16 flags) +#endif +{ + struct i40e_netdev_priv *np = netdev_priv(dev); + struct i40e_pf *pf = np->vsi->back; + int err = 0; + + if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED)) + return -EOPNOTSUPP; + + /* Hardware does not support aging addresses so if a + * ndm_state is given only allow permanent addresses + */ + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + netdev_info(dev, "FDB only supports static addresses\n"); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_add_excl(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(dev, addr); + else + err = -EINVAL; + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} + +#ifndef USE_DEFAULT_FDB_DEL_DUMP +#ifdef USE_CONST_DEV_UC_CHAR +static int i40e_ndo_fdb_del(struct ndmsg *ndm, + struct net_device *dev, + const unsigned char *addr) +#else +static int i40e_ndo_fdb_del(struct ndmsg *ndm, + struct net_device *dev, + unsigned char *addr) +#endif +{ + struct i40e_netdev_priv *np = netdev_priv(dev); + struct i40e_pf *pf = np->vsi->back; + int err = -EOPNOTSUPP; + + if (ndm->ndm_state & NUD_PERMANENT) { + netdev_info(dev, "FDB only supports static addresses\n"); + return -EINVAL; + } + + if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { + if (is_unicast_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + } + + return err; +} + +static int i40e_ndo_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + int idx) +{ + struct i40e_netdev_priv *np = netdev_priv(dev); + struct i40e_pf *pf = np->vsi->back; + + if (pf->flags & I40E_FLAG_SRIOV_ENABLED) + idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); + + return idx; +} + +#endif /* USE_DEFAULT_FDB_DEL_DUMP */ +#endif /* HAVE_FDB_OPS */ static const struct net_device_ops i40e_netdev_ops = { .ndo_open = i40e_open, .ndo_stop = i40e_close, @@ -6679,6 +6727,13 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_add_vxlan_port = i40e_add_vxlan_port, .ndo_del_vxlan_port = i40e_del_vxlan_port, #endif +#ifdef HAVE_FDB_OPS + .ndo_fdb_add = i40e_ndo_fdb_add, +#ifndef USE_DEFAULT_FDB_DEL_DUMP + .ndo_fdb_del = i40e_ndo_fdb_del, + .ndo_fdb_dump = i40e_ndo_fdb_dump, +#endif +#endif }; /** @@ -6985,11 +7040,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) unregister_netdev(vsi->netdev); } } else { - if (!test_and_set_bit(__I40E_DOWN, &vsi->state)) - i40e_down(vsi); - i40e_vsi_free_irq(vsi); - i40e_vsi_free_tx_resources(vsi); - i40e_vsi_free_rx_resources(vsi); + i40e_vsi_close(vsi); } i40e_vsi_disable_irq(vsi); } @@ -8093,6 +8144,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 link_status; int err = 0; u32 len; + u32 i; err = pci_enable_device_mem(pdev); if (err) @@ -8282,6 +8334,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; } + /* if FDIR VSI was set up, start it now */ + for (i = 0; i < pf->hw.func_caps.num_vsis; i++) { + if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) { + i40e_vsi_open(pf->vsi[i]); + break; + } + } /* The main driver is (mostly) up and happy. We need to set this state * before setting up the misc vector or we get a race and the vector diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 9cd57e61795..10652f615ae 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -167,6 +167,9 @@ i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid, i40e_status i40e_aq_mac_address_write(struct i40e_hw *hw, u16 flags, u8 *mac_addr, struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw, + u16 seid, u16 credit, u8 max_credit, + struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_dcb_updated(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_hmc_resource_profile(struct i40e_hw *hw, diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 02c11a7f7d2..b27b2f59ea8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -29,6 +29,24 @@ /***********************misc routines*****************************/ /** + * i40e_vc_disable_vf + * @pf: pointer to the pf info + * @vf: pointer to the vf info + * + * Disable the VF through a SW reset + **/ +static inline void i40e_vc_disable_vf(struct i40e_pf *pf, struct i40e_vf *vf) +{ + struct i40e_hw *hw = &pf->hw; + u32 reg; + + reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id)); + reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK; + wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg); + i40e_flush(hw); +} + +/** * i40e_vc_isvalid_vsi_id * @vf: pointer to the vf info * @vsi_id: vf relative vsi id @@ -416,6 +434,15 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) if (ret) dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); + /* Set VF bandwidth if specified */ + if (vf->tx_rate) { + ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, + vf->tx_rate / 50, 0, NULL); + if (ret) + dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n", + vf->vf_id, ret); + } + error_alloc_vsi_res: return ret; } @@ -2088,10 +2115,16 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, goto error_pvid; } - if (vsi->info.pvid == 0 && i40e_is_vsi_in_vlan(vsi)) + if (vsi->info.pvid == 0 && i40e_is_vsi_in_vlan(vsi)) { dev_err(&pf->pdev->dev, "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n", vf_id); + /* Administrator Error - knock the VF offline until he does + * the right thing by reconfiguring his network correctly + * and then reloading the VF driver. + */ + i40e_vc_disable_vf(pf, vf); + } /* Check for condition where there was already a port VLAN ID * filter set and now it is being deleted by setting it to zero. @@ -2160,7 +2193,61 @@ error_pvid: **/ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate) { - return -EOPNOTSUPP; + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_pf *pf = np->vsi->back; + struct i40e_vsi *vsi; + struct i40e_vf *vf; + int speed = 0; + int ret = 0; + + /* validate the request */ + if (vf_id >= pf->num_alloc_vfs) { + dev_err(&pf->pdev->dev, "Invalid VF Identifier %d.\n", vf_id); + ret = -EINVAL; + goto error; + } + + vf = &(pf->vf[vf_id]); + vsi = pf->vsi[vf->lan_vsi_index]; + if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { + dev_err(&pf->pdev->dev, "Uninitialized VF %d.\n", vf_id); + ret = -EINVAL; + goto error; + } + + switch (pf->hw.phy.link_info.link_speed) { + case I40E_LINK_SPEED_40GB: + speed = 40000; + break; + case I40E_LINK_SPEED_10GB: + speed = 10000; + break; + case I40E_LINK_SPEED_1GB: + speed = 1000; + break; + default: + break; + } + + if (tx_rate > speed) { + dev_err(&pf->pdev->dev, "Invalid tx rate %d specified for vf %d.", + tx_rate, vf->vf_id); + ret = -EINVAL; + goto error; + } + + /* Tx rate credits are in values of 50Mbps, 0 is disabled*/ + ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, tx_rate / 50, 0, + NULL); + if (ret) { + dev_err(&pf->pdev->dev, "Unable to set tx rate, error code %d.\n", + ret); + ret = -EIO; + goto error; + } + vf->tx_rate = tx_rate; +error: + return ret; } /** @@ -2200,10 +2287,17 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, memcpy(&ivi->mac, vf->default_lan_addr.addr, ETH_ALEN); - ivi->tx_rate = 0; + ivi->tx_rate = vf->tx_rate; ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK; ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >> I40E_VLAN_PRIORITY_SHIFT; + if (vf->link_forced == false) + ivi->linkstate = IFLA_VF_LINK_STATE_AUTO; + else if (vf->link_up == true) + ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE; + else + ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; + ret = 0; error_param: diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 389c47f396d..ba3d1f8414b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -98,6 +98,7 @@ struct i40e_vf { unsigned long vf_caps; /* vf's adv. capabilities */ unsigned long vf_states; /* vf's runtime states */ + unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ bool link_forced; bool link_up; /* only valid if vf link is forced */ }; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index 5470ce95936..c79df257f83 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -28,6 +28,16 @@ #include "i40e_prototype.h" /** + * i40e_is_nvm_update_op - return true if this is an NVM update operation + * @desc: API request descriptor + **/ +static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc) +{ + return (desc->opcode == i40e_aqc_opc_nvm_erase) || + (desc->opcode == i40e_aqc_opc_nvm_update); +} + +/** * i40e_adminq_init_regs - Initialize AdminQ registers * @hw: pointer to the hardware structure * @@ -659,6 +669,12 @@ i40e_status i40evf_asq_send_command(struct i40e_hw *hw, goto asq_send_command_exit; } + if (i40e_is_nvm_update_op(desc) && hw->aq.nvm_busy) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: NVM busy.\n"); + status = I40E_ERR_NVM; + goto asq_send_command_exit; + } + details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use); if (cmd_details) { *details = *cmd_details; @@ -786,6 +802,9 @@ i40e_status i40evf_asq_send_command(struct i40e_hw *hw, hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval; } + if (i40e_is_nvm_update_op(desc)) + hw->aq.nvm_busy = true; + /* update the error if time out occurred */ if ((!cmd_completed) && (!details->async && !details->postpone)) { @@ -880,6 +899,9 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, e->msg_size); } + if (i40e_is_nvm_update_op(&e->desc)) + hw->aq.nvm_busy = false; + /* Restore the original datalen and buffer address in the desc, * FW updates datalen to indicate the event message * size diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h index 8f72c31d95c..7d24be52860 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h @@ -87,6 +87,7 @@ struct i40e_adminq_info { u16 fw_min_ver; /* firmware minor version */ u16 api_maj_ver; /* api major version */ u16 api_min_ver; /* api minor version */ + bool nvm_busy; struct mutex asq_mutex; /* Send queue lock */ struct mutex arq_mutex; /* Receive queue lock */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 97662b6bd98..6e617669c32 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -180,9 +180,6 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_add_mirror_rule = 0x0260, i40e_aqc_opc_delete_mirror_rule = 0x0261, - i40e_aqc_opc_set_storm_control_config = 0x0280, - i40e_aqc_opc_get_storm_control_config = 0x0281, - /* DCB commands */ i40e_aqc_opc_dcb_ignore_pfc = 0x0301, i40e_aqc_opc_dcb_updated = 0x0302, @@ -205,6 +202,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_query_switching_comp_bw_config = 0x041A, i40e_aqc_opc_suspend_port_tx = 0x041B, i40e_aqc_opc_resume_port_tx = 0x041C, + i40e_aqc_opc_configure_partition_bw = 0x041D, /* hmc */ i40e_aqc_opc_query_hmc_resource_profile = 0x0500, @@ -1289,27 +1287,6 @@ struct i40e_aqc_add_delete_mirror_rule_completion { I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule_completion); -/* Set Storm Control Configuration (direct 0x0280) - * Get Storm Control Configuration (direct 0x0281) - * the command and response use the same descriptor structure - */ -struct i40e_aqc_set_get_storm_control_config { - __le32 broadcast_threshold; - __le32 multicast_threshold; - __le32 control_flags; -#define I40E_AQC_STORM_CONTROL_MDIPW 0x01 -#define I40E_AQC_STORM_CONTROL_MDICW 0x02 -#define I40E_AQC_STORM_CONTROL_BDIPW 0x04 -#define I40E_AQC_STORM_CONTROL_BDICW 0x08 -#define I40E_AQC_STORM_CONTROL_BIDU 0x10 -#define I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT 8 -#define I40E_AQC_STORM_CONTROL_INTERVAL_MASK (0x3FF << \ - I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT) - u8 reserved[4]; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aqc_set_get_storm_control_config); - /* DCB 0x03xx*/ /* PFC Ignore (direct 0x0301) @@ -1499,6 +1476,15 @@ struct i40e_aqc_query_switching_comp_bw_config_resp { * (direct 0x041B and 0x041C) uses the generic SEID struct */ +/* Configure partition BW + * (indirect 0x041D) + */ +struct i40e_aqc_configure_partition_bw_data { + __le16 pf_valid_bits; + u8 min_bw[16]; /* guaranteed bandwidth */ + u8 max_bw[16]; /* bandwidth limit */ +}; + /* Get and set the active HMC resource profile and status. * (direct 0x0500) and (direct 0x0501) */ @@ -1583,11 +1569,8 @@ struct i40e_aq_get_phy_abilities_resp { #define I40E_AQ_PHY_FLAG_PAUSE_TX 0x01 #define I40E_AQ_PHY_FLAG_PAUSE_RX 0x02 #define I40E_AQ_PHY_FLAG_LOW_POWER 0x04 -#define I40E_AQ_PHY_FLAG_AN_SHIFT 3 -#define I40E_AQ_PHY_FLAG_AN_MASK (0x3 << I40E_AQ_PHY_FLAG_AN_SHIFT) -#define I40E_AQ_PHY_FLAG_AN_OFF 0x00 /* link forced on */ -#define I40E_AQ_PHY_FLAG_AN_OFF_LINK_DOWN 0x01 -#define I40E_AQ_PHY_FLAG_AN_ON 0x02 +#define I40E_AQ_PHY_LINK_ENABLED 0x08 +#define I40E_AQ_PHY_AN_ENABLED 0x10 #define I40E_AQ_PHY_FLAG_MODULE_QUAL 0x20 __le16 eee_capability; #define I40E_AQ_EEE_100BASE_TX 0x0002 diff --git a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h index 17e42ca26d0..775fcb2463d 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h @@ -53,6 +53,7 @@ struct i40e_hmc_obj_rxq { u8 tphdata_ena; u8 tphhead_ena; u8 lrxqthresh; + u8 prefena; /* NOTE: normally must be set to 1 at init */ }; /* Tx queue context data */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 2797548fde0..da6054cbd9c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -31,7 +31,7 @@ char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = "Intel(R) XL710 X710 Virtual Function Network Driver"; -#define DRV_VERSION "0.9.16" +#define DRV_VERSION "0.9.21" const char i40evf_driver_version[] = DRV_VERSION; static const char i40evf_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporation."; diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index fa36fe12e77..49976bf7c60 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -73,9 +73,8 @@ static s32 igb_validate_nvm_checksum_82580(struct e1000_hw *hw); static s32 igb_update_nvm_checksum_82580(struct e1000_hw *hw); static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw); static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw); -static const u16 e1000_82580_rxpbs_table[] = - { 36, 72, 144, 1, 2, 4, 8, 16, - 35, 70, 140 }; +static const u16 e1000_82580_rxpbs_table[] = { + 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; /** * igb_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO @@ -1269,7 +1268,7 @@ static s32 igb_check_for_link_82575(struct e1000_hw *hw) if (hw->phy.media_type != e1000_media_type_copper) { ret_val = igb_get_pcs_speed_and_duplex_82575(hw, &speed, - &duplex); + &duplex); /* Use this flag to determine if link needs to be checked or * not. If we have link clear the flag so that we do not * continue to check for link. @@ -1436,9 +1435,8 @@ static s32 igb_reset_hw_82575(struct e1000_hw *hw) /* set the completion timeout for interface */ ret_val = igb_set_pcie_completion_timeout(hw); - if (ret_val) { + if (ret_val) hw_dbg("PCI-E Set completion timeout has failed.\n"); - } hw_dbg("Masking off all interrupts\n"); wr32(E1000_IMC, 0xffffffff); @@ -1676,7 +1674,7 @@ static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw) hw->mac.type == e1000_82576) { ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data); if (ret_val) { - printk(KERN_DEBUG "NVM Read Error\n\n"); + hw_dbg(KERN_DEBUG "NVM Read Error\n\n"); return ret_val; } @@ -1689,7 +1687,7 @@ static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw) * link either autoneg or be forced to 1000/Full */ ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD | - E1000_CTRL_FD | E1000_CTRL_FRCDPX; + E1000_CTRL_FD | E1000_CTRL_FRCDPX; /* set speed of 1000/Full if speed/duplex is forced */ reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL; @@ -2005,14 +2003,14 @@ static s32 igb_set_pcie_completion_timeout(struct e1000_hw *hw) * 16ms to 55ms */ ret_val = igb_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, - &pcie_devctl2); + &pcie_devctl2); if (ret_val) goto out; pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; ret_val = igb_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, - &pcie_devctl2); + &pcie_devctl2); out: /* disable completion timeout resend */ gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND; @@ -2436,8 +2434,7 @@ static s32 igb_update_nvm_checksum_82580(struct e1000_hw *hw) ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); if (ret_val) { - hw_dbg("NVM Read Error while updating checksum" - " compatibility bit.\n"); + hw_dbg("NVM Read Error while updating checksum compatibility bit.\n"); goto out; } @@ -2447,8 +2444,7 @@ static s32 igb_update_nvm_checksum_82580(struct e1000_hw *hw) ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); if (ret_val) { - hw_dbg("NVM Write Error while updating checksum" - " compatibility bit.\n"); + hw_dbg("NVM Write Error while updating checksum compatibility bit.\n"); goto out; } } diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 09d78be7241..f3667c07056 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -37,9 +37,9 @@ s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, u8 data); #define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \ - (ID_LED_DEF1_DEF2 << 8) | \ - (ID_LED_DEF1_DEF2 << 4) | \ - (ID_LED_OFF1_ON2)) + (ID_LED_DEF1_DEF2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_OFF1_ON2)) #define E1000_RAR_ENTRIES_82575 16 #define E1000_RAR_ENTRIES_82576 24 @@ -67,16 +67,16 @@ s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 #define E1000_EICR_TX_QUEUE ( \ - E1000_EICR_TX_QUEUE0 | \ - E1000_EICR_TX_QUEUE1 | \ - E1000_EICR_TX_QUEUE2 | \ - E1000_EICR_TX_QUEUE3) + E1000_EICR_TX_QUEUE0 | \ + E1000_EICR_TX_QUEUE1 | \ + E1000_EICR_TX_QUEUE2 | \ + E1000_EICR_TX_QUEUE3) #define E1000_EICR_RX_QUEUE ( \ - E1000_EICR_RX_QUEUE0 | \ - E1000_EICR_RX_QUEUE1 | \ - E1000_EICR_RX_QUEUE2 | \ - E1000_EICR_RX_QUEUE3) + E1000_EICR_RX_QUEUE0 | \ + E1000_EICR_RX_QUEUE1 | \ + E1000_EICR_RX_QUEUE2 | \ + E1000_EICR_RX_QUEUE3) /* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ #define E1000_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index b05bf925ac7..22078c4e0f8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -101,11 +101,11 @@ /* Same mask, but for extended and packet split descriptors */ #define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ - E1000_RXDEXT_STATERR_CE | \ - E1000_RXDEXT_STATERR_SE | \ - E1000_RXDEXT_STATERR_SEQ | \ - E1000_RXDEXT_STATERR_CXE | \ - E1000_RXDEXT_STATERR_RXE) + E1000_RXDEXT_STATERR_CE | \ + E1000_RXDEXT_STATERR_SE | \ + E1000_RXDEXT_STATERR_SEQ | \ + E1000_RXDEXT_STATERR_CXE | \ + E1000_RXDEXT_STATERR_RXE) #define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 #define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 @@ -406,12 +406,12 @@ * o LSC = Link Status Change */ #define IMS_ENABLE_MASK ( \ - E1000_IMS_RXT0 | \ - E1000_IMS_TXDW | \ - E1000_IMS_RXDMT0 | \ - E1000_IMS_RXSEQ | \ - E1000_IMS_LSC | \ - E1000_IMS_DOUTSYNC) + E1000_IMS_RXT0 | \ + E1000_IMS_TXDW | \ + E1000_IMS_RXDMT0 | \ + E1000_IMS_RXSEQ | \ + E1000_IMS_LSC | \ + E1000_IMS_DOUTSYNC) /* Interrupt Mask Set */ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ @@ -1011,8 +1011,7 @@ #define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F /* DMA Coalescing register fields */ -#define E1000_PCIEMISC_LX_DECISION 0x00000080 /* Lx power decision based - on DMA coal */ +#define E1000_PCIEMISC_LX_DECISION 0x00000080 /* Lx power on DMA coal */ /* Tx Rate-Scheduler Config fields */ #define E1000_RTTBCNRC_RS_ENA 0x80000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 10741d170f2..da705a61bb3 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -320,15 +320,15 @@ struct e1000_host_mng_command_info { #include "e1000_mbx.h" struct e1000_mac_operations { - s32 (*check_for_link)(struct e1000_hw *); - s32 (*reset_hw)(struct e1000_hw *); - s32 (*init_hw)(struct e1000_hw *); + s32 (*check_for_link)(struct e1000_hw *); + s32 (*reset_hw)(struct e1000_hw *); + s32 (*init_hw)(struct e1000_hw *); bool (*check_mng_mode)(struct e1000_hw *); - s32 (*setup_physical_interface)(struct e1000_hw *); + s32 (*setup_physical_interface)(struct e1000_hw *); void (*rar_set)(struct e1000_hw *, u8 *, u32); - s32 (*read_mac_addr)(struct e1000_hw *); - s32 (*get_speed_and_duplex)(struct e1000_hw *, u16 *, u16 *); - s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); + s32 (*read_mac_addr)(struct e1000_hw *); + s32 (*get_speed_and_duplex)(struct e1000_hw *, u16 *, u16 *); + s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); void (*release_swfw_sync)(struct e1000_hw *, u16); #ifdef CONFIG_IGB_HWMON s32 (*get_thermal_sensor_data)(struct e1000_hw *); @@ -338,31 +338,31 @@ struct e1000_mac_operations { }; struct e1000_phy_operations { - s32 (*acquire)(struct e1000_hw *); - s32 (*check_polarity)(struct e1000_hw *); - s32 (*check_reset_block)(struct e1000_hw *); - s32 (*force_speed_duplex)(struct e1000_hw *); - s32 (*get_cfg_done)(struct e1000_hw *hw); - s32 (*get_cable_length)(struct e1000_hw *); - s32 (*get_phy_info)(struct e1000_hw *); - s32 (*read_reg)(struct e1000_hw *, u32, u16 *); + s32 (*acquire)(struct e1000_hw *); + s32 (*check_polarity)(struct e1000_hw *); + s32 (*check_reset_block)(struct e1000_hw *); + s32 (*force_speed_duplex)(struct e1000_hw *); + s32 (*get_cfg_done)(struct e1000_hw *hw); + s32 (*get_cable_length)(struct e1000_hw *); + s32 (*get_phy_info)(struct e1000_hw *); + s32 (*read_reg)(struct e1000_hw *, u32, u16 *); void (*release)(struct e1000_hw *); - s32 (*reset)(struct e1000_hw *); - s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); - s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); - s32 (*write_reg)(struct e1000_hw *, u32, u16); + s32 (*reset)(struct e1000_hw *); + s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); + s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); + s32 (*write_reg)(struct e1000_hw *, u32, u16); s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *); s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8); }; struct e1000_nvm_operations { - s32 (*acquire)(struct e1000_hw *); - s32 (*read)(struct e1000_hw *, u16, u16, u16 *); + s32 (*acquire)(struct e1000_hw *); + s32 (*read)(struct e1000_hw *, u16, u16, u16 *); void (*release)(struct e1000_hw *); - s32 (*write)(struct e1000_hw *, u16, u16, u16 *); - s32 (*update)(struct e1000_hw *); - s32 (*validate)(struct e1000_hw *); - s32 (*valid_led_default)(struct e1000_hw *, u16 *); + s32 (*write)(struct e1000_hw *, u16, u16, u16 *); + s32 (*update)(struct e1000_hw *); + s32 (*validate)(struct e1000_hw *); + s32 (*valid_led_default)(struct e1000_hw *, u16 *); }; #define E1000_MAX_SENSORS 3 diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 1e0c404db81..42e42a93a4b 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -442,7 +442,7 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) * The caller must have a packed mc_addr_list of multicast addresses. **/ void igb_update_mc_addr_list(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count) + u8 *mc_addr_list, u32 mc_addr_count) { u32 hash_value, hash_bit, hash_reg; int i; @@ -866,8 +866,7 @@ s32 igb_config_fc_after_link_up(struct e1000_hw *hw) goto out; if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { - hw_dbg("Copper PHY and Auto Neg " - "has not completed.\n"); + hw_dbg("Copper PHY and Auto Neg has not completed.\n"); goto out; } @@ -1298,7 +1297,7 @@ static s32 igb_valid_led_default(struct e1000_hw *hw, u16 *data) } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { - switch(hw->phy.media_type) { + switch (hw->phy.media_type) { case e1000_media_type_internal_serdes: *data = ID_LED_DEFAULT_82575_SERDES; break; diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c index 9abf82919c6..89f635fd5dd 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.c +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c @@ -480,6 +480,7 @@ s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) /* Loop to allow for up to whole page write of eeprom */ while (widx < words) { u16 word_out = data[widx]; + word_out = (word_out >> 8) | (word_out << 8); igb_shift_out_eec_bits(hw, word_out, 16); widx++; diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h index 5b101170b17..7f54d7e6832 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.h +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h @@ -32,7 +32,7 @@ void igb_release_nvm(struct e1000_hw *hw); s32 igb_read_mac_addr(struct e1000_hw *hw); s32 igb_read_part_num(struct e1000_hw *hw, u32 *part_num); s32 igb_read_part_string(struct e1000_hw *hw, u8 *part_num, - u32 part_num_size); + u32 part_num_size); s32 igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 4009bbab740..ec7dc6c511b 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -924,8 +924,7 @@ static s32 igb_copper_link_autoneg(struct e1000_hw *hw) if (phy->autoneg_wait_to_complete) { ret_val = igb_wait_autoneg(hw); if (ret_val) { - hw_dbg("Error while waiting for " - "autoneg to complete\n"); + hw_dbg("Error while waiting for autoneg to complete\n"); goto out; } } diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index bdb246e848e..034e7c56dc4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -301,9 +301,9 @@ #define E1000_RA2 0x054E0 /* 2nd half of Rx address array - RW Array */ #define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4)) #define E1000_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ - (0x054E0 + ((_i - 16) * 8))) + (0x054E0 + ((_i - 16) * 8))) #define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ - (0x054E4 + ((_i - 16) * 8))) + (0x054E4 + ((_i - 16) * 8))) #define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) #define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) #define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) @@ -358,8 +358,7 @@ #define E1000_VMBMEM(_n) (0x00800 + (64 * (_n))) #define E1000_VMOLR(_n) (0x05AD0 + (4 * (_n))) #define E1000_DVMOLR(_n) (0x0C038 + (64 * (_n))) -#define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) /* VLAN Virtual Machine - * Filter - RW */ +#define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) /* VLAN VM Filter */ #define E1000_VMVIR(_n) (0x03700 + (4 * (_n))) struct e1000_hw; diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 27130065d92..77f16121d24 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -198,6 +198,7 @@ struct igb_tx_buffer { unsigned int bytecount; u16 gso_segs; __be16 protocol; + DEFINE_DMA_UNMAP_ADDR(dma); DEFINE_DMA_UNMAP_LEN(len); u32 tx_flags; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index e5570acbeea..8ef9e287da5 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1060,8 +1060,8 @@ static struct igb_reg_test reg_test_i350[] = { { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, { E1000_RA, 0, 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, @@ -1103,8 +1103,8 @@ static struct igb_reg_test reg_test_82580[] = { { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, { E1000_RA, 0, 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, @@ -1149,14 +1149,14 @@ static struct igb_reg_test reg_test_82576[] = { { E1000_TDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, { E1000_TDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, - { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, { E1000_RA, 0, 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, { E1000_RA, 0, 16, TABLE64_TEST_HI, 0x83FFFFFF, 0xFFFFFFFF }, { E1000_RA2, 0, 8, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, { E1000_RA2, 0, 8, TABLE64_TEST_HI, 0x83FFFFFF, 0xFFFFFFFF }, - { E1000_MTA, 0, 128,TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, { 0, 0, 0, 0 } }; @@ -1196,8 +1196,8 @@ static bool reg_pattern_test(struct igb_adapter *adapter, u64 *data, { struct e1000_hw *hw = &adapter->hw; u32 pat, val; - static const u32 _test[] = - {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; + static const u32 _test[] = { + 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; for (pat = 0; pat < ARRAY_SIZE(_test); pat++) { wr32(reg, (_test[pat] & write)); val = rd32(reg) & mask; @@ -1218,6 +1218,7 @@ static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data, { struct e1000_hw *hw = &adapter->hw; u32 val; + wr32(reg, write & mask); val = rd32(reg); if ((write & mask) != (val & mask)) { @@ -1387,14 +1388,14 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) /* Hook up test interrupt handler just for this test */ if (adapter->flags & IGB_FLAG_HAS_MSIX) { if (request_irq(adapter->msix_entries[0].vector, - igb_test_intr, 0, netdev->name, adapter)) { + igb_test_intr, 0, netdev->name, adapter)) { *data = 1; return -1; } } else if (adapter->flags & IGB_FLAG_HAS_MSI) { shared_int = false; if (request_irq(irq, - igb_test_intr, 0, netdev->name, adapter)) { + igb_test_intr, 0, netdev->name, adapter)) { *data = 1; return -1; } @@ -1949,6 +1950,7 @@ static int igb_link_test(struct igb_adapter *adapter, u64 *data) *data = 0; if (hw->phy.media_type == e1000_media_type_internal_serdes) { int i = 0; + hw->mac.serdes_has_link = false; /* On some blade server designs, link establishment diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 16430a8440f..50cfe7db3b1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -217,8 +217,7 @@ static void igb_netpoll(struct net_device *); #ifdef CONFIG_PCI_IOV static unsigned int max_vfs = 0; module_param(max_vfs, uint, 0); -MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate " - "per physical function"); +MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function"); #endif /* CONFIG_PCI_IOV */ static pci_ers_result_t igb_io_error_detected(struct pci_dev *, @@ -384,8 +383,7 @@ static void igb_dump(struct igb_adapter *adapter) /* Print netdevice Info */ if (netdev) { dev_info(&adapter->pdev->dev, "Net device Info\n"); - pr_info("Device Name state trans_start " - "last_rx\n"); + pr_info("Device Name state trans_start last_rx\n"); pr_info("%-15s %016lX %016lX %016lX\n", netdev->name, netdev->state, netdev->trans_start, netdev->last_rx); } @@ -438,9 +436,7 @@ static void igb_dump(struct igb_adapter *adapter) pr_info("------------------------------------\n"); pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] " - "[bi->dma ] leng ntw timestamp " - "bi->skb\n"); + pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] [bi->dma ] leng ntw timestamp bi->skb\n"); for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { const char *next_desc; @@ -458,9 +454,8 @@ static void igb_dump(struct igb_adapter *adapter) else next_desc = ""; - pr_info("T [0x%03X] %016llX %016llX %016llX" - " %04X %p %016llX %p%s\n", i, - le64_to_cpu(u0->a), + pr_info("T [0x%03X] %016llX %016llX %016llX %04X %p %016llX %p%s\n", + i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), (u64)dma_unmap_addr(buffer_info, dma), dma_unmap_len(buffer_info, len), @@ -519,10 +514,8 @@ rx_ring_summary: pr_info("------------------------------------\n"); pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] " - "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); - pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----" - "----------- [bi->skb] <-- Adv Rx Write-Back format\n"); + pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] [bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); + pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n"); for (i = 0; i < rx_ring->count; i++) { const char *next_desc; @@ -681,9 +674,9 @@ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) static int __init igb_init_module(void) { int ret; + pr_info("%s - version %s\n", igb_driver_string, igb_driver_version); - pr_info("%s\n", igb_copyright); #ifdef CONFIG_IGB_DCA @@ -1345,6 +1338,7 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) for (; v_idx < q_vectors; v_idx++) { int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, tqpv, txr_idx, rqpv, rxr_idx); @@ -1484,6 +1478,7 @@ static void igb_irq_disable(struct igb_adapter *adapter) */ if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 regval = rd32(E1000_EIAM); + wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask); wr32(E1000_EIMC, adapter->eims_enable_mask); regval = rd32(E1000_EIAC); @@ -1495,6 +1490,7 @@ static void igb_irq_disable(struct igb_adapter *adapter) wrfl(); if (adapter->flags & IGB_FLAG_HAS_MSIX) { int i; + for (i = 0; i < adapter->num_q_vectors; i++) synchronize_irq(adapter->msix_entries[i].vector); } else { @@ -1513,6 +1509,7 @@ static void igb_irq_enable(struct igb_adapter *adapter) if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; u32 regval = rd32(E1000_EIAC); + wr32(E1000_EIAC, regval | adapter->eims_enable_mask); regval = rd32(E1000_EIAM); wr32(E1000_EIAM, regval | adapter->eims_enable_mask); @@ -1745,6 +1742,7 @@ int igb_up(struct igb_adapter *adapter) /* notify VFs that reset has been completed */ if (adapter->vfs_allocated_count) { u32 reg_data = rd32(E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; wr32(E1000_CTRL_EXT, reg_data); } @@ -1960,6 +1958,7 @@ void igb_reset(struct igb_adapter *adapter) /* disable receive for all VFs and wait one second */ if (adapter->vfs_allocated_count) { int i; + for (i = 0 ; i < adapter->vfs_allocated_count; i++) adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; @@ -3077,6 +3076,7 @@ static int __igb_open(struct net_device *netdev, bool resuming) /* notify VFs that reset has been completed */ if (adapter->vfs_allocated_count) { u32 reg_data = rd32(E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; wr32(E1000_CTRL_EXT, reg_data); } @@ -3248,7 +3248,7 @@ void igb_setup_tctl(struct igb_adapter *adapter) * Configure a transmit ring after a reset. **/ void igb_configure_tx_ring(struct igb_adapter *adapter, - struct igb_ring *ring) + struct igb_ring *ring) { struct e1000_hw *hw = &adapter->hw; u32 txdctl = 0; @@ -3430,6 +3430,7 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) if (hw->mac.type > e1000_82575) { /* Set the default pool for the PF's first queue */ u32 vtctl = rd32(E1000_VT_CTL); + vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | E1000_VT_CTL_DISABLE_DEF_POOL); vtctl |= adapter->vfs_allocated_count << @@ -3511,7 +3512,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) } static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, - int vfn) + int vfn) { struct e1000_hw *hw = &adapter->hw; u32 vmolr; @@ -4077,7 +4078,7 @@ static void igb_spoof_check(struct igb_adapter *adapter) if (!adapter->wvbr) return; - for(j = 0; j < adapter->vfs_allocated_count; j++) { + for (j = 0; j < adapter->vfs_allocated_count; j++) { if (adapter->wvbr & (1 << j) || adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { dev_warn(&adapter->pdev->dev, @@ -4209,14 +4210,15 @@ static void igb_watchdog_task(struct work_struct *work) if (!netif_carrier_ok(netdev)) { u32 ctrl; + hw->mac.ops.get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); ctrl = rd32(E1000_CTRL); /* Links status message must follow this format */ - printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s " - "Duplex, Flow Control: %s\n", + netdev_info(netdev, + "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", netdev->name, adapter->link_speed, adapter->link_duplex == FULL_DUPLEX ? @@ -4242,11 +4244,8 @@ static void igb_watchdog_task(struct work_struct *work) /* check for thermal sensor event */ if (igb_thermal_sensor_event(hw, - E1000_THSTAT_LINK_THROTTLE)) { - netdev_info(netdev, "The network adapter link " - "speed was downshifted because it " - "overheated\n"); - } + E1000_THSTAT_LINK_THROTTLE)) + netdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n"); /* adjust timeout factor according to speed/duplex */ adapter->tx_timeout_factor = 1; @@ -4277,12 +4276,11 @@ static void igb_watchdog_task(struct work_struct *work) /* check for thermal sensor event */ if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) { - netdev_err(netdev, "The network adapter was " - "stopped because it overheated\n"); + netdev_err(netdev, "The network adapter was stopped because it overheated\n"); } /* Links status message must follow this format */ - printk(KERN_INFO "igb: %s NIC Link is Down\n", + netdev_info(netdev, "igb: %s NIC Link is Down\n", netdev->name); netif_carrier_off(netdev); @@ -4344,6 +4342,7 @@ static void igb_watchdog_task(struct work_struct *work) /* Cause software interrupt to ensure Rx ring is cleaned */ if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 eics = 0; + for (i = 0; i < adapter->num_q_vectors; i++) eics |= adapter->q_vector[i]->eims_value; wr32(E1000_EICS, eics); @@ -4483,13 +4482,12 @@ static void igb_update_itr(struct igb_q_vector *q_vector, case low_latency: /* 50 usec aka 20000 ints/s */ if (bytes > 10000) { /* this if handles the TSO accounting */ - if (bytes/packets > 8000) { + if (bytes/packets > 8000) itrval = bulk_latency; - } else if ((packets < 10) || ((bytes/packets) > 1200)) { + else if ((packets < 10) || ((bytes/packets) > 1200)) itrval = bulk_latency; - } else if ((packets > 35)) { + else if ((packets > 35)) itrval = lowest_latency; - } } else if (bytes/packets > 2000) { itrval = bulk_latency; } else if (packets <= 2 && bytes < 512) { @@ -4675,6 +4673,7 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) return; } else { u8 l4_hdr = 0; + switch (first->protocol) { case htons(ETH_P_IP): vlan_macip_lens |= skb_network_header_len(skb); @@ -4962,6 +4961,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, */ if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) { unsigned short f; + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); } else { @@ -5621,6 +5621,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) vmolr |= E1000_VMOLR_MPME; } else if (vf_data->num_vf_mc_hashes) { int j; + vmolr |= E1000_VMOLR_ROMPE; for (j = 0; j < vf_data->num_vf_mc_hashes; j++) igb_mta_set(hw, vf_data->vf_mc_hashes[j]); @@ -5672,6 +5673,7 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter) for (i = 0; i < adapter->vfs_allocated_count; i++) { u32 vmolr = rd32(E1000_VMOLR(i)); + vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); vf_data = &adapter->vf_data[i]; @@ -5770,6 +5772,7 @@ static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) if (!adapter->vf_data[vf].vlans_enabled) { u32 size; + reg = rd32(E1000_VMOLR(vf)); size = reg & E1000_VMOLR_RLPML_MASK; size += 4; @@ -5798,6 +5801,7 @@ static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) adapter->vf_data[vf].vlans_enabled--; if (!adapter->vf_data[vf].vlans_enabled) { u32 size; + reg = rd32(E1000_VMOLR(vf)); size = reg & E1000_VMOLR_RLPML_MASK; size -= 4; @@ -5902,8 +5906,8 @@ static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) */ if (!add && (adapter->netdev->flags & IFF_PROMISC)) { u32 vlvf, bits; - int regndx = igb_find_vlvf_entry(adapter, vid); + if (regndx < 0) goto out; /* See if any other pools are set for this VLAN filter @@ -6963,6 +6967,7 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring, if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { u16 vid; + if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) vid = be16_to_cpu(rx_desc->wb.upper.vlan); @@ -7172,7 +7177,7 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) break; case SIOCGMIIREG: if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, - &data->val_out)) + &data->val_out)) return -EIO; break; case SIOCSMIIREG: @@ -8049,6 +8054,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) } /* endif adapter->dmac is not disabled */ } else if (hw->mac.type == e1000_82580) { u32 reg = rd32(E1000_PCIEMISC); + wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); wr32(E1000_DMACR, 0); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index c6c4ca7d68e..c688c8a4c06 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -155,7 +155,6 @@ struct vf_data_storage { struct vf_macvlans { struct list_head l; int vf; - int rar_entry; bool free; bool is_macvlan; u8 vf_macvlan[ETH_ALEN]; @@ -613,6 +612,15 @@ static inline void ixgbe_write_tail(struct ixgbe_ring *ring, u32 value) #define MAX_MSIX_VECTORS_82598 18 #define MAX_Q_VECTORS_82598 16 +struct ixgbe_mac_addr { + u8 addr[ETH_ALEN]; + u16 queue; + u16 state; /* bitmask */ +}; +#define IXGBE_MAC_STATE_DEFAULT 0x1 +#define IXGBE_MAC_STATE_MODIFIED 0x2 +#define IXGBE_MAC_STATE_IN_USE 0x4 + #define MAX_Q_VECTORS MAX_Q_VECTORS_82599 #define MAX_MSIX_COUNT MAX_MSIX_VECTORS_82599 @@ -785,6 +793,7 @@ struct ixgbe_adapter { u32 timer_event_accumulator; u32 vferr_refcount; + struct ixgbe_mac_addr *mac_table; struct kobject *info_kobj; #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff *ixgbe_hwmon_buff; @@ -863,6 +872,13 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter); int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter); int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, u16 subdevice_id); +#ifdef CONFIG_PCI_IOV +void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); +#endif +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, + u8 *addr, u16 queue); +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, + u8 *addr, u16 queue); void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter); netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *, struct ixgbe_ring *); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 4c78ea8946c..1c52e475348 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -337,19 +337,25 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) int i; bool link_up; - /* - * Validate the water mark configuration for packet buffer 0. Zero - * water marks indicate that the packet buffer was not configured - * and the watermarks for packet buffer 0 should always be configured. - */ - if (!hw->fc.low_water || - !hw->fc.high_water[0] || - !hw->fc.pause_time) { - hw_dbg(hw, "Invalid water mark configuration\n"); + /* Validate the water mark configuration */ + if (!hw->fc.pause_time) { ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; goto out; } + /* Low water mark of zero causes XOFF floods */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + if (!hw->fc.low_water[i] || + hw->fc.low_water[i] >= hw->fc.high_water[i]) { + hw_dbg(hw, "Invalid water mark configuration\n"); + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + } + } + /* * On 82598 having Rx FC on causes resets while doing 1G * so if it's on turn it off once we know link_speed. For @@ -432,12 +438,11 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg); IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg); - fcrtl = (hw->fc.low_water << 10) | IXGBE_FCRTL_XONE; - /* Set up and enable Rx high/low water mark thresholds, enable XON. */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && hw->fc.high_water[i]) { + fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl); IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 981b8a7b100..bdc55819179 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -271,6 +271,7 @@ out: **/ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) { + s32 ret_val; u32 ctrl_ext; /* Set the media type */ @@ -292,12 +293,15 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) IXGBE_WRITE_FLUSH(hw); /* Setup flow control */ - ixgbe_setup_fc(hw); + ret_val = ixgbe_setup_fc(hw); + if (!ret_val) + goto out; /* Clear adapter stopped flag */ hw->adapter_stopped = false; - return 0; +out: + return ret_val; } /** @@ -2106,19 +2110,25 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) u32 fcrtl, fcrth; int i; - /* - * Validate the water mark configuration for packet buffer 0. Zero - * water marks indicate that the packet buffer was not configured - * and the watermarks for packet buffer 0 should always be configured. - */ - if (!hw->fc.low_water || - !hw->fc.high_water[0] || - !hw->fc.pause_time) { - hw_dbg(hw, "Invalid water mark configuration\n"); + /* Validate the water mark configuration. */ + if (!hw->fc.pause_time) { ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; goto out; } + /* Low water mark of zero causes XOFF floods */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + if (!hw->fc.low_water[i] || + hw->fc.low_water[i] >= hw->fc.high_water[i]) { + hw_dbg(hw, "Invalid water mark configuration\n"); + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + } + } + /* Negotiate the fc mode to use */ ixgbe_fc_autoneg(hw); @@ -2181,12 +2191,11 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg); IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg); - fcrtl = (hw->fc.low_water << 10) | IXGBE_FCRTL_XONE; - /* Set up and enable Rx high/low water mark thresholds, enable XON. */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && hw->fc.high_water[i]) { + fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl); fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; } else { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index f12c40fb553..d15ff2e5edb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -141,8 +141,6 @@ static inline bool ixgbe_removed(void __iomem *addr) return unlikely(!addr); } -void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg); - static inline void ixgbe_write_reg(struct ixgbe_hw *hw, u32 reg, u32 value) { u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); @@ -172,18 +170,7 @@ static inline void ixgbe_write_reg64(struct ixgbe_hw *hw, u32 reg, u64 value) } #define IXGBE_WRITE_REG64(a, reg, value) ixgbe_write_reg64((a), (reg), (value)) -static inline u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) -{ - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); - u32 value; - - if (ixgbe_removed(reg_addr)) - return IXGBE_FAILED_READ_REG; - value = readl(reg_addr + reg); - if (unlikely(value == IXGBE_FAILED_READ_REG)) - ixgbe_check_remove(hw, reg); - return value; -} +u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg); #define IXGBE_READ_REG(a, reg) ixgbe_read_reg((a), (reg)) #define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) \ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c index 7a77f37a7cb..d3ba63f9ad3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c @@ -208,7 +208,6 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg); - fcrtl = (hw->fc.low_water << 10) | IXGBE_FCRTL_XONE; /* Configure PFC Tx thresholds per TC */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { if (!(pfc_en & (1 << i))) { @@ -217,6 +216,7 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) continue; } + fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; reg = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl); IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c index bdb99b3b0f3..3b932fe64ab 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c @@ -242,7 +242,6 @@ s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc) max_tc = prio_tc[i]; } - fcrtl = (hw->fc.low_water << 10) | IXGBE_FCRTL_XONE; /* Configure PFC Tx thresholds per TC */ for (i = 0; i <= max_tc; i++) { @@ -257,6 +256,7 @@ s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc) if (enabled) { reg = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; + fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl); } else { reg = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h index b16cc786750..0772b7730fc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h @@ -81,9 +81,7 @@ struct ixgbe_fcoe { void *extra_ddp_buffer; dma_addr_t extra_ddp_buffer_dma; unsigned long mode; -#ifdef CONFIG_IXGBE_DCB u8 up; -#endif }; #endif /* _IXGBE_FCOE_H */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d62e7a25cf9..8089ea9f2fb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -301,7 +301,7 @@ static void ixgbe_remove_adapter(struct ixgbe_hw *hw) ixgbe_service_event_schedule(adapter); } -void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) +static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) { u32 value; @@ -320,6 +320,32 @@ void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) ixgbe_remove_adapter(hw); } +/** + * ixgbe_read_reg - Read from device register + * @hw: hw specific details + * @reg: offset of register to read + * + * Returns : value read or IXGBE_FAILED_READ_REG if removed + * + * This function is used to read device registers. It checks for device + * removal by confirming any read that returns all ones by checking the + * status register value for all ones. This function avoids reading from + * the hardware if a removal was previously detected in which case it + * returns IXGBE_FAILED_READ_REG (all ones). + */ +u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) +{ + u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u32 value; + + if (ixgbe_removed(reg_addr)) + return IXGBE_FAILED_READ_REG; + value = readl(reg_addr + reg); + if (unlikely(value == IXGBE_FAILED_READ_REG)) + ixgbe_check_remove(hw, reg); + return value; +} + static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev) { u16 value; @@ -3743,35 +3769,6 @@ static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, } /** - * ixgbe_vlan_filter_disable - helper to disable hw vlan filtering - * @adapter: driver data - */ -static void ixgbe_vlan_filter_disable(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 vlnctrl; - - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); -} - -/** - * ixgbe_vlan_filter_enable - helper to enable hw vlan filtering - * @adapter: driver data - */ -static void ixgbe_vlan_filter_enable(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 vlnctrl; - - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl |= IXGBE_VLNCTRL_VFE; - vlnctrl &= ~IXGBE_VLNCTRL_CFIEN; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); -} - -/** * ixgbe_vlan_strip_disable - helper to disable hw vlan stripping * @adapter: driver data */ @@ -3850,6 +3847,158 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter) } /** + * ixgbe_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure + * + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA + **/ +static int ixgbe_write_mc_addr_list(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + + if (!netif_running(netdev)) + return 0; + + if (hw->mac.ops.update_mc_addr_list) + hw->mac.ops.update_mc_addr_list(hw, netdev); + else + return -ENOMEM; + +#ifdef CONFIG_PCI_IOV + ixgbe_restore_vf_multicasts(adapter); +#endif + + return netdev_mc_count(netdev); +} + +#ifdef CONFIG_PCI_IOV +void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, + adapter->mac_table[i].queue, + IXGBE_RAH_AV); + else + hw->mac.ops.clear_rar(hw, i); + + adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED); + } +} +#endif + +static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { + if (adapter->mac_table[i].state & + IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + adapter->mac_table[i].addr, + adapter->mac_table[i].queue, + IXGBE_RAH_AV); + else + hw->mac.ops.clear_rar(hw, i); + + adapter->mac_table[i].state &= + ~(IXGBE_MAC_STATE_MODIFIED); + } + } +} + +static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) +{ + int i; + struct ixgbe_hw *hw = &adapter->hw; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + } + ixgbe_sync_mac_table(adapter); +} + +static int ixgbe_available_rars(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i, count = 0; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state == 0) + count++; + } + return count; +} + +/* this function destroys the first RAR entry */ +static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter, + u8 *addr) +{ + struct ixgbe_hw *hw = &adapter->hw; + + memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN); + adapter->mac_table[0].queue = VMDQ_P(0); + adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | + IXGBE_MAC_STATE_IN_USE); + hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, + adapter->mac_table[0].queue, + IXGBE_RAH_AV); +} + +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + continue; + adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | + IXGBE_MAC_STATE_IN_USE); + ether_addr_copy(adapter->mac_table[i].addr, addr); + adapter->mac_table[i].queue = queue; + ixgbe_sync_mac_table(adapter); + return i; + } + return -ENOMEM; +} + +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +{ + /* search table for addr, if found, set to 0 and sync */ + int i; + struct ixgbe_hw *hw = &adapter->hw; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (ether_addr_equal(addr, adapter->mac_table[i].addr) && + adapter->mac_table[i].queue == queue) { + adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + ixgbe_sync_mac_table(adapter); + return 0; + } + } + return -ENOMEM; +} +/** * ixgbe_write_uc_addr_list - write unicast addresses to RAR table * @netdev: network interface device structure * @@ -3858,39 +4007,23 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter) * 0 on no addresses written * X on writing X addresses to the RAR table **/ -static int ixgbe_write_uc_addr_list(struct net_device *netdev) +static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - unsigned int rar_entries = hw->mac.num_rar_entries - 1; int count = 0; - /* In SR-IOV/VMDQ modes significantly less RAR entries are available */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - rar_entries = IXGBE_MAX_PF_MACVLANS - 1; - /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > rar_entries) + if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) return -ENOMEM; if (!netdev_uc_empty(netdev)) { struct netdev_hw_addr *ha; - /* return error if we do not support writing to RAR table */ - if (!hw->mac.ops.set_rar) - return -ENOMEM; - netdev_for_each_uc_addr(ha, netdev) { - if (!rar_entries) - break; - hw->mac.ops.set_rar(hw, rar_entries--, ha->addr, - VMDQ_P(0), IXGBE_RAH_AV); + ixgbe_del_mac_filter(adapter, ha->addr, vfn); + ixgbe_add_mac_filter(adapter, ha->addr, vfn); count++; } } - /* write the addresses in reverse order to avoid write combining */ - for (; rar_entries > 0 ; rar_entries--) - hw->mac.ops.clear_rar(hw, rar_entries); - return count; } @@ -3908,11 +4041,12 @@ void ixgbe_set_rx_mode(struct net_device *netdev) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE; + u32 vlnctrl; int count; /* Check for Promiscuous and All Multicast modes */ - fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* set all bits that we expect to always be set */ fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */ @@ -3922,26 +4056,24 @@ void ixgbe_set_rx_mode(struct net_device *netdev) /* clear the bits we are changing the status of */ fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - + vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); if (netdev->flags & IFF_PROMISC) { hw->addr_ctrl.user_set_promisc = true; fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - vmolr |= (IXGBE_VMOLR_ROPE | IXGBE_VMOLR_MPE); + vmolr |= IXGBE_VMOLR_MPE; /* Only disable hardware filter vlans in promiscuous mode * if SR-IOV and VMDQ are disabled - otherwise ensure * that hardware VLAN filters remain enabled. */ if (!(adapter->flags & (IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED))) - ixgbe_vlan_filter_disable(adapter); - else - ixgbe_vlan_filter_enable(adapter); + vlnctrl |= (IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); } else { if (netdev->flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; vmolr |= IXGBE_VMOLR_MPE; } - ixgbe_vlan_filter_enable(adapter); + vlnctrl |= IXGBE_VLNCTRL_VFE; hw->addr_ctrl.user_set_promisc = false; } @@ -3950,7 +4082,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) * sufficient space to store all the addresses then enable * unicast promiscuous mode */ - count = ixgbe_write_uc_addr_list(netdev); + count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0)); if (count < 0) { fctrl |= IXGBE_FCTRL_UPE; vmolr |= IXGBE_VMOLR_ROPE; @@ -3960,11 +4092,13 @@ void ixgbe_set_rx_mode(struct net_device *netdev) * then we should just turn on promiscuous mode so * that we can at least receive multicast traffic */ - hw->mac.ops.update_mc_addr_list(hw, netdev); - vmolr |= IXGBE_VMOLR_ROMPE; - - if (adapter->num_vfs) - ixgbe_restore_vf_multicasts(adapter); + count = ixgbe_write_mc_addr_list(netdev); + if (count < 0) { + fctrl |= IXGBE_FCTRL_MPE; + vmolr |= IXGBE_VMOLR_MPE; + } else if (count) { + vmolr |= IXGBE_VMOLR_ROMPE; + } if (hw->mac.type != ixgbe_mac_82598EB) { vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(VMDQ_P(0))) & @@ -3985,6 +4119,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) /* NOTE: VLAN filtering is disabled by setting PROMISC */ } + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) @@ -4101,8 +4236,8 @@ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) && (pb == ixgbe_fcoe_get_tc(adapter))) tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; - #endif + /* Calculate delay value for device */ switch (hw->mac.type) { case ixgbe_mac_X540: @@ -4143,7 +4278,7 @@ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) * @adapter: board private structure to calculate for * @pb: packet buffer to calculate */ -static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter) +static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb) { struct ixgbe_hw *hw = &adapter->hw; struct net_device *dev = adapter->netdev; @@ -4153,6 +4288,14 @@ static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter) /* Calculate max LAN frame size */ tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN; +#ifdef IXGBE_FCOE + /* FCoE traffic class uses FCOE jumbo frames */ + if ((dev->features & NETIF_F_FCOE_MTU) && + (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) && + (pb == netdev_get_prio_tc_map(dev, adapter->fcoe.up))) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; +#endif + /* Calculate delay value for device */ switch (hw->mac.type) { case ixgbe_mac_X540: @@ -4179,15 +4322,17 @@ static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter) if (!num_tc) num_tc = 1; - hw->fc.low_water = ixgbe_lpbthresh(adapter); - for (i = 0; i < num_tc; i++) { hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i); + hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i); /* Low water marks must not be larger than high water marks */ - if (hw->fc.low_water > hw->fc.high_water[i]) - hw->fc.low_water = 0; + if (hw->fc.low_water[i] > hw->fc.high_water[i]) + hw->fc.low_water[i] = 0; } + + for (; i < MAX_TRAFFIC_CLASS; i++) + hw->fc.high_water[i] = 0; } static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) @@ -4249,20 +4394,10 @@ static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool, vmolr |= IXGBE_VMOLR_ROMPE; hw->mac.ops.update_mc_addr_list(hw, dev); } - ixgbe_write_uc_addr_list(adapter->netdev); + ixgbe_write_uc_addr_list(adapter->netdev, pool); IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr); } -static void ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 pool) -{ - struct ixgbe_hw *hw = &adapter->hw; - unsigned int entry; - - entry = hw->mac.num_rar_entries - pool; - hw->mac.ops.set_rar(hw, entry, addr, VMDQ_P(pool), IXGBE_RAH_AV); -} - static void ixgbe_fwd_psrtype(struct ixgbe_fwd_adapter *vadapter) { struct ixgbe_adapter *adapter = vadapter->real_adapter; @@ -4742,7 +4877,9 @@ void ixgbe_up(struct ixgbe_adapter *adapter) void ixgbe_reset(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; int err; + u8 old_addr[ETH_ALEN]; if (ixgbe_removed(hw->hw_addr)) return; @@ -4778,9 +4915,10 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - - /* reprogram the RAR[0] in case user changed it. */ - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, VMDQ_P(0), IXGBE_RAH_AV); + /* do not flush user set addresses */ + memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len); + ixgbe_flush_sw_mac_table(adapter); + ixgbe_mac_set_default_filter(adapter, old_addr); /* update SAN MAC vmdq pool selection */ if (hw->mac.san_mac_rar_index) @@ -5026,6 +5164,10 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) #endif /* CONFIG_IXGBE_DCB */ #endif /* IXGBE_FCOE */ + adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * + hw->mac.num_rar_entries, + GFP_ATOMIC); + /* Set MAC specific capability flags and exceptions */ switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -7172,16 +7314,17 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; + int ret; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, VMDQ_P(0), IXGBE_RAH_AV); - - return 0; + ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); + return ret > 0 ? 0 : ret; } static int @@ -8187,6 +8330,8 @@ skip_sriov: goto err_sw_init; } + ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr); + setup_timer(&adapter->service_timer, &ixgbe_service_timer, (unsigned long) adapter); @@ -8319,6 +8464,7 @@ err_sw_init: ixgbe_disable_sriov(adapter); adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP; iounmap(adapter->io_addr); + kfree(adapter->mac_table); err_ioremap: free_netdev(netdev); err_alloc_etherdev: @@ -8392,6 +8538,7 @@ static void ixgbe_remove(struct pci_dev *pdev) e_dev_info("complete\n"); + kfree(adapter->mac_table); free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index e6c68d396c9..a01417c0662 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -72,8 +72,6 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter) for (i = 0; i < num_vf_macvlans; i++) { mv_list->vf = -1; mv_list->free = true; - mv_list->rar_entry = hw->mac.num_rar_entries - - (i + adapter->num_vfs + 1); list_add(&mv_list->l, &adapter->vf_mvs.l); mv_list++; } @@ -327,6 +325,7 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, u32 vector_bit; u32 vector_reg; u32 mta_reg; + u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf)); /* only so many hash values supported */ entries = min(entries, IXGBE_MAX_VF_MC_ENTRIES); @@ -353,25 +352,13 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, mta_reg |= (1 << vector_bit); IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg); } + vmolr |= IXGBE_VMOLR_ROMPE; + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr); return 0; } -static void ixgbe_restore_vf_macvlans(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct list_head *pos; - struct vf_macvlans *entry; - - list_for_each(pos, &adapter->vf_mvs.l) { - entry = list_entry(pos, struct vf_macvlans, l); - if (!entry->free) - hw->mac.ops.set_rar(hw, entry->rar_entry, - entry->vf_macvlan, - entry->vf, IXGBE_RAH_AV); - } -} - +#ifdef CONFIG_PCI_IOV void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -382,6 +369,7 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter) u32 mta_reg; for (i = 0; i < adapter->num_vfs; i++) { + u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(i)); vfinfo = &adapter->vfinfo[i]; for (j = 0; j < vfinfo->num_vf_mc_hashes; j++) { hw->addr_ctrl.mta_in_use++; @@ -391,11 +379,18 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter) mta_reg |= (1 << vector_bit); IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg); } + + if (vfinfo->num_vf_mc_hashes) + vmolr |= IXGBE_VMOLR_ROMPE; + else + vmolr &= ~IXGBE_VMOLR_ROMPE; + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr); } /* Restore any VF macvlans */ - ixgbe_restore_vf_macvlans(adapter); + ixgbe_full_sync_mac_table(adapter); } +#endif static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf) @@ -495,8 +490,7 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) static void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe) { u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf)); - vmolr |= (IXGBE_VMOLR_ROMPE | - IXGBE_VMOLR_BAM); + vmolr |= IXGBE_VMOLR_BAM; if (aupe) vmolr |= IXGBE_VMOLR_AUPE; else @@ -514,7 +508,6 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; - int rar_entry = hw->mac.num_rar_entries - (vf + 1); u8 num_tcs = netdev_get_num_tc(adapter->netdev); /* add PF assigned VLAN or VLAN 0 */ @@ -544,7 +537,7 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) /* Flush and reset the mta with the new values */ ixgbe_set_rx_mode(adapter->netdev); - hw->mac.ops.clear_rar(hw, rar_entry); + ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf); /* reset VF api back to unknown */ adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10; @@ -553,11 +546,9 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, int vf, unsigned char *mac_addr) { - struct ixgbe_hw *hw = &adapter->hw; - int rar_entry = hw->mac.num_rar_entries - (vf + 1); - + ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf); memcpy(adapter->vfinfo[vf].vf_mac_addresses, mac_addr, ETH_ALEN); - hw->mac.ops.set_rar(hw, rar_entry, mac_addr, vf, IXGBE_RAH_AV); + ixgbe_add_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf); return 0; } @@ -565,7 +556,6 @@ static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, int vf, int index, unsigned char *mac_addr) { - struct ixgbe_hw *hw = &adapter->hw; struct list_head *pos; struct vf_macvlans *entry; @@ -576,7 +566,8 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, entry->vf = -1; entry->free = true; entry->is_macvlan = false; - hw->mac.ops.clear_rar(hw, entry->rar_entry); + ixgbe_del_mac_filter(adapter, + entry->vf_macvlan, vf); } } } @@ -612,7 +603,7 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter, entry->vf = vf; memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN); - hw->mac.ops.set_rar(hw, entry->rar_entry, mac_addr, vf, IXGBE_RAH_AV); + ixgbe_add_mac_filter(adapter, mac_addr, vf); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 139eaddfb2e..cea64014760 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -34,7 +34,9 @@ */ #define IXGBE_MAX_VFS_DRV_LIMIT (IXGBE_MAX_VF_FUNCTIONS - 1) +#ifdef CONFIG_PCI_IOV void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); +#endif void ixgbe_msg_task(struct ixgbe_adapter *adapter); int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 8a6ff2423f0..551d6089a4d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2746,7 +2746,7 @@ struct ixgbe_bus_info { /* Flow control parameters */ struct ixgbe_fc_info { u32 high_water[MAX_TRAFFIC_CLASS]; /* Flow Control High-water */ - u32 low_water; /* Flow Control Low-water */ + u32 low_water[MAX_TRAFFIC_CLASS]; /* Flow Control Low-water */ u16 pause_time; /* Flow Control Pause timer */ bool send_xon; /* Flow control send XON */ bool strict_ieee; /* Strict IEEE mode */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d0799e8e31e..eacce3a2e9e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -85,7 +85,7 @@ static DEFINE_PCI_DEVICE_TABLE(ixgbevf_pci_tbl) = { MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl); MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); -MODULE_DESCRIPTION("Intel(R) 82599 Virtual Function Driver"); +MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index a2844ff322c..e900c1abdef 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -534,15 +534,6 @@ static inline void s2io_start_all_tx_queue(struct s2io_nic *sp) netif_tx_start_all_queues(sp->dev); } -static inline void s2io_start_tx_queue(struct s2io_nic *sp, int fifo_no) -{ - if (!sp->config.multiq) - sp->mac_control.fifos[fifo_no].queue_state = - FIFO_QUEUE_START; - - netif_tx_start_all_queues(sp->dev); -} - static inline void s2io_wake_all_tx_queue(struct s2io_nic *sp) { if (!sp->config.multiq) { diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 27e8c824b20..137f366ec7e 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -425,8 +425,8 @@ dmamem_err: * @rx_rsize: ring size * Description: this function initializes the DMA RX descriptor */ -void free_rx_ring(struct device *dev, struct sxgbe_rx_queue *rx_ring, - int rx_rsize) +static void free_rx_ring(struct device *dev, struct sxgbe_rx_queue *rx_ring, + int rx_rsize) { dma_free_coherent(dev, rx_rsize * sizeof(struct sxgbe_rx_norm_desc), rx_ring->dma_rx, rx_ring->dma_rx_phy); @@ -519,8 +519,8 @@ error: * @tx_rsize: ring size * Description: this function initializes the DMA TX descriptor */ -void free_tx_ring(struct device *dev, struct sxgbe_tx_queue *tx_ring, - int tx_rsize) +static void free_tx_ring(struct device *dev, struct sxgbe_tx_queue *tx_ring, + int tx_rsize) { dma_free_coherent(dev, tx_rsize * sizeof(struct sxgbe_tx_norm_desc), tx_ring->dma_tx, tx_ring->dma_tx_phy); @@ -1218,11 +1218,10 @@ static int sxgbe_release(struct net_device *dev) return 0; } - /* Prepare first Tx descriptor for doing TSO operation */ -void sxgbe_tso_prepare(struct sxgbe_priv_data *priv, - struct sxgbe_tx_norm_desc *first_desc, - struct sk_buff *skb) +static void sxgbe_tso_prepare(struct sxgbe_priv_data *priv, + struct sxgbe_tx_norm_desc *first_desc, + struct sk_buff *skb) { unsigned int total_hdr_len, tcp_hdr_len; @@ -1910,40 +1909,6 @@ static void sxgbe_set_rx_mode(struct net_device *dev) readl(ioaddr + SXGBE_HASH_LOW)); } -/** - * sxgbe_config - entry point for changing configuration mode passed on by - * ifconfig - * @dev : pointer to the device structure - * @map : pointer to the device mapping structure - * Description: - * This function is a driver entry point which gets called by the kernel - * whenever some device configuration is changed. - * Return value: - * This function returns 0 if success and appropriate error otherwise. - */ -static int sxgbe_config(struct net_device *dev, struct ifmap *map) -{ - struct sxgbe_priv_data *priv = netdev_priv(dev); - - /* Can't act on a running interface */ - if (dev->flags & IFF_UP) - return -EBUSY; - - /* Don't allow changing the I/O address */ - if (map->base_addr != (unsigned long)priv->ioaddr) { - netdev_warn(dev, "can't change I/O address\n"); - return -EOPNOTSUPP; - } - - /* Don't allow changing the IRQ */ - if (map->irq != priv->irq) { - netdev_warn(dev, "not change IRQ number %d\n", priv->irq); - return -EOPNOTSUPP; - } - - return 0; -} - #ifdef CONFIG_NET_POLL_CONTROLLER /** * sxgbe_poll_controller - entry point for polling receive by device @@ -2005,7 +1970,6 @@ static const struct net_device_ops sxgbe_netdev_ops = { .ndo_set_rx_mode = sxgbe_set_rx_mode, .ndo_tx_timeout = sxgbe_tx_timeout, .ndo_do_ioctl = sxgbe_ioctl, - .ndo_set_config = sxgbe_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = sxgbe_poll_controller, #endif diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d940034acdd..93cf4f63f42 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2214,27 +2214,6 @@ static void stmmac_tx_timeout(struct net_device *dev) stmmac_tx_err(priv); } -/* Configuration changes (passed on by ifconfig) */ -static int stmmac_config(struct net_device *dev, struct ifmap *map) -{ - if (dev->flags & IFF_UP) /* can't act on a running interface */ - return -EBUSY; - - /* Don't allow changing the I/O address */ - if (map->base_addr != dev->base_addr) { - pr_warn("%s: can't change I/O address\n", dev->name); - return -EOPNOTSUPP; - } - - /* Don't allow changing the IRQ */ - if (map->irq != dev->irq) { - pr_warn("%s: not change IRQ number %d\n", dev->name, dev->irq); - return -EOPNOTSUPP; - } - - return 0; -} - /** * stmmac_set_rx_mode - entry point for multicast addressing * @dev : pointer to the device structure @@ -2600,7 +2579,6 @@ static const struct net_device_ops stmmac_netdev_ops = { .ndo_set_rx_mode = stmmac_set_rx_mode, .ndo_tx_timeout = stmmac_tx_timeout, .ndo_do_ioctl = stmmac_ioctl, - .ndo_set_config = stmmac_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = stmmac_poll_controller, #endif diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 73f74f36943..7399a52f7c2 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -313,19 +313,6 @@ static int mii_irqs[PHY_MAX_ADDR] = { PHY_POLL, }; static struct mii_bus *cpmac_mii; -static int cpmac_config(struct net_device *dev, struct ifmap *map) -{ - if (dev->flags & IFF_UP) - return -EBUSY; - - /* Don't allow changing the I/O address */ - if (map->base_addr != dev->base_addr) - return -EOPNOTSUPP; - - /* ignore other fields */ - return 0; -} - static void cpmac_set_multicast_list(struct net_device *dev) { struct netdev_hw_addr *ha; @@ -1100,7 +1087,6 @@ static const struct net_device_ops cpmac_netdev_ops = { .ndo_tx_timeout = cpmac_tx_timeout, .ndo_set_rx_mode = cpmac_set_multicast_list, .ndo_do_ioctl = cpmac_ioctl, - .ndo_set_config = cpmac_config, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig index 8a049a2b447..f66ddaee0c8 100644 --- a/drivers/net/ethernet/via/Kconfig +++ b/drivers/net/ethernet/via/Kconfig @@ -19,7 +19,7 @@ if NET_VENDOR_VIA config VIA_RHINE tristate "VIA Rhine support" - depends on PCI + depends on (PCI || USE_OF) select CRC32 select MII ---help--- diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index f61dc2b72bb..4fa92012cea 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -94,6 +94,10 @@ static const int multicast_filter_limit = 32; #include <linux/ioport.h> #include <linux/interrupt.h> #include <linux/pci.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> #include <linux/dma-mapping.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> @@ -279,6 +283,15 @@ static DEFINE_PCI_DEVICE_TABLE(rhine_pci_tbl) = { }; MODULE_DEVICE_TABLE(pci, rhine_pci_tbl); +/* OpenFirmware identifiers for platform-bus devices + * The .data field is currently only used to store chip revision + * (for quirks etc.) + */ +static struct of_device_id rhine_of_tbl[] = { + { .compatible = "via,vt8500-rhine", .data = (void *)0x84 }, + { } /* terminate list */ +}; +MODULE_DEVICE_TABLE(of, rhine_of_tbl); /* Offsets to the device registers. */ enum register_offsets { @@ -446,7 +459,8 @@ struct rhine_private { unsigned char *tx_bufs; dma_addr_t tx_bufs_dma; - struct pci_dev *pdev; + int revision; + int irq; long pioaddr; struct net_device *dev; struct napi_struct napi; @@ -701,7 +715,7 @@ static void rhine_reload_eeprom(long pioaddr, struct net_device *dev) static void rhine_poll(struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); - const int irq = rp->pdev->irq; + const int irq = rp->irq; disable_irq(irq); rhine_interrupt(irq, dev); @@ -846,7 +860,8 @@ static void rhine_hw_init(struct net_device *dev, long pioaddr) msleep(5); /* Reload EEPROM controlled bytes cleared by soft reset */ - rhine_reload_eeprom(pioaddr, dev); + if (dev_is_pci(dev->dev.parent)) + rhine_reload_eeprom(pioaddr, dev); } static const struct net_device_ops rhine_netdev_ops = { @@ -867,125 +882,55 @@ static const struct net_device_ops rhine_netdev_ops = { #endif }; -static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) +static int rhine_init_one_common(struct device *hwdev, int revision, + long pioaddr, void __iomem *ioaddr, int irq) { struct net_device *dev; struct rhine_private *rp; - int i, rc; - u32 quirks; - long pioaddr; - long memaddr; - void __iomem *ioaddr; - int io_size, phy_id; + int i, rc, phy_id; const char *name; -#ifdef USE_MMIO - int bar = 1; -#else - int bar = 0; -#endif - -/* when built into the kernel, we only print version if device is found */ -#ifndef MODULE - pr_info_once("%s\n", version); -#endif - - io_size = 256; - phy_id = 0; - quirks = 0; - name = "Rhine"; - if (pdev->revision < VTunknown0) { - quirks = rqRhineI; - io_size = 128; - } - else if (pdev->revision >= VT6102) { - quirks = rqWOL | rqForceReset; - if (pdev->revision < VT6105) { - name = "Rhine II"; - quirks |= rqStatusWBRace; /* Rhine-II exclusive */ - } - else { - phy_id = 1; /* Integrated PHY, phy_id fixed to 1 */ - if (pdev->revision >= VT6105_B0) - quirks |= rq6patterns; - if (pdev->revision < VT6105M) - name = "Rhine III"; - else - name = "Rhine III (Management Adapter)"; - } - } - - rc = pci_enable_device(pdev); - if (rc) - goto err_out; /* this should always be supported */ - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + rc = dma_set_mask(hwdev, DMA_BIT_MASK(32)); if (rc) { - dev_err(&pdev->dev, - "32-bit PCI DMA addresses not supported by the card!?\n"); - goto err_out_pci_disable; - } - - /* sanity check */ - if ((pci_resource_len(pdev, 0) < io_size) || - (pci_resource_len(pdev, 1) < io_size)) { - rc = -EIO; - dev_err(&pdev->dev, "Insufficient PCI resources, aborting\n"); - goto err_out_pci_disable; + dev_err(hwdev, "32-bit DMA addresses not supported by the card!?\n"); + goto err_out; } - pioaddr = pci_resource_start(pdev, 0); - memaddr = pci_resource_start(pdev, 1); - - pci_set_master(pdev); - dev = alloc_etherdev(sizeof(struct rhine_private)); if (!dev) { rc = -ENOMEM; - goto err_out_pci_disable; + goto err_out; } - SET_NETDEV_DEV(dev, &pdev->dev); + SET_NETDEV_DEV(dev, hwdev); rp = netdev_priv(dev); rp->dev = dev; - rp->quirks = quirks; + rp->revision = revision; rp->pioaddr = pioaddr; - rp->pdev = pdev; + rp->base = ioaddr; + rp->irq = irq; rp->msg_enable = netif_msg_init(debug, RHINE_MSG_DEFAULT); - rc = pci_request_regions(pdev, DRV_NAME); - if (rc) - goto err_out_free_netdev; - - ioaddr = pci_iomap(pdev, bar, io_size); - if (!ioaddr) { - rc = -EIO; - dev_err(&pdev->dev, - "ioremap failed for device %s, region 0x%X @ 0x%lX\n", - pci_name(pdev), io_size, memaddr); - goto err_out_free_res; - } - -#ifdef USE_MMIO - enable_mmio(pioaddr, quirks); - - /* Check that selected MMIO registers match the PIO ones */ - i = 0; - while (mmio_verify_registers[i]) { - int reg = mmio_verify_registers[i++]; - unsigned char a = inb(pioaddr+reg); - unsigned char b = readb(ioaddr+reg); - if (a != b) { - rc = -EIO; - dev_err(&pdev->dev, - "MMIO do not match PIO [%02x] (%02x != %02x)\n", - reg, a, b); - goto err_out_unmap; + phy_id = 0; + name = "Rhine"; + if (revision < VTunknown0) { + rp->quirks = rqRhineI; + } else if (revision >= VT6102) { + rp->quirks = rqWOL | rqForceReset; + if (revision < VT6105) { + name = "Rhine II"; + rp->quirks |= rqStatusWBRace; /* Rhine-II exclusive */ + } else { + phy_id = 1; /* Integrated PHY, phy_id fixed to 1 */ + if (revision >= VT6105_B0) + rp->quirks |= rq6patterns; + if (revision < VT6105M) + name = "Rhine III"; + else + name = "Rhine III (Management Adapter)"; } } -#endif /* USE_MMIO */ - - rp->base = ioaddr; u64_stats_init(&rp->tx_stats.syncp); u64_stats_init(&rp->rx_stats.syncp); @@ -1030,7 +975,7 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rp->quirks & rqRhineI) dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM; - if (pdev->revision >= VT6105M) + if (rp->revision >= VT6105M) dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; @@ -1038,18 +983,12 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* dev->name not defined before register_netdev()! */ rc = register_netdev(dev); if (rc) - goto err_out_unmap; + goto err_out_free_netdev; netdev_info(dev, "VIA %s at 0x%lx, %pM, IRQ %d\n", - name, -#ifdef USE_MMIO - memaddr, -#else - (long)ioaddr, -#endif - dev->dev_addr, pdev->irq); + name, (long)ioaddr, dev->dev_addr, rp->irq); - pci_set_drvdata(pdev, dev); + dev_set_drvdata(hwdev, dev); { u16 mii_cmd; @@ -1078,41 +1017,152 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +err_out_free_netdev: + free_netdev(dev); +err_out: + return rc; +} + +static int rhine_init_one_pci(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *hwdev = &pdev->dev; + int i, rc; + long pioaddr, memaddr; + void __iomem *ioaddr; + int io_size = pdev->revision < VTunknown0 ? 128 : 256; + u32 quirks = pdev->revision < VTunknown0 ? rqRhineI : 0; +#ifdef USE_MMIO + int bar = 1; +#else + int bar = 0; +#endif + +/* when built into the kernel, we only print version if device is found */ +#ifndef MODULE + pr_info_once("%s\n", version); +#endif + + rc = pci_enable_device(pdev); + if (rc) + goto err_out; + + /* sanity check */ + if ((pci_resource_len(pdev, 0) < io_size) || + (pci_resource_len(pdev, 1) < io_size)) { + rc = -EIO; + dev_err(hwdev, "Insufficient PCI resources, aborting\n"); + goto err_out_pci_disable; + } + + pioaddr = pci_resource_start(pdev, 0); + memaddr = pci_resource_start(pdev, 1); + + pci_set_master(pdev); + + rc = pci_request_regions(pdev, DRV_NAME); + if (rc) + goto err_out_pci_disable; + + ioaddr = pci_iomap(pdev, bar, io_size); + if (!ioaddr) { + rc = -EIO; + dev_err(hwdev, + "ioremap failed for device %s, region 0x%X @ 0x%lX\n", + dev_name(hwdev), io_size, memaddr); + goto err_out_free_res; + } + +#ifdef USE_MMIO + enable_mmio(pioaddr, quirks); + + /* Check that selected MMIO registers match the PIO ones */ + i = 0; + while (mmio_verify_registers[i]) { + int reg = mmio_verify_registers[i++]; + unsigned char a = inb(pioaddr+reg); + unsigned char b = readb(ioaddr+reg); + + if (a != b) { + rc = -EIO; + dev_err(hwdev, + "MMIO do not match PIO [%02x] (%02x != %02x)\n", + reg, a, b); + goto err_out_unmap; + } + } +#endif /* USE_MMIO */ + + rc = rhine_init_one_common(&pdev->dev, pdev->revision, + pioaddr, ioaddr, pdev->irq); + if (!rc) + return 0; + err_out_unmap: pci_iounmap(pdev, ioaddr); err_out_free_res: pci_release_regions(pdev); -err_out_free_netdev: - free_netdev(dev); err_out_pci_disable: pci_disable_device(pdev); err_out: return rc; } +static int rhine_init_one_platform(struct platform_device *pdev) +{ + const struct of_device_id *match; + u32 revision; + int irq; + struct resource *res; + void __iomem *ioaddr; + + match = of_match_device(rhine_of_tbl, &pdev->dev); + if (!match) + return -EINVAL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ioaddr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ioaddr)) + return PTR_ERR(ioaddr); + + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (!irq) + return -EINVAL; + + revision = (u32)match->data; + if (!revision) + return -EINVAL; + + return rhine_init_one_common(&pdev->dev, revision, + (long)ioaddr, ioaddr, irq); +} + static int alloc_ring(struct net_device* dev) { struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; void *ring; dma_addr_t ring_dma; - ring = pci_alloc_consistent(rp->pdev, - RX_RING_SIZE * sizeof(struct rx_desc) + - TX_RING_SIZE * sizeof(struct tx_desc), - &ring_dma); + ring = dma_alloc_coherent(hwdev, + RX_RING_SIZE * sizeof(struct rx_desc) + + TX_RING_SIZE * sizeof(struct tx_desc), + &ring_dma, + GFP_ATOMIC); if (!ring) { netdev_err(dev, "Could not allocate DMA memory\n"); return -ENOMEM; } if (rp->quirks & rqRhineI) { - rp->tx_bufs = pci_alloc_consistent(rp->pdev, - PKT_BUF_SZ * TX_RING_SIZE, - &rp->tx_bufs_dma); + rp->tx_bufs = dma_alloc_coherent(hwdev, + PKT_BUF_SZ * TX_RING_SIZE, + &rp->tx_bufs_dma, + GFP_ATOMIC); if (rp->tx_bufs == NULL) { - pci_free_consistent(rp->pdev, - RX_RING_SIZE * sizeof(struct rx_desc) + - TX_RING_SIZE * sizeof(struct tx_desc), - ring, ring_dma); + dma_free_coherent(hwdev, + RX_RING_SIZE * sizeof(struct rx_desc) + + TX_RING_SIZE * sizeof(struct tx_desc), + ring, ring_dma); return -ENOMEM; } } @@ -1128,16 +1178,17 @@ static int alloc_ring(struct net_device* dev) static void free_ring(struct net_device* dev) { struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; - pci_free_consistent(rp->pdev, - RX_RING_SIZE * sizeof(struct rx_desc) + - TX_RING_SIZE * sizeof(struct tx_desc), - rp->rx_ring, rp->rx_ring_dma); + dma_free_coherent(hwdev, + RX_RING_SIZE * sizeof(struct rx_desc) + + TX_RING_SIZE * sizeof(struct tx_desc), + rp->rx_ring, rp->rx_ring_dma); rp->tx_ring = NULL; if (rp->tx_bufs) - pci_free_consistent(rp->pdev, PKT_BUF_SZ * TX_RING_SIZE, - rp->tx_bufs, rp->tx_bufs_dma); + dma_free_coherent(hwdev, PKT_BUF_SZ * TX_RING_SIZE, + rp->tx_bufs, rp->tx_bufs_dma); rp->tx_bufs = NULL; @@ -1146,6 +1197,7 @@ static void free_ring(struct net_device* dev) static void alloc_rbufs(struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; dma_addr_t next; int i; @@ -1174,9 +1226,9 @@ static void alloc_rbufs(struct net_device *dev) break; rp->rx_skbuff_dma[i] = - pci_map_single(rp->pdev, skb->data, rp->rx_buf_sz, - PCI_DMA_FROMDEVICE); - if (dma_mapping_error(&rp->pdev->dev, rp->rx_skbuff_dma[i])) { + dma_map_single(hwdev, skb->data, rp->rx_buf_sz, + DMA_FROM_DEVICE); + if (dma_mapping_error(hwdev, rp->rx_skbuff_dma[i])) { rp->rx_skbuff_dma[i] = 0; dev_kfree_skb(skb); break; @@ -1190,6 +1242,7 @@ static void alloc_rbufs(struct net_device *dev) static void free_rbufs(struct net_device* dev) { struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; int i; /* Free all the skbuffs in the Rx queue. */ @@ -1197,9 +1250,9 @@ static void free_rbufs(struct net_device* dev) rp->rx_ring[i].rx_status = 0; rp->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */ if (rp->rx_skbuff[i]) { - pci_unmap_single(rp->pdev, + dma_unmap_single(hwdev, rp->rx_skbuff_dma[i], - rp->rx_buf_sz, PCI_DMA_FROMDEVICE); + rp->rx_buf_sz, DMA_FROM_DEVICE); dev_kfree_skb(rp->rx_skbuff[i]); } rp->rx_skbuff[i] = NULL; @@ -1230,6 +1283,7 @@ static void alloc_tbufs(struct net_device* dev) static void free_tbufs(struct net_device* dev) { struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; int i; for (i = 0; i < TX_RING_SIZE; i++) { @@ -1238,10 +1292,10 @@ static void free_tbufs(struct net_device* dev) rp->tx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */ if (rp->tx_skbuff[i]) { if (rp->tx_skbuff_dma[i]) { - pci_unmap_single(rp->pdev, + dma_unmap_single(hwdev, rp->tx_skbuff_dma[i], rp->tx_skbuff[i]->len, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); } dev_kfree_skb(rp->tx_skbuff[i]); } @@ -1469,7 +1523,7 @@ static void init_registers(struct net_device *dev) rhine_set_rx_mode(dev); - if (rp->pdev->revision >= VT6105M) + if (rp->revision >= VT6105M) rhine_init_cam_filter(dev); napi_enable(&rp->napi); @@ -1581,16 +1635,15 @@ static int rhine_open(struct net_device *dev) void __iomem *ioaddr = rp->base; int rc; - rc = request_irq(rp->pdev->irq, rhine_interrupt, IRQF_SHARED, dev->name, - dev); + rc = request_irq(rp->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev); if (rc) return rc; - netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->pdev->irq); + netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->irq); rc = alloc_ring(dev); if (rc) { - free_irq(rp->pdev->irq, dev); + free_irq(rp->irq, dev); return rc; } alloc_rbufs(dev); @@ -1659,6 +1712,7 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; void __iomem *ioaddr = rp->base; unsigned entry; @@ -1695,9 +1749,9 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, rp->tx_bufs)); } else { rp->tx_skbuff_dma[entry] = - pci_map_single(rp->pdev, skb->data, skb->len, - PCI_DMA_TODEVICE); - if (dma_mapping_error(&rp->pdev->dev, rp->tx_skbuff_dma[entry])) { + dma_map_single(hwdev, skb->data, skb->len, + DMA_TO_DEVICE); + if (dma_mapping_error(hwdev, rp->tx_skbuff_dma[entry])) { dev_kfree_skb_any(skb); rp->tx_skbuff_dma[entry] = 0; dev->stats.tx_dropped++; @@ -1788,6 +1842,7 @@ static irqreturn_t rhine_interrupt(int irq, void *dev_instance) static void rhine_tx(struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE; /* find and cleanup dirty tx descriptors */ @@ -1831,10 +1886,10 @@ static void rhine_tx(struct net_device *dev) } /* Free the original skb. */ if (rp->tx_skbuff_dma[entry]) { - pci_unmap_single(rp->pdev, + dma_unmap_single(hwdev, rp->tx_skbuff_dma[entry], rp->tx_skbuff[entry]->len, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); } dev_consume_skb_any(rp->tx_skbuff[entry]); rp->tx_skbuff[entry] = NULL; @@ -1863,6 +1918,7 @@ static inline u16 rhine_get_vlan_tci(struct sk_buff *skb, int data_size) static int rhine_rx(struct net_device *dev, int limit) { struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; int count; int entry = rp->cur_rx % RX_RING_SIZE; @@ -1924,19 +1980,19 @@ static int rhine_rx(struct net_device *dev, int limit) if (pkt_len < rx_copybreak) skb = netdev_alloc_skb_ip_align(dev, pkt_len); if (skb) { - pci_dma_sync_single_for_cpu(rp->pdev, - rp->rx_skbuff_dma[entry], - rp->rx_buf_sz, - PCI_DMA_FROMDEVICE); + dma_sync_single_for_cpu(hwdev, + rp->rx_skbuff_dma[entry], + rp->rx_buf_sz, + DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, rp->rx_skbuff[entry]->data, pkt_len); skb_put(skb, pkt_len); - pci_dma_sync_single_for_device(rp->pdev, - rp->rx_skbuff_dma[entry], - rp->rx_buf_sz, - PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(hwdev, + rp->rx_skbuff_dma[entry], + rp->rx_buf_sz, + DMA_FROM_DEVICE); } else { skb = rp->rx_skbuff[entry]; if (skb == NULL) { @@ -1945,10 +2001,10 @@ static int rhine_rx(struct net_device *dev, int limit) } rp->rx_skbuff[entry] = NULL; skb_put(skb, pkt_len); - pci_unmap_single(rp->pdev, + dma_unmap_single(hwdev, rp->rx_skbuff_dma[entry], rp->rx_buf_sz, - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); } if (unlikely(desc_length & DescTag)) @@ -1979,10 +2035,11 @@ static int rhine_rx(struct net_device *dev, int limit) if (skb == NULL) break; /* Better luck next round. */ rp->rx_skbuff_dma[entry] = - pci_map_single(rp->pdev, skb->data, + dma_map_single(hwdev, skb->data, rp->rx_buf_sz, - PCI_DMA_FROMDEVICE); - if (dma_mapping_error(&rp->pdev->dev, rp->rx_skbuff_dma[entry])) { + DMA_FROM_DEVICE); + if (dma_mapping_error(hwdev, + rp->rx_skbuff_dma[entry])) { dev_kfree_skb(skb); rp->rx_skbuff_dma[entry] = 0; break; @@ -2103,7 +2160,7 @@ static void rhine_set_rx_mode(struct net_device *dev) /* Too many to match, or accept all multicasts. */ iowrite32(0xffffffff, ioaddr + MulticastFilter0); iowrite32(0xffffffff, ioaddr + MulticastFilter1); - } else if (rp->pdev->revision >= VT6105M) { + } else if (rp->revision >= VT6105M) { int i = 0; u32 mCAMmask = 0; /* 32 mCAMs (6105M and better) */ netdev_for_each_mc_addr(ha, dev) { @@ -2125,7 +2182,7 @@ static void rhine_set_rx_mode(struct net_device *dev) iowrite32(mc_filter[1], ioaddr + MulticastFilter1); } /* enable/disable VLAN receive filtering */ - if (rp->pdev->revision >= VT6105M) { + if (rp->revision >= VT6105M) { if (dev->flags & IFF_PROMISC) BYTE_REG_BITS_OFF(BCR1_VIDFR, ioaddr + PCIBusConfig1); else @@ -2136,11 +2193,11 @@ static void rhine_set_rx_mode(struct net_device *dev) static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - struct rhine_private *rp = netdev_priv(dev); + struct device *hwdev = dev->dev.parent; strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); strlcpy(info->version, DRV_VERSION, sizeof(info->version)); - strlcpy(info->bus_info, pci_name(rp->pdev), sizeof(info->bus_info)); + strlcpy(info->bus_info, dev_name(hwdev), sizeof(info->bus_info)); } static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) @@ -2277,7 +2334,7 @@ static int rhine_close(struct net_device *dev) /* Stop the chip's Tx and Rx processes. */ iowrite16(CmdStop, ioaddr + ChipCmd); - free_irq(rp->pdev->irq, dev); + free_irq(rp->irq, dev); free_rbufs(dev); free_tbufs(dev); free_ring(dev); @@ -2286,7 +2343,7 @@ static int rhine_close(struct net_device *dev) } -static void rhine_remove_one(struct pci_dev *pdev) +static void rhine_remove_one_pci(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct rhine_private *rp = netdev_priv(dev); @@ -2300,7 +2357,21 @@ static void rhine_remove_one(struct pci_dev *pdev) pci_disable_device(pdev); } -static void rhine_shutdown (struct pci_dev *pdev) +static int rhine_remove_one_platform(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct rhine_private *rp = netdev_priv(dev); + + unregister_netdev(dev); + + iounmap(rp->base); + + free_netdev(dev); + + return 0; +} + +static void rhine_shutdown_pci(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct rhine_private *rp = netdev_priv(dev); @@ -2354,8 +2425,7 @@ static void rhine_shutdown (struct pci_dev *pdev) #ifdef CONFIG_PM_SLEEP static int rhine_suspend(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct net_device *dev = pci_get_drvdata(pdev); + struct net_device *dev = dev_get_drvdata(device); struct rhine_private *rp = netdev_priv(dev); if (!netif_running(dev)) @@ -2367,15 +2437,15 @@ static int rhine_suspend(struct device *device) netif_device_detach(dev); - rhine_shutdown(pdev); + if (dev_is_pci(device)) + rhine_shutdown_pci(to_pci_dev(device)); return 0; } static int rhine_resume(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct net_device *dev = pci_get_drvdata(pdev); + struct net_device *dev = dev_get_drvdata(device); struct rhine_private *rp = netdev_priv(dev); if (!netif_running(dev)) @@ -2408,15 +2478,26 @@ static SIMPLE_DEV_PM_OPS(rhine_pm_ops, rhine_suspend, rhine_resume); #endif /* !CONFIG_PM_SLEEP */ -static struct pci_driver rhine_driver = { +static struct pci_driver rhine_driver_pci = { .name = DRV_NAME, .id_table = rhine_pci_tbl, - .probe = rhine_init_one, - .remove = rhine_remove_one, - .shutdown = rhine_shutdown, + .probe = rhine_init_one_pci, + .remove = rhine_remove_one_pci, + .shutdown = rhine_shutdown_pci, .driver.pm = RHINE_PM_OPS, }; +static struct platform_driver rhine_driver_platform = { + .probe = rhine_init_one_platform, + .remove = rhine_remove_one_platform, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = rhine_of_tbl, + .pm = RHINE_PM_OPS, + } +}; + static struct dmi_system_id rhine_dmi_table[] __initdata = { { .ident = "EPIA-M", @@ -2437,6 +2518,8 @@ static struct dmi_system_id rhine_dmi_table[] __initdata = { static int __init rhine_init(void) { + int ret_pci, ret_platform; + /* when a module, this is printed whether or not devices are found in probe */ #ifdef MODULE pr_info("%s\n", version); @@ -2449,13 +2532,19 @@ static int __init rhine_init(void) else if (avoid_D3) pr_info("avoid_D3 set\n"); - return pci_register_driver(&rhine_driver); + ret_pci = pci_register_driver(&rhine_driver_pci); + ret_platform = platform_driver_register(&rhine_driver_platform); + if ((ret_pci < 0) && (ret_platform < 0)) + return ret_pci; + + return 0; } static void __exit rhine_cleanup(void) { - pci_unregister_driver(&rhine_driver); + platform_driver_unregister(&rhine_driver_platform); + pci_unregister_driver(&rhine_driver_pci); } diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d18f711d0b0..d1f7826aa75 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -28,50 +28,117 @@ #include <linux/hyperv.h> #include <linux/rndis.h> -/* Fwd declaration */ -struct hv_netvsc_packet; -struct ndis_tcp_ip_checksum_info; +/* RSS related */ +#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ +#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */ -/* Represent the xfer page packet which contains 1 or more netvsc packet */ -struct xferpage_packet { - struct list_head list_ent; - u32 status; +#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 +#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 - /* # of netvsc packets this xfer packet contains */ - u32 count; +#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 +#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 + +struct ndis_obj_header { + u8 type; + u8 rev; + u16 size; +} __packed; + +/* ndis_recv_scale_cap/cap_flag */ +#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 +#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000 +#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 +#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 + +struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ + struct ndis_obj_header hdr; + u32 cap_flag; + u32 num_int_msg; + u32 num_recv_que; + u16 num_indirect_tabent; +} __packed; + + +/* ndis_recv_scale_param flags */ +#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 +#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 +#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 +#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 +#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 + +/* Hash info bits */ +#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001 +#define NDIS_HASH_IPV4 0x00000100 +#define NDIS_HASH_TCP_IPV4 0x00000200 +#define NDIS_HASH_IPV6 0x00000400 +#define NDIS_HASH_IPV6_EX 0x00000800 +#define NDIS_HASH_TCP_IPV6 0x00001000 +#define NDIS_HASH_TCP_IPV6_EX 0x00002000 + +#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) +#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 + +#define ITAB_NUM 128 +#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 +extern u8 netvsc_hash_key[]; + +struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ + struct ndis_obj_header hdr; + + /* Qualifies the rest of the information */ + u16 flag; + + /* The base CPU number to do receive processing. not used */ + u16 base_cpu_number; + + /* This describes the hash function and type being enabled */ + u32 hashinfo; + + /* The size of indirection table array */ + u16 indirect_tabsize; + + /* The offset of the indirection table from the beginning of this + * structure + */ + u32 indirect_taboffset; + + /* The size of the hash secret key */ + u16 hashkey_size; + + /* The offset of the secret key from the beginning of this structure */ + u32 kashkey_offset; + + u32 processor_masks_offset; + u32 num_processor_masks; + u32 processor_masks_entry_size; }; +/* Fwd declaration */ +struct ndis_tcp_ip_checksum_info; + /* * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame * within the RNDIS */ struct hv_netvsc_packet { /* Bookkeeping stuff */ - struct list_head list_ent; u32 status; struct hv_device *device; bool is_data_pkt; u16 vlan_tci; - /* - * Valid only for receives when we break a xfer page packet - * into multiple netvsc packets - */ - struct xferpage_packet *xfer_page_pkt; + u16 q_idx; + struct vmbus_channel *channel; - union { - struct { - u64 recv_completion_tid; - void *recv_completion_ctx; - void (*recv_completion)(void *context); - } recv; - struct { - u64 send_completion_tid; - void *send_completion_ctx; - void (*send_completion)(void *context); - } send; - } completion; + u64 send_completion_tid; + void *send_completion_ctx; + void (*send_completion)(void *context); /* This points to the memory after page_buf */ struct rndis_message *rndis_msg; @@ -120,6 +187,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, struct ndis_tcp_ip_checksum_info *csum_info); +void netvsc_channel_cb(void *context); int rndis_filter_open(struct hv_device *dev); int rndis_filter_close(struct hv_device *dev); int rndis_filter_device_add(struct hv_device *dev, @@ -517,11 +585,10 @@ struct nvsp_message { #define NETVSC_RECEIVE_BUFFER_ID 0xcafe -/* Preallocated receive packets */ -#define NETVSC_RECEIVE_PACKETLIST_COUNT 256 - #define NETVSC_PACKET_SIZE 2048 +#define VRSS_SEND_TAB_SIZE 16 + /* Per netvsc channel-specific */ struct netvsc_device { struct hv_device *dev; @@ -532,12 +599,6 @@ struct netvsc_device { wait_queue_head_t wait_drain; bool start_remove; bool destroy; - /* - * List of free preallocated hv_netvsc_packet to represent receive - * packet - */ - struct list_head recv_pkt_list; - spinlock_t recv_pkt_list_lock; /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; @@ -555,10 +616,20 @@ struct netvsc_device { struct net_device *ndev; + struct vmbus_channel *chn_table[NR_CPUS]; + u32 send_table[VRSS_SEND_TAB_SIZE]; + u32 num_chn; + atomic_t queue_sends[NR_CPUS]; + /* Holds rndis device info */ void *extension; - /* The recive buffer for this device */ + + int ring_size; + + /* The primary channel callback buffer */ unsigned char cb_buffer[NETVSC_PACKET_SIZE]; + /* The sub channel callback buffer */ + unsigned char *sub_cb_buf; }; /* NdisInitialize message */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index f7629ecefa8..bbee4463503 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -387,7 +387,6 @@ static void netvsc_disconnect_vsp(struct netvsc_device *net_device) int netvsc_device_remove(struct hv_device *device) { struct netvsc_device *net_device; - struct hv_netvsc_packet *netvsc_packet, *pos; unsigned long flags; net_device = hv_get_drvdata(device); @@ -416,11 +415,8 @@ int netvsc_device_remove(struct hv_device *device) vmbus_close(device->channel); /* Release all resources */ - list_for_each_entry_safe(netvsc_packet, pos, - &net_device->recv_pkt_list, list_ent) { - list_del(&netvsc_packet->list_ent); - kfree(netvsc_packet); - } + if (net_device->sub_cb_buf) + vfree(net_device->sub_cb_buf); kfree(net_device); return 0; @@ -461,7 +457,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device, (nvsp_packet->hdr.msg_type == NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) || (nvsp_packet->hdr.msg_type == - NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) { + NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) || + (nvsp_packet->hdr.msg_type == + NVSP_MSG5_TYPE_SUBCHANNEL)) { /* Copy the response back */ memcpy(&net_device->channel_init_pkt, nvsp_packet, sizeof(struct nvsp_message)); @@ -469,28 +467,36 @@ static void netvsc_send_completion(struct netvsc_device *net_device, } else if (nvsp_packet->hdr.msg_type == NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) { int num_outstanding_sends; + u16 q_idx = 0; + struct vmbus_channel *channel = device->channel; + int queue_sends; /* Get the send context */ nvsc_packet = (struct hv_netvsc_packet *)(unsigned long) packet->trans_id; /* Notify the layer above us */ - if (nvsc_packet) - nvsc_packet->completion.send.send_completion( - nvsc_packet->completion.send. - send_completion_ctx); + if (nvsc_packet) { + q_idx = nvsc_packet->q_idx; + channel = nvsc_packet->channel; + nvsc_packet->send_completion(nvsc_packet-> + send_completion_ctx); + } num_outstanding_sends = atomic_dec_return(&net_device->num_outstanding_sends); + queue_sends = atomic_dec_return(&net_device-> + queue_sends[q_idx]); if (net_device->destroy && num_outstanding_sends == 0) wake_up(&net_device->wait_drain); - if (netif_queue_stopped(ndev) && !net_device->start_remove && - (hv_ringbuf_avail_percent(&device->channel->outbound) - > RING_AVAIL_PERCENT_HIWATER || - num_outstanding_sends < 1)) - netif_wake_queue(ndev); + if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && + !net_device->start_remove && + (hv_ringbuf_avail_percent(&channel->outbound) > + RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) + netif_tx_wake_queue(netdev_get_tx_queue( + ndev, q_idx)); } else { netdev_err(ndev, "Unknown send completion packet type- " "%d received!!\n", nvsp_packet->hdr.msg_type); @@ -505,6 +511,7 @@ int netvsc_send(struct hv_device *device, int ret = 0; struct nvsp_message sendMessage; struct net_device *ndev; + struct vmbus_channel *out_channel = NULL; u64 req_id; net_device = get_outbound_net_device(device); @@ -526,20 +533,25 @@ int netvsc_send(struct hv_device *device, 0xFFFFFFFF; sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; - if (packet->completion.send.send_completion) + if (packet->send_completion) req_id = (ulong)packet; else req_id = 0; + out_channel = net_device->chn_table[packet->q_idx]; + if (out_channel == NULL) + out_channel = device->channel; + packet->channel = out_channel; + if (packet->page_buf_cnt) { - ret = vmbus_sendpacket_pagebuffer(device->channel, + ret = vmbus_sendpacket_pagebuffer(out_channel, packet->page_buf, packet->page_buf_cnt, &sendMessage, sizeof(struct nvsp_message), req_id); } else { - ret = vmbus_sendpacket(device->channel, &sendMessage, + ret = vmbus_sendpacket(out_channel, &sendMessage, sizeof(struct nvsp_message), req_id, VM_PKT_DATA_INBAND, @@ -548,17 +560,24 @@ int netvsc_send(struct hv_device *device, if (ret == 0) { atomic_inc(&net_device->num_outstanding_sends); - if (hv_ringbuf_avail_percent(&device->channel->outbound) < + atomic_inc(&net_device->queue_sends[packet->q_idx]); + + if (hv_ringbuf_avail_percent(&out_channel->outbound) < RING_AVAIL_PERCENT_LOWATER) { - netif_stop_queue(ndev); + netif_tx_stop_queue(netdev_get_tx_queue( + ndev, packet->q_idx)); + if (atomic_read(&net_device-> - num_outstanding_sends) < 1) - netif_wake_queue(ndev); + queue_sends[packet->q_idx]) < 1) + netif_tx_wake_queue(netdev_get_tx_queue( + ndev, packet->q_idx)); } } else if (ret == -EAGAIN) { - netif_stop_queue(ndev); - if (atomic_read(&net_device->num_outstanding_sends) < 1) { - netif_wake_queue(ndev); + netif_tx_stop_queue(netdev_get_tx_queue( + ndev, packet->q_idx)); + if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) { + netif_tx_wake_queue(netdev_get_tx_queue( + ndev, packet->q_idx)); ret = -ENOSPC; } } else { @@ -570,6 +589,7 @@ int netvsc_send(struct hv_device *device, } static void netvsc_send_recv_completion(struct hv_device *device, + struct vmbus_channel *channel, struct netvsc_device *net_device, u64 transaction_id, u32 status) { @@ -587,7 +607,7 @@ static void netvsc_send_recv_completion(struct hv_device *device, retry_send_cmplt: /* Send the completion */ - ret = vmbus_sendpacket(device->channel, &recvcompMessage, + ret = vmbus_sendpacket(channel, &recvcompMessage, sizeof(struct nvsp_message), transaction_id, VM_PKT_COMP, 0); if (ret == 0) { @@ -613,76 +633,20 @@ retry_send_cmplt: } } -/* Send a receive completion packet to RNDIS device (ie NetVsp) */ -static void netvsc_receive_completion(void *context) -{ - struct hv_netvsc_packet *packet = context; - struct hv_device *device = packet->device; - struct netvsc_device *net_device; - u64 transaction_id = 0; - bool fsend_receive_comp = false; - unsigned long flags; - struct net_device *ndev; - u32 status = NVSP_STAT_NONE; - - /* - * Even though it seems logical to do a GetOutboundNetDevice() here to - * send out receive completion, we are using GetInboundNetDevice() - * since we may have disable outbound traffic already. - */ - net_device = get_inbound_net_device(device); - if (!net_device) - return; - ndev = net_device->ndev; - - /* Overloading use of the lock. */ - spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); - - if (packet->status != NVSP_STAT_SUCCESS) - packet->xfer_page_pkt->status = NVSP_STAT_FAIL; - - packet->xfer_page_pkt->count--; - - /* - * Last one in the line that represent 1 xfer page packet. - * Return the xfer page packet itself to the freelist - */ - if (packet->xfer_page_pkt->count == 0) { - fsend_receive_comp = true; - transaction_id = packet->completion.recv.recv_completion_tid; - status = packet->xfer_page_pkt->status; - list_add_tail(&packet->xfer_page_pkt->list_ent, - &net_device->recv_pkt_list); - - } - - /* Put the packet back */ - list_add_tail(&packet->list_ent, &net_device->recv_pkt_list); - spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags); - - /* Send a receive completion for the xfer page packet */ - if (fsend_receive_comp) - netvsc_send_recv_completion(device, net_device, transaction_id, - status); - -} - static void netvsc_receive(struct netvsc_device *net_device, + struct vmbus_channel *channel, struct hv_device *device, struct vmpacket_descriptor *packet) { struct vmtransfer_page_packet_header *vmxferpage_packet; struct nvsp_message *nvsp_packet; - struct hv_netvsc_packet *netvsc_packet = NULL; - /* struct netvsc_driver *netvscDriver; */ - struct xferpage_packet *xferpage_packet = NULL; + struct hv_netvsc_packet nv_pkt; + struct hv_netvsc_packet *netvsc_packet = &nv_pkt; + u32 status = NVSP_STAT_SUCCESS; int i; int count = 0; - unsigned long flags; struct net_device *ndev; - LIST_HEAD(listHead); - ndev = net_device->ndev; /* @@ -715,77 +679,14 @@ static void netvsc_receive(struct netvsc_device *net_device, return; } - /* - * Grab free packets (range count + 1) to represent this xfer - * page packet. +1 to represent the xfer page packet itself. - * We grab it here so that we know exactly how many we can - * fulfil - */ - spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); - while (!list_empty(&net_device->recv_pkt_list)) { - list_move_tail(net_device->recv_pkt_list.next, &listHead); - if (++count == vmxferpage_packet->range_cnt + 1) - break; - } - spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags); - - /* - * We need at least 2 netvsc pkts (1 to represent the xfer - * page and at least 1 for the range) i.e. we can handled - * some of the xfer page packet ranges... - */ - if (count < 2) { - netdev_err(ndev, "Got only %d netvsc pkt...needed " - "%d pkts. Dropping this xfer page packet completely!\n", - count, vmxferpage_packet->range_cnt + 1); - - /* Return it to the freelist */ - spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); - for (i = count; i != 0; i--) { - list_move_tail(listHead.next, - &net_device->recv_pkt_list); - } - spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, - flags); - - netvsc_send_recv_completion(device, net_device, - vmxferpage_packet->d.trans_id, - NVSP_STAT_FAIL); - - return; - } - - /* Remove the 1st packet to represent the xfer page packet itself */ - xferpage_packet = (struct xferpage_packet *)listHead.next; - list_del(&xferpage_packet->list_ent); - xferpage_packet->status = NVSP_STAT_SUCCESS; - - /* This is how much we can satisfy */ - xferpage_packet->count = count - 1; - - if (xferpage_packet->count != vmxferpage_packet->range_cnt) { - netdev_err(ndev, "Needed %d netvsc pkts to satisfy " - "this xfer page...got %d\n", - vmxferpage_packet->range_cnt, xferpage_packet->count); - } + count = vmxferpage_packet->range_cnt; + netvsc_packet->device = device; + netvsc_packet->channel = channel; /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ - for (i = 0; i < (count - 1); i++) { - netvsc_packet = (struct hv_netvsc_packet *)listHead.next; - list_del(&netvsc_packet->list_ent); - + for (i = 0; i < count; i++) { /* Initialize the netvsc packet */ netvsc_packet->status = NVSP_STAT_SUCCESS; - netvsc_packet->xfer_page_pkt = xferpage_packet; - netvsc_packet->completion.recv.recv_completion = - netvsc_receive_completion; - netvsc_packet->completion.recv.recv_completion_ctx = - netvsc_packet; - netvsc_packet->device = device; - /* Save this so that we can send it back */ - netvsc_packet->completion.recv.recv_completion_tid = - vmxferpage_packet->d.trans_id; - netvsc_packet->data = (void *)((unsigned long)net_device-> recv_buf + vmxferpage_packet->ranges[i].byte_offset); netvsc_packet->total_data_buflen = @@ -794,16 +695,53 @@ static void netvsc_receive(struct netvsc_device *net_device, /* Pass it to the upper layer */ rndis_filter_receive(device, netvsc_packet); - netvsc_receive_completion(netvsc_packet-> - completion.recv.recv_completion_ctx); + if (netvsc_packet->status != NVSP_STAT_SUCCESS) + status = NVSP_STAT_FAIL; } + netvsc_send_recv_completion(device, channel, net_device, + vmxferpage_packet->d.trans_id, status); } -static void netvsc_channel_cb(void *context) + +static void netvsc_send_table(struct hv_device *hdev, + struct vmpacket_descriptor *vmpkt) +{ + struct netvsc_device *nvscdev; + struct net_device *ndev; + struct nvsp_message *nvmsg; + int i; + u32 count, *tab; + + nvscdev = get_outbound_net_device(hdev); + if (!nvscdev) + return; + ndev = nvscdev->ndev; + + nvmsg = (struct nvsp_message *)((unsigned long)vmpkt + + (vmpkt->offset8 << 3)); + + if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE) + return; + + count = nvmsg->msg.v5_msg.send_table.count; + if (count != VRSS_SEND_TAB_SIZE) { + netdev_err(ndev, "Received wrong send-table size:%u\n", count); + return; + } + + tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table + + nvmsg->msg.v5_msg.send_table.offset); + + for (i = 0; i < count; i++) + nvscdev->send_table[i] = tab[i]; +} + +void netvsc_channel_cb(void *context) { int ret; - struct hv_device *device = context; + struct vmbus_channel *channel = (struct vmbus_channel *)context; + struct hv_device *device; struct netvsc_device *net_device; u32 bytes_recvd; u64 request_id; @@ -812,14 +750,19 @@ static void netvsc_channel_cb(void *context) int bufferlen = NETVSC_PACKET_SIZE; struct net_device *ndev; + if (channel->primary_channel != NULL) + device = channel->primary_channel->device_obj; + else + device = channel->device_obj; + net_device = get_inbound_net_device(device); if (!net_device) return; ndev = net_device->ndev; - buffer = net_device->cb_buffer; + buffer = get_per_channel_state(channel); do { - ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen, + ret = vmbus_recvpacket_raw(channel, buffer, bufferlen, &bytes_recvd, &request_id); if (ret == 0) { if (bytes_recvd > 0) { @@ -831,8 +774,12 @@ static void netvsc_channel_cb(void *context) break; case VM_PKT_DATA_USING_XFER_PAGES: - netvsc_receive(net_device, - device, desc); + netvsc_receive(net_device, channel, + device, desc); + break; + + case VM_PKT_DATA_INBAND: + netvsc_send_table(device, desc); break; default: @@ -880,11 +827,9 @@ static void netvsc_channel_cb(void *context) int netvsc_device_add(struct hv_device *device, void *additional_info) { int ret = 0; - int i; int ring_size = ((struct netvsc_device_info *)additional_info)->ring_size; struct netvsc_device *net_device; - struct hv_netvsc_packet *packet, *pos; struct net_device *ndev; net_device = alloc_net_device(device); @@ -893,6 +838,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) goto cleanup; } + net_device->ring_size = ring_size; + /* * Coming into this function, struct net_device * is * registered as the driver private data. @@ -903,24 +850,14 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) ndev = net_device->ndev; /* Initialize the NetVSC channel extension */ - spin_lock_init(&net_device->recv_pkt_list_lock); - - INIT_LIST_HEAD(&net_device->recv_pkt_list); - - for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) { - packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL); - if (!packet) - break; - - list_add_tail(&packet->list_ent, - &net_device->recv_pkt_list); - } init_completion(&net_device->channel_init_wait); + set_per_channel_state(device->channel, net_device->cb_buffer); + /* Open the channel */ ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, ring_size * PAGE_SIZE, NULL, 0, - netvsc_channel_cb, device); + netvsc_channel_cb, device->channel); if (ret != 0) { netdev_err(ndev, "unable to open channel: %d\n", ret); @@ -930,6 +867,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) /* Channel is opened */ pr_info("hv_netvsc channel opened successfully\n"); + net_device->chn_table[0] = device->channel; + /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device); if (ret != 0) { @@ -946,16 +885,8 @@ close: cleanup: - if (net_device) { - list_for_each_entry_safe(packet, pos, - &net_device->recv_pkt_list, - list_ent) { - list_del(&packet->list_ent); - kfree(packet); - } - + if (net_device) kfree(net_device); - } return ret; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 31e55fba7ca..c76b66515e9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net) return ret; } - netif_start_queue(net); + netif_tx_start_all_queues(net); nvdev = hv_get_drvdata(device_obj); rdev = nvdev->extension; @@ -149,11 +149,93 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, return ppi; } +union sub_key { + u64 k; + struct { + u8 pad[3]; + u8 kb; + u32 ka; + }; +}; + +/* Toeplitz hash function + * data: network byte order + * return: host byte order + */ +static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen) +{ + union sub_key subk; + int k_next = 4; + u8 dt; + int i, j; + u32 ret = 0; + + subk.k = 0; + subk.ka = ntohl(*(u32 *)key); + + for (i = 0; i < dlen; i++) { + subk.kb = key[k_next]; + k_next = (k_next + 1) % klen; + dt = data[i]; + for (j = 0; j < 8; j++) { + if (dt & 0x80) + ret ^= subk.ka; + dt <<= 1; + subk.k <<= 1; + } + } + + return ret; +} + +static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) +{ + struct iphdr *iphdr; + int data_len; + bool ret = false; + + if (eth_hdr(skb)->h_proto != htons(ETH_P_IP)) + return false; + + iphdr = ip_hdr(skb); + + if (iphdr->version == 4) { + if (iphdr->protocol == IPPROTO_TCP) + data_len = 12; + else + data_len = 8; + *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, + (u8 *)&iphdr->saddr, data_len); + ret = true; + } + + return ret; +} + +static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + struct net_device_context *net_device_ctx = netdev_priv(ndev); + struct hv_device *hdev = net_device_ctx->device_ctx; + struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev); + u32 hash; + u16 q_idx = 0; + + if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) + return 0; + + if (netvsc_set_hash(&hash, skb)) + q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % + ndev->real_num_tx_queues; + + return q_idx; +} + static void netvsc_xmit_completion(void *context) { struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; struct sk_buff *skb = (struct sk_buff *) - (unsigned long)packet->completion.send.send_completion_tid; + (unsigned long)packet->send_completion_tid; kfree(packet); @@ -333,6 +415,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) packet->vlan_tci = skb->vlan_tci; + packet->q_idx = skb_get_queue_mapping(skb); + packet->is_data_pkt = true; packet->total_data_buflen = skb->len; @@ -341,9 +425,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) (num_data_pgs * sizeof(struct hv_page_buffer))); /* Set the completion routine */ - packet->completion.send.send_completion = netvsc_xmit_completion; - packet->completion.send.send_completion_ctx = packet; - packet->completion.send.send_completion_tid = (unsigned long)skb; + packet->send_completion = netvsc_xmit_completion; + packet->send_completion_ctx = packet; + packet->send_completion_tid = (unsigned long)skb; isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; @@ -554,6 +638,10 @@ int netvsc_recv_callback(struct hv_device *device_obj, __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), packet->vlan_tci); + skb_record_rx_queue(skb, packet->channel-> + offermsg.offer.sub_channel_index % + net->real_num_rx_queues); + net->stats.rx_packets++; net->stats.rx_bytes += packet->total_data_buflen; @@ -602,7 +690,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) hv_set_drvdata(hdev, ndev); device_info.ring_size = ring_size; rndis_filter_device_add(hdev, &device_info); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); return 0; } @@ -648,6 +736,7 @@ static const struct net_device_ops device_ops = { .ndo_change_mtu = netvsc_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = netvsc_set_mac_addr, + .ndo_select_queue = netvsc_select_queue, }; /* @@ -694,9 +783,11 @@ static int netvsc_probe(struct hv_device *dev, struct net_device *net = NULL; struct net_device_context *net_device_ctx; struct netvsc_device_info device_info; + struct netvsc_device *nvdev; int ret; - net = alloc_etherdev(sizeof(struct net_device_context)); + net = alloc_etherdev_mq(sizeof(struct net_device_context), + num_online_cpus()); if (!net) return -ENOMEM; @@ -729,6 +820,12 @@ static int netvsc_probe(struct hv_device *dev, } memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + nvdev = hv_get_drvdata(dev); + netif_set_real_num_tx_queues(net, nvdev->num_chn); + netif_set_real_num_rx_queues(net, nvdev->num_chn); + dev_info(&dev->device, "real num tx,rx queues:%u, %u\n", + net->real_num_tx_queues, net->real_num_rx_queues); + ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 143a98caf61..99c527adae5 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -31,7 +31,7 @@ #include "hyperv_net.h" -#define RNDIS_EXT_LEN 100 +#define RNDIS_EXT_LEN PAGE_SIZE struct rndis_request { struct list_head list_ent; struct completion wait_event; @@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev, rndis_msg->ndis_msg_type = msg_type; rndis_msg->msg_len = msg_len; + request->pkt.q_idx = 0; + /* * Set the request id. This field is always after the rndis header for * request/response packet types so we just used the SetRequest as a @@ -234,7 +236,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->page_buf[0].len; } - packet->completion.send.send_completion = NULL; + packet->send_completion = NULL; ret = netvsc_send(dev->net_dev->dev, packet); return ret; @@ -399,8 +401,6 @@ static void rndis_filter_receive_data(struct rndis_device *dev, pkt->total_data_buflen = rndis_pkt->data_len; pkt->data = (void *)((unsigned long)pkt->data + data_offset); - pkt->is_data_pkt = true; - vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); if (vlan) { pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | @@ -509,6 +509,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, query->info_buflen = 0; query->dev_vc_handle = 0; + if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) { + struct ndis_recv_scale_cap *cap; + + request->request_msg.msg_len += + sizeof(struct ndis_recv_scale_cap); + query->info_buflen = sizeof(struct ndis_recv_scale_cap); + cap = (struct ndis_recv_scale_cap *)((unsigned long)query + + query->info_buf_offset); + cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES; + cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2; + cap->hdr.size = sizeof(struct ndis_recv_scale_cap); + } + ret = rndis_filter_send_request(dev, request); if (ret != 0) goto cleanup; @@ -695,6 +708,89 @@ cleanup: return ret; } +u8 netvsc_hash_key[HASH_KEYLEN] = { + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa +}; + +int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) +{ + struct net_device *ndev = rdev->net_dev->ndev; + struct rndis_request *request; + struct rndis_set_request *set; + struct rndis_set_complete *set_complete; + u32 extlen = sizeof(struct ndis_recv_scale_param) + + 4*ITAB_NUM + HASH_KEYLEN; + struct ndis_recv_scale_param *rssp; + u32 *itab; + u8 *keyp; + int i, t, ret; + + request = get_rndis_request( + rdev, RNDIS_MSG_SET, + RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); + if (!request) + return -ENOMEM; + + set = &request->request_msg.msg.set_req; + set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS; + set->info_buflen = extlen; + set->info_buf_offset = sizeof(struct rndis_set_request); + set->dev_vc_handle = 0; + + rssp = (struct ndis_recv_scale_param *)(set + 1); + rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS; + rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; + rssp->hdr.size = sizeof(struct ndis_recv_scale_param); + rssp->flag = 0; + rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | + NDIS_HASH_TCP_IPV4; + rssp->indirect_tabsize = 4*ITAB_NUM; + rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param); + rssp->hashkey_size = HASH_KEYLEN; + rssp->kashkey_offset = rssp->indirect_taboffset + + rssp->indirect_tabsize; + + /* Set indirection table entries */ + itab = (u32 *)(rssp + 1); + for (i = 0; i < ITAB_NUM; i++) + itab[i] = i % num_queue; + + /* Set hask key values */ + keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); + for (i = 0; i < HASH_KEYLEN; i++) + keyp[i] = netvsc_hash_key[i]; + + + ret = rndis_filter_send_request(rdev, request); + if (ret != 0) + goto cleanup; + + t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + if (t == 0) { + netdev_err(ndev, "timeout before we got a set response...\n"); + /* can't put_rndis_request, since we may still receive a + * send-completion. + */ + return -ETIMEDOUT; + } else { + set_complete = &request->response_msg.msg.set_complete; + if (set_complete->status != RNDIS_STATUS_SUCCESS) { + netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", + set_complete->status); + ret = -EINVAL; + } + } + +cleanup: + put_rndis_request(rdev, request); + return ret; +} + + static int rndis_filter_query_device_link_status(struct rndis_device *dev) { u32 size = sizeof(u32); @@ -886,6 +982,28 @@ static int rndis_filter_close_device(struct rndis_device *dev) return ret; } +static void netvsc_sc_open(struct vmbus_channel *new_sc) +{ + struct netvsc_device *nvscdev; + u16 chn_index = new_sc->offermsg.offer.sub_channel_index; + int ret; + + nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj); + + if (chn_index >= nvscdev->num_chn) + return; + + set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) * + NETVSC_PACKET_SIZE); + + ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, + nvscdev->ring_size * PAGE_SIZE, NULL, 0, + netvsc_channel_cb, new_sc); + + if (ret == 0) + nvscdev->chn_table[chn_index] = new_sc; +} + int rndis_filter_device_add(struct hv_device *dev, void *additional_info) { @@ -894,6 +1012,10 @@ int rndis_filter_device_add(struct hv_device *dev, struct rndis_device *rndis_device; struct netvsc_device_info *device_info = additional_info; struct ndis_offload_params offloads; + struct nvsp_message *init_packet; + int t; + struct ndis_recv_scale_cap rsscap; + u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); rndis_device = get_rndis_device(); if (!rndis_device) @@ -913,6 +1035,7 @@ int rndis_filter_device_add(struct hv_device *dev, /* Initialize the rndis device */ net_device = hv_get_drvdata(dev); + net_device->num_chn = 1; net_device->extension = rndis_device; rndis_device->net_dev = net_device; @@ -952,7 +1075,6 @@ int rndis_filter_device_add(struct hv_device *dev, if (ret) goto err_dev_remv; - rndis_filter_query_device_link_status(rndis_device); device_info->link_state = rndis_device->link_state; @@ -961,7 +1083,66 @@ int rndis_filter_device_add(struct hv_device *dev, rndis_device->hw_mac_adr, device_info->link_state ? "down" : "up"); - return ret; + if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) + return 0; + + /* vRSS setup */ + memset(&rsscap, 0, rsscap_size); + ret = rndis_filter_query_device(rndis_device, + OID_GEN_RECEIVE_SCALE_CAPABILITIES, + &rsscap, &rsscap_size); + if (ret || rsscap.num_recv_que < 2) + goto out; + + net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ? + num_online_cpus() : rsscap.num_recv_que; + if (net_device->num_chn == 1) + goto out; + + net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) * + NETVSC_PACKET_SIZE); + if (!net_device->sub_cb_buf) { + net_device->num_chn = 1; + dev_info(&dev->device, "No memory for subchannels.\n"); + goto out; + } + + vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); + + init_packet = &net_device->channel_init_pkt; + memset(init_packet, 0, sizeof(struct nvsp_message)); + init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; + init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE; + init_packet->msg.v5_msg.subchn_req.num_subchannels = + net_device->num_chn - 1; + ret = vmbus_sendpacket(dev->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret) + goto out; + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); + if (t == 0) { + ret = -ETIMEDOUT; + goto out; + } + if (init_packet->msg.v5_msg.subchn_comp.status != + NVSP_STAT_SUCCESS) { + ret = -ENODEV; + goto out; + } + net_device->num_chn = 1 + + init_packet->msg.v5_msg.subchn_comp.num_subchannels; + + vmbus_are_subchannels_present(dev->channel); + + ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn); + +out: + if (ret) + net_device->num_chn = 1; + return 0; /* return 0 because primary channel can be used alone */ err_dev_remv: rndis_filter_device_remove(dev); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 753a8c23d15..cfb27c86541 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -30,6 +30,7 @@ #include <linux/if_link.h> #include <linux/if_macvlan.h> #include <linux/hash.h> +#include <linux/workqueue.h> #include <net/rtnetlink.h> #include <net/xfrm.h> @@ -40,10 +41,18 @@ struct macvlan_port { struct hlist_head vlan_hash[MACVLAN_HASH_SIZE]; struct list_head vlans; struct rcu_head rcu; + struct sk_buff_head bc_queue; + struct work_struct bc_work; bool passthru; int count; }; +struct macvlan_skb_cb { + const struct macvlan_dev *src; +}; + +#define MACVLAN_SKB_CB(__skb) ((struct macvlan_skb_cb *)&((__skb)->cb[0])) + static void macvlan_port_destroy(struct net_device *dev); static struct macvlan_port *macvlan_port_get_rcu(const struct net_device *dev) @@ -120,7 +129,7 @@ static int macvlan_broadcast_one(struct sk_buff *skb, struct net_device *dev = vlan->dev; if (local) - return dev_forward_skb(dev, skb); + return __dev_forward_skb(dev, skb); skb->dev = dev; if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) @@ -128,7 +137,7 @@ static int macvlan_broadcast_one(struct sk_buff *skb, else skb->pkt_type = PACKET_MULTICAST; - return netif_rx(skb); + return 0; } static u32 macvlan_hash_mix(const struct macvlan_dev *vlan) @@ -175,32 +184,32 @@ static void macvlan_broadcast(struct sk_buff *skb, if (likely(nskb)) err = macvlan_broadcast_one( nskb, vlan, eth, - mode == MACVLAN_MODE_BRIDGE); + mode == MACVLAN_MODE_BRIDGE) ?: + netif_rx_ni(nskb); macvlan_count_rx(vlan, skb->len + ETH_HLEN, err == NET_RX_SUCCESS, 1); } } } -/* called under rcu_read_lock() from netif_receive_skb */ -static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) +static void macvlan_process_broadcast(struct work_struct *w) { - struct macvlan_port *port; - struct sk_buff *skb = *pskb; - const struct ethhdr *eth = eth_hdr(skb); - const struct macvlan_dev *vlan; - const struct macvlan_dev *src; - struct net_device *dev; - unsigned int len = 0; - int ret = NET_RX_DROP; + struct macvlan_port *port = container_of(w, struct macvlan_port, + bc_work); + struct sk_buff *skb; + struct sk_buff_head list; + + skb_queue_head_init(&list); + + spin_lock_bh(&port->bc_queue.lock); + skb_queue_splice_tail_init(&port->bc_queue, &list); + spin_unlock_bh(&port->bc_queue.lock); + + while ((skb = __skb_dequeue(&list))) { + const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src; + + rcu_read_lock(); - port = macvlan_port_get_rcu(skb->dev); - if (is_multicast_ether_addr(eth->h_dest)) { - skb = ip_check_defrag(skb, IP_DEFRAG_MACVLAN); - if (!skb) - return RX_HANDLER_CONSUMED; - eth = eth_hdr(skb); - src = macvlan_hash_lookup(port, eth->h_source); if (!src) /* frame comes from an external address */ macvlan_broadcast(skb, port, NULL, @@ -213,20 +222,80 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) macvlan_broadcast(skb, port, src->dev, MACVLAN_MODE_VEPA | MACVLAN_MODE_BRIDGE); - else if (src->mode == MACVLAN_MODE_BRIDGE) + else /* * flood only to VEPA ports, bridge ports * already saw the frame on the way out. */ macvlan_broadcast(skb, port, src->dev, MACVLAN_MODE_VEPA); - else { + + rcu_read_unlock(); + + kfree_skb(skb); + } +} + +static void macvlan_broadcast_enqueue(struct macvlan_port *port, + struct sk_buff *skb) +{ + struct sk_buff *nskb; + int err = -ENOMEM; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + goto err; + + spin_lock(&port->bc_queue.lock); + if (skb_queue_len(&port->bc_queue) < skb->dev->tx_queue_len) { + __skb_queue_tail(&port->bc_queue, nskb); + err = 0; + } + spin_unlock(&port->bc_queue.lock); + + if (err) + goto free_nskb; + + schedule_work(&port->bc_work); + return; + +free_nskb: + kfree_skb(nskb); +err: + atomic_long_inc(&skb->dev->rx_dropped); +} + +/* called under rcu_read_lock() from netif_receive_skb */ +static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) +{ + struct macvlan_port *port; + struct sk_buff *skb = *pskb; + const struct ethhdr *eth = eth_hdr(skb); + const struct macvlan_dev *vlan; + const struct macvlan_dev *src; + struct net_device *dev; + unsigned int len = 0; + int ret = NET_RX_DROP; + + port = macvlan_port_get_rcu(skb->dev); + if (is_multicast_ether_addr(eth->h_dest)) { + skb = ip_check_defrag(skb, IP_DEFRAG_MACVLAN); + if (!skb) + return RX_HANDLER_CONSUMED; + eth = eth_hdr(skb); + src = macvlan_hash_lookup(port, eth->h_source); + if (src && src->mode != MACVLAN_MODE_VEPA && + src->mode != MACVLAN_MODE_BRIDGE) { /* forward to original port. */ vlan = src; - ret = macvlan_broadcast_one(skb, vlan, eth, 0); + ret = macvlan_broadcast_one(skb, vlan, eth, 0) ?: + netif_rx(skb); goto out; } + MACVLAN_SKB_CB(skb)->src = src; + macvlan_broadcast_enqueue(port, skb); + return RX_HANDLER_PASS; } @@ -764,6 +833,9 @@ static int macvlan_port_create(struct net_device *dev) for (i = 0; i < MACVLAN_HASH_SIZE; i++) INIT_HLIST_HEAD(&port->vlan_hash[i]); + skb_queue_head_init(&port->bc_queue); + INIT_WORK(&port->bc_work, macvlan_process_broadcast); + err = netdev_rx_handler_register(dev, macvlan_handle_frame, port); if (err) kfree(port); @@ -776,6 +848,7 @@ static void macvlan_port_destroy(struct net_device *dev) { struct macvlan_port *port = macvlan_port_get_rtnl(dev); + cancel_work_sync(&port->bc_work); dev->priv_flags &= ~IFF_MACVLAN_PORT; netdev_rx_handler_unregister(dev); kfree_rcu(port, rcu); diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 643464d5a72..6c622aedbae 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -144,41 +144,11 @@ static int at803x_resume(struct phy_device *phydev) static int at803x_config_init(struct phy_device *phydev) { - int val; int ret; - u32 features; - - features = SUPPORTED_TP | SUPPORTED_MII | SUPPORTED_AUI | - SUPPORTED_FIBRE | SUPPORTED_BNC; - - val = phy_read(phydev, MII_BMSR); - if (val < 0) - return val; - - if (val & BMSR_ANEGCAPABLE) - features |= SUPPORTED_Autoneg; - if (val & BMSR_100FULL) - features |= SUPPORTED_100baseT_Full; - if (val & BMSR_100HALF) - features |= SUPPORTED_100baseT_Half; - if (val & BMSR_10FULL) - features |= SUPPORTED_10baseT_Full; - if (val & BMSR_10HALF) - features |= SUPPORTED_10baseT_Half; - - if (val & BMSR_ESTATEN) { - val = phy_read(phydev, MII_ESTATUS); - if (val < 0) - return val; - - if (val & ESTATUS_1000_TFULL) - features |= SUPPORTED_1000baseT_Full; - if (val & ESTATUS_1000_THALF) - features |= SUPPORTED_1000baseT_Half; - } - phydev->supported = features; - phydev->advertising = features; + ret = genphy_config_init(phydev); + if (ret < 0) + return ret; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { ret = phy_write(phydev, AT803X_DEBUG_ADDR, @@ -283,8 +253,7 @@ static int __init atheros_init(void) static void __exit atheros_exit(void) { - return phy_drivers_unregister(at803x_driver, - ARRAY_SIZE(at803x_driver)); + phy_drivers_unregister(at803x_driver, ARRAY_SIZE(at803x_driver)); } module_init(atheros_init); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0ce60662429..466ae3e0632 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1067,7 +1067,7 @@ int genphy_soft_reset(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_soft_reset); -static int genphy_config_init(struct phy_device *phydev) +int genphy_config_init(struct phy_device *phydev) { int val; u32 features; @@ -1118,6 +1118,7 @@ static int gen10g_soft_reset(struct phy_device *phydev) /* Do nothing for now */ return 0; } +EXPORT_SYMBOL(genphy_config_init); static int gen10g_config_init(struct phy_device *phydev) { diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 11f34813e23..180c49479c4 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -249,8 +249,7 @@ static int __init smsc_init(void) static void __exit smsc_exit(void) { - return phy_drivers_unregister(smsc_phy_driver, - ARRAY_SIZE(smsc_phy_driver)); + phy_drivers_unregister(smsc_phy_driver, ARRAY_SIZE(smsc_phy_driver)); } MODULE_DESCRIPTION("SMSC PHY driver"); diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 14372c65a7e..5dc0935da99 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -319,8 +319,7 @@ static int __init vsc82xx_init(void) static void __exit vsc82xx_exit(void) { - return phy_drivers_unregister(vsc82xx_driver, - ARRAY_SIZE(vsc82xx_driver)); + phy_drivers_unregister(vsc82xx_driver, ARRAY_SIZE(vsc82xx_driver)); } module_init(vsc82xx_init); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 0a114d05f68..212f537fc68 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -154,13 +154,23 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * @reset: Reset (part of) the device, as specified by a bitmask of * flags from &enum ethtool_reset_flags. Returns a negative * error code or zero. + * @get_rxfh_key_size: Get the size of the RX flow hash key. + * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. * @get_rxfh_indir: Get the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. + * @get_rxfh: Get the contents of the RX flow hash indirection table and hash + * key. + * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size + * returns zero. * Returns a negative error code or zero. * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. + * @set_rxfh: Set the contents of the RX flow hash indirection table and + * hash key. + * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size + * returns zero. * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or @@ -232,7 +242,10 @@ struct ethtool_ops { int (*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *); int (*flash_device)(struct net_device *, struct ethtool_flash *); int (*reset)(struct net_device *, u32 *); + u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); + int (*get_rxfh)(struct net_device *, u32 *, u8 *); + int (*set_rxfh)(struct net_device *, u32 *, u8 *); int (*get_rxfh_indir)(struct net_device *, u32 *); int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); diff --git a/include/linux/filter.h b/include/linux/filter.h index 024fd03e5d1..759abf78dd6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -223,6 +223,7 @@ enum { BPF_S_ANC_VLAN_TAG, BPF_S_ANC_VLAN_TAG_PRESENT, BPF_S_ANC_PAY_OFFSET, + BPF_S_ANC_RANDOM, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ed3a3aa660..a803d792df1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2633,6 +2633,7 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index aad8eeaf416..5146ce06649 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -45,7 +45,8 @@ struct netlink_kernel_cfg { unsigned int flags; void (*input)(struct sk_buff *skb); struct mutex *cb_mutex; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sk); }; diff --git a/include/linux/phy.h b/include/linux/phy.h index 4d0221fd068..51d15f684e7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -666,6 +666,7 @@ static inline int phy_read_status(struct phy_device *phydev) return phydev->drv->read_status(phydev); } +int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index f7d372b7d4f..79b530fb2c4 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -54,6 +54,7 @@ #define __6LOWPAN_H__ #include <net/ipv6.h> +#include <net/net_namespace.h> #define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */ #define UIP_IPH_LEN 40 /* ipv6 fixed header size */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5f9eb260990..361d2607719 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -373,6 +373,14 @@ static inline void rt_genid_bump_ipv6(struct net *net) } #endif +#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) +static inline struct netns_ieee802154_lowpan * +net_ieee802154_lowpan(struct net *net) +{ + return &net->ieee802154_lowpan; +} +#endif + /* For callers who don't really care about whether it's IPv4 or IPv6 */ static inline void rt_genid_bump_all(struct net *net) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 87d87740818..163d2b467d7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -558,7 +558,6 @@ void tcp_send_loss_probe(struct sock *sk); bool tcp_schedule_loss_probe(struct sock *sk); /* tcp_input.c */ -void tcp_cwnd_application_limited(struct sock *sk); void tcp_resume_early_retransmit(struct sock *sk); void tcp_rearm_rto(struct sock *sk); void tcp_reset(struct sock *sk); diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 11917f747cb..dfa4c860cce 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -373,6 +373,14 @@ enum { */ #define AUDIT_MESSAGE_TEXT_MAX 8560 +/* Multicast Netlink socket groups (default up to 32) */ +enum audit_nlgrps { + AUDIT_NLGRP_NONE, /* Group 0 not used */ + AUDIT_NLGRP_READLOG, /* "best effort" read only socket */ + __AUDIT_NLGRP_MAX +}; +#define AUDIT_NLGRP_MAX (__AUDIT_NLGRP_MAX - 1) + struct audit_status { __u32 mask; /* Bit mask for valid entries */ __u32 enabled; /* 1 = enabled, 0 = disabled */ diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 154dd6d3c8f..12c37a197d2 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -347,7 +347,12 @@ struct vfs_cap_data { #define CAP_BLOCK_SUSPEND 36 -#define CAP_LAST_CAP CAP_BLOCK_SUSPEND +/* Allow reading the audit log via multicast netlink socket */ + +#define CAP_AUDIT_READ 37 + + +#define CAP_LAST_CAP CAP_AUDIT_READ #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index fd161e91b6d..d47d31d6fa0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -847,6 +847,35 @@ struct ethtool_rxfh_indir { }; /** + * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. + * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH + * @rss_context: RSS context identifier. + * @indir_size: On entry, the array size of the user buffer, which may be zero. + * On return from %ETHTOOL_GRSSH, the array size of the hardware + * indirection table. + * @key_size: On entry, the array size of the user buffer in bytes, + * which may be zero. + * On return from %ETHTOOL_GRSSH, the size of the RSS hash key. + * @rsvd: Reserved for future extensions. + * @rss_config: RX ring/queue index for each hash value i.e., indirection table + * of size @indir_size followed by hash key of size @key_size. + * + * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the + * size should be returned. For %ETHTOOL_SRSSH, a @indir_size of 0xDEADBEEF + * means that indir table setting is not requested and a @indir_size of zero + * means the indir table should be reset to default values. This last feature + * is not supported by the original implementations. + */ +struct ethtool_rxfh { + __u32 cmd; + __u32 rss_context; + __u32 indir_size; + __u32 key_size; + __u32 rsvd[2]; + __u32 rss_config[0]; +}; + +/** * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW * @h_u: Flow field values to match (dependent on @flow_type) @@ -1118,6 +1147,9 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */ #define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */ +#define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ +#define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 8eb9ccaa5b4..253b4d42cf2 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -130,7 +130,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG 44 #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 -#define SKF_AD_MAX 56 +#define SKF_AD_RANDOM 56 +#define SKF_AD_MAX 60 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/kernel/audit.c b/kernel/audit.c index 7c2893602d0..33531d72e4a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -424,6 +424,38 @@ static void kauditd_send_skb(struct sk_buff *skb) } /* + * kauditd_send_multicast_skb - send the skb to multicast userspace listeners + * + * This function doesn't consume an skb as might be expected since it has to + * copy it anyways. + */ +static void kauditd_send_multicast_skb(struct sk_buff *skb) +{ + struct sk_buff *copy; + struct audit_net *aunet = net_generic(&init_net, audit_net_id); + struct sock *sock = aunet->nlsk; + + if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG)) + return; + + /* + * The seemingly wasteful skb_copy() rather than bumping the refcount + * using skb_get() is necessary because non-standard mods are made to + * the skb by the original kaudit unicast socket send routine. The + * existing auditd daemon assumes this breakage. Fixing this would + * require co-ordinating a change in the established protocol between + * the kaudit kernel subsystem and the auditd userspace code. There is + * no reason for new multicast clients to continue with this + * non-compliance. + */ + copy = skb_copy(skb, GFP_KERNEL); + if (!copy) + return; + + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); +} + +/* * flush_hold_queue - empty the hold queue if auditd appears * * If auditd just started, drain the queue of messages already @@ -1076,10 +1108,22 @@ static void audit_receive(struct sk_buff *skb) mutex_unlock(&audit_cmd_mutex); } +/* Run custom bind function on netlink socket group connect or bind requests. */ +static int audit_bind(int group) +{ + if (!capable(CAP_AUDIT_READ)) + return -EPERM; + + return 0; +} + static int __net_init audit_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = audit_receive, + .bind = audit_bind, + .flags = NL_CFG_F_NONROOT_RECV, + .groups = AUDIT_NLGRP_MAX, }; struct audit_net *aunet = net_generic(net, audit_net_id); @@ -1901,10 +1945,10 @@ out: * audit_log_end - end one audit record * @ab: the audit_buffer * - * The netlink_* functions cannot be called inside an irq context, so - * the audit buffer is placed on a queue and a tasklet is scheduled to - * remove them from the queue outside the irq context. May be called in - * any context. + * netlink_unicast() cannot be called inside an irq context because it blocks + * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed + * on a queue and a tasklet is scheduled to remove them from the queue outside + * the irq context. May be called in any context. */ void audit_log_end(struct audit_buffer *ab) { @@ -1914,6 +1958,18 @@ void audit_log_end(struct audit_buffer *ab) audit_log_lost("rate limit exceeded"); } else { struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); + + kauditd_send_multicast_skb(ab->skb); + + /* + * The original kaudit unicast socket sends up messages with + * nlmsg_len set to the payload length rather than the entire + * message length. This breaks the standard set by netlink. + * The existing auditd daemon assumes this breakage. Fixing + * this would require co-ordinating a change in the established + * protocol between the kaudit kernel subsystem and the auditd + * userspace code. + */ nlh->nlmsg_len = ab->skb->len - NLMSG_HDRLEN; if (audit_pid) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 733ec283ed1..8f025afa29f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -706,38 +706,36 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { + struct vlan_pcpu_stats *p; + u32 rx_errors = 0, tx_dropped = 0; + int i; - if (vlan_dev_priv(dev)->vlan_pcpu_stats) { - struct vlan_pcpu_stats *p; - u32 rx_errors = 0, tx_dropped = 0; - int i; - - for_each_possible_cpu(i) { - u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; - unsigned int start; - - p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); - do { - start = u64_stats_fetch_begin_irq(&p->syncp); - rxpackets = p->rx_packets; - rxbytes = p->rx_bytes; - rxmulticast = p->rx_multicast; - txpackets = p->tx_packets; - txbytes = p->tx_bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); - - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; - stats->multicast += rxmulticast; - stats->tx_packets += txpackets; - stats->tx_bytes += txbytes; - /* rx_errors & tx_dropped are u32 */ - rx_errors += p->rx_errors; - tx_dropped += p->tx_dropped; - } - stats->rx_errors = rx_errors; - stats->tx_dropped = tx_dropped; + for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; + unsigned int start; + + p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + do { + start = u64_stats_fetch_begin_irq(&p->syncp); + rxpackets = p->rx_packets; + rxbytes = p->rx_bytes; + rxmulticast = p->rx_multicast; + txpackets = p->tx_packets; + txbytes = p->tx_bytes; + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->multicast += rxmulticast; + stats->tx_packets += txpackets; + stats->tx_bytes += txbytes; + /* rx_errors & tx_dropped are u32 */ + rx_errors += p->rx_errors; + tx_dropped += p->tx_dropped; } + stats->rx_errors = rx_errors; + stats->tx_dropped = tx_dropped; + return stats; } diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a9..11d70e3afef 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1661,6 +1661,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(is_skb_forwardable); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + + if (unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->protocol = eth_type_trans(skb, dev); + + return 0; +} +EXPORT_SYMBOL_GPL(__dev_forward_skb); + /** * dev_forward_skb - loopback an skb to another netif * @@ -1681,24 +1704,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - if (skb_copy_ubufs(skb, GFP_ATOMIC)) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - } - - if (unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - - skb_scrub_packet(skb, true); - skb->protocol = eth_type_trans(skb, dev); - - return netif_rx_internal(skb); + return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 640ba0e5831..1d72786ef86 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -557,6 +557,23 @@ err_out: return ret; } +static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, + struct ethtool_rxnfc *rx_rings, + u32 size) +{ + int ret = 0, i; + + if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) + ret = -EFAULT; + + /* Validate ring indices */ + for (i = 0; i < size; i++) { + if (indir[i] >= rx_rings->data) + ret = -EINVAL; + } + return ret; +} + static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { @@ -613,6 +630,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, u32 *indir; const struct ethtool_ops *ops = dev->ethtool_ops; int ret; + u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || !ops->get_rxnfc) @@ -643,28 +661,196 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, for (i = 0; i < dev_size; i++) indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); } else { - if (copy_from_user(indir, - useraddr + - offsetof(struct ethtool_rxfh_indir, - ring_index[0]), - dev_size * sizeof(indir[0]))) { + ret = ethtool_copy_validate_indir(indir, + useraddr + ringidx_offset, + &rx_rings, + dev_size); + if (ret) + goto out; + } + + ret = ops->set_rxfh_indir(dev, indir); + +out: + kfree(indir); + return ret; +} + +static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + u32 user_indir_size = 0, user_key_size = 0; + u32 dev_indir_size = 0, dev_key_size = 0; + u32 total_size; + u32 indir_offset, indir_bytes; + u32 key_offset; + u32 *indir = NULL; + u8 *hkey = NULL; + u8 *rss_config; + + if (!(dev->ethtool_ops->get_rxfh_indir_size || + dev->ethtool_ops->get_rxfh_key_size) || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + + indir_offset = offsetof(struct ethtool_rxfh, indir_size); + + if (copy_from_user(&user_indir_size, + useraddr + indir_offset, + sizeof(user_indir_size))) + return -EFAULT; + + if (copy_to_user(useraddr + indir_offset, + &dev_indir_size, sizeof(dev_indir_size))) + return -EFAULT; + + if (ops->get_rxfh_key_size) + dev_key_size = ops->get_rxfh_key_size(dev); + + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + key_offset = offsetof(struct ethtool_rxfh, key_size); + + if (copy_from_user(&user_key_size, + useraddr + key_offset, + sizeof(user_key_size))) + return -EFAULT; + + if (copy_to_user(useraddr + key_offset, + &dev_key_size, sizeof(dev_key_size))) + return -EFAULT; + + /* If the user buffer size is 0, this is just a query for the + * device table size and key size. Otherwise, if the User size is + * not equal to device table size or key size it's an error. + */ + if (!user_indir_size && !user_key_size) + return 0; + + if ((user_indir_size && (user_indir_size != dev_indir_size)) || + (user_key_size && (user_key_size != dev_key_size))) + return -EINVAL; + + indir_bytes = user_indir_size * sizeof(indir[0]); + total_size = indir_bytes + user_key_size; + rss_config = kzalloc(total_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + if (user_indir_size) + indir = (u32 *)rss_config; + + if (user_key_size) + hkey = rss_config + indir_bytes; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey); + if (!ret) { + if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, rss_config[0]), + rss_config, total_size)) ret = -EFAULT; + } + + kfree(rss_config); + + return ret; +} + +static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxnfc rx_rings; + u32 user_indir_size = 0, dev_indir_size = 0, i; + u32 user_key_size = 0, dev_key_size = 0; + u32 *indir = NULL, indir_bytes = 0; + u8 *hkey = NULL; + u8 *rss_config; + u32 indir_offset, key_offset; + u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); + + if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) || + !ops->get_rxnfc || !ops->set_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + + indir_offset = offsetof(struct ethtool_rxfh, indir_size); + if (copy_from_user(&user_indir_size, + useraddr + indir_offset, + sizeof(user_indir_size))) + return -EFAULT; + + if (ops->get_rxfh_key_size) + dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); + + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + key_offset = offsetof(struct ethtool_rxfh, key_size); + if (copy_from_user(&user_key_size, + useraddr + key_offset, + sizeof(user_key_size))) + return -EFAULT; + + /* If either indir or hash key is valid, proceed further. + */ + if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) && + user_indir_size != dev_indir_size)) || + (user_key_size && (user_key_size != dev_key_size))) + return -EINVAL; + + if (user_indir_size != 0xDEADBEEF) + indir_bytes = dev_indir_size * sizeof(indir[0]); + + rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + rx_rings.cmd = ETHTOOL_GRXRINGS; + ret = ops->get_rxnfc(dev, &rx_rings, NULL); + if (ret) + goto out; + + /* user_indir_size == 0 means reset the indir table to default. + * user_indir_size == 0xDEADBEEF means indir setting is not requested. + */ + if (user_indir_size && user_indir_size != 0xDEADBEEF) { + indir = (u32 *)rss_config; + ret = ethtool_copy_validate_indir(indir, + useraddr + rss_cfg_offset, + &rx_rings, + user_indir_size); + if (ret) goto out; - } + } else if (user_indir_size == 0) { + indir = (u32 *)rss_config; + for (i = 0; i < dev_indir_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } - /* Validate ring indices */ - for (i = 0; i < dev_size; i++) { - if (indir[i] >= rx_rings.data) { - ret = -EINVAL; - goto out; - } + if (user_key_size) { + hkey = rss_config + indir_bytes; + if (copy_from_user(hkey, + useraddr + rss_cfg_offset + indir_bytes, + user_key_size)) { + ret = -EFAULT; + goto out; } } - ret = ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh(dev, indir, hkey); out: - kfree(indir); + kfree(rss_config); return ret; } @@ -1491,6 +1677,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: case ETHTOOL_GRXFHINDIR: + case ETHTOOL_GRSSH: case ETHTOOL_GFEATURES: case ETHTOOL_GCHANNELS: case ETHTOOL_GET_TS_INFO: @@ -1628,6 +1815,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GRSSH: + rc = ethtool_get_rxfh(dev, useraddr); + break; + case ETHTOOL_SRSSH: + rc = ethtool_set_rxfh(dev, useraddr); + break; case ETHTOOL_GFEATURES: rc = ethtool_get_features(dev, useraddr); break; diff --git a/net/core/filter.c b/net/core/filter.c index 9d79ca0a6e8..7c4db3dd3d1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -652,6 +652,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) return raw_smp_processor_id(); } +/* note that this only generates 32-bit random numbers */ +static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + return (u64)prandom_u32(); +} + static bool convert_bpf_extensions(struct sock_filter *fp, struct sock_filter_int **insnp) { @@ -781,6 +787,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_NLATTR: case SKF_AD_OFF + SKF_AD_NLATTR_NEST: case SKF_AD_OFF + SKF_AD_CPU: + case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; insn->a_reg = ARG1_REG; @@ -814,6 +821,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_CPU: insn->imm = __get_raw_cpu_id - __bpf_call_base; break; + case SKF_AD_OFF + SKF_AD_RANDOM: + insn->imm = __get_random_u32 - __bpf_call_base; + break; } break; @@ -1364,6 +1374,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); ANCILLARY(PAY_OFFSET); + ANCILLARY(RANDOM); } /* ancillary operation unknown or unsupported */ @@ -1748,6 +1759,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index ef2d54372b1..6f1428c4870 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -36,7 +36,7 @@ struct lowpan_frag_info { u8 d_offset; }; -struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb) +static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb) { return (struct lowpan_frag_info *)skb->cb; } @@ -120,6 +120,8 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, struct inet_frag_queue *q; struct lowpan_create_arg arg; unsigned int hash; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); arg.tag = frag_info->d_tag; arg.d_size = frag_info->d_size; @@ -129,7 +131,7 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, read_lock(&lowpan_frags.lock); hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); - q = inet_frag_find(&net->ieee802154_lowpan.frags, + q = inet_frag_find(&ieee802154_lowpan->frags, &lowpan_frags, &arg, hash); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); @@ -357,6 +359,8 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) struct net *net = dev_net(skb->dev); struct lowpan_frag_info *frag_info = lowpan_cb(skb); struct ieee802154_addr source, dest; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); int err; source = mac_cb(skb)->source; @@ -366,10 +370,10 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) if (err < 0) goto err; - if (frag_info->d_size > net->ieee802154_lowpan.max_dsize) + if (frag_info->d_size > ieee802154_lowpan->max_dsize) goto err; - inet_frag_evictor(&net->ieee802154_lowpan.frags, &lowpan_frags, false); + inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false); fq = fq_find(net, frag_info, &source, &dest); if (fq != NULL) { @@ -436,6 +440,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); table = lowpan_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { @@ -444,10 +450,10 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].data = &net->ieee802154_lowpan.frags.high_thresh; - table[1].data = &net->ieee802154_lowpan.frags.low_thresh; - table[2].data = &net->ieee802154_lowpan.frags.timeout; - table[3].data = &net->ieee802154_lowpan.max_dsize; + table[0].data = &ieee802154_lowpan->frags.high_thresh; + table[1].data = &ieee802154_lowpan->frags.low_thresh; + table[2].data = &ieee802154_lowpan->frags.timeout; + table[3].data = &ieee802154_lowpan->max_dsize; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -458,7 +464,7 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->ieee802154_lowpan.sysctl.frags_hdr = hdr; + ieee802154_lowpan->sysctl.frags_hdr = hdr; return 0; err_reg: @@ -471,9 +477,11 @@ err_alloc: static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net) { struct ctl_table *table; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); - table = net->ieee802154_lowpan.sysctl.frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->ieee802154_lowpan.sysctl.frags_hdr); + table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } @@ -514,20 +522,26 @@ static inline void lowpan_frags_sysctl_unregister(void) static int __net_init lowpan_frags_init_net(struct net *net) { - net->ieee802154_lowpan.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - net->ieee802154_lowpan.frags.low_thresh = IPV6_FRAG_LOW_THRESH; - net->ieee802154_lowpan.frags.timeout = IPV6_FRAG_TIMEOUT; - net->ieee802154_lowpan.max_dsize = 0xFFFF; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); - inet_frags_init_net(&net->ieee802154_lowpan.frags); + ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; + ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + ieee802154_lowpan->max_dsize = 0xFFFF; + + inet_frags_init_net(&ieee802154_lowpan->frags); return lowpan_frags_ns_sysctl_register(net); } static void __net_exit lowpan_frags_exit_net(struct net *net) { + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); + lowpan_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&net->ieee802154_lowpan.frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); } static struct pernet_operations lowpan_frags_ops = { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 94213c89156..c5a557a06a3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -410,7 +410,7 @@ static int ipgre_open(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_gre(dev_net(dev), &fl4, + rt = ip_route_output_gre(t->net, &fl4, t->parms.iph.daddr, t->parms.iph.saddr, t->parms.o_key, @@ -434,7 +434,7 @@ static int ipgre_close(struct net_device *dev) if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; - in_dev = inetdev_by_index(dev_net(dev), t->mlink); + in_dev = inetdev_by_index(t->net, t->mlink); if (in_dev) ip_mc_dec_group(in_dev, t->parms.iph.daddr); } @@ -478,7 +478,7 @@ static void __gre_tunnel_init(struct net_device *dev) dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; - dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; + dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bd6d52eeff..eb1dde37e67 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2916,6 +2916,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + + case TCP_FASTOPEN: + if (icsk->icsk_accept_queue.fastopenq != NULL) + val = icsk->icsk_accept_queue.fastopenq->max_qlen; + else + val = 0; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp + tp->tsoffset; break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d6b46eb2f94..6efed134ab6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4703,28 +4703,6 @@ static int tcp_prune_queue(struct sock *sk) return -1; } -/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. - * As additional protections, we do not touch cwnd in retransmission phases, - * and if application hit its sndbuf limit recently. - */ -void tcp_cwnd_application_limited(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && - sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - /* Limited by application or receiver window. */ - u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); - u32 win_used = max(tp->snd_cwnd_used, init_win); - if (win_used < tp->snd_cwnd) { - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; - } - tp->snd_cwnd_used = 0; - } - tp->snd_cwnd_stamp = tcp_time_stamp; -} - static bool tcp_should_expand_sndbuf(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 025e2509398..20847de991e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, BUG_ON(!skb || !tcp_skb_pcount(skb)); if (clone_it) { - const struct sk_buff *fclone = skb + 1; - skb_mstamp_get(&skb->skb_mstamp); - if (unlikely(skb->fclone == SKB_FCLONE_ORIG && - fclone->fclone == SKB_FCLONE_CLONE)) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); - if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else @@ -1387,6 +1380,28 @@ unsigned int tcp_current_mss(struct sock *sk) return mss_now; } +/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. + * As additional protections, we do not touch cwnd in retransmission phases, + * and if application hit its sndbuf limit recently. + */ +static void tcp_cwnd_application_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && + sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + /* Limited by application or receiver window. */ + u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); + u32 win_used = max(tp->snd_cwnd_used, init_win); + if (win_used < tp->snd_cwnd) { + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; + } + tp->snd_cwnd_used = 0; + } + tp->snd_cwnd_stamp = tcp_time_stamp; +} + /* Congestion window validation. (RFC2861) */ static void tcp_cwnd_validate(struct sock *sk) { @@ -2039,6 +2054,25 @@ bool tcp_schedule_loss_probe(struct sock *sk) return true; } +/* Thanks to skb fast clones, we can detect if a prior transmit of + * a packet is still in a qdisc or driver queue. + * In this case, there is very little point doing a retransmit ! + * Note: This is called from BH context only. + */ +static bool skb_still_in_host_queue(const struct sock *sk, + const struct sk_buff *skb) +{ + const struct sk_buff *fclone = skb + 1; + + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) { + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + return true; + } + return false; +} + /* When probe timeout (PTO) fires, send a new segment if one exists, else * retransmit the last segment. */ @@ -2064,6 +2098,9 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb)) goto rearm_timer; + if (skb_still_in_host_queue(sk, skb)) + goto rearm_timer; + pcount = tcp_skb_pcount(skb); if (WARN_ON(!pcount)) goto rearm_timer; @@ -2385,6 +2422,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; + if (skb_still_in_host_queue(sk, skb)) + return -EBUSY; + if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) BUG(); @@ -2478,7 +2518,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * see tcp_input.c tcp_sacktag_write_queue(). */ TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; - } else { + } else if (err != -EBUSY) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } return err; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6c7fa0853fc..fc203db2211 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2504,8 +2504,8 @@ static int inet6_addr_add(struct net *net, int ifindex, return PTR_ERR(ifp); } -static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx, - unsigned int plen) +static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, + const struct in6_addr *pfx, unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -2528,7 +2528,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p in6_ifa_hold(ifp); read_unlock_bh(&idev->lock); + if (!(ifp->flags & IFA_F_TEMPORARY) && + (ifa_flags & IFA_F_MANAGETEMPADDR)) + manage_tempaddrs(idev, ifp, 0, 0, false, + jiffies); ipv6_del_addr(ifp); + addrconf_verify_rtnl(); return 0; } } @@ -2568,7 +2573,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr, ireq.ifr6_prefixlen); rtnl_unlock(); return err; @@ -3743,6 +3748,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx, *peer_pfx; + u32 ifa_flags; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3754,7 +3760,13 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) if (pfx == NULL) return -EINVAL; - return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); + ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; + + /* We ignore other flags so far. */ + ifa_flags &= IFA_F_MANAGETEMPADDR; + + return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx, + ifm->ifa_prefixlen); } static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9d921462b57..75277b739b0 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -72,6 +72,7 @@ struct ip6gre_net { }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); @@ -353,10 +354,10 @@ failed_free: static void ip6gre_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); - ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + ip6gre_tunnel_unlink(ign, t); dev_put(dev); } @@ -611,8 +612,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { - struct net *net = dev_net(dev); struct ip6_tnl *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ unsigned int max_headroom = 0; /* The extra header space needed */ @@ -979,7 +980,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); - struct rt6_info *rt = rt6_lookup(dev_net(dev), + struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, strict); @@ -1063,13 +1064,12 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, int err = 0; struct ip6_tnl_parm2 p; struct __ip6_tnl_parm p1; - struct ip6_tnl *t; - struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); switch (cmd) { case SIOCGETTUNNEL: - t = NULL; if (dev == ign->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; @@ -1077,9 +1077,9 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); + if (t == NULL) + t = netdev_priv(dev); } - if (t == NULL) - t = netdev_priv(dev); memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) @@ -1242,7 +1242,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->iflink = 0; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } @@ -1297,11 +1296,17 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, - struct list_head *head) +static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) { + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &ip6gre_link_ops || + dev->rtnl_link_ops == &ip6gre_tap_ops) + unregister_netdevice_queue(dev, head); + for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1310,7 +1315,12 @@ static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, t = rtnl_dereference(ign->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1329,6 +1339,11 @@ static int __net_init ip6gre_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(ign->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; @@ -1349,12 +1364,10 @@ err_alloc_dev: static void __net_exit ip6gre_exit_net(struct net *net) { - struct ip6gre_net *ign; LIST_HEAD(list); - ign = net_generic(net, ip6gre_net_id); rtnl_lock(); - ip6gre_destroy_tunnels(ign, &list); + ip6gre_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } @@ -1531,15 +1544,14 @@ out: static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip6_tnl *t, *nt; - struct net *net = dev_net(dev); + struct ip6_tnl *t, *nt = netdev_priv(dev); + struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct __ip6_tnl_parm p; if (dev == ign->fb_tunnel_dev) return -EINVAL; - nt = netdev_priv(dev); ip6gre_netlink_parms(data, &p); t = ip6gre_tunnel_locate(net, &p, 0); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e8138da4c14..6e42dcfad40 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -400,19 +400,17 @@ static void nfnetlink_rcv(struct sk_buff *skb) } #ifdef CONFIG_MODULES -static void nfnetlink_bind(int group) +static int nfnetlink_bind(int group) { const struct nfnetlink_subsystem *ss; int type = nfnl_group2type[group]; rcu_read_lock(); ss = nfnetlink_get_subsys(type); - if (!ss) { - rcu_read_unlock(); - request_module("nfnetlink-subsys-%d", type); - return; - } rcu_read_unlock(); + if (!ss) + request_module("nfnetlink-subsys-%d", type); + return 0; } #endif diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 894cda0206b..92f4b6915e8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1206,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1232,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; + unbind = nl_table[protocol].unbind; netlink_unlock_table(); if (err < 0) @@ -1248,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, nlk = nlk_sk(sock->sk); nlk->module = module; nlk->netlink_bind = bind; + nlk->netlink_unbind = unbind; out: return err; @@ -1301,6 +1304,7 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].unbind = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -1411,6 +1415,19 @@ static int netlink_realloc_groups(struct sock *sk) return err; } +static void netlink_unbind(int group, long unsigned int groups, + struct netlink_sock *nlk) +{ + int undo; + + if (!nlk->netlink_unbind) + return; + + for (undo = 0; undo < group; undo++) + if (test_bit(group, &groups)) + nlk->netlink_unbind(undo); +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -1419,6 +1436,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; + long unsigned int groups = nladdr->nl_groups; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1427,7 +1445,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups) { + if (groups) { if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); @@ -1435,37 +1453,45 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->portid) { + if (nlk->portid) if (nladdr->nl_pid != nlk->portid) return -EINVAL; - } else { + + if (nlk->netlink_bind && groups) { + int group; + + for (group = 0; group < nlk->ngroups; group++) { + if (!test_bit(group, &groups)) + continue; + err = nlk->netlink_bind(group); + if (!err) + continue; + netlink_unbind(group, groups, nlk); + return err; + } + } + + if (!nlk->portid) { err = nladdr->nl_pid ? netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); - if (err) + if (err) { + netlink_unbind(nlk->ngroups - 1, groups, nlk); return err; + } } - if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) + if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); netlink_update_subscriptions(sk, nlk->subscriptions + - hweight32(nladdr->nl_groups) - + hweight32(groups) - hweight32(nlk->groups[0])); - nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; + nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; netlink_update_listeners(sk); netlink_table_ungrab(); - if (nlk->netlink_bind && nlk->groups[0]) { - int i; - - for (i = 0; i < nlk->ngroups; i++) { - if (test_bit(i, nlk->groups)) - nlk->netlink_bind(i); - } - } - return 0; } @@ -2103,13 +2129,17 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return err; if (!val || val - 1 >= nlk->ngroups) return -EINVAL; + if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { + err = nlk->netlink_bind(val); + if (err) + return err; + } netlink_table_grab(); netlink_update_socket_mc(nlk, val, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); - - if (nlk->netlink_bind) - nlk->netlink_bind(val); + if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) + nlk->netlink_unbind(val); err = 0; break; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed13a790b00..0b59d441f5b 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -38,7 +38,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - void (*netlink_bind)(int group); + int (*netlink_bind)(int group); + void (*netlink_unbind)(int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -74,7 +75,8 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index fee06b99a4d..e37b2cbbf17 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -71,6 +71,7 @@ #include <net/route.h> #include <net/ipv6.h> #include <net/inet_common.h> +#include <net/busy_poll.h> #include <linux/socket.h> /* for sa_family_t */ #include <linux/export.h> @@ -6557,6 +6558,10 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk->sk_shutdown & RCV_SHUTDOWN) break; + if (sk_can_busy_loop(sk) && + sk_busy_loop(sk, noblock)) + continue; + /* User doesn't want to wait. */ error = -EAGAIN; if (!timeo) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 7144eb6a1b9..d49dc2ed30a 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -38,6 +38,7 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <net/sock.h> +#include <net/busy_poll.h> #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> @@ -204,6 +205,9 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) goto out_free; + if (!sctp_ulpevent_is_notification(event)) + sk_mark_napi_id(sk, skb); + /* Check if the user wishes to receive this event. */ if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) goto out_free; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 95ab5ef9292..119a59b4bec 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -112,6 +112,8 @@ const char tipc_bclink_name[] = "broadcast-link"; static void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, struct tipc_node_map *nm_diff); +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); static u32 bcbuf_acks(struct sk_buff *buf) { @@ -273,7 +275,7 @@ exit: /** * tipc_bclink_update_link_state - update broadcast link state * - * tipc_net_lock and node lock set + * RCU and node lock set */ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) { @@ -321,7 +323,7 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) : n_ptr->bclink.last_sent); spin_lock_bh(&bc_lock); - tipc_bearer_send(&bcbearer->bearer, buf, NULL); + tipc_bearer_send(MAX_BEARERS, buf, NULL); bcl->stats.sent_nacks++; spin_unlock_bh(&bc_lock); kfree_skb(buf); @@ -335,8 +337,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) * * Delay any upcoming NACK by this node if another node has already * requested the first message this node is going to ask for. - * - * Only tipc_net_lock set. */ static void bclink_peek_nack(struct tipc_msg *msg) { @@ -408,7 +408,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) /** * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards * - * tipc_net_lock is read_locked, no other locks set + * RCU is locked, no other locks set */ void tipc_bclink_rcv(struct sk_buff *buf) { @@ -627,13 +627,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (bp_index == 0) { /* Use original buffer for first bearer */ - tipc_bearer_send(b, buf, &b->bcast_addr); + tipc_bearer_send(b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ tbuf = pskb_copy(buf, GFP_ATOMIC); if (!tbuf) break; - tipc_bearer_send(b, tbuf, &b->bcast_addr); + tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } @@ -655,20 +655,27 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, /** * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer */ -void tipc_bcbearer_sort(void) +void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) { struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; + struct tipc_bearer *b; int b_index; int pri; spin_lock_bh(&bc_lock); + if (action) + tipc_nmap_add(nm_ptr, node); + else + tipc_nmap_remove(nm_ptr, node); + /* Group bearers by priority (can assume max of two per priority) */ memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); + rcu_read_lock(); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - struct tipc_bearer *b = bearer_list[b_index]; + b = rcu_dereference_rtnl(bearer_list[b_index]); if (!b || !b->nodes.count) continue; @@ -677,6 +684,7 @@ void tipc_bcbearer_sort(void) else bp_temp[b->priority].secondary = b; } + rcu_read_unlock(); /* Create array of bearer pairs for broadcasting */ bp_curr = bcbearer->bpairs; @@ -783,8 +791,8 @@ void tipc_bclink_init(void) bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - bcl->b_ptr = &bcbearer->bearer; - bearer_list[BCBEARER] = &bcbearer->bearer; + bcl->bearer_id = MAX_BEARERS; + rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); } @@ -795,16 +803,15 @@ void tipc_bclink_stop(void) tipc_link_purge_queues(bcl); spin_unlock_bh(&bc_lock); - bearer_list[BCBEARER] = NULL; + RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); memset(bclink, 0, sizeof(*bclink)); memset(bcbearer, 0, sizeof(*bcbearer)); } - /** * tipc_nmap_add - add a node to a node map */ -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; @@ -819,7 +826,7 @@ void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) /** * tipc_nmap_remove - remove a node from a node map */ -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a80ef54b818..7c1ef1b3d7b 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -69,9 +69,6 @@ struct tipc_node; extern const char tipc_bclink_name[]; -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); - /** * tipc_nmap_equal - test for equality of node maps */ @@ -98,6 +95,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); int tipc_bclink_stats(char *stats_buf, const u32 buf_size); int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); -void tipc_bcbearer_sort(void); +void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3fef7eb776d..f3259d4133b 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -49,7 +49,7 @@ static struct tipc_media * const media_info_array[] = { NULL }; -struct tipc_bearer *bearer_list[MAX_BEARERS + 1]; +struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); @@ -178,7 +178,7 @@ struct tipc_bearer *tipc_bearer_find(const char *name) u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (b_ptr && (!strcmp(b_ptr->name, name))) return b_ptr; } @@ -198,10 +198,9 @@ struct sk_buff *tipc_bearer_get_names(void) if (!buf) return NULL; - read_lock_bh(&tipc_net_lock); for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { - b = bearer_list[j]; + b = rtnl_dereference(bearer_list[j]); if (!b) continue; if (b->media == media_info_array[i]) { @@ -211,22 +210,33 @@ struct sk_buff *tipc_bearer_get_names(void) } } } - read_unlock_bh(&tipc_net_lock); return buf; } -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_add_dest(u32 bearer_id, u32 dest) { - tipc_nmap_add(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_add_dest(b_ptr->link_req); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (b_ptr) { + tipc_bcbearer_sort(&b_ptr->nodes, dest, true); + tipc_disc_add_dest(b_ptr->link_req); + } + rcu_read_unlock(); } -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) { - tipc_nmap_remove(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_remove_dest(b_ptr->link_req); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (b_ptr) { + tipc_bcbearer_sort(&b_ptr->nodes, dest, false); + tipc_disc_remove_dest(b_ptr->link_req); + } + rcu_read_unlock(); } /** @@ -271,13 +281,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) return -EINVAL; } - write_lock_bh(&tipc_net_lock); - m_ptr = tipc_media_find(b_names.media_name); if (!m_ptr) { pr_warn("Bearer <%s> rejected, media <%s> not registered\n", name, b_names.media_name); - goto exit; + return -EINVAL; } if (priority == TIPC_MEDIA_LINK_PRI) @@ -287,7 +295,7 @@ restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (!b_ptr) { bearer_id = i; continue; @@ -295,14 +303,14 @@ restart: if (!strcmp(name, b_ptr->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); - goto exit; + return -EINVAL; } if ((b_ptr->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", name); - goto exit; + return -EINVAL; } pr_warn("Bearer <%s> priority adjustment required %u->%u\n", name, priority + 1, priority); @@ -312,21 +320,20 @@ restart: if (bearer_id >= MAX_BEARERS) { pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n", name, MAX_BEARERS); - goto exit; + return -EINVAL; } b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); - if (!b_ptr) { - res = -ENOMEM; - goto exit; - } + if (!b_ptr) + return -ENOMEM; + strcpy(b_ptr->name, name); b_ptr->media = m_ptr; res = m_ptr->enable_media(b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); - goto exit; + return -EINVAL; } b_ptr->identity = bearer_id; @@ -341,16 +348,14 @@ restart: bearer_disable(b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); - goto exit; + return -EINVAL; } - bearer_list[bearer_id] = b_ptr; + rcu_assign_pointer(bearer_list[bearer_id], b_ptr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, disc_domain), priority); -exit: - write_unlock_bh(&tipc_net_lock); return res; } @@ -359,19 +364,16 @@ exit: */ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { - read_lock_bh(&tipc_net_lock); pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_disc_delete(b_ptr->link_req); tipc_link_reset_list(b_ptr->identity); - tipc_disc_create(b_ptr, &b_ptr->bcast_addr); - read_unlock_bh(&tipc_net_lock); + tipc_disc_reset(b_ptr); return 0; } /** * bearer_disable * - * Note: This routine assumes caller holds tipc_net_lock. + * Note: This routine assumes caller holds RTNL lock. */ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) { @@ -385,12 +387,12 @@ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) tipc_disc_delete(b_ptr->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == bearer_list[i]) { - bearer_list[i] = NULL; + if (b_ptr == rtnl_dereference(bearer_list[i])) { + RCU_INIT_POINTER(bearer_list[i], NULL); break; } } - kfree(b_ptr); + kfree_rcu(b_ptr, rcu); } int tipc_disable_bearer(const char *name) @@ -398,7 +400,6 @@ int tipc_disable_bearer(const char *name) struct tipc_bearer *b_ptr; int res; - write_lock_bh(&tipc_net_lock); b_ptr = tipc_bearer_find(name); if (b_ptr == NULL) { pr_warn("Attempt to disable unknown bearer <%s>\n", name); @@ -407,7 +408,6 @@ int tipc_disable_bearer(const char *name) bearer_disable(b_ptr, false); res = 0; } - write_unlock_bh(&tipc_net_lock); return res; } @@ -444,7 +444,7 @@ int tipc_enable_l2_media(struct tipc_bearer *b) return -ENODEV; /* Associate TIPC bearer with Ethernet bearer */ - b->media_ptr = dev; + rcu_assign_pointer(b->media_ptr, dev); memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; @@ -463,8 +463,12 @@ int tipc_enable_l2_media(struct tipc_bearer *b) */ void tipc_disable_l2_media(struct tipc_bearer *b) { - struct net_device *dev = (struct net_device *)b->media_ptr; + struct net_device *dev; + + dev = (struct net_device *)rtnl_dereference(b->media_ptr); + RCU_INIT_POINTER(b->media_ptr, NULL); RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); dev_put(dev); } @@ -478,8 +482,12 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct sk_buff *clone; + struct net_device *dev; int delta; - struct net_device *dev = (struct net_device *)b->media_ptr; + + dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); + if (!dev) + return 0; clone = skb_clone(buf, GFP_ATOMIC); if (!clone) @@ -507,10 +515,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, * The media send routine must not alter the buffer being passed in * as it may be needed for later retransmission! */ -void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, +void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest) { - b->media->send_msg(buf, b, dest); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (likely(b_ptr)) + b_ptr->media->send_msg(buf, b_ptr, dest); + rcu_read_unlock(); } /** @@ -535,7 +549,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, } rcu_read_lock(); - b_ptr = rcu_dereference(dev->tipc_ptr); + b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; @@ -568,12 +582,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - rcu_read_lock(); - b_ptr = rcu_dereference(dev->tipc_ptr); - if (!b_ptr) { - rcu_read_unlock(); + b_ptr = rtnl_dereference(dev->tipc_ptr); + if (!b_ptr) return NOTIFY_DONE; - } b_ptr->mtu = dev->mtu; @@ -592,11 +603,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - tipc_disable_bearer(b_ptr->name); + bearer_disable(b_ptr, false); break; } - rcu_read_unlock(); - return NOTIFY_OK; } @@ -633,7 +642,7 @@ void tipc_bearer_stop(void) u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (b_ptr) { bearer_disable(b_ptr, true); bearer_list[i] = NULL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ba48145e871..a983b3005e7 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -113,6 +113,7 @@ struct tipc_media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @bcast_addr: media address used in broadcasting + * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer * @window: default window size for bearer * @tolerance: default link tolerance for bearer @@ -127,12 +128,13 @@ struct tipc_media { * care of initializing all other fields. */ struct tipc_bearer { - void *media_ptr; /* initalized by media */ + void __rcu *media_ptr; /* initalized by media */ u32 mtu; /* initalized by media */ struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct rcu_head rcu; u32 priority; u32 window; u32 tolerance; @@ -150,7 +152,7 @@ struct tipc_bearer_names { struct tipc_link; -extern struct tipc_bearer *bearer_list[]; +extern struct tipc_bearer __rcu *bearer_list[]; /* * TIPC routines available to supported media types @@ -181,14 +183,14 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); +void tipc_bearer_add_dest(u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(const char *name); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(void); -void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, +void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/config.c b/net/tipc/config.c index 4b981c05382..251f5a2028e 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -42,8 +42,6 @@ #define REPLY_TRUNCATED "<truncated>\n" -static DEFINE_MUTEX(config_mutex); - static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ static int rep_headroom; /* reply message headroom to use */ @@ -223,7 +221,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area { struct sk_buff *rep_tlv_buf; - mutex_lock(&config_mutex); + rtnl_lock(); /* Save request and reply details in a well-known location */ req_tlv_area = request_area; @@ -337,6 +335,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Return reply buffer */ exit: - mutex_unlock(&config_mutex); + rtnl_unlock(); return rep_tlv_buf; } diff --git a/net/tipc/core.h b/net/tipc/core.h index 8985bbcb942..36cbf158845 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -56,7 +56,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/vmalloc.h> - +#include <linux/rtnetlink.h> #define TIPC_MOD_VER "2.0.0" diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 542fe3413dc..ada42e436f5 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -46,8 +46,9 @@ /** * struct tipc_link_req - information about an ongoing link setup request - * @bearer: bearer issuing requests + * @bearer_id: identity of bearer issuing requests * @dest: destination address for request messages + * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) * @lock: spinlock for controlling access to requests * @buf: request message to be (repeatedly) sent @@ -55,8 +56,9 @@ * @timer_intv: current interval between requests (in ms) */ struct tipc_link_req { - struct tipc_bearer *bearer; + u32 bearer_id; struct tipc_media_addr dest; + u32 domain; int num_nodes; spinlock_t lock; struct sk_buff *buf; @@ -69,22 +71,19 @@ struct tipc_link_req { * @type: message type (request or response) * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, struct tipc_bearer *b_ptr) +static void tipc_disc_init_msg(struct sk_buff *buf, u32 type, + struct tipc_bearer *b_ptr) { - struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); struct tipc_msg *msg; u32 dest_domain = b_ptr->domain; - if (buf) { - msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); - msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tipc_random); - msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tipc_net_id); - b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); - } - return buf; + msg = buf_msg(buf); + tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); + msg_set_non_seq(msg, 1); + msg_set_node_sig(msg, tipc_random); + msg_set_dest_domain(msg, dest_domain); + msg_set_bc_netid(msg, tipc_net_id); + b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); } /** @@ -239,9 +238,10 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr) link_fully_up = link_working_working(link); if ((type == DSC_REQ_MSG) && !link_fully_up) { - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, b_ptr); + rbuf = tipc_buf_acquire(INT_H_SIZE); if (rbuf) { - tipc_bearer_send(b_ptr, rbuf, &media_addr); + tipc_disc_init_msg(rbuf, DSC_RESP_MSG, b_ptr); + tipc_bearer_send(b_ptr->identity, rbuf, &media_addr); kfree_skb(rbuf); } } @@ -303,7 +303,7 @@ static void disc_timeout(struct tipc_link_req *req) spin_lock_bh(&req->lock); /* Stop searching if only desired node has been found */ - if (tipc_node(req->bearer->domain) && req->num_nodes) { + if (tipc_node(req->domain) && req->num_nodes) { req->timer_intv = TIPC_LINK_REQ_INACTIVE; goto exit; } @@ -315,7 +315,7 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->bearer, req->buf, &req->dest); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); req->timer_intv *= 2; @@ -347,21 +347,21 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) if (!req) return -ENOMEM; - req->buf = tipc_disc_init_msg(DSC_REQ_MSG, b_ptr); - if (!req->buf) { - kfree(req); - return -ENOMSG; - } + req->buf = tipc_buf_acquire(INT_H_SIZE); + if (!req->buf) + return -ENOMEM; + tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); memcpy(&req->dest, dest, sizeof(*dest)); - req->bearer = b_ptr; + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); k_start_timer(&req->timer, req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(req->bearer, req->buf, &req->dest); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); return 0; } @@ -376,3 +376,23 @@ void tipc_disc_delete(struct tipc_link_req *req) kfree_skb(req->buf); kfree(req); } + +/** + * tipc_disc_reset - reset object to send periodic link setup requests + * @b_ptr: ptr to bearer issuing requests + * @dest_domain: network domain to which links can be established + */ +void tipc_disc_reset(struct tipc_bearer *b_ptr) +{ + struct tipc_link_req *req = b_ptr->link_req; + + spin_lock_bh(&req->lock); + tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; + req->num_nodes = 0; + req->timer_intv = TIPC_LINK_REQ_INIT; + k_start_timer(&req->timer, req->timer_intv); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + spin_unlock_bh(&req->lock); +} diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 07f34729459..515b57392f4 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -41,6 +41,7 @@ struct tipc_link_req; int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); void tipc_disc_delete(struct tipc_link_req *req); +void tipc_disc_reset(struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); void tipc_disc_remove_dest(struct tipc_link_req *req); void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr); diff --git a/net/tipc/link.c b/net/tipc/link.c index c5190ab7529..c723ee90219 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -101,9 +101,18 @@ static unsigned int align(unsigned int i) static void link_init_max_pkt(struct tipc_link *l_ptr) { + struct tipc_bearer *b_ptr; u32 max_pkt; - max_pkt = (l_ptr->b_ptr->mtu & ~3); + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + if (!b_ptr) { + rcu_read_unlock(); + return; + } + max_pkt = (b_ptr->mtu & ~3); + rcu_read_unlock(); + if (max_pkt > MAX_MSG_SIZE) max_pkt = MAX_MSG_SIZE; @@ -248,7 +257,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; l_ptr->peer_session = INVALID_SESSION; - l_ptr->b_ptr = b_ptr; + l_ptr->bearer_id = b_ptr->identity; link_set_supervision_props(l_ptr, b_ptr->tolerance); l_ptr->state = RESET_UNKNOWN; @@ -263,6 +272,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); + l_ptr->net_plane = b_ptr->net_plane; link_init_max_pkt(l_ptr); l_ptr->next_out_no = 1; @@ -426,7 +436,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) return; tipc_node_link_down(l_ptr->owner, l_ptr); - tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); + tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; @@ -477,7 +487,7 @@ static void link_activate(struct tipc_link *l_ptr) { l_ptr->next_in_no = l_ptr->stats.recv_info = 1; tipc_node_link_up(l_ptr->owner, l_ptr); - tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); + tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr); } /** @@ -777,7 +787,7 @@ int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) if (likely(!link_congested(l_ptr))) { link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; return dsz; } @@ -825,7 +835,6 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) struct tipc_node *n_ptr; int res = -ELINKCONG; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -838,7 +847,6 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) } else { kfree_skb(buf); } - read_unlock_bh(&tipc_net_lock); return res; } @@ -902,7 +910,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) if (list_empty(message_list)) return; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -917,7 +924,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) } tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); /* discard the messages if they couldn't be sent */ list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) { @@ -941,7 +947,7 @@ static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf, if (likely(!link_congested(l_ptr))) { if (likely(msg_size(msg) <= l_ptr->max_pkt)) { link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; return res; @@ -979,7 +985,6 @@ again: if (unlikely(res < 0)) return res; - read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); if (likely(node)) { tipc_node_lock(node); @@ -990,7 +995,6 @@ again: &sender->max_pkt); exit: tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return res; } @@ -1007,7 +1011,6 @@ exit: */ sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) @@ -1018,7 +1021,6 @@ exit: } tipc_node_unlock(node); } - read_unlock_bh(&tipc_net_lock); /* Couldn't find a link to the destination node */ kfree_skb(buf); @@ -1204,7 +1206,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (r_q_size && buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->retransm_queue_head = mod(++r_q_head); l_ptr->retransm_queue_size = --r_q_size; l_ptr->stats.retransmitted++; @@ -1216,7 +1218,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); l_ptr->proto_msg_queue = NULL; @@ -1233,7 +1235,8 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (mod(next - first) < l_ptr->queue_limit[0]) { msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, + &l_ptr->media_addr); if (msg_user(msg) == MSG_BUNDLER) msg_set_type(msg, CLOSED_MSG); l_ptr->next_out = buf->next; @@ -1262,12 +1265,9 @@ static void link_reset_all(unsigned long addr) char addr_string[16]; u32 i; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find((u32)addr); - if (!n_ptr) { - read_unlock_bh(&tipc_net_lock); + if (!n_ptr) return; /* node no longer exists */ - } tipc_node_lock(n_ptr); @@ -1282,7 +1282,6 @@ static void link_reset_all(unsigned long addr) } tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); } static void link_retransmit_failure(struct tipc_link *l_ptr, @@ -1352,7 +1351,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf, msg = buf_msg(buf); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); buf = buf->next; retransmits--; l_ptr->stats.retransmitted++; @@ -1440,14 +1439,13 @@ static int link_recv_buf_validate(struct sk_buff *buf) /** * tipc_rcv - process TIPC packets/messages arriving from off-node * @head: pointer to message buffer chain - * @tb_ptr: pointer to bearer message arrived on + * @b_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) { - read_lock_bh(&tipc_net_lock); while (head) { struct tipc_node *n_ptr; struct tipc_link *l_ptr; @@ -1635,7 +1633,6 @@ unlock_discard: discard: kfree_skb(buf); } - read_unlock_bh(&tipc_net_lock); } /** @@ -1752,7 +1749,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, /* Create protocol message with "out-of-sequence" sequence number */ msg_set_type(msg, msg_typ); - msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); + msg_set_net_plane(msg, l_ptr->net_plane); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); msg_set_last_bcast(msg, tipc_bclink_get_last_sent()); @@ -1818,7 +1815,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); } @@ -1843,9 +1840,9 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) /* record unnumbered packet arrival (force mismatch on next timeout) */ l_ptr->checkpoint--; - if (l_ptr->b_ptr->net_plane != msg_net_plane(msg)) + if (l_ptr->net_plane != msg_net_plane(msg)) if (tipc_own_addr > msg_prevnode(msg)) - l_ptr->b_ptr->net_plane = msg_net_plane(msg); + l_ptr->net_plane = msg_net_plane(msg); switch (msg_type(msg)) { @@ -2397,8 +2394,6 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) /* tipc_link_find_owner - locate owner node of link by link's name * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. - * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted; - * this also prevents link deletion. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ @@ -2460,7 +2455,7 @@ static int link_value_is_valid(u16 cmd, u32 new_value) * @new_value: new value of link, bearer, or media setting * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) * - * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted. + * Caller must hold RTNL lock to ensure link/bearer/media is not deleted. * * Returns 0 if value updated and negative value on error. */ @@ -2566,9 +2561,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space " (cannot change setting on broadcast link)"); } - read_lock_bh(&tipc_net_lock); res = link_cmd_set_value(args->name, new_value, cmd); - read_unlock_bh(&tipc_net_lock); if (res) return tipc_cfg_reply_error_string("cannot change link setting"); @@ -2602,22 +2595,18 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ return tipc_cfg_reply_error_string("link not found"); return tipc_cfg_reply_none(); } - read_lock_bh(&tipc_net_lock); node = tipc_link_find_owner(link_name, &bearer_id); - if (!node) { - read_unlock_bh(&tipc_net_lock); + if (!node) return tipc_cfg_reply_error_string("link not found"); - } + tipc_node_lock(node); l_ptr = node->links[bearer_id]; if (!l_ptr) { tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string("link not found"); } link_reset_statistics(l_ptr); tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } @@ -2650,18 +2639,15 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) if (!strcmp(name, tipc_bclink_name)) return tipc_bclink_stats(buf, buf_size); - read_lock_bh(&tipc_net_lock); node = tipc_link_find_owner(name, &bearer_id); - if (!node) { - read_unlock_bh(&tipc_net_lock); + if (!node) return 0; - } + tipc_node_lock(node); l = node->links[bearer_id]; if (!l) { tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return 0; } @@ -2727,7 +2713,6 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) (s->accu_queue_sz / s->queue_sz_counts) : 0); tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return ret; } @@ -2778,7 +2763,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) if (dest == tipc_own_addr) return MAX_MSG_SIZE; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -2787,13 +2771,18 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) res = l_ptr->max_pkt; tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); return res; } static void link_print(struct tipc_link *l_ptr, const char *str) { - pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + if (b_ptr) + pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); + rcu_read_unlock(); if (link_working_unknown(l_ptr)) pr_cont(":WU\n"); diff --git a/net/tipc/link.h b/net/tipc/link.h index 8c0b49b5b2e..4b556c181ba 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -107,7 +107,7 @@ struct tipc_stats { * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint - * @b_ptr: pointer to bearer used by link + * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] * @continuity_interval: link continuity testing interval [in ms] * @abort_limit: # of unacknowledged continuity probes needed to reset link @@ -116,6 +116,7 @@ struct tipc_stats { * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') * @queue_limit: outbound message queue congestion thresholds (indexed by user) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset @@ -155,7 +156,7 @@ struct tipc_link { u32 checkpoint; u32 peer_session; u32 peer_bearer_id; - struct tipc_bearer *b_ptr; + u32 bearer_id; u32 tolerance; u32 continuity_interval; u32 abort_limit; @@ -167,6 +168,7 @@ struct tipc_link { } proto_msg; struct tipc_msg *pmsg; u32 priority; + char net_plane; u32 queue_limit[15]; /* queue_limit[0]==window limit */ /* Changeover */ diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index aff8041dc15..36a72822601 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -248,7 +248,6 @@ void tipc_named_node_up(unsigned long nodearg) u32 max_item_buf = 0; /* compute maximum amount of publication data to send per message */ - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(node); if (n_ptr) { tipc_node_lock(n_ptr); @@ -258,7 +257,6 @@ void tipc_named_node_up(unsigned long nodearg) ITEM_SIZE) * ITEM_SIZE; tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); if (!max_item_buf) return; diff --git a/net/tipc/net.c b/net/tipc/net.c index 4c564eb69e1..75bb39025d5 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -45,39 +45,34 @@ /* * The TIPC locking policy is designed to ensure a very fine locking * granularity, permitting complete parallel access to individual - * port and node/link instances. The code consists of three major + * port and node/link instances. The code consists of four major * locking domains, each protected with their own disjunct set of locks. * - * 1: The routing hierarchy. - * Comprises the structures 'zone', 'cluster', 'node', 'link' - * and 'bearer'. The whole hierarchy is protected by a big - * read/write lock, tipc_net_lock, to enssure that nothing is added - * or removed while code is accessing any of these structures. - * This layer must not be called from the two others while they - * hold any of their own locks. - * Neither must it itself do any upcalls to the other two before - * it has released tipc_net_lock and other protective locks. + * 1: The bearer level. + * RTNL lock is used to serialize the process of configuring bearer + * on update side, and RCU lock is applied on read side to make + * bearer instance valid on both paths of message transmission and + * reception. * - * Within the tipc_net_lock domain there are two sub-domains;'node' and - * 'bearer', where local write operations are permitted, - * provided that those are protected by individual spin_locks - * per instance. Code holding tipc_net_lock(read) and a node spin_lock - * is permitted to poke around in both the node itself and its - * subordinate links. I.e, it can update link counters and queues, - * change link state, send protocol messages, and alter the - * "active_links" array in the node; but it can _not_ remove a link - * or a node from the overall structure. - * Correspondingly, individual bearers may change status within a - * tipc_net_lock(read), protected by an individual spin_lock ber bearer - * instance, but it needs tipc_net_lock(write) to remove/add any bearers. + * 2: The node and link level. + * All node instances are saved into two tipc_node_list and node_htable + * lists. The two lists are protected by node_list_lock on write side, + * and they are guarded with RCU lock on read side. Especially node + * instance is destroyed only when TIPC module is removed, and we can + * confirm that there has no any user who is accessing the node at the + * moment. Therefore, Except for iterating the two lists within RCU + * protection, it's no needed to hold RCU that we access node instance + * in other places. * + * In addition, all members in node structure including link instances + * are protected by node spin lock. * - * 2: The transport level of the protocol. - * This consists of the structures port, (and its user level - * representations, such as user_port and tipc_sock), reference and - * tipc_user (port.c, reg.c, socket.c). + * 3: The transport level of the protocol. + * This consists of the structures port, (and its user level + * representations, such as user_port and tipc_sock), reference and + * tipc_user (port.c, reg.c, socket.c). * - * This layer has four different locks: + * This layer has four different locks: * - The tipc_port spin_lock. This is protecting each port instance * from parallel data access and removal. Since we can not place * this lock in the port itself, it has been placed in the @@ -96,7 +91,7 @@ * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * - * 3: The name table (name_table.c, name_distr.c, subscription.c) + * 4: The name table (name_table.c, name_distr.c, subscription.c) * - There is one big read/write-lock (tipc_nametbl_lock) protecting the * overall name table structure. Nothing must be added/removed to * this structure without holding write access to it. @@ -108,8 +103,6 @@ * - A local spin_lock protecting the queue of subscriber events. */ -DEFINE_RWLOCK(tipc_net_lock); - static void net_route_named_msg(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -175,15 +168,13 @@ void tipc_net_start(u32 addr) { char addr_string[16]; - write_lock_bh(&tipc_net_lock); tipc_own_addr = addr; tipc_named_reinit(); tipc_port_reinit(); tipc_bclink_init(); - write_unlock_bh(&tipc_net_lock); - tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, TIPC_ZONE_SCOPE, 0, tipc_own_addr); + pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); @@ -195,11 +186,11 @@ void tipc_net_stop(void) return; tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); - write_lock_bh(&tipc_net_lock); + rtnl_lock(); tipc_bearer_stop(); tipc_bclink_stop(); tipc_node_stop(); - write_unlock_bh(&tipc_net_lock); + rtnl_unlock(); pr_info("Left network mode\n"); } diff --git a/net/tipc/net.h b/net/tipc/net.h index 079daadb3f7..f781cae8df4 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,8 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -extern rwlock_t tipc_net_lock; - void tipc_net_route_msg(struct sk_buff *buf); void tipc_net_start(u32 addr); diff --git a/net/tipc/node.c b/net/tipc/node.c index 1d3a4999a70..be90115cda1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -148,7 +148,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) n_ptr->working_links++; pr_info("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); if (!active[0]) { active[0] = active[1] = l_ptr; @@ -208,11 +208,11 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); return; } pr_info("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); active = &n_ptr->active_links[0]; if (active[0] == l_ptr) @@ -239,7 +239,7 @@ int tipc_node_is_up(struct tipc_node *n_ptr) void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; + n_ptr->links[l_ptr->bearer_id] = l_ptr; spin_lock_bh(&node_list_lock); tipc_num_links++; spin_unlock_bh(&node_list_lock); @@ -273,14 +273,12 @@ static void node_name_purge_complete(unsigned long node_addr) { struct tipc_node *n_ptr; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(node_addr); if (n_ptr) { tipc_node_lock(n_ptr); n_ptr->block_setup &= ~WAIT_NAMES_GONE; tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); } static void node_lost_contact(struct tipc_node *n_ptr) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 14d04e63b1f..be491a74c1e 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -147,7 +147,7 @@ struct security_class_mapping secclass_map[] = { { "peer", { "recv", NULL } }, { "capability2", { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend", - NULL } }, + "audit_read", NULL } }, { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, "attach_queue", NULL } }, diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index bf7be77ddd6..833a96611da 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -92,6 +92,7 @@ extern void yyerror(const char *str); "#"?("cpu") { return K_CPU; } "#"?("vlan_tci") { return K_VLANT; } "#"?("vlan_pr") { return K_VLANP; } +"#"?("rand") { return K_RAND; } ":" { return ':'; } "," { return ','; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index d15efc989ef..e6306c51c26 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_LDXI %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF +%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' @@ -164,6 +164,9 @@ ldb | OP_LDB K_POFF { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LDB K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } ; ldh @@ -212,6 +215,9 @@ ldh | OP_LDH K_POFF { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LDH K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } ; ldi @@ -265,6 +271,9 @@ ld | OP_LD K_POFF { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LD K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { |