diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-02 13:38:27 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-02 13:38:27 -0700 |
commit | aecdc33e111b2c447b622e287c6003726daa1426 (patch) | |
tree | 3e7657eae4b785e1a1fb5dfb225dbae0b2f0cfc6 /drivers/net/ethernet/intel | |
parent | a20acf99f75e49271381d65db097c9763060a1e8 (diff) | |
parent | a3a6cab5ea10cca64d036851fe0d932448f2fe4f (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller:
1) GRE now works over ipv6, from Dmitry Kozlov.
2) Make SCTP more network namespace aware, from Eric Biederman.
3) TEAM driver now works with non-ethernet devices, from Jiri Pirko.
4) Make openvswitch network namespace aware, from Pravin B Shelar.
5) IPV6 NAT implementation, from Patrick McHardy.
6) Server side support for TCP Fast Open, from Jerry Chu and others.
7) Packet BPF filter supports MOD and XOR, from Eric Dumazet and Daniel
Borkmann.
8) Increate the loopback default MTU to 64K, from Eric Dumazet.
9) Use a per-task rather than per-socket page fragment allocator for
outgoing networking traffic. This benefits processes that have very
many mostly idle sockets, which is quite common.
From Eric Dumazet.
10) Use up to 32K for page fragment allocations, with fallbacks to
smaller sizes when higher order page allocations fail. Benefits are
a) less segments for driver to process b) less calls to page
allocator c) less waste of space.
From Eric Dumazet.
11) Allow GRO to be used on GRE tunnels, from Eric Dumazet.
12) VXLAN device driver, one way to handle VLAN issues such as the
limitation of 4096 VLAN IDs yet still have some level of isolation.
From Stephen Hemminger.
13) As usual there is a large boatload of driver changes, with the scale
perhaps tilted towards the wireless side this time around.
Fix up various fairly trivial conflicts, mostly caused by the user
namespace changes.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1012 commits)
hyperv: Add buffer for extended info after the RNDIS response message.
hyperv: Report actual status in receive completion packet
hyperv: Remove extra allocated space for recv_pkt_list elements
hyperv: Fix page buffer handling in rndis_filter_send_request()
hyperv: Fix the missing return value in rndis_filter_set_packet_filter()
hyperv: Fix the max_xfer_size in RNDIS initialization
vxlan: put UDP socket in correct namespace
vxlan: Depend on CONFIG_INET
sfc: Fix the reported priorities of different filter types
sfc: Remove EFX_FILTER_FLAG_RX_OVERRIDE_IP
sfc: Fix loopback self-test with separate_tx_channels=1
sfc: Fix MCDI structure field lookup
sfc: Add parentheses around use of bitfield macro arguments
sfc: Fix null function pointer in efx_sriov_channel_type
vxlan: virtual extensible lan
igmp: export symbol ip_mc_leave_group
netlink: add attributes to fdb interface
tg3: unconditionally select HWMON support when tg3 is enabled.
Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT"
gre: fix sparse warning
...
Diffstat (limited to 'drivers/net/ethernet/intel')
28 files changed, 2152 insertions, 1153 deletions
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 736a7d987db..9089d00f142 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -174,6 +174,20 @@ static int e1000_get_settings(struct net_device *netdev, ecmd->autoneg = ((hw->media_type == e1000_media_type_fiber) || hw->autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 1; MDI => 0 */ + if ((hw->media_type == e1000_media_type_copper) && + netif_carrier_ok(netdev)) + ecmd->eth_tp_mdix = (!!adapter->phy_info.mdix_mode ? + ETH_TP_MDI_X : + ETH_TP_MDI); + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->mdix; return 0; } @@ -183,6 +197,22 @@ static int e1000_set_settings(struct net_device *netdev, struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + e_err(drv, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) msleep(1); @@ -199,12 +229,21 @@ static int e1000_set_settings(struct net_device *netdev, ecmd->advertising = hw->autoneg_advertised; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__E1000_RESETTING, &adapter->flags); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->mdix = AUTO_ALL_MODES; + else + hw->mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index f3f9aeb7d1e..222bfaff462 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2014,6 +2014,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter, e1000_unmap_and_free_tx_resource(adapter, buffer_info); } + netdev_reset_queue(adapter->netdev); size = sizeof(struct e1000_buffer) * tx_ring->count; memset(tx_ring->buffer_info, 0, size); @@ -3273,6 +3274,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, nr_frags, mss); if (count) { + netdev_sent_queue(netdev, skb->len); skb_tx_timestamp(skb); e1000_tx_queue(adapter, tx_ring, tx_flags, count); @@ -3860,6 +3862,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, unsigned int i, eop; unsigned int count = 0; unsigned int total_tx_bytes=0, total_tx_packets=0; + unsigned int bytes_compl = 0, pkts_compl = 0; i = tx_ring->next_to_clean; eop = tx_ring->buffer_info[i].next_to_watch; @@ -3877,6 +3880,11 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, if (cleaned) { total_tx_packets += buffer_info->segs; total_tx_bytes += buffer_info->bytecount; + if (buffer_info->skb) { + bytes_compl += buffer_info->skb->len; + pkts_compl++; + } + } e1000_unmap_and_free_tx_resource(adapter, buffer_info); tx_desc->upper.data = 0; @@ -3890,6 +3898,8 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, tx_ring->next_to_clean = i; + netdev_completed_queue(netdev, pkts_compl, bytes_compl); + #define TX_WAKE_THRESHOLD 32 if (unlikely(count && netif_carrier_ok(netdev) && E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) { @@ -4950,6 +4960,10 @@ int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + hw->mdix = AUTO_ALL_MODES; + return 0; err_inval: diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 080c89093fe..c9858640800 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -653,7 +653,7 @@ static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw) **/ static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active) { - u16 data = er32(POEMB); + u32 data = er32(POEMB); if (active) data |= E1000_PHY_CTRL_D0A_LPLU; @@ -677,7 +677,7 @@ static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active) **/ static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active) { - u16 data = er32(POEMB); + u32 data = er32(POEMB); if (!active) { data &= ~E1000_PHY_CTRL_NOND0A_LPLU; diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 0349e2478df..c11ac275666 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -199,6 +199,11 @@ static int e1000_get_settings(struct net_device *netdev, else ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + return 0; } @@ -241,6 +246,10 @@ static int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: @@ -264,6 +273,22 @@ static int e1000_set_settings(struct net_device *netdev, return -EINVAL; } + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) usleep_range(1000, 2000); @@ -282,20 +307,32 @@ static int e1000_set_settings(struct net_device *netdev, hw->fc.requested_mode = e1000_fc_default; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__E1000_RESETTING, &adapter->state); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* + * fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { e1000e_down(adapter); e1000e_up(adapter); - } else { + } else e1000e_reset(adapter); - } clear_bit(__E1000_RESETTING, &adapter->state); return 0; @@ -1905,7 +1942,8 @@ static int e1000_set_coalesce(struct net_device *netdev, return -EINVAL; if (ec->rx_coalesce_usecs == 4) { - adapter->itr = adapter->itr_setting = 4; + adapter->itr_setting = 4; + adapter->itr = adapter->itr_setting; } else if (ec->rx_coalesce_usecs <= 3) { adapter->itr = 20000; adapter->itr_setting = ec->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 3f0223ac4c7..fb659dd8db0 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -56,7 +56,7 @@ #define DRV_EXTRAVERSION "-k" -#define DRV_VERSION "2.0.0" DRV_EXTRAVERSION +#define DRV_VERSION "2.1.4" DRV_EXTRAVERSION char e1000e_driver_name[] = "e1000e"; const char e1000e_driver_version[] = DRV_VERSION; @@ -3446,7 +3446,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /* * if short on Rx space, Rx wins and must trump Tx - * adjustment or use Early Receive if available + * adjustment */ if (pba < min_rx_space) pba = min_rx_space; @@ -3755,6 +3755,10 @@ static irqreturn_t e1000_intr_msi_test(int irq, void *data) e_dbg("icr is %08X\n", icr); if (icr & E1000_ICR_RXSEQ) { adapter->flags &= ~FLAG_MSI_TEST_FAILED; + /* + * Force memory writes to complete before acknowledging the + * interrupt is handled. + */ wmb(); } @@ -3796,6 +3800,10 @@ static int e1000_test_msi_interrupt(struct e1000_adapter *adapter) goto msi_test_failed; } + /* + * Force memory writes to complete before enabling and firing an + * interrupt. + */ wmb(); e1000_irq_enable(adapter); @@ -3807,7 +3815,7 @@ static int e1000_test_msi_interrupt(struct e1000_adapter *adapter) e1000_irq_disable(adapter); - rmb(); + rmb(); /* read flags after interrupt has been fired */ if (adapter->flags & FLAG_MSI_TEST_FAILED) { adapter->int_mode = E1000E_INT_MODE_LEGACY; @@ -4670,7 +4678,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) struct e1000_buffer *buffer_info; unsigned int i; u32 cmd_length = 0; - u16 ipcse = 0, tucse, mss; + u16 ipcse = 0, mss; u8 ipcss, ipcso, tucss, tucso, hdr_len; if (!skb_is_gso(skb)) @@ -4704,7 +4712,6 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data; tucss = skb_transport_offset(skb); tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data; - tucse = 0; cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | E1000_TXD_CMD_TCP | (skb->len - (hdr_len))); @@ -4718,7 +4725,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse); context_desc->upper_setup.tcp_fields.tucss = tucss; context_desc->upper_setup.tcp_fields.tucso = tucso; - context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse); + context_desc->upper_setup.tcp_fields.tucse = 0; context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss); context_desc->tcp_seg_setup.fields.hdr_len = hdr_len; context_desc->cmd_and_length = cpu_to_le32(cmd_length); diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index b860d4f7ea2..fc62a3f3a5b 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -84,8 +84,9 @@ static const u16 e1000_igp_2_cable_length_table[] = { #define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200 /* I82577 PHY Control 2 */ -#define I82577_PHY_CTRL2_AUTO_MDIX 0x0400 -#define I82577_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 +#define I82577_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82577_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82577_PHY_CTRL2_MDIX_CFG_MASK 0x0600 /* I82577 PHY Diagnostics Status */ #define I82577_DSTATUS_CABLE_LENGTH 0x03FC @@ -702,6 +703,32 @@ s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) if (ret_val) return ret_val; + /* Set MDI/MDIX mode */ + ret_val = e1e_rphy(hw, I82577_PHY_CTRL_2, &phy_data); + if (ret_val) + return ret_val; + phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK; + /* + * Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = e1e_wphy(hw, I82577_PHY_CTRL_2, phy_data); + if (ret_val) + return ret_val; + return e1000_set_master_slave_mode(hw); } diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index ba994fb4cec..ca4641e2f74 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -2223,11 +2223,10 @@ out: s32 igb_set_eee_i350(struct e1000_hw *hw) { s32 ret_val = 0; - u32 ipcnfg, eeer, ctrl_ext; + u32 ipcnfg, eeer; - ctrl_ext = rd32(E1000_CTRL_EXT); - if ((hw->mac.type != e1000_i350) || - (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK)) + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) goto out; ipcnfg = rd32(E1000_IPCNFG); eeer = rd32(E1000_EEER); @@ -2240,6 +2239,14 @@ s32 igb_set_eee_i350(struct e1000_hw *hw) E1000_EEER_RX_LPI_EN | E1000_EEER_LPI_FC); + /* keep the LPI clock running before EEE is enabled */ + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { + u32 eee_su; + eee_su = rd32(E1000_EEE_SU); + eee_su &= ~E1000_EEE_SU_LPI_CLK_STP; + wr32(E1000_EEE_SU, eee_su); + } + } else { ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); @@ -2249,6 +2256,8 @@ s32 igb_set_eee_i350(struct e1000_hw *hw) } wr32(E1000_IPCNFG, ipcnfg); wr32(E1000_EEER, eeer); + rd32(E1000_IPCNFG); + rd32(E1000_EEER); out: return ret_val; diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index ec7e4fe3e3e..de4b41ec3c4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -322,6 +322,9 @@ #define E1000_FCRTC_RTH_COAL_SHIFT 4 #define E1000_PCIEMISC_LX_DECISION 0x00000080 /* Lx power decision */ +/* Timestamp in Rx buffer */ +#define E1000_RXPBS_CFG_TS_EN 0x80000000 + /* SerDes Control */ #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 @@ -360,6 +363,7 @@ #define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ #define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ #define E1000_ICR_VMMB 0x00000100 /* VM MB event */ +#define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */ #define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */ /* If this bit asserted, the driver should claim the interrupt */ #define E1000_ICR_INT_ASSERTED 0x80000000 @@ -399,6 +403,7 @@ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ #define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */ +#define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */ #define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ @@ -510,6 +515,9 @@ #define E1000_TIMINCA_16NS_SHIFT 24 +#define E1000_TSICR_TXTS 0x00000002 +#define E1000_TSIM_TXTS 0x00000002 + #define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */ #define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */ #define E1000_MDICNFG_PHY_MASK 0x03E00000 @@ -849,8 +857,9 @@ #define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* EEE Enable 100M AN */ #define E1000_EEER_TX_LPI_EN 0x00010000 /* EEE Tx LPI Enable */ #define E1000_EEER_RX_LPI_EN 0x00020000 /* EEE Rx LPI Enable */ -#define E1000_EEER_FRC_AN 0x10000000 /* Enable EEE in loopback */ +#define E1000_EEER_FRC_AN 0x10000000 /* Enable EEE in loopback */ #define E1000_EEER_LPI_FC 0x00040000 /* EEE Enable on FC */ +#define E1000_EEE_SU_LPI_CLK_STP 0X00800000 /* EEE LPI Clock Stop */ /* SerDes Control */ #define E1000_GEN_CTL_READY 0x80000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 7be98b6f105..3404bc79f4c 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -464,6 +464,32 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw) phy_data |= I82580_CFG_ENABLE_DOWNSHIFT; ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data); + if (ret_val) + goto out; + + /* Set MDI/MDIX mode */ + ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data); + if (ret_val) + goto out; + phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; + /* + * Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82580_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82580_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = hw->phy.ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); out: return ret_val; @@ -2246,8 +2272,7 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) if (ret_val) goto out; - phy_data &= ~I82580_PHY_CTRL2_AUTO_MDIX; - phy_data &= ~I82580_PHY_CTRL2_FORCE_MDI_MDIX; + phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; ret_val = phy->ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); if (ret_val) diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index 34e40619f16..6ac3299bfcb 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -111,8 +111,9 @@ s32 igb_check_polarity_m88(struct e1000_hw *hw); #define I82580_PHY_STATUS2_SPEED_100MBPS 0x0100 /* I82580 PHY Control 2 */ -#define I82580_PHY_CTRL2_AUTO_MDIX 0x0400 -#define I82580_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 +#define I82580_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82580_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82580_PHY_CTRL2_MDIX_CFG_MASK 0x0600 /* I82580 PHY Diagnostics Status */ #define I82580_DSTATUS_CABLE_LENGTH 0x03FC diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 28394bea525..e5db48594e8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -91,6 +91,8 @@ #define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ #define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ #define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ +#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ +#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ /* Filtering Registers */ #define E1000_SAQF(_n) (0x5980 + 4 * (_n)) @@ -347,6 +349,7 @@ /* Energy Efficient Ethernet "EEE" register */ #define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ #define E1000_EEER 0x0E30 /* Energy Efficient Ethernet */ +#define E1000_EEE_SU 0X0E34 /* EEE Setup */ /* Thermal Sensor Register */ #define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 9e572dd29ab..8aad230c059 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -34,9 +34,11 @@ #include "e1000_mac.h" #include "e1000_82575.h" +#ifdef CONFIG_IGB_PTP #include <linux/clocksource.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> +#endif /* CONFIG_IGB_PTP */ #include <linux/bitops.h> #include <linux/if_vlan.h> @@ -99,7 +101,6 @@ struct vf_data_storage { u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; u16 tx_rate; - struct pci_dev *vfdev; }; #define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ @@ -131,9 +132,9 @@ struct vf_data_storage { #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 /* Supported Rx Buffer Sizes */ -#define IGB_RXBUFFER_512 512 +#define IGB_RXBUFFER_256 256 #define IGB_RXBUFFER_16384 16384 -#define IGB_RX_HDR_LEN IGB_RXBUFFER_512 +#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 /* How many Tx Descriptors do we need to call netif_wake_queue ? */ #define IGB_TX_QUEUE_WAKE 16 @@ -167,8 +168,8 @@ struct igb_tx_buffer { unsigned int bytecount; u16 gso_segs; __be16 protocol; - dma_addr_t dma; - u32 length; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); u32 tx_flags; }; @@ -212,7 +213,6 @@ struct igb_q_vector { struct igb_ring_container rx, tx; struct napi_struct napi; - int numa_node; u16 itr_val; u8 set_itr; @@ -257,7 +257,6 @@ struct igb_ring { }; /* Items past this point are only used during ring alloc / free */ dma_addr_t dma; /* phys address of the ring */ - int numa_node; /* node to alloc ring memory on */ }; enum e1000_ring_flags_t { @@ -342,7 +341,6 @@ struct igb_adapter { /* OS defined structs */ struct pci_dev *pdev; - struct hwtstamp_config hwtstamp_config; spinlock_t stats64_lock; struct rtnl_link_stats64 stats64; @@ -373,15 +371,19 @@ struct igb_adapter { int vf_rate_link_speed; u32 rss_queues; u32 wvbr; - int node; u32 *shadow_vfta; +#ifdef CONFIG_IGB_PTP struct ptp_clock *ptp_clock; - struct ptp_clock_info caps; - struct delayed_work overflow_work; + struct ptp_clock_info ptp_caps; + struct delayed_work ptp_overflow_work; + struct work_struct ptp_tx_work; + struct sk_buff *ptp_tx_skb; spinlock_t tmreg_lock; struct cyclecounter cc; struct timecounter tc; +#endif /* CONFIG_IGB_PTP */ + char fw_version[32]; }; @@ -390,6 +392,7 @@ struct igb_adapter { #define IGB_FLAG_QUAD_PORT_A (1 << 2) #define IGB_FLAG_QUEUE_PAIRS (1 << 3) #define IGB_FLAG_DMAC (1 << 4) +#define IGB_FLAG_PTP (1 << 5) /* DMA Coalescing defines */ #define IGB_MIN_TXPBSIZE 20408 @@ -435,13 +438,17 @@ extern void igb_power_up_link(struct igb_adapter *); extern void igb_set_fw_version(struct igb_adapter *); #ifdef CONFIG_IGB_PTP extern void igb_ptp_init(struct igb_adapter *adapter); -extern void igb_ptp_remove(struct igb_adapter *adapter); - -extern void igb_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim); +extern void igb_ptp_stop(struct igb_adapter *adapter); +extern void igb_ptp_reset(struct igb_adapter *adapter); +extern void igb_ptp_tx_work(struct work_struct *work); +extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); +extern void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb); +extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd); +#endif /* CONFIG_IGB_PTP */ -#endif static inline s32 igb_reset_phy(struct e1000_hw *hw) { if (hw->phy.ops.reset) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 70591117051..2ea01284982 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -148,9 +148,9 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) SUPPORTED_100baseT_Full | SUPPORTED_1000baseT_Full| SUPPORTED_Autoneg | - SUPPORTED_TP); - ecmd->advertising = (ADVERTISED_TP | - ADVERTISED_Pause); + SUPPORTED_TP | + SUPPORTED_Pause); + ecmd->advertising = ADVERTISED_TP; if (hw->mac.autoneg == 1) { ecmd->advertising |= ADVERTISED_Autoneg; @@ -158,6 +158,21 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) ecmd->advertising |= hw->phy.autoneg_advertised; } + if (hw->mac.autoneg != 1) + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + + if (hw->fc.requested_mode == e1000_fc_full) + ecmd->advertising |= ADVERTISED_Pause; + else if (hw->fc.requested_mode == e1000_fc_rx_pause) + ecmd->advertising |= (ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + else if (hw->fc.requested_mode == e1000_fc_tx_pause) + ecmd->advertising |= ADVERTISED_Asym_Pause; + else + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + ecmd->port = PORT_TP; ecmd->phy_address = hw->phy.addr; } else { @@ -198,6 +213,19 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) } ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 2; MDI =>1; Invalid =>0 */ + if (hw->phy.media_type == e1000_media_type_copper) + ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : + ETH_TP_MDI; + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + return 0; } @@ -214,6 +242,22 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) return -EINVAL; } + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) msleep(1); @@ -227,12 +271,25 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) hw->fc.requested_mode = e1000_fc_default; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (igb_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__IGB_RESETTING, &adapter->state); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* + * fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { igb_down(adapter); @@ -1469,33 +1526,22 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ctrl_reg = 0; - u16 phy_reg = 0; hw->mac.autoneg = false; - switch (hw->phy.type) { - case e1000_phy_m88: - /* Auto-MDI/MDIX Off */ - igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); - /* reset to update Auto-MDI/MDIX */ - igb_write_phy_reg(hw, PHY_CONTROL, 0x9140); - /* autoneg off */ - igb_write_phy_reg(hw, PHY_CONTROL, 0x8140); - break; - case e1000_phy_82580: - /* enable MII loopback */ - igb_write_phy_reg(hw, I82580_PHY_LBK_CTRL, 0x8041); - break; - case e1000_phy_i210: - /* set loopback speed in PHY */ - igb_read_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2), - &phy_reg); - phy_reg |= GS40G_MAC_SPEED_1G; - igb_write_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2), - phy_reg); - ctrl_reg = rd32(E1000_CTRL_EXT); - default: - break; + if (hw->phy.type == e1000_phy_m88) { + if (hw->phy.id != I210_I_PHY_ID) { + /* Auto-MDI/MDIX Off */ + igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); + /* reset to update Auto-MDI/MDIX */ + igb_write_phy_reg(hw, PHY_CONTROL, 0x9140); + /* autoneg off */ + igb_write_phy_reg(hw, PHY_CONTROL, 0x8140); + } else { + /* force 1000, set loopback */ + igb_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); + igb_write_phy_reg(hw, PHY_CONTROL, 0x4140); + } } /* add small delay to avoid loopback test failure */ @@ -1513,7 +1559,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) E1000_CTRL_FD | /* Force Duplex to FULL */ E1000_CTRL_SLU); /* Set link up enable bit */ - if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210)) + if (hw->phy.type == e1000_phy_m88) ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */ wr32(E1000_CTRL, ctrl_reg); @@ -1521,11 +1567,10 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) /* Disable the receiver on the PHY so when a cable is plugged in, the * PHY does not begin to autoneg when a cable is reconnected to the NIC. */ - if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210)) + if (hw->phy.type == e1000_phy_m88) igb_phy_disable_receiver(adapter); - udelay(500); - + mdelay(500); return 0; } @@ -1785,13 +1830,6 @@ static int igb_loopback_test(struct igb_adapter *adapter, u64 *data) *data = 0; goto out; } - if ((adapter->hw.mac.type == e1000_i210) - || (adapter->hw.mac.type == e1000_i211)) { - dev_err(&adapter->pdev->dev, - "Loopback test not supported on this part at this time.\n"); - *data = 0; - goto out; - } *data = igb_setup_desc_rings(adapter); if (*data) goto out; @@ -2257,6 +2295,54 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } +static int igb_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (adapter->hw.mac.type) { +#ifdef CONFIG_IGB_PTP + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i210: + case e1000_i211: + info->so_timestamping = + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + if (adapter->ptp_clock) + info->phc_index = ptp_clock_index(adapter->ptp_clock); + else + info->phc_index = -1; + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = 1 << HWTSTAMP_FILTER_NONE; + + /* 82576 does not support timestamping all packets. */ + if (adapter->hw.mac.type >= e1000_82580) + info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL; + else + info->rx_filters |= + (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +#endif /* CONFIG_IGB_PTP */ + default: + return -EOPNOTSUPP; + } +} + static int igb_ethtool_begin(struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -2270,38 +2356,6 @@ static void igb_ethtool_complete(struct net_device *netdev) pm_runtime_put(&adapter->pdev->dev); } -#ifdef CONFIG_IGB_PTP -static int igb_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) -{ - struct igb_adapter *adapter = netdev_priv(dev); - - info->so_timestamping = - SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; - - if (adapter->ptp_clock) - info->phc_index = ptp_clock_index(adapter->ptp_clock); - else - info->phc_index = -1; - - info->tx_types = - (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON); - - info->rx_filters = - (1 << HWTSTAMP_FILTER_NONE) | - (1 << HWTSTAMP_FILTER_ALL) | - (1 << HWTSTAMP_FILTER_SOME) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | - (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); - - return 0; -} - -#endif static const struct ethtool_ops igb_ethtool_ops = { .get_settings = igb_get_settings, .set_settings = igb_set_settings, @@ -2328,11 +2382,9 @@ static const struct ethtool_ops igb_ethtool_ops = { .get_ethtool_stats = igb_get_ethtool_stats, .get_coalesce = igb_get_coalesce, .set_coalesce = igb_set_coalesce, + .get_ts_info = igb_get_ts_info, .begin = igb_ethtool_begin, .complete = igb_ethtool_complete, -#ifdef CONFIG_IGB_PTP - .get_ts_info = igb_ethtool_get_ts_info, -#endif }; void igb_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index f88c822e57a..e1ceb37ef12 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -172,8 +172,7 @@ static void igb_check_vf_rate_limit(struct igb_adapter *); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); -static int igb_find_enabled_vfs(struct igb_adapter *adapter); -static int igb_check_vf_assignment(struct igb_adapter *adapter); +static bool igb_vfs_are_assigned(struct igb_adapter *adapter); #endif #ifdef CONFIG_PM @@ -404,8 +403,8 @@ static void igb_dump(struct igb_adapter *adapter) buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp); } @@ -456,8 +455,8 @@ static void igb_dump(struct igb_adapter *adapter) " %04X %p %016llX %p%s\n", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp, buffer_info->skb, next_desc); @@ -466,7 +465,8 @@ static void igb_dump(struct igb_adapter *adapter) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, buffer_info->skb->data, - buffer_info->length, true); + dma_unmap_len(buffer_info, len), + true); } } @@ -683,52 +683,29 @@ static int igb_alloc_queues(struct igb_adapter *adapter) { struct igb_ring *ring; int i; - int orig_node = adapter->node; for (i = 0; i < adapter->num_tx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); + ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); if (!ring) goto err; ring->count = adapter->tx_ring_count; ring->queue_index = i; ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; /* For 82575, context index must be unique per ring. */ if (adapter->hw.mac.type == e1000_82575) set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); adapter->tx_ring[i] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; for (i = 0; i < adapter->num_rx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); + ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); if (!ring) goto err; ring->count = adapter->rx_ring_count; ring->queue_index = i; ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; /* set flag indicating ring supports SCTP checksum offload */ if (adapter->hw.mac.type >= e1000_82576) set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); @@ -742,16 +719,12 @@ static int igb_alloc_queues(struct igb_adapter *adapter) adapter->rx_ring[i] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_cache_ring_register(adapter); return 0; err: - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_free_queues(adapter); return -ENOMEM; @@ -1117,24 +1090,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) struct igb_q_vector *q_vector; struct e1000_hw *hw = &adapter->hw; int v_idx; - int orig_node = adapter->node; for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { - if ((adapter->num_q_vectors == (adapter->num_rx_queues + - adapter->num_tx_queues)) && - (adapter->num_rx_queues == v_idx)) - adapter->node = orig_node; - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL, - adapter->node); - if (!q_vector) - q_vector = kzalloc(sizeof(struct igb_q_vector), - GFP_KERNEL); + q_vector = kzalloc(sizeof(struct igb_q_vector), + GFP_KERNEL); if (!q_vector) goto err_out; q_vector->adapter = adapter; @@ -1143,14 +1102,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); adapter->q_vector[v_idx] = q_vector; } - /* Restore the adapter's original node */ - adapter->node = orig_node; return 0; err_out: - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_free_q_vectors(adapter); return -ENOMEM; } @@ -1751,6 +1706,11 @@ void igb_reset(struct igb_adapter *adapter) /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); +#ifdef CONFIG_IGB_PTP + /* Re-enable PTP, where applicable. */ + igb_ptp_reset(adapter); +#endif /* CONFIG_IGB_PTP */ + igb_get_phy_info(hw); } @@ -2180,11 +2140,12 @@ static int __devinit igb_probe(struct pci_dev *pdev, } #endif + #ifdef CONFIG_IGB_PTP /* do hw tstamp init after resetting */ igb_ptp_init(adapter); +#endif /* CONFIG_IGB_PTP */ -#endif dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); /* print bus type/speed/width info */ dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", @@ -2259,9 +2220,9 @@ static void __devexit igb_remove(struct pci_dev *pdev) pm_runtime_get_noresume(&pdev->dev); #ifdef CONFIG_IGB_PTP - igb_ptp_remove(adapter); + igb_ptp_stop(adapter); +#endif /* CONFIG_IGB_PTP */ -#endif /* * The watchdog timer may be rescheduled, so explicitly * disable watchdog from being rescheduled. @@ -2294,11 +2255,11 @@ static void __devexit igb_remove(struct pci_dev *pdev) /* reclaim resources allocated to VFs */ if (adapter->vf_data) { /* disable iov and allow time for transactions to clear */ - if (!igb_check_vf_assignment(adapter)) { + if (igb_vfs_are_assigned(adapter)) { + dev_info(&pdev->dev, "Unloading driver while VFs are assigned - VFs will not be deallocated\n"); + } else { pci_disable_sriov(pdev); msleep(500); - } else { - dev_info(&pdev->dev, "VF(s) assigned to guests!\n"); } kfree(adapter->vf_data); @@ -2338,7 +2299,7 @@ static void __devinit igb_probe_vfs(struct igb_adapter * adapter) #ifdef CONFIG_PCI_IOV struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; - int old_vfs = igb_find_enabled_vfs(adapter); + int old_vfs = pci_num_vf(adapter->pdev); int i; /* Virtualization features not supported on i210 family. */ @@ -2418,8 +2379,6 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; - adapter->node = -1; - spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { @@ -2666,13 +2625,11 @@ static int igb_close(struct net_device *netdev) int igb_setup_tx_resources(struct igb_ring *tx_ring) { struct device *dev = tx_ring->dev; - int orig_node = dev_to_node(dev); int size; size = sizeof(struct igb_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); - if (!tx_ring->tx_buffer_info) - tx_ring->tx_buffer_info = vzalloc(size); + + tx_ring->tx_buffer_info = vzalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -2680,18 +2637,10 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); - set_dev_node(dev, tx_ring->numa_node); tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!tx_ring->desc) - tx_ring->desc = dma_alloc_coherent(dev, - tx_ring->size, - &tx_ring->dma, - GFP_KERNEL); - if (!tx_ring->desc) goto err; @@ -2702,8 +2651,8 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) err: vfree(tx_ring->tx_buffer_info); - dev_err(dev, - "Unable to allocate memory for the transmit descriptor ring\n"); + tx_ring->tx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); return -ENOMEM; } @@ -2820,34 +2769,23 @@ static void igb_configure_tx(struct igb_adapter *adapter) int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct device *dev = rx_ring->dev; - int orig_node = dev_to_node(dev); - int size, desc_len; + int size; size = sizeof(struct igb_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); - if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vzalloc(size); + + rx_ring->rx_buffer_info = vzalloc(size); if (!rx_ring->rx_buffer_info) goto err; - desc_len = sizeof(union e1000_adv_rx_desc); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * desc_len; + rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - set_dev_node(dev, rx_ring->numa_node); rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!rx_ring->desc) - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); - if (!rx_ring->desc) goto err; @@ -2859,8 +2797,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) err: vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; - dev_err(dev, "Unable to allocate memory for the receive descriptor" - " ring\n"); + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); return -ENOMEM; } @@ -2898,57 +2835,48 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mrqc, rxcsum; - u32 j, num_rx_queues, shift = 0, shift2 = 0; - union e1000_reta { - u32 dword; - u8 bytes[4]; - } reta; - static const u8 rsshash[40] = { - 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, - 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, - 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, - 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; + u32 j, num_rx_queues, shift = 0; + static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, + 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, + 0xA32DCB77, 0x0CF23080, 0x3BB7426A, + 0xFA01ACBE }; /* Fill out hash function seeds */ - for (j = 0; j < 10; j++) { - u32 rsskey = rsshash[(j * 4)]; - rsskey |= rsshash[(j * 4) + 1] << 8; - rsskey |= rsshash[(j * 4) + 2] << 16; - rsskey |= rsshash[(j * 4) + 3] << 24; - array_wr32(E1000_RSSRK(0), j, rsskey); - } + for (j = 0; j < 10; j++) + wr32(E1000_RSSRK(j), rsskey[j]); num_rx_queues = adapter->rss_queues; - if (adapter->vfs_allocated_count) { - /* 82575 and 82576 supports 2 RSS queues for VMDq */ - switch (hw->mac.type) { - case e1000_i350: - case e1000_82580: - num_rx_queues = 1; - shift = 0; - break; - case e1000_82576: + switch (hw->mac.type) { + case e1000_82575: + shift = 6; + break; + case e1000_82576: + /* 82576 supports 2 RSS queues for SR-IOV */ + if (adapter->vfs_allocated_count) { shift = 3; num_rx_queues = 2; - break; - case e1000_82575: - shift = 2; - shift2 = 6; - default: - break; } - } else { - if (hw->mac.type == e1000_82575) - shift = 6; + break; + default: + break; } - for (j = 0; j < (32 * 4); j++) { - reta.bytes[j & 3] = (j % num_rx_queues) << shift; - if (shift2) - reta.bytes[j & 3] |= num_rx_queues << shift2; - if ((j & 3) == 3) - wr32(E1000_RETA(j >> 2), reta.dword); + /* + * Populate the indirection table 4 entries at a time. To do this + * we are generating the results for n and n+2 and then interleaving + * those with the results with n+1 and n+3. + */ + for (j = 0; j < 32; j++) { + /* first pass generates n and n+2 */ + u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues; + u32 reta = (base & 0x07800780) >> (7 - shift); + + /* second pass generates n+1 and n+3 */ + base += 0x00010001 * num_rx_queues; + reta |= (base & 0x07800780) << (1 + shift); + + wr32(E1000_RETA(j), reta); } /* @@ -3184,8 +3112,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; #endif srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +#ifdef CONFIG_IGB_PTP if (hw->mac.type >= e1000_82580) srrctl |= E1000_SRRCTL_TIMESTAMP; +#endif /* CONFIG_IGB_PTP */ /* Only set Drop Enable if we are supporting multiple queues */ if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) srrctl |= E1000_SRRCTL_DROP_EN; @@ -3269,20 +3199,20 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *ring, { if (tx_buffer->skb) { dev_kfree_skb_any(tx_buffer->skb); - if (tx_buffer->dma) + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); - } else if (tx_buffer->dma) { + } else if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; - tx_buffer->dma = 0; + dma_unmap_len_set(tx_buffer, len, 0); /* buffer_info must be completely set up in the transmit path */ } @@ -4229,9 +4159,11 @@ static __le32 igb_tx_cmd_type(u32 tx_flags) if (tx_flags & IGB_TX_FLAGS_VLAN) cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); +#ifdef CONFIG_IGB_PTP /* set timestamp bit if present */ - if (tx_flags & IGB_TX_FLAGS_TSTAMP) + if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP)) cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); +#endif /* CONFIG_IGB_PTP */ /* set segmentation bits for TSO */ if (tx_flags & IGB_TX_FLAGS_TSO) @@ -4275,7 +4207,7 @@ static void igb_tx_map(struct igb_ring *tx_ring, const u8 hdr_len) { struct sk_buff *skb = first->skb; - struct igb_tx_buffer *tx_buffer_info; + struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; dma_addr_t dma; struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; @@ -4296,8 +4228,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, goto dma_error; /* record length, and DMA address */ - first->length = size; - first->dma = dma; + dma_unmap_len_set(first, len, size); + dma_unmap_addr_set(first, dma, dma); tx_desc->read.buffer_addr = cpu_to_le64(dma); for (;;) { @@ -4339,9 +4271,9 @@ static void igb_tx_map(struct igb_ring *tx_ring, if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - tx_buffer_info->length = size; - tx_buffer_info->dma = dma; + tx_buffer = &tx_ring->tx_buffer_info[i]; + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); tx_desc->read.olinfo_status = 0; tx_desc->read.buffer_addr = cpu_to_le64(dma); @@ -4392,9 +4324,9 @@ dma_error: /* clear dma mappings for failed tx_buffer_info map */ for (;;) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info == first) + tx_buffer = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) break; if (i == 0) i = tx_ring->count; @@ -4440,6 +4372,9 @@ static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { +#ifdef CONFIG_IGB_PTP + struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); +#endif /* CONFIG_IGB_PTP */ struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; @@ -4462,10 +4397,17 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, first->bytecount = skb->len; first->gso_segs = 1; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { +#ifdef CONFIG_IGB_PTP + if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + !(adapter->ptp_tx_skb))) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGB_TX_FLAGS_TSTAMP; + + adapter->ptp_tx_skb = skb_get(skb); + if (adapter->hw.mac.type == e1000_82576) + schedule_work(&adapter->ptp_tx_work); } +#endif /* CONFIG_IGB_PTP */ if (vlan_tx_tag_present(skb)) { tx_flags |= IGB_TX_FLAGS_VLAN; @@ -4661,11 +4603,13 @@ void igb_update_stats(struct igb_adapter *adapter, bytes = 0; packets = 0; for (i = 0; i < adapter->num_rx_queues; i++) { - u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF; + u32 rqdpc = rd32(E1000_RQDPC(i)); struct igb_ring *ring = adapter->rx_ring[i]; - ring->rx_stats.drops += rqdpc_tmp; - net_stats->rx_fifo_errors += rqdpc_tmp; + if (rqdpc) { + ring->rx_stats.drops += rqdpc; + net_stats->rx_fifo_errors += rqdpc; + } do { start = u64_stats_fetch_begin_bh(&ring->rx_syncp); @@ -4755,7 +4699,11 @@ void igb_update_stats(struct igb_adapter *adapter, reg = rd32(E1000_CTRL_EXT); if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { adapter->stats.rxerrc += rd32(E1000_RXERRC); - adapter->stats.tncrs += rd32(E1000_TNCRS); + + /* this stat has invalid values on i210/i211 */ + if ((hw->mac.type != e1000_i210) && + (hw->mac.type != e1000_i211)) + adapter->stats.tncrs += rd32(E1000_TNCRS); } adapter->stats.tsctc += rd32(E1000_TSCTC); @@ -4852,6 +4800,19 @@ static irqreturn_t igb_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + wr32(E1000_EIMS, adapter->eims_other); return IRQ_HANDLED; @@ -5002,102 +4963,43 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event, static int igb_vf_configure(struct igb_adapter *adapter, int vf) { unsigned char mac_addr[ETH_ALEN]; - struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; - struct pci_dev *pvfdev; - unsigned int device_id; - u16 thisvf_devfn; eth_random_addr(mac_addr); igb_set_vf_mac(adapter, vf, mac_addr); - switch (adapter->hw.mac.type) { - case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) | - (pdev->devfn & 1); - break; - case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) | - (pdev->devfn & 3); - break; - default: - device_id = 0; - thisvf_devfn = 0; - break; - } - - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == thisvf_devfn) - break; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); - } - - if (pvfdev) - adapter->vf_data[vf].vfdev = pvfdev; - else - dev_err(&pdev->dev, - "Couldn't find pci dev ptr for VF %4.4x\n", - thisvf_devfn); - return pvfdev != NULL; + return 0; } -static int igb_find_enabled_vfs(struct igb_adapter *adapter) +static bool igb_vfs_are_assigned(struct igb_adapter *adapter) { - struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct pci_dev *pvfdev; - u16 vf_devfn = 0; - u16 vf_stride; - unsigned int device_id; - int vfs_found = 0; + struct pci_dev *vfdev; + int dev_id; switch (adapter->hw.mac.type) { case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - vf_stride = 2; + dev_id = IGB_82576_VF_DEV_ID; break; case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - vf_stride = 4; + dev_id = IGB_I350_VF_DEV_ID; break; default: - device_id = 0; - vf_stride = 0; - break; - } - - vf_devfn = pdev->devfn + 0x80; - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == vf_devfn && - (pvfdev->bus->number >= pdev->bus->number)) - vfs_found++; - vf_devfn += vf_stride; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); + return false; } - return vfs_found; -} - -static int igb_check_vf_assignment(struct igb_adapter *adapter) -{ - int i; - for (i = 0; i < adapter->vfs_allocated_count; i++) { - if (adapter->vf_data[i].vfdev) { - if (adapter->vf_data[i].vfdev->dev_flags & - PCI_DEV_FLAGS_ASSIGNED) + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL); + while (vfdev) { + /* if we don't own it we don't care */ + if (vfdev->is_virtfn && vfdev->physfn == pdev) { + /* if it is assigned we cannot release it */ + if (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) return true; } + + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, vfdev); } + return false; } @@ -5643,6 +5545,19 @@ static irqreturn_t igb_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5684,6 +5599,19 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5743,37 +5671,6 @@ static int igb_poll(struct napi_struct *napi, int budget) return 0; } -#ifdef CONFIG_IGB_PTP -/** - * igb_tx_hwtstamp - utility function which checks for TX time stamp - * @q_vector: pointer to q_vector containing needed info - * @buffer: pointer to igb_tx_buffer structure - * - * If we were asked to do hardware stamping and such a time stamp is - * available, then it must have been for this skb here because we only - * allow only one such packet into the queue. - */ -static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, - struct igb_tx_buffer *buffer_info) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps shhwtstamps; - u64 regval; - - /* if skb does not support hw timestamp or TX stamp not valid exit */ - if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) || - !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) - return; - - regval = rd32(E1000_TXSTMPL); - regval |= (u64)rd32(E1000_TXSTMPH) << 32; - - igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval); - skb_tstamp_tx(buffer_info->skb, &shhwtstamps); -} - -#endif /** * igb_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: pointer to q_vector containing needed info @@ -5785,7 +5682,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx.ring; struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc, *eop_desc; + union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx.work_limit; unsigned int i = tx_ring->next_to_clean; @@ -5797,16 +5694,16 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_desc = IGB_TX_DESC(tx_ring, i); i -= tx_ring->count; - for (; budget; budget--) { - eop_desc = tx_buffer->next_to_watch; - - /* prevent any other reads prior to eop_desc */ - rmb(); + do { + union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; + /* prevent any other reads prior to eop_desc */ + rmb(); + /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) break; @@ -5818,25 +5715,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; -#ifdef CONFIG_IGB_PTP - /* retrieve hardware timestamp */ - igb_tx_hwtstamp(q_vector, tx_buffer); - -#endif /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; /* unmap skb header data */ dma_unmap_single(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* clear last DMA location and unmap remaining buffers */ while (tx_desc != eop_desc) { - tx_buffer->dma = 0; - tx_buffer++; tx_desc++; i++; @@ -5847,17 +5740,15 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } /* unmap any remaining paged data */ - if (tx_buffer->dma) { + if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); } } - /* clear last DMA location */ - tx_buffer->dma = 0; - /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; tx_desc++; @@ -5867,7 +5758,13 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_buffer = tx_ring->tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } - } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); @@ -5883,12 +5780,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { struct e1000_hw *hw = &adapter->hw; - eop_desc = tx_buffer->next_to_watch; - /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); - if (eop_desc && + if (tx_buffer->next_to_watch && time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { @@ -5912,9 +5807,9 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_ring->next_to_use, tx_ring->next_to_clean, tx_buffer->time_stamp, - eop_desc, + tx_buffer->next_to_watch, jiffies, - eop_desc->wb.status); + tx_buffer->next_to_watch->wb.status); netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); @@ -5994,47 +5889,6 @@ static inline void igb_rx_hash(struct igb_ring *ring, skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); } -#ifdef CONFIG_IGB_PTP -static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - u64 regval; - - if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | - E1000_RXDADV_STAT_TS)) - return; - - /* - * If this bit is set, then the RX registers contain the time stamp. No - * other packet will be time stamped until we read these registers, so - * read the registers to make them available again. Because only one - * packet can be time stamped at a time, we know that the register - * values must belong to this one here and therefore we don't need to - * compare any of the additional attributes stored for it. - * - * If nothing went wrong, then it should have a shared tx_flags that we - * can turn into a skb_shared_hwtstamps. - */ - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - u32 *stamp = (u32 *)skb->data; - regval = le32_to_cpu(*(stamp + 2)); - regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; - skb_pull(skb, IGB_TS_HDR_LEN); - } else { - if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) - return; - - regval = rd32(E1000_RXSTMPL); - regval |= (u64)rd32(E1000_RXSTMPH) << 32; - } - - igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); -} - -#endif static void igb_rx_vlan(struct igb_ring *ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) @@ -6146,8 +6000,8 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) } #ifdef CONFIG_IGB_PTP - igb_rx_hwtstamp(q_vector, rx_desc, skb); -#endif + igb_ptp_rx_hwtstamp(q_vector, rx_desc, skb); +#endif /* CONFIG_IGB_PTP */ igb_rx_hash(rx_ring, rx_desc, skb); igb_rx_checksum(rx_ring, rx_desc, skb); igb_rx_vlan(rx_ring, rx_desc, skb); @@ -6341,181 +6195,6 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } /** - * igb_hwtstamp_ioctl - control hardware time stamping - * @netdev: - * @ifreq: - * @cmd: - * - * Outgoing time stamping can be enabled and disabled. Play nice and - * disable it when requested, although it shouldn't case any overhead - * when no packet needs it. At most one packet in the queue may be - * marked for time stamping, otherwise it would be impossible to tell - * for sure to which packet the hardware time stamp belongs. - * - * Incoming time stamping has to be configured via the hardware - * filters. Not all combinations are supported, in particular event - * type has to be specified. Matching the kind of event packet is - * not supported, with the exception of "all V2 events regardless of - * level 2 or 4". - * - **/ -static int igb_hwtstamp_ioctl(struct net_device *netdev, - struct ifreq *ifr, int cmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct hwtstamp_config config; - u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; - u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - u32 tsync_rx_cfg = 0; - bool is_l4 = false; - bool is_l2 = false; - u32 regval; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - tsync_tx_ctl = 0; - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - tsync_rx_ctl = 0; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_ALL: - /* - * register TSYNCRXCFG must be set, therefore it is not - * possible to time stamp both Sync and Delay_Req messages - * => fall back to time stamping all packets - */ - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = true; - is_l4 = true; - break; - default: - return -ERANGE; - } - - if (hw->mac.type == e1000_82575) { - if (tsync_rx_ctl | tsync_tx_ctl) - return -EINVAL; - return 0; - } - - /* - * Per-packet timestamping only works if all packets are - * timestamped, so enable timestamping in all packets as - * long as one rx filter was configured. - */ - if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { - tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - } - - /* enable/disable TX */ - regval = rd32(E1000_TSYNCTXCTL); - regval &= ~E1000_TSYNCTXCTL_ENABLED; - regval |= tsync_tx_ctl; - wr32(E1000_TSYNCTXCTL, regval); - - /* enable/disable RX */ - regval = rd32(E1000_TSYNCRXCTL); - regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); - regval |= tsync_rx_ctl; - wr32(E1000_TSYNCRXCTL, regval); - - /* define which PTP packets are time stamped */ - wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); - - /* define ethertype filter for timestamped packets */ - if (is_l2) - wr32(E1000_ETQF(3), - (E1000_ETQF_FILTER_ENABLE | /* enable filter */ - E1000_ETQF_1588 | /* enable timestamping */ - ETH_P_1588)); /* 1588 eth protocol type */ - else - wr32(E1000_ETQF(3), 0); - -#define PTP_PORT 319 - /* L4 Queue Filter[3]: filter by destination port and protocol */ - if (is_l4) { - u32 ftqf = (IPPROTO_UDP /* UDP */ - | E1000_FTQF_VF_BP /* VF not compared */ - | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ - | E1000_FTQF_MASK); /* mask all inputs */ - ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ - - wr32(E1000_IMIR(3), htons(PTP_PORT)); - wr32(E1000_IMIREXT(3), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); - if (hw->mac.type == e1000_82576) { - /* enable source port check */ - wr32(E1000_SPQF(3), htons(PTP_PORT)); - ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; - } - wr32(E1000_FTQF(3), ftqf); - } else { - wr32(E1000_FTQF(3), E1000_FTQF_MASK); - } - wrfl(); - - adapter->hwtstamp_config = config; - - /* clear TX/RX time stamp registers, just to be sure */ - regval = rd32(E1000_TXSTMPH); - regval = rd32(E1000_RXSTMPH); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -/** * igb_ioctl - * @netdev: * @ifreq: @@ -6528,8 +6207,10 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); +#ifdef CONFIG_IGB_PTP case SIOCSHWTSTAMP: - return igb_hwtstamp_ioctl(netdev, ifr, cmd); + return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd); +#endif /* CONFIG_IGB_PTP */ default: return -EOPNOTSUPP; } @@ -6667,6 +6348,10 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index c846ea9131a..ee21445157a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -69,22 +69,22 @@ * 2^40 * 10^-9 / 60 = 18.3 minutes. */ -#define IGB_OVERFLOW_PERIOD (HZ * 60 * 9) -#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) -#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) -#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) -#define IGB_NBITS_82580 40 +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) +#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) +#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) +#define IGB_NBITS_82580 40 /* * SYSTIM read access for the 82576 */ -static cycle_t igb_82576_systim_read(const struct cyclecounter *cc) +static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) { - u64 val; - u32 lo, hi; struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi; lo = rd32(E1000_SYSTIML); hi = rd32(E1000_SYSTIMH); @@ -99,12 +99,12 @@ static cycle_t igb_82576_systim_read(const struct cyclecounter *cc) * SYSTIM read access for the 82580 */ -static cycle_t igb_82580_systim_read(const struct cyclecounter *cc) +static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) { - u64 val; - u32 lo, hi, jk; struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi, jk; /* * The timestamp latches on lowest register read. For the 82580 @@ -122,16 +122,101 @@ static cycle_t igb_82580_systim_read(const struct cyclecounter *cc) } /* + * SYSTIM read access for I210/I211 + */ + +static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + u32 sec, nsec, jk; + + /* + * The timestamp latches on lowest register read. For I210/I211, the + * lowest register is SYSTIMR. Since we only need to provide nanosecond + * resolution, we can ignore it. + */ + jk = rd32(E1000_SYSTIMR); + nsec = rd32(E1000_SYSTIML); + sec = rd32(E1000_SYSTIMH); + + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static void igb_ptp_write_i210(struct igb_adapter *adapter, + const struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + + /* + * Writing the SYSTIMR register is not necessary as it only provides + * sub-nanosecond resolution. + */ + wr32(E1000_SYSTIML, ts->tv_nsec); + wr32(E1000_SYSTIMH, ts->tv_sec); +} + +/** + * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp + * @adapter: board private structure + * @hwtstamps: timestamp structure to update + * @systim: unsigned 64bit system time value. + * + * We need to convert the system time value stored in the RX/TXSTMP registers + * into a hwtstamp which can be used by the upper level timestamping functions. + * + * The 'tmreg_lock' spinlock is used to protect the consistency of the + * system time value. This is needed because reading the 64 bit time + * value involves reading two (or three) 32 bit registers. The first + * read latches the value. Ditto for writing. + * + * In addition, here have extended the system time with an overflow + * counter in software. + **/ +static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamps, + u64 systim) +{ + unsigned long flags; + u64 ns; + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_82580: + case e1000_i350: + spin_lock_irqsave(&adapter->tmreg_lock, flags); + + ns = timecounter_cyc2time(&adapter->tc, systim); + + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + memset(hwtstamps, 0, sizeof(*hwtstamps)); + hwtstamps->hwtstamp = ns_to_ktime(ns); + break; + case e1000_i210: + case e1000_i211: + memset(hwtstamps, 0, sizeof(*hwtstamps)); + /* Upper 32 bits contain s, lower 32 bits contain ns. */ + hwtstamps->hwtstamp = ktime_set(systim >> 32, + systim & 0xFFFFFFFF); + break; + default: + break; + } +} + +/* * PTP clock operations */ -static int ptp_82576_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; u64 rate; u32 incvalue; - int neg_adj = 0; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); - struct e1000_hw *hw = &igb->hw; if (ppb < 0) { neg_adj = 1; @@ -153,13 +238,14 @@ static int ptp_82576_adjfreq(struct ptp_clock_info *ptp, s32 ppb) return 0; } -static int ptp_82580_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; u64 rate; u32 inca; - int neg_adj = 0; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); - struct e1000_hw *hw = &igb->hw; if (ppb < 0) { neg_adj = 1; @@ -178,11 +264,12 @@ static int ptp_82580_adjfreq(struct ptp_clock_info *ptp, s32 ppb) return 0; } -static int igb_adjtime(struct ptp_clock_info *ptp, s64 delta) +static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta) { - s64 now; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); + s64 now; spin_lock_irqsave(&igb->tmreg_lock, flags); @@ -195,12 +282,32 @@ static int igb_adjtime(struct ptp_clock_info *ptp, s64 delta) return 0; } -static int igb_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + struct timespec now, then = ns_to_timespec(delta); + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, &now); + now = timespec_add(now, then); + igb_ptp_write_i210(igb, (const struct timespec *)&now); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp, + struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; u64 ns; u32 remainder; - unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); spin_lock_irqsave(&igb->tmreg_lock, flags); @@ -214,11 +321,29 @@ static int igb_gettime(struct ptp_clock_info *ptp, struct timespec *ts) return 0; } -static int igb_settime(struct ptp_clock_info *ptp, const struct timespec *ts) +static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp, + struct timespec *ts) { - u64 ns; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_settime_82576(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + u64 ns; ns = ts->tv_sec * 1000000000ULL; ns += ts->tv_nsec; @@ -232,77 +357,369 @@ static int igb_settime(struct ptp_clock_info *ptp, const struct timespec *ts) return 0; } -static int ptp_82576_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, + const struct timespec *ts) { - return -EOPNOTSUPP; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_write_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; } -static int ptp_82580_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +static int igb_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) { return -EOPNOTSUPP; } -static void igb_overflow_check(struct work_struct *work) +/** + * igb_ptp_tx_work + * @work: pointer to work struct + * + * This work function polls the TSYNCTXCTL valid bit to determine when a + * timestamp has been taken for the current stored skb. + */ +void igb_ptp_tx_work(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, struct igb_adapter, + ptp_tx_work); + struct e1000_hw *hw = &adapter->hw; + u32 tsynctxctl; + + if (!adapter->ptp_tx_skb) + return; + + tsynctxctl = rd32(E1000_TSYNCTXCTL); + if (tsynctxctl & E1000_TSYNCTXCTL_VALID) + igb_ptp_tx_hwtstamp(adapter); + else + /* reschedule to check later */ + schedule_work(&adapter->ptp_tx_work); +} + +static void igb_ptp_overflow_check(struct work_struct *work) { - struct timespec ts; struct igb_adapter *igb = - container_of(work, struct igb_adapter, overflow_work.work); + container_of(work, struct igb_adapter, ptp_overflow_work.work); + struct timespec ts; - igb_gettime(&igb->caps, &ts); + igb->ptp_caps.gettime(&igb->ptp_caps, &ts); pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); - schedule_delayed_work(&igb->overflow_work, IGB_OVERFLOW_PERIOD); + schedule_delayed_work(&igb->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); +} + +/** + * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp + * @adapter: Board private structure. + * + * If we were asked to do hardware stamping and such a time stamp is + * available, then it must have been for this skb here because we only + * allow only one such packet into the queue. + */ +void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct skb_shared_hwtstamps shhwtstamps; + u64 regval; + + regval = rd32(E1000_TXSTMPL); + regval |= (u64)rd32(E1000_TXSTMPH) << 32; + + igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; +} + +void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + u64 regval; + + if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | + E1000_RXDADV_STAT_TS)) + return; + + /* + * If this bit is set, then the RX registers contain the time stamp. No + * other packet will be time stamped until we read these registers, so + * read the registers to make them available again. Because only one + * packet can be time stamped at a time, we know that the register + * values must belong to this one here and therefore we don't need to + * compare any of the additional attributes stored for it. + * + * If nothing went wrong, then it should have a shared tx_flags that we + * can turn into a skb_shared_hwtstamps. + */ + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + u32 *stamp = (u32 *)skb->data; + regval = le32_to_cpu(*(stamp + 2)); + regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; + skb_pull(skb, IGB_TS_HDR_LEN); + } else { + if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) + return; + + regval = rd32(E1000_RXSTMPL); + regval |= (u64)rd32(E1000_RXSTMPH) << 32; + } + + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); +} + +/** + * igb_ptp_hwtstamp_ioctl - control hardware time stamping + * @netdev: + * @ifreq: + * @cmd: + * + * Outgoing time stamping can be enabled and disabled. Play nice and + * disable it when requested, although it shouldn't case any overhead + * when no packet needs it. At most one packet in the queue may be + * marked for time stamping, otherwise it would be impossible to tell + * for sure to which packet the hardware time stamp belongs. + * + * Incoming time stamping has to be configured via the hardware + * filters. Not all combinations are supported, in particular event + * type has to be specified. Matching the kind of event packet is + * not supported, with the exception of "all V2 events regardless of + * level 2 or 4". + * + **/ +int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct hwtstamp_config config; + u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + u32 tsync_rx_cfg = 0; + bool is_l4 = false; + bool is_l2 = false; + u32 regval; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + tsync_tx_ctl = 0; + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl = 0; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_ALL: + /* + * register TSYNCRXCFG must be set, therefore it is not + * possible to time stamp both Sync and Delay_Req messages + * => fall back to time stamping all packets + */ + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; + is_l2 = true; + is_l4 = true; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; + is_l2 = true; + is_l4 = true; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = true; + is_l4 = true; + break; + default: + return -ERANGE; + } + + if (hw->mac.type == e1000_82575) { + if (tsync_rx_ctl | tsync_tx_ctl) + return -EINVAL; + return 0; + } + + /* + * Per-packet timestamping only works if all packets are + * timestamped, so enable timestamping in all packets as + * long as one rx filter was configured. + */ + if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { + tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + + if ((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) { + regval = rd32(E1000_RXPBS); + regval |= E1000_RXPBS_CFG_TS_EN; + wr32(E1000_RXPBS, regval); + } + } + + /* enable/disable TX */ + regval = rd32(E1000_TSYNCTXCTL); + regval &= ~E1000_TSYNCTXCTL_ENABLED; + regval |= tsync_tx_ctl; + wr32(E1000_TSYNCTXCTL, regval); + + /* enable/disable RX */ + regval = rd32(E1000_TSYNCRXCTL); + regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); + regval |= tsync_rx_ctl; + wr32(E1000_TSYNCRXCTL, regval); + + /* define which PTP packets are time stamped */ + wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); + + /* define ethertype filter for timestamped packets */ + if (is_l2) + wr32(E1000_ETQF(3), + (E1000_ETQF_FILTER_ENABLE | /* enable filter */ + E1000_ETQF_1588 | /* enable timestamping */ + ETH_P_1588)); /* 1588 eth protocol type */ + else + wr32(E1000_ETQF(3), 0); + +#define PTP_PORT 319 + /* L4 Queue Filter[3]: filter by destination port and protocol */ + if (is_l4) { + u32 ftqf = (IPPROTO_UDP /* UDP */ + | E1000_FTQF_VF_BP /* VF not compared */ + | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ + | E1000_FTQF_MASK); /* mask all inputs */ + ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ + + wr32(E1000_IMIR(3), htons(PTP_PORT)); + wr32(E1000_IMIREXT(3), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); + if (hw->mac.type == e1000_82576) { + /* enable source port check */ + wr32(E1000_SPQF(3), htons(PTP_PORT)); + ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; + } + wr32(E1000_FTQF(3), ftqf); + } else { + wr32(E1000_FTQF(3), E1000_FTQF_MASK); + } + wrfl(); + + /* clear TX/RX time stamp registers, just to be sure */ + regval = rd32(E1000_TXSTMPL); + regval = rd32(E1000_TXSTMPH); + regval = rd32(E1000_RXSTMPL); + regval = rd32(E1000_RXSTMPH); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; } void igb_ptp_init(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; switch (hw->mac.type) { - case e1000_i210: - case e1000_i211: - case e1000_i350: + case e1000_82576: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 1000000000; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82576; + adapter->cc.mask = CLOCKSOURCE_MASK(64); + adapter->cc.mult = 1; + adapter->cc.shift = IGB_82576_TSYNC_SHIFT; + /* Dial the nominal frequency. */ + wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + break; case e1000_82580: - adapter->caps.owner = THIS_MODULE; - strcpy(adapter->caps.name, "igb-82580"); - adapter->caps.max_adj = 62499999; - adapter->caps.n_ext_ts = 0; - adapter->caps.pps = 0; - adapter->caps.adjfreq = ptp_82580_adjfreq; - adapter->caps.adjtime = igb_adjtime; - adapter->caps.gettime = igb_gettime; - adapter->caps.settime = igb_settime; - adapter->caps.enable = ptp_82580_enable; - adapter->cc.read = igb_82580_systim_read; - adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); - adapter->cc.mult = 1; - adapter->cc.shift = 0; + case e1000_i350: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82580; + adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); + adapter->cc.mult = 1; + adapter->cc.shift = 0; /* Enable the timer functions by clearing bit 31. */ wr32(E1000_TSAUXC, 0x0); break; - - case e1000_82576: - adapter->caps.owner = THIS_MODULE; - strcpy(adapter->caps.name, "igb-82576"); - adapter->caps.max_adj = 1000000000; - adapter->caps.n_ext_ts = 0; - adapter->caps.pps = 0; - adapter->caps.adjfreq = ptp_82576_adjfreq; - adapter->caps.adjtime = igb_adjtime; - adapter->caps.gettime = igb_gettime; - adapter->caps.settime = igb_settime; - adapter->caps.enable = ptp_82576_enable; - adapter->cc.read = igb_82576_systim_read; - adapter->cc.mask = CLOCKSOURCE_MASK(64); - adapter->cc.mult = 1; - adapter->cc.shift = IGB_82576_TSYNC_SHIFT; - /* Dial the nominal frequency. */ - wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + case e1000_i210: + case e1000_i211: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; + adapter->ptp_caps.gettime = igb_ptp_gettime_i210; + adapter->ptp_caps.settime = igb_ptp_settime_i210; + adapter->ptp_caps.enable = igb_ptp_enable; + /* Enable the timer functions by clearing bit 31. */ + wr32(E1000_TSAUXC, 0x0); break; - default: adapter->ptp_clock = NULL; return; @@ -310,86 +727,114 @@ void igb_ptp_init(struct igb_adapter *adapter) wrfl(); - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); + + /* Initialize the clock and overflow work for devices that need it. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); - INIT_DELAYED_WORK(&adapter->overflow_work, igb_overflow_check); + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); - spin_lock_init(&adapter->tmreg_lock); + INIT_DELAYED_WORK(&adapter->ptp_overflow_work, + igb_ptp_overflow_check); - schedule_delayed_work(&adapter->overflow_work, IGB_OVERFLOW_PERIOD); + schedule_delayed_work(&adapter->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); + } + + /* Initialize the time sync interrupts for devices that support it. */ + if (hw->mac.type >= e1000_82580) { + wr32(E1000_TSIM, E1000_TSIM_TXTS); + wr32(E1000_IMS, E1000_IMS_TS); + } - adapter->ptp_clock = ptp_clock_register(&adapter->caps); + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else + } else { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); + adapter->flags |= IGB_FLAG_PTP; + } } -void igb_ptp_remove(struct igb_adapter *adapter) +/** + * igb_ptp_stop - Disable PTP device and stop the overflow check. + * @adapter: Board private structure. + * + * This function stops the PTP support and cancels the delayed work. + **/ +void igb_ptp_stop(struct igb_adapter *adapter) { switch (adapter->hw.mac.type) { - case e1000_i211: - case e1000_i210: - case e1000_i350: - case e1000_82580: case e1000_82576: - cancel_delayed_work_sync(&adapter->overflow_work); + case e1000_82580: + case e1000_i350: + cancel_delayed_work_sync(&adapter->ptp_overflow_work); + break; + case e1000_i210: + case e1000_i211: + /* No delayed work to cancel. */ break; default: return; } + cancel_work_sync(&adapter->ptp_tx_work); + if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); dev_info(&adapter->pdev->dev, "removed PHC on %s\n", adapter->netdev->name); + adapter->flags &= ~IGB_FLAG_PTP; } } /** - * igb_systim_to_hwtstamp - convert system time value to hw timestamp - * @adapter: board private structure - * @hwtstamps: timestamp structure to update - * @systim: unsigned 64bit system time value. - * - * We need to convert the system time value stored in the RX/TXSTMP registers - * into a hwtstamp which can be used by the upper level timestamping functions. + * igb_ptp_reset - Re-enable the adapter for PTP following a reset. + * @adapter: Board private structure. * - * The 'tmreg_lock' spinlock is used to protect the consistency of the - * system time value. This is needed because reading the 64 bit time - * value involves reading two (or three) 32 bit registers. The first - * read latches the value. Ditto for writing. - * - * In addition, here have extended the system time with an overflow - * counter in software. + * This function handles the reset work required to re-enable the PTP device. **/ -void igb_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim) +void igb_ptp_reset(struct igb_adapter *adapter) { - u64 ns; - unsigned long flags; + struct e1000_hw *hw = &adapter->hw; + + if (!(adapter->flags & IGB_FLAG_PTP)) + return; switch (adapter->hw.mac.type) { + case e1000_82576: + /* Dial the nominal frequency. */ + wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + break; + case e1000_82580: + case e1000_i350: case e1000_i210: case e1000_i211: - case e1000_i350: - case e1000_82580: - case e1000_82576: + /* Enable the timer functions and interrupts. */ + wr32(E1000_TSAUXC, 0x0); + wr32(E1000_TSIM, E1000_TSIM_TXTS); + wr32(E1000_IMS, E1000_IMS_TS); break; default: + /* No work to do. */ return; } - spin_lock_irqsave(&adapter->tmreg_lock, flags); + /* Re-initialize the timer. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); - ns = timecounter_cyc2time(&adapter->tc, systim); - - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(ns); + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); + } } diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index 5fd5d04c26c..89f40e51fc1 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_IXGBE) += ixgbe.o -ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ +ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o ixgbe_debugfs.o\ ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \ ixgbe_mbx.o ixgbe_x540.o ixgbe_lib.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index b9623e9ea89..5bd26763554 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -78,6 +78,9 @@ /* Supported Rx Buffer Sizes */ #define IXGBE_RXBUFFER_256 256 /* Used for skb receive header */ +#define IXGBE_RXBUFFER_2K 2048 +#define IXGBE_RXBUFFER_3K 3072 +#define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ /* @@ -104,6 +107,7 @@ #define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) #define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) #define IXGBE_TX_FLAGS_TSTAMP (u32)(1 << 8) +#define IXGBE_TX_FLAGS_NO_IFCS (u32)(1 << 9) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 @@ -293,16 +297,25 @@ struct ixgbe_ring_feature { * this is twice the size of a half page we need to double the page order * for FCoE enabled Rx queues. */ -#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) -static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) { - return test_bit(__IXGBE_RX_FCOE, &ring->state) ? 1 : 0; +#ifdef IXGBE_FCOE + if (test_bit(__IXGBE_RX_FCOE, &ring->state)) + return (PAGE_SIZE < 8192) ? IXGBE_RXBUFFER_4K : + IXGBE_RXBUFFER_3K; +#endif + return IXGBE_RXBUFFER_2K; } -#else -#define ixgbe_rx_pg_order(_ring) 0 + +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ +#ifdef IXGBE_FCOE + if (test_bit(__IXGBE_RX_FCOE, &ring->state)) + return (PAGE_SIZE < 8192) ? 1 : 0; #endif + return 0; +} #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) -#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ @@ -584,6 +597,9 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff ixgbe_hwmon_buff; #endif /* CONFIG_IXGBE_HWMON */ +#ifdef CONFIG_DEBUG_FS + struct dentry *ixgbe_dbg_adapter; +#endif /*CONFIG_DEBUG_FS*/ }; struct ixgbe_fdir_filter { @@ -712,7 +728,12 @@ extern int ixgbe_fcoe_get_hbainfo(struct net_device *netdev, struct netdev_fcoe_hbainfo *info); extern u8 ixgbe_fcoe_get_tc(struct ixgbe_adapter *adapter); #endif /* IXGBE_FCOE */ - +#ifdef CONFIG_DEBUG_FS +extern void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter); +extern void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter); +extern void ixgbe_dbg_init(void); +extern void ixgbe_dbg_exit(void); +#endif /* CONFIG_DEBUG_FS */ static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring) { return netdev_get_tx_queue(ring->netdev, ring->queue_index); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c new file mode 100644 index 00000000000..8d3a2188909 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -0,0 +1,300 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifdef CONFIG_DEBUG_FS + +#include <linux/debugfs.h> +#include <linux/module.h> + +#include "ixgbe.h" + +static struct dentry *ixgbe_dbg_root; + +static char ixgbe_dbg_reg_ops_buf[256] = ""; + +/** + * ixgbe_dbg_reg_ops_open - prep the debugfs pokee data item when opened + * @inode: inode that was opened + * @filp: file info + * + * Stash the adapter pointer hiding in the inode into the file pointer where + * we can find it later in the read and write calls + **/ +static int ixgbe_dbg_reg_ops_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/** + * ixgbe_dbg_reg_ops_read - read for reg_ops datum + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + char buf[256]; + int bytes_not_copied; + int len; + + /* don't allow partial reads */ + if (*ppos != 0) + return 0; + + len = snprintf(buf, sizeof(buf), "%s: %s\n", + adapter->netdev->name, ixgbe_dbg_reg_ops_buf); + if (count < len) + return -ENOSPC; + bytes_not_copied = copy_to_user(buffer, buf, len); + if (bytes_not_copied < 0) + return bytes_not_copied; + + *ppos = len; + return len; +} + +/** + * ixgbe_dbg_reg_ops_write - write into reg_ops datum + * @filp: the opened file + * @buffer: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_reg_ops_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + int bytes_not_copied; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + if (count >= sizeof(ixgbe_dbg_reg_ops_buf)) + return -ENOSPC; + + bytes_not_copied = copy_from_user(ixgbe_dbg_reg_ops_buf, buffer, count); + if (bytes_not_copied < 0) + return bytes_not_copied; + else if (bytes_not_copied < count) + count -= bytes_not_copied; + else + return -ENOSPC; + ixgbe_dbg_reg_ops_buf[count] = '\0'; + + if (strncmp(ixgbe_dbg_reg_ops_buf, "write", 5) == 0) { + u32 reg, value; + int cnt; + cnt = sscanf(&ixgbe_dbg_reg_ops_buf[5], "%x %x", ®, &value); + if (cnt == 2) { + IXGBE_WRITE_REG(&adapter->hw, reg, value); + value = IXGBE_READ_REG(&adapter->hw, reg); + e_dev_info("write: 0x%08x = 0x%08x\n", reg, value); + } else { + e_dev_info("write <reg> <value>\n"); + } + } else if (strncmp(ixgbe_dbg_reg_ops_buf, "read", 4) == 0) { + u32 reg, value; + int cnt; + cnt = sscanf(&ixgbe_dbg_reg_ops_buf[4], "%x", ®); + if (cnt == 1) { + value = IXGBE_READ_REG(&adapter->hw, reg); + e_dev_info("read 0x%08x = 0x%08x\n", reg, value); + } else { + e_dev_info("read <reg>\n"); + } + } else { + e_dev_info("Unknown command %s\n", ixgbe_dbg_reg_ops_buf); + e_dev_info("Available commands:\n"); + e_dev_info(" read <reg>\n"); + e_dev_info(" write <reg> <value>\n"); + } + return count; +} + +static const struct file_operations ixgbe_dbg_reg_ops_fops = { + .owner = THIS_MODULE, + .open = ixgbe_dbg_reg_ops_open, + .read = ixgbe_dbg_reg_ops_read, + .write = ixgbe_dbg_reg_ops_write, +}; + +static char ixgbe_dbg_netdev_ops_buf[256] = ""; + +/** + * ixgbe_dbg_netdev_ops_open - prep the debugfs netdev_ops data item + * @inode: inode that was opened + * @filp: file info + * + * Stash the adapter pointer hiding in the inode into the file pointer + * where we can find it later in the read and write calls + **/ +static int ixgbe_dbg_netdev_ops_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/** + * ixgbe_dbg_netdev_ops_read - read for netdev_ops datum + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_netdev_ops_read(struct file *filp, + char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + char buf[256]; + int bytes_not_copied; + int len; + + /* don't allow partial reads */ + if (*ppos != 0) + return 0; + + len = snprintf(buf, sizeof(buf), "%s: %s\n", + adapter->netdev->name, ixgbe_dbg_netdev_ops_buf); + if (count < len) + return -ENOSPC; + bytes_not_copied = copy_to_user(buffer, buf, len); + if (bytes_not_copied < 0) + return bytes_not_copied; + + *ppos = len; + return len; +} + +/** + * ixgbe_dbg_netdev_ops_write - write into netdev_ops datum + * @filp: the opened file + * @buffer: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + int bytes_not_copied; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + if (count >= sizeof(ixgbe_dbg_netdev_ops_buf)) + return -ENOSPC; + + bytes_not_copied = copy_from_user(ixgbe_dbg_netdev_ops_buf, + buffer, count); + if (bytes_not_copied < 0) + return bytes_not_copied; + else if (bytes_not_copied < count) + count -= bytes_not_copied; + else + return -ENOSPC; + ixgbe_dbg_netdev_ops_buf[count] = '\0'; + + if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) { + adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev); + e_dev_info("tx_timeout called\n"); + } else { + e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf); + e_dev_info("Available commands:\n"); + e_dev_info(" tx_timeout\n"); + } + return count; +} + +static const struct file_operations ixgbe_dbg_netdev_ops_fops = { + .owner = THIS_MODULE, + .open = ixgbe_dbg_netdev_ops_open, + .read = ixgbe_dbg_netdev_ops_read, + .write = ixgbe_dbg_netdev_ops_write, +}; + +/** + * ixgbe_dbg_adapter_init - setup the debugfs directory for the adapter + * @adapter: the adapter that is starting up + **/ +void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter) +{ + const char *name = pci_name(adapter->pdev); + struct dentry *pfile; + adapter->ixgbe_dbg_adapter = debugfs_create_dir(name, ixgbe_dbg_root); + if (adapter->ixgbe_dbg_adapter) { + pfile = debugfs_create_file("reg_ops", 0600, + adapter->ixgbe_dbg_adapter, adapter, + &ixgbe_dbg_reg_ops_fops); + if (!pfile) + e_dev_err("debugfs reg_ops for %s failed\n", name); + pfile = debugfs_create_file("netdev_ops", 0600, + adapter->ixgbe_dbg_adapter, adapter, + &ixgbe_dbg_netdev_ops_fops); + if (!pfile) + e_dev_err("debugfs netdev_ops for %s failed\n", name); + } else { + e_dev_err("debugfs entry for %s failed\n", name); + } +} + +/** + * ixgbe_dbg_adapter_exit - clear out the adapter's debugfs entries + * @pf: the pf that is stopping + **/ +void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter) +{ + if (adapter->ixgbe_dbg_adapter) + debugfs_remove_recursive(adapter->ixgbe_dbg_adapter); + adapter->ixgbe_dbg_adapter = NULL; +} + +/** + * ixgbe_dbg_init - start up debugfs for the driver + **/ +void ixgbe_dbg_init(void) +{ + ixgbe_dbg_root = debugfs_create_dir(ixgbe_driver_name, NULL); + if (ixgbe_dbg_root == NULL) + pr_err("init of debugfs failed\n"); +} + +/** + * ixgbe_dbg_exit - clean out the driver's debugfs entries + **/ +void ixgbe_dbg_exit(void) +{ + debugfs_remove_recursive(ixgbe_dbg_root); +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ee61819d608..868af693821 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1167,7 +1167,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, } bi->dma = dma; - bi->page_offset ^= ixgbe_rx_bufsz(rx_ring); + bi->page_offset = 0; return true; } @@ -1320,29 +1320,6 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, return max_len; } -static void ixgbe_get_rsc_cnt(struct ixgbe_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - __le32 rsc_enabled; - u32 rsc_cnt; - - if (!ring_is_rsc_enabled(rx_ring)) - return; - - rsc_enabled = rx_desc->wb.lower.lo_dword.data & - cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); - - /* If this is an RSC frame rsc_cnt should be non-zero */ - if (!rsc_enabled) - return; - - rsc_cnt = le32_to_cpu(rsc_enabled); - rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; - - IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; -} - static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, struct sk_buff *skb) { @@ -1440,16 +1417,28 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, prefetch(IXGBE_RX_DESC(rx_ring, ntc)); - if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - return false; + /* update RSC append count if present */ + if (ring_is_rsc_enabled(rx_ring)) { + __le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data & + cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); + + if (unlikely(rsc_enabled)) { + u32 rsc_cnt = le32_to_cpu(rsc_enabled); + + rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; + IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; - /* append_cnt indicates packet is RSC, if so fetch nextp */ - if (IXGBE_CB(skb)->append_cnt) { - ntc = le32_to_cpu(rx_desc->wb.upper.status_error); - ntc &= IXGBE_RXDADV_NEXTP_MASK; - ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + /* update ntc based on RSC value */ + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= IXGBE_RXDADV_NEXTP_MASK; + ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + } } + /* if we are the last buffer then there is nothing else to do */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + /* place skb in next buffer to be received */ rx_ring->rx_buffer_info[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; @@ -1458,6 +1447,78 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, } /** + * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being adjusted + * + * This function is an ixgbe specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being updated + * + * This function provides a basic DMA sync up for the first fragment of an + * skb. The reason for doing this is that the first fragment cannot be + * unmapped until we have reached the end of packet descriptor for a buffer + * chain. + */ +static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + /* if the page was released unmap it, else just sync our portion */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } else { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + frag->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + IXGBE_CB(skb)->dma = 0; +} + +/** * ixgbe_cleanup_headers - Correct corrupted or empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor @@ -1479,24 +1540,7 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; struct net_device *netdev = rx_ring->netdev; - unsigned char *va; - unsigned int pull_len; - - /* if the page was released unmap it, else just sync our portion */ - if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - IXGBE_CB(skb)->page_released = false; - } else { - dma_sync_single_range_for_cpu(rx_ring->dev, - IXGBE_CB(skb)->dma, - frag->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - } - IXGBE_CB(skb)->dma = 0; /* verify that the packet does not have any known errors */ if (unlikely(ixgbe_test_staterr(rx_desc, @@ -1506,40 +1550,9 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, return true; } - /* - * it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* - * we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = skb_frag_size(frag); - if (pull_len > IXGBE_RX_HDR_SIZE) - pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; - - /* - * if we sucked the frag empty then we should free it, - * if there are other frags here something is screwed up in hardware - */ - if (skb_frag_size(frag) == 0) { - BUG_ON(skb_shinfo(skb)->nr_frags != 1); - skb_shinfo(skb)->nr_frags = 0; - __skb_frag_unref(frag); - skb->truesize -= ixgbe_rx_bufsz(rx_ring); - } + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + ixgbe_pull_tail(rx_ring, skb); #ifdef IXGBE_FCOE /* do not attempt to pad FCoE Frames as this will disrupt DDP */ @@ -1560,33 +1573,17 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, } /** - * ixgbe_can_reuse_page - determine if we can reuse a page - * @rx_buffer: pointer to rx_buffer containing the page we want to reuse - * - * Returns true if page can be reused in another Rx buffer - **/ -static inline bool ixgbe_can_reuse_page(struct ixgbe_rx_buffer *rx_buffer) -{ - struct page *page = rx_buffer->page; - - /* if we are only owner of page and it is local we can reuse it */ - return likely(page_count(page) == 1) && - likely(page_to_nid(page) == numa_node_id()); -} - -/** * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring * @rx_ring: rx descriptor ring to store buffers on * @old_buff: donor buffer to have page reused * - * Syncronizes page for reuse by the adapter + * Synchronizes page for reuse by the adapter **/ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *old_buff) { struct ixgbe_rx_buffer *new_buff; u16 nta = rx_ring->next_to_alloc; - u16 bufsz = ixgbe_rx_bufsz(rx_ring); new_buff = &rx_ring->rx_buffer_info[nta]; @@ -1597,17 +1594,13 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, /* transfer page from old buffer to new buffer */ new_buff->page = old_buff->page; new_buff->dma = old_buff->dma; - - /* flip page offset to other buffer and store to new_buff */ - new_buff->page_offset = old_buff->page_offset ^ bufsz; + new_buff->page_offset = old_buff->page_offset; /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, - new_buff->page_offset, bufsz, + new_buff->page_offset, + ixgbe_rx_bufsz(rx_ring), DMA_FROM_DEVICE); - - /* bump ref count on page before it is given to the stack */ - get_page(new_buff->page); } /** @@ -1617,20 +1610,159 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buff to place the data into * - * This function is based on skb_add_rx_frag. I would have used that - * function however it doesn't handle the truesize case correctly since we - * are allocating more memory than might be used for a single receive. + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. **/ -static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, +static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - struct sk_buff *skb, int size) + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buffer->page, rx_buffer->page_offset, - size); - skb->len += size; - skb->data_len += size; - skb->truesize += ixgbe_rx_bufsz(rx_ring); + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_bufsz(rx_ring); +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int last_offset = ixgbe_rx_pg_size(rx_ring) - + ixgbe_rx_bufsz(rx_ring); +#endif + + if ((size <= IXGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, truesize); + + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; + + /* + * since we are the only owner of the page and we need to + * increment it, just set the value to 2 in order to avoid + * an unecessary locked operation + */ + atomic_set(&page->_count, 2); +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; + + /* bump ref count on page before it is given to the stack */ + get_page(page); +#endif + + return true; +} + +static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + struct ixgbe_rx_buffer *rx_buffer; + struct sk_buff *skb; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + page = rx_buffer->page; + prefetchw(page); + + skb = rx_buffer->skb; + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + return NULL; + } + + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + + /* + * Delay unmapping of the first packet. It carries the + * header information, HW may still access the header + * after the writeback. Only unmap it when EOP is + * reached + */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + goto dma_sync; + + IXGBE_CB(skb)->dma = rx_buffer->dma; + } else { + if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + ixgbe_dma_sync_frag(rx_ring, skb); + +dma_sync: + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + + /* pull page into skb */ + if (ixgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + } + + /* clear contents of buffer_info */ + rx_buffer->skb = NULL; + rx_buffer->dma = 0; + rx_buffer->page = NULL; + + return skb; } /** @@ -1653,16 +1785,14 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, unsigned int total_rx_bytes = 0, total_rx_packets = 0; #ifdef IXGBE_FCOE struct ixgbe_adapter *adapter = q_vector->adapter; - int ddp_bytes = 0; + int ddp_bytes; + unsigned int mss = 0; #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); do { - struct ixgbe_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; struct sk_buff *skb; - struct page *page; - u16 ntc; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { @@ -1670,9 +1800,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, cleaned_count = 0; } - ntc = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC(rx_ring, ntc); - rx_buffer = &rx_ring->rx_buffer_info[ntc]; + rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) break; @@ -1684,75 +1812,12 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ rmb(); - page = rx_buffer->page; - prefetchw(page); - - skb = rx_buffer->skb; - - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; - - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif + /* retrieve a buffer from the ring */ + skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IXGBE_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - break; - } - - /* - * we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - - /* - * Delay unmapping of the first packet. It carries the - * header information, HW may still access the header - * after the writeback. Only unmap it when EOP is - * reached - */ - IXGBE_CB(skb)->dma = rx_buffer->dma; - } else { - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - } - - /* pull page into skb */ - ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, - le16_to_cpu(rx_desc->wb.upper.length)); - - if (ixgbe_can_reuse_page(rx_buffer)) { - /* hand second half of page back to the ring */ - ixgbe_reuse_rx_page(rx_ring, rx_buffer); - } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { - /* the page has been released from the ring */ - IXGBE_CB(skb)->page_released = true; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE); - } - - /* clear contents of buffer_info */ - rx_buffer->skb = NULL; - rx_buffer->dma = 0; - rx_buffer->page = NULL; - - ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; cleaned_count++; @@ -1775,6 +1840,20 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, /* if ddp, not passing to ULD unless for FCP_RSP or error */ if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) { ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb); + /* include DDPed FCoE data */ + if (ddp_bytes > 0) { + if (!mss) { + mss = rx_ring->netdev->mtu - + sizeof(struct fcoe_hdr) - + sizeof(struct fc_frame_header) - + sizeof(struct fcoe_crc_eof); + if (mss > 512) + mss &= ~511; + } + total_rx_bytes += ddp_bytes; + total_rx_packets += DIV_ROUND_UP(ddp_bytes, + mss); + } if (!ddp_bytes) { dev_kfree_skb_any(skb); continue; @@ -1788,21 +1867,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, budget--; } while (likely(budget)); -#ifdef IXGBE_FCOE - /* include DDPed FCoE data */ - if (ddp_bytes > 0) { - unsigned int mss; - - mss = rx_ring->netdev->mtu - sizeof(struct fcoe_hdr) - - sizeof(struct fc_frame_header) - - sizeof(struct fcoe_crc_eof); - if (mss > 512) - mss &= ~511; - total_rx_bytes += ddp_bytes; - total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); - } - -#endif /* IXGBE_FCOE */ u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -2868,11 +2932,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; /* configure the packet buffer length */ -#if PAGE_SIZE > IXGBE_MAX_RXBUFFER - srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#else srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#endif /* configure descriptor type */ srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; @@ -2980,13 +3040,7 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, * total size of max desc * buf_len is not greater * than 65536 */ -#if (PAGE_SIZE <= 8192) rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (PAGE_SIZE <= 16384) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; -#endif IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl); } @@ -3606,8 +3660,6 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) if (hw->mac.type == ixgbe_mac_82598EB) netif_set_gso_max_size(adapter->netdev, 32768); - hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true); - #ifdef IXGBE_FCOE if (adapter->netdev->features & NETIF_F_FCOE_MTU) max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); @@ -3807,6 +3859,11 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) #ifdef CONFIG_IXGBE_DCB ixgbe_configure_dcb(adapter); #endif + /* + * We must restore virtualization before VLANs or else + * the VLVF registers will not be populated + */ + ixgbe_configure_virtualization(adapter); ixgbe_set_rx_mode(adapter->netdev); ixgbe_restore_vlan(adapter); @@ -3838,8 +3895,6 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) break; } - ixgbe_configure_virtualization(adapter); - #ifdef IXGBE_FCOE /* configure FCoE L2 filters, redirection table, and Rx control */ ixgbe_configure_fcoe(adapter); @@ -4130,27 +4185,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } /** - * ixgbe_init_rx_page_offset - initialize page offset values for Rx buffers - * @rx_ring: ring to setup - * - * On many IA platforms the L1 cache has a critical stride of 4K, this - * results in each receive buffer starting in the same cache set. To help - * reduce the pressure on this cache set we can interleave the offsets so - * that only every other buffer will be in the same cache set. - **/ -static void ixgbe_init_rx_page_offset(struct ixgbe_ring *rx_ring) -{ - struct ixgbe_rx_buffer *rx_buffer = rx_ring->rx_buffer_info; - u16 i; - - for (i = 0; i < rx_ring->count; i += 2) { - rx_buffer[0].page_offset = 0; - rx_buffer[1].page_offset = ixgbe_rx_bufsz(rx_ring); - rx_buffer = &rx_buffer[2]; - } -} - -/** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ @@ -4195,8 +4229,6 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; memset(rx_ring->rx_buffer_info, 0, size); - ixgbe_init_rx_page_offset(rx_ring); - /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); @@ -4646,8 +4678,6 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - ixgbe_init_rx_page_offset(rx_ring); - return 0; err: vfree(rx_ring->rx_buffer_info); @@ -5530,8 +5560,9 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter) { u32 ssvpc; - /* Do not perform spoof check for 82598 */ - if (adapter->hw.mac.type == ixgbe_mac_82598EB) + /* Do not perform spoof check for 82598 or if not in IOV mode */ + if (adapter->hw.mac.type == ixgbe_mac_82598EB || + adapter->num_vfs == 0) return; ssvpc = IXGBE_READ_REG(&adapter->hw, IXGBE_SSVPC); @@ -5543,7 +5574,7 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter) if (!ssvpc) return; - e_warn(drv, "%d Spoofed packets detected\n", ssvpc); + e_warn(drv, "%u Spoofed packets detected\n", ssvpc); } /** @@ -5874,9 +5905,12 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN) && - !(first->tx_flags & IXGBE_TX_FLAGS_TXSW)) - return; + if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN)) { + if (unlikely(skb->no_fcs)) + first->tx_flags |= IXGBE_TX_FLAGS_NO_IFCS; + if (!(first->tx_flags & IXGBE_TX_FLAGS_TXSW)) + return; + } } else { u8 l4_hdr = 0; switch (first->protocol) { @@ -5938,7 +5972,6 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ __le32 cmd_type = cpu_to_le32(IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); /* set HW vlan bit if vlan is present */ @@ -5958,6 +5991,10 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) #endif cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_TSE); + /* insert frame checksum */ + if (!(tx_flags & IXGBE_TX_FLAGS_NO_IFCS)) + cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS); + return cmd_type; } @@ -6063,8 +6100,6 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, if (likely(!data_len)) break; - if (unlikely(skb->no_fcs)) - cmd_type &= ~(cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); i++; @@ -6854,9 +6889,9 @@ static int ixgbe_set_features(struct net_device *netdev, return 0; } -static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, +static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - unsigned char *addr, + const unsigned char *addr, u16 flags) { struct ixgbe_adapter *adapter = netdev_priv(dev); @@ -6893,7 +6928,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr) + const unsigned char *addr) { struct ixgbe_adapter *adapter = netdev_priv(dev); int err = -EOPNOTSUPP; @@ -7136,11 +7171,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, goto err_ioremap; } - for (i = 1; i <= 5; i++) { - if (pci_resource_len(pdev, i) == 0) - continue; - } - netdev->netdev_ops = &ixgbe_netdev_ops; ixgbe_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; @@ -7419,6 +7449,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, e_err(probe, "failed to allocate sysfs resources\n"); #endif /* CONFIG_IXGBE_HWMON */ +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_adapter_init(adapter); +#endif /* CONFIG_DEBUG_FS */ + return 0; err_register: @@ -7453,6 +7487,10 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_adapter_exit(adapter); +#endif /*CONFIG_DEBUG_FS */ + set_bit(__IXGBE_DOWN, &adapter->state); cancel_work_sync(&adapter->service_task); @@ -7708,6 +7746,10 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_init(); +#endif /* CONFIG_DEBUG_FS */ + #ifdef CONFIG_IXGBE_DCA dca_register_notify(&dca_notifier); #endif @@ -7730,6 +7772,11 @@ static void __exit ixgbe_exit_module(void) dca_unregister_notify(&dca_notifier); #endif pci_unregister_driver(&ixgbe_driver); + +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_exit(); +#endif /* CONFIG_DEBUG_FS */ + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 3456d561714..39881cb17a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -960,7 +960,8 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) /* (Re)start the overflow check */ adapter->flags2 |= IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED; - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps); + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; e_dev_err("ptp_clock_register failed\n"); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 4fea8716ab6..dce48bf64d9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -346,6 +346,10 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter) static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf) { + /* VLAN 0 is a special case, don't allow it to be removed */ + if (!vid && !add) + return 0; + return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add); } @@ -414,6 +418,7 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) VLAN_PRIO_SHIFT)), vf); ixgbe_set_vmolr(hw, vf, false); } else { + ixgbe_set_vf_vlan(adapter, true, 0, vf); ixgbe_set_vmvir(adapter, 0, vf); ixgbe_set_vmolr(hw, vf, true); } @@ -810,9 +815,9 @@ out: return err; } -static int ixgbe_link_mbps(int internal_link_speed) +static int ixgbe_link_mbps(struct ixgbe_adapter *adapter) { - switch (internal_link_speed) { + switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: return 100; case IXGBE_LINK_SPEED_1GB_FULL: @@ -824,27 +829,30 @@ static int ixgbe_link_mbps(int internal_link_speed) } } -static void ixgbe_set_vf_rate_limit(struct ixgbe_hw *hw, int vf, int tx_rate, - int link_speed) +static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf) { - int rf_dec, rf_int; - u32 bcnrc_val; + struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; + struct ixgbe_hw *hw = &adapter->hw; + u32 bcnrc_val = 0; + u16 queue, queues_per_pool; + u16 tx_rate = adapter->vfinfo[vf].tx_rate; + + if (tx_rate) { + /* start with base link speed value */ + bcnrc_val = adapter->vf_rate_link_speed; - if (tx_rate != 0) { /* Calculate the rate factor values to set */ - rf_int = link_speed / tx_rate; - rf_dec = (link_speed - (rf_int * tx_rate)); - rf_dec = (rf_dec * (1<<IXGBE_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; - - bcnrc_val = IXGBE_RTTBCNRC_RS_ENA; - bcnrc_val |= ((rf_int<<IXGBE_RTTBCNRC_RF_INT_SHIFT) & - IXGBE_RTTBCNRC_RF_INT_MASK); - bcnrc_val |= (rf_dec & IXGBE_RTTBCNRC_RF_DEC_MASK); - } else { - bcnrc_val = 0; + bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT; + bcnrc_val /= tx_rate; + + /* clear everything but the rate factor */ + bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK | + IXGBE_RTTBCNRC_RF_DEC_MASK; + + /* enable the rate scheduler */ + bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA; } - IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, 2*vf); /* vf Y uses queue 2*Y */ /* * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM * register. Typically MMW_SIZE=0x014 if 9728-byte jumbo is supported @@ -861,53 +869,68 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_hw *hw, int vf, int tx_rate, break; } - IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val); + /* determine how many queues per pool based on VMDq mask */ + queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask); + + /* write value for all Tx queues belonging to VF */ + for (queue = 0; queue < queues_per_pool; queue++) { + unsigned int reg_idx = (vf * queues_per_pool) + queue; + + IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, reg_idx); + IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val); + } } void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter) { - int actual_link_speed, i; - bool reset_rate = false; + int i; /* VF Tx rate limit was not set */ - if (adapter->vf_rate_link_speed == 0) + if (!adapter->vf_rate_link_speed) return; - actual_link_speed = ixgbe_link_mbps(adapter->link_speed); - if (actual_link_speed != adapter->vf_rate_link_speed) { - reset_rate = true; + if (ixgbe_link_mbps(adapter) != adapter->vf_rate_link_speed) { adapter->vf_rate_link_speed = 0; dev_info(&adapter->pdev->dev, - "Link speed has been changed. VF Transmit rate " - "is disabled\n"); + "Link speed has been changed. VF Transmit rate is disabled\n"); } for (i = 0; i < adapter->num_vfs; i++) { - if (reset_rate) + if (!adapter->vf_rate_link_speed) adapter->vfinfo[i].tx_rate = 0; - ixgbe_set_vf_rate_limit(&adapter->hw, i, - adapter->vfinfo[i].tx_rate, - actual_link_speed); + ixgbe_set_vf_rate_limit(adapter, i); } } int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - int actual_link_speed; + int link_speed; + + /* verify VF is active */ + if (vf >= adapter->num_vfs) + return -EINVAL; - actual_link_speed = ixgbe_link_mbps(adapter->link_speed); - if ((vf >= adapter->num_vfs) || (!adapter->link_up) || - (tx_rate > actual_link_speed) || (actual_link_speed != 10000) || - ((tx_rate != 0) && (tx_rate <= 10))) - /* rate limit cannot be set to 10Mb or less in 10Gb adapters */ + /* verify link is up */ + if (!adapter->link_up) return -EINVAL; - adapter->vf_rate_link_speed = actual_link_speed; - adapter->vfinfo[vf].tx_rate = (u16)tx_rate; - ixgbe_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + /* verify we are linked at 10Gbps */ + link_speed = ixgbe_link_mbps(adapter); + if (link_speed != 10000) + return -EINVAL; + + /* rate limit cannot be less than 10Mbs or greater than link speed */ + if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed))) + return -EINVAL; + + /* store values */ + adapter->vf_rate_link_speed = link_speed; + adapter->vfinfo[vf].tx_rate = tx_rate; + + /* update hardware configuration */ + ixgbe_set_vf_rate_limit(adapter, vf); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index 418af827b23..da17ccf5c09 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -272,5 +272,6 @@ struct ixgbe_adv_tx_context_desc { /* Error Codes */ #define IXGBE_ERR_INVALID_MAC_ADDR -1 #define IXGBE_ERR_RESET_FAILED -2 +#define IXGBE_ERR_INVALID_ARGUMENT -3 #endif /* _IXGBEVF_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 98cadb0c4da..383b4e1cd17 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -101,7 +101,9 @@ struct ixgbevf_ring { /* Supported Rx Buffer Sizes */ #define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ -#define IXGBEVF_RXBUFFER_2048 2048 +#define IXGBEVF_RXBUFFER_3K 3072 +#define IXGBEVF_RXBUFFER_7K 7168 +#define IXGBEVF_RXBUFFER_15K 15360 #define IXGBEVF_MAX_RXBUFFER 16384 /* largest size for single descriptor */ #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 @@ -259,6 +261,11 @@ enum ixbgevf_state_t { __IXGBEVF_DOWN }; +struct ixgbevf_cb { + struct sk_buff *prev; +}; +#define IXGBE_CB(skb) ((struct ixgbevf_cb *)(skb)->cb) + enum ixgbevf_boards { board_82599_vf, board_X540_vf, diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 6647383c4dd..0ee9bd4819f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -263,6 +263,8 @@ cont_loop: tx_ring->total_bytes += total_bytes; tx_ring->total_packets += total_packets; u64_stats_update_end(&tx_ring->syncp); + q_vector->tx.total_bytes += total_bytes; + q_vector->tx.total_packets += total_packets; return count < tx_ring->count; } @@ -272,12 +274,10 @@ cont_loop: * @q_vector: structure containing interrupt and ring information * @skb: packet to send up * @status: hardware indication of status of receive - * @rx_ring: rx descriptor ring (for a specific queue) to setup * @rx_desc: rx descriptor **/ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, struct sk_buff *skb, u8 status, - struct ixgbevf_ring *ring, union ixgbe_adv_rx_desc *rx_desc) { struct ixgbevf_adapter *adapter = q_vector->adapter; @@ -433,11 +433,21 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, if (!(staterr & IXGBE_RXD_STAT_EOP)) { skb->next = next_buffer->skb; - skb->next->prev = skb; + IXGBE_CB(skb->next)->prev = skb; adapter->non_eop_descs++; goto next_desc; } + /* we should not be chaining buffers, if we did drop the skb */ + if (IXGBE_CB(skb)->prev) { + do { + struct sk_buff *this = skb; + skb = IXGBE_CB(skb)->prev; + dev_kfree_skb(this); + } while (skb); + goto next_desc; + } + /* ERR_MASK will only have valid bits if EOP set */ if (unlikely(staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)) { dev_kfree_skb_irq(skb); @@ -461,7 +471,7 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, } skb->protocol = eth_type_trans(skb, rx_ring->netdev); - ixgbevf_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc); + ixgbevf_receive_skb(q_vector, skb, staterr, rx_desc); next_desc: rx_desc->wb.upper.status_error = 0; @@ -490,6 +500,8 @@ next_desc: rx_ring->total_packets += total_rx_packets; rx_ring->total_bytes += total_rx_bytes; u64_stats_update_end(&rx_ring->syncp); + q_vector->rx.total_packets += total_rx_packets; + q_vector->rx.total_bytes += total_rx_bytes; return !!budget; } @@ -716,40 +728,15 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector) } } -static irqreturn_t ixgbevf_msix_mbx(int irq, void *data) +static irqreturn_t ixgbevf_msix_other(int irq, void *data) { struct ixgbevf_adapter *adapter = data; struct ixgbe_hw *hw = &adapter->hw; - u32 msg; - bool got_ack = false; - - if (!hw->mbx.ops.check_for_ack(hw)) - got_ack = true; - if (!hw->mbx.ops.check_for_msg(hw)) { - hw->mbx.ops.read(hw, &msg, 1); + hw->mac.get_link_status = 1; - if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 1)); - - if (msg & IXGBE_VT_MSGTYPE_NACK) - pr_warn("Last Request of type %2.2x to PF Nacked\n", - msg & 0xFF); - /* - * Restore the PFSTS bit in case someone is polling for a - * return message from the PF - */ - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFSTS; - } - - /* - * checking for the ack clears the PFACK bit. Place - * it back in the v2p_mailbox cache so that anyone - * polling for an ack will not miss it - */ - if (got_ack) - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFACK; + if (!test_bit(__IXGBEVF_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_other); @@ -899,10 +886,10 @@ static int ixgbevf_request_msix_irqs(struct ixgbevf_adapter *adapter) } err = request_irq(adapter->msix_entries[vector].vector, - &ixgbevf_msix_mbx, 0, netdev->name, adapter); + &ixgbevf_msix_other, 0, netdev->name, adapter); if (err) { hw_dbg(&adapter->hw, - "request_irq for msix_mbx failed: %d\n", err); + "request_irq for msix_other failed: %d\n", err); goto free_queue_irqs; } @@ -1057,15 +1044,46 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - if (rx_ring->rx_buf_len == MAXIMUM_ETHERNET_VLAN_SIZE) - srrctl |= IXGBEVF_RXBUFFER_2048 >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= rx_ring->rx_buf_len >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >> + IXGBE_SRRCTL_BSIZEPKT_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); } +static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + int i; + u16 rx_buf_len; + + /* notify the PF of our intent to use this size of frame */ + ixgbevf_rlpml_set_vf(hw, max_frame); + + /* PF will allow an extra 4 bytes past for vlan tagged frames */ + max_frame += VLAN_HLEN; + + /* + * Make best use of allocation by using all but 1K of a + * power of 2 allocation that will be used for skb->head. + */ + if ((hw->mac.type == ixgbe_mac_X540_vf) && + (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)) + rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; + else if (max_frame <= IXGBEVF_RXBUFFER_3K) + rx_buf_len = IXGBEVF_RXBUFFER_3K; + else if (max_frame <= IXGBEVF_RXBUFFER_7K) + rx_buf_len = IXGBEVF_RXBUFFER_7K; + else if (max_frame <= IXGBEVF_RXBUFFER_15K) + rx_buf_len = IXGBEVF_RXBUFFER_15K; + else + rx_buf_len = IXGBEVF_MAX_RXBUFFER; + + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i].rx_buf_len = rx_buf_len; +} + /** * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset * @adapter: board private structure @@ -1076,18 +1094,14 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) { u64 rdba; struct ixgbe_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; int i, j; u32 rdlen; - int rx_buf_len; /* PSRTYPE must be initialized in 82599 */ IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, 0); - if (netdev->mtu <= ETH_DATA_LEN) - rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; - else - rx_buf_len = ALIGN(max_frame, 1024); + + /* set_rx_buffer_len must be called before ring initialization */ + ixgbevf_set_rx_buffer_len(adapter); rdlen = adapter->rx_ring[0].count * sizeof(union ixgbe_adv_rx_desc); /* Setup the HW Rx Head and Tail Descriptor Pointers and @@ -1103,7 +1117,6 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFRDT(j), 0); adapter->rx_ring[i].head = IXGBE_VFRDH(j); adapter->rx_ring[i].tail = IXGBE_VFRDT(j); - adapter->rx_ring[i].rx_buf_len = rx_buf_len; ixgbevf_configure_srrctl(adapter, j); } @@ -1113,36 +1126,47 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; + int err; + + if (!hw->mac.ops.set_vfta) + return -EOPNOTSUPP; spin_lock(&adapter->mbx_lock); /* add VID to filter table */ - if (hw->mac.ops.set_vfta) - hw->mac.ops.set_vfta(hw, vid, 0, true); + err = hw->mac.ops.set_vfta(hw, vid, 0, true); spin_unlock(&adapter->mbx_lock); + /* translate error return types so error makes sense */ + if (err == IXGBE_ERR_MBX) + return -EIO; + + if (err == IXGBE_ERR_INVALID_ARGUMENT) + return -EACCES; + set_bit(vid, adapter->active_vlans); - return 0; + return err; } static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; + int err = -EOPNOTSUPP; spin_lock(&adapter->mbx_lock); /* remove VID from filter table */ if (hw->mac.ops.set_vfta) - hw->mac.ops.set_vfta(hw, vid, 0, false); + err = hw->mac.ops.set_vfta(hw, vid, 0, false); spin_unlock(&adapter->mbx_lock); clear_bit(vid, adapter->active_vlans); - return 0; + return err; } static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter) @@ -1308,6 +1332,25 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; } +static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int api[] = { ixgbe_mbox_api_10, + ixgbe_mbox_api_unknown }; + int err = 0, idx = 0; + + spin_lock(&adapter->mbx_lock); + + while (api[idx] != ixgbe_mbox_api_unknown) { + err = ixgbevf_negotiate_api_version(hw, api[idx]); + if (!err) + break; + idx++; + } + + spin_unlock(&adapter->mbx_lock); +} + static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1315,7 +1358,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) int i, j = 0; int num_rx_rings = adapter->num_rx_queues; u32 txdctl, rxdctl; - u32 msg[2]; for (i = 0; i < adapter->num_tx_queues; i++) { j = adapter->tx_ring[i].reg_idx; @@ -1356,10 +1398,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0); } - msg[0] = IXGBE_VF_SET_LPE; - msg[1] = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - hw->mbx.ops.write_posted(hw, msg, 2); - spin_unlock(&adapter->mbx_lock); clear_bit(__IXGBEVF_DOWN, &adapter->state); @@ -1371,6 +1409,7 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) ixgbevf_save_reset_stats(adapter); ixgbevf_init_last_counter_stats(adapter); + hw->mac.get_link_status = 1; mod_timer(&adapter->watchdog_timer, jiffies); } @@ -1378,6 +1417,8 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + ixgbevf_negotiate_api(adapter); + ixgbevf_configure(adapter); ixgbevf_up_complete(adapter); @@ -1419,7 +1460,7 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_adapter *adapter, rx_buffer_info->skb = NULL; do { struct sk_buff *this = skb; - skb = skb->prev; + skb = IXGBE_CB(skb)->prev; dev_kfree_skb(this); } while (skb); } @@ -1547,8 +1588,6 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter) { - struct ixgbe_hw *hw = &adapter->hw; - WARN_ON(in_interrupt()); while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state)) @@ -1561,10 +1600,8 @@ void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter) * watchdog task will continue to schedule reset tasks until * the PF is up and running. */ - if (!hw->mac.ops.reset_hw(hw)) { - ixgbevf_down(adapter); - ixgbevf_up(adapter); - } + ixgbevf_down(adapter); + ixgbevf_up(adapter); clear_bit(__IXGBEVF_RESETTING, &adapter->state); } @@ -1867,6 +1904,22 @@ err_set_interrupt: } /** + * ixgbevf_clear_interrupt_scheme - Clear the current interrupt scheme settings + * @adapter: board private structure to clear interrupt scheme on + * + * We go through and clear interrupt specific resources and reset the structure + * to pre-load conditions + **/ +static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) +{ + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + + ixgbevf_free_q_vectors(adapter); + ixgbevf_reset_interrupt_capability(adapter); +} + +/** * ixgbevf_sw_init - Initialize general software structures * (struct ixgbevf_adapter) * @adapter: board private structure to initialize @@ -2351,6 +2404,8 @@ static int ixgbevf_open(struct net_device *netdev) } } + ixgbevf_negotiate_api(adapter); + /* allocate transmit descriptors */ err = ixgbevf_setup_all_tx_resources(adapter); if (err) @@ -2860,10 +2915,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE; - u32 msg[2]; if (adapter->hw.mac.type == ixgbe_mac_X540_vf) max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE; @@ -2877,35 +2930,91 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; - if (!netif_running(netdev)) { - msg[0] = IXGBE_VF_SET_LPE; - msg[1] = max_frame; - hw->mbx.ops.write_posted(hw, msg, 2); - } - if (netif_running(netdev)) ixgbevf_reinit_locked(adapter); return 0; } -static void ixgbevf_shutdown(struct pci_dev *pdev) +static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct ixgbevf_adapter *adapter = netdev_priv(netdev); +#ifdef CONFIG_PM + int retval = 0; +#endif netif_device_detach(netdev); if (netif_running(netdev)) { + rtnl_lock(); ixgbevf_down(adapter); ixgbevf_free_irq(adapter); ixgbevf_free_all_tx_resources(adapter); ixgbevf_free_all_rx_resources(adapter); + rtnl_unlock(); } - pci_save_state(pdev); + ixgbevf_clear_interrupt_scheme(adapter); +#ifdef CONFIG_PM + retval = pci_save_state(pdev); + if (retval) + return retval; + +#endif pci_disable_device(pdev); + + return 0; +} + +#ifdef CONFIG_PM +static int ixgbevf_resume(struct pci_dev *pdev) +{ + struct ixgbevf_adapter *adapter = pci_get_drvdata(pdev); + struct net_device *netdev = adapter->netdev; + u32 err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + /* + * pci_restore_state clears dev->state_saved so call + * pci_save_state to restore it. + */ + pci_save_state(pdev); + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); + return err; + } + pci_set_master(pdev); + + rtnl_lock(); + err = ixgbevf_init_interrupt_scheme(adapter); + rtnl_unlock(); + if (err) { + dev_err(&pdev->dev, "Cannot initialize interrupts\n"); + return err; + } + + ixgbevf_reset(adapter); + + if (netif_running(netdev)) { + err = ixgbevf_open(netdev); + if (err) + return err; + } + + netif_device_attach(netdev); + + return err; +} + +#endif /* CONFIG_PM */ +static void ixgbevf_shutdown(struct pci_dev *pdev) +{ + ixgbevf_suspend(pdev, PMSG_SUSPEND); } static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, @@ -2946,7 +3055,7 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, return stats; } -static const struct net_device_ops ixgbe_netdev_ops = { +static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_open = ixgbevf_open, .ndo_stop = ixgbevf_close, .ndo_start_xmit = ixgbevf_xmit_frame, @@ -2962,7 +3071,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { static void ixgbevf_assign_netdev_ops(struct net_device *dev) { - dev->netdev_ops = &ixgbe_netdev_ops; + dev->netdev_ops = &ixgbevf_netdev_ops; ixgbevf_set_ethtool_ops(dev); dev->watchdog_timeo = 5 * HZ; } @@ -3131,6 +3240,7 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, return 0; err_register: + ixgbevf_clear_interrupt_scheme(adapter); err_sw_init: ixgbevf_reset_interrupt_capability(adapter); iounmap(hw->hw_addr); @@ -3168,6 +3278,7 @@ static void __devexit ixgbevf_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); + ixgbevf_clear_interrupt_scheme(adapter); ixgbevf_reset_interrupt_capability(adapter); iounmap(adapter->hw.hw_addr); @@ -3267,6 +3378,11 @@ static struct pci_driver ixgbevf_driver = { .id_table = ixgbevf_pci_tbl, .probe = ixgbevf_probe, .remove = __devexit_p(ixgbevf_remove), +#ifdef CONFIG_PM + /* Power Management Hooks */ + .suspend = ixgbevf_suspend, + .resume = ixgbevf_resume, +#endif .shutdown = ixgbevf_shutdown, .err_handler = &ixgbevf_err_handler }; diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c index 9c955900fe6..d5028ddf4b3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.c +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c @@ -86,14 +86,17 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw) static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val = IXGBE_ERR_MBX; + s32 ret_val = -IXGBE_ERR_MBX; + + if (!mbx->ops.read) + goto out; ret_val = ixgbevf_poll_for_msg(hw); /* if ack received read message, otherwise we timed out */ if (!ret_val) ret_val = mbx->ops.read(hw, msg, size); - +out: return ret_val; } @@ -109,7 +112,11 @@ static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val; + s32 ret_val = -IXGBE_ERR_MBX; + + /* exit if either we can't write or there isn't a defined timeout */ + if (!mbx->ops.write || !mbx->timeout) + goto out; /* send msg */ ret_val = mbx->ops.write(hw, msg, size); @@ -117,7 +124,7 @@ static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) /* if msg sent wait until we receive an ack */ if (!ret_val) ret_val = ixgbevf_poll_for_ack(hw); - +out: return ret_val; } diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index cf9131c5c11..946ce86f337 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -76,12 +76,29 @@ /* bits 23:16 are used for exra info for certain messages */ #define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT) +/* definitions to support mailbox API version negotiation */ + +/* + * each element denotes a version of the API; existing numbers may not + * change; any additions must go at the end + */ +enum ixgbe_pfvf_api_rev { + ixgbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */ + ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */ + /* This value should always be last */ + ixgbe_mbox_api_unknown, /* indicates that API version is not known */ +}; + +/* mailbox API, legacy requests */ #define IXGBE_VF_RESET 0x01 /* VF requests reset */ #define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */ #define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */ #define IXGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */ -#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ -#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ + +/* mailbox API, version 1.0 VF requests */ +#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ +#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ +#define IXGBE_VF_API_NEGOTIATE 0x08 /* negotiate API version */ /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index ec89b86f7ca..0c7447e6fcc 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -79,6 +79,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) /* Call adapter stop to disable tx/rx and clear interrupts */ hw->mac.ops.stop_adapter(hw); + /* reset the api version */ + hw->api_version = ixgbe_mbox_api_10; + IXGBE_WRITE_REG(hw, IXGBE_VFCTRL, IXGBE_CTRL_RST); IXGBE_WRITE_FLUSH(hw); @@ -97,7 +100,7 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) msgbuf[0] = IXGBE_VF_RESET; mbx->ops.write_posted(hw, msgbuf, 1); - msleep(10); + mdelay(10); /* set our "perm_addr" based on info provided by PF */ /* also set up the mc_filter_type which is piggy backed @@ -346,16 +349,32 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw, static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on) { + struct ixgbe_mbx_info *mbx = &hw->mbx; u32 msgbuf[2]; + s32 err; msgbuf[0] = IXGBE_VF_SET_VLAN; msgbuf[1] = vlan; /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */ msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT; - ixgbevf_write_msg_read_ack(hw, msgbuf, 2); + err = mbx->ops.write_posted(hw, msgbuf, 2); + if (err) + goto mbx_err; - return 0; + err = mbx->ops.read_posted(hw, msgbuf, 2); + if (err) + goto mbx_err; + + /* remove extra bits from the message */ + msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; + msgbuf[0] &= ~(0xFF << IXGBE_VT_MSGINFO_SHIFT); + + if (msgbuf[0] != (IXGBE_VF_SET_VLAN | IXGBE_VT_MSGTYPE_ACK)) + err = IXGBE_ERR_INVALID_ARGUMENT; + +mbx_err: + return err; } /** @@ -389,20 +408,23 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, bool *link_up, bool autoneg_wait_to_complete) { + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val = 0; u32 links_reg; + u32 in_msg = 0; - if (!(hw->mbx.ops.check_for_rst(hw))) { - *link_up = false; - *speed = 0; - return -1; - } + /* If we were hit with a reset drop the link */ + if (!mbx->ops.check_for_rst(hw) || !mbx->timeout) + mac->get_link_status = true; - links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + if (!mac->get_link_status) + goto out; - if (links_reg & IXGBE_LINKS_UP) - *link_up = true; - else - *link_up = false; + /* if link status is down no point in checking to see if pf is up */ + links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + if (!(links_reg & IXGBE_LINKS_UP)) + goto out; switch (links_reg & IXGBE_LINKS_SPEED_82599) { case IXGBE_LINKS_SPEED_10G_82599: @@ -416,7 +438,79 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, break; } - return 0; + /* if the read failed it could just be a mailbox collision, best wait + * until we are called again and don't report an error */ + if (mbx->ops.read(hw, &in_msg, 1)) + goto out; + + if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) { + /* msg is not CTS and is NACK we must have lost CTS status */ + if (in_msg & IXGBE_VT_MSGTYPE_NACK) + ret_val = -1; + goto out; + } + + /* the pf is talking, if we timed out in the past we reinit */ + if (!mbx->timeout) { + ret_val = -1; + goto out; + } + + /* if we passed all the tests above then the link is up and we no + * longer need to check for link */ + mac->get_link_status = false; + +out: + *link_up = !mac->get_link_status; + return ret_val; +} + +/** + * ixgbevf_rlpml_set_vf - Set the maximum receive packet length + * @hw: pointer to the HW structure + * @max_size: value to assign to max frame size + **/ +void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size) +{ + u32 msgbuf[2]; + + msgbuf[0] = IXGBE_VF_SET_LPE; + msgbuf[1] = max_size; + ixgbevf_write_msg_read_ack(hw, msgbuf, 2); +} + +/** + * ixgbevf_negotiate_api_version - Negotiate supported API version + * @hw: pointer to the HW structure + * @api: integer containing requested API version + **/ +int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api) +{ + int err; + u32 msg[3]; + + /* Negotiate the mailbox API version */ + msg[0] = IXGBE_VF_API_NEGOTIATE; + msg[1] = api; + msg[2] = 0; + err = hw->mbx.ops.write_posted(hw, msg, 3); + + if (!err) + err = hw->mbx.ops.read_posted(hw, msg, 3); + + if (!err) { + msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; + + /* Store value and return 0 on success */ + if (msg[0] == (IXGBE_VF_API_NEGOTIATE | IXGBE_VT_MSGTYPE_ACK)) { + hw->api_version = api; + return 0; + } + + err = IXGBE_ERR_INVALID_ARGUMENT; + } + + return err; } static const struct ixgbe_mac_operations ixgbevf_mac_ops = { diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 25c951daee5..47f11a584d8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -137,6 +137,8 @@ struct ixgbe_hw { u8 revision_id; bool adapter_stopped; + + int api_version; }; struct ixgbevf_hw_stats { @@ -170,5 +172,7 @@ struct ixgbevf_info { const struct ixgbe_mac_operations *mac_ops; }; +void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); +int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api); #endif /* __IXGBE_VF_H__ */ |