diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
28 files changed, 2152 insertions, 1153 deletions
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 736a7d987db..9089d00f142 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -174,6 +174,20 @@ static int e1000_get_settings(struct net_device *netdev, ecmd->autoneg = ((hw->media_type == e1000_media_type_fiber) || hw->autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 1; MDI => 0 */ + if ((hw->media_type == e1000_media_type_copper) && + netif_carrier_ok(netdev)) + ecmd->eth_tp_mdix = (!!adapter->phy_info.mdix_mode ? + ETH_TP_MDI_X : + ETH_TP_MDI); + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->mdix; return 0; } @@ -183,6 +197,22 @@ static int e1000_set_settings(struct net_device *netdev, struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + e_err(drv, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) msleep(1); @@ -199,12 +229,21 @@ static int e1000_set_settings(struct net_device *netdev, ecmd->advertising = hw->autoneg_advertised; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__E1000_RESETTING, &adapter->flags); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->mdix = AUTO_ALL_MODES; + else + hw->mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index f3f9aeb7d1e..222bfaff462 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2014,6 +2014,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter, e1000_unmap_and_free_tx_resource(adapter, buffer_info); } + netdev_reset_queue(adapter->netdev); size = sizeof(struct e1000_buffer) * tx_ring->count; memset(tx_ring->buffer_info, 0, size); @@ -3273,6 +3274,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, nr_frags, mss); if (count) { + netdev_sent_queue(netdev, skb->len); skb_tx_timestamp(skb); e1000_tx_queue(adapter, tx_ring, tx_flags, count); @@ -3860,6 +3862,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, unsigned int i, eop; unsigned int count = 0; unsigned int total_tx_bytes=0, total_tx_packets=0; + unsigned int bytes_compl = 0, pkts_compl = 0; i = tx_ring->next_to_clean; eop = tx_ring->buffer_info[i].next_to_watch; @@ -3877,6 +3880,11 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, if (cleaned) { total_tx_packets += buffer_info->segs; total_tx_bytes += buffer_info->bytecount; + if (buffer_info->skb) { + bytes_compl += buffer_info->skb->len; + pkts_compl++; + } + } e1000_unmap_and_free_tx_resource(adapter, buffer_info); tx_desc->upper.data = 0; @@ -3890,6 +3898,8 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, tx_ring->next_to_clean = i; + netdev_completed_queue(netdev, pkts_compl, bytes_compl); + #define TX_WAKE_THRESHOLD 32 if (unlikely(count && netif_carrier_ok(netdev) && E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) { @@ -4950,6 +4960,10 @@ int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + hw->mdix = AUTO_ALL_MODES; + return 0; err_inval: diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 080c89093fe..c9858640800 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -653,7 +653,7 @@ static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw) **/ static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active) { - u16 data = er32(POEMB); + u32 data = er32(POEMB); if (active) data |= E1000_PHY_CTRL_D0A_LPLU; @@ -677,7 +677,7 @@ static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active) **/ static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active) { - u16 data = er32(POEMB); + u32 data = er32(POEMB); if (!active) { data &= ~E1000_PHY_CTRL_NOND0A_LPLU; diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 0349e2478df..c11ac275666 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -199,6 +199,11 @@ static int e1000_get_settings(struct net_device *netdev, else ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + return 0; } @@ -241,6 +246,10 @@ static int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: @@ -264,6 +273,22 @@ static int e1000_set_settings(struct net_device *netdev, return -EINVAL; } + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) usleep_range(1000, 2000); @@ -282,20 +307,32 @@ static int e1000_set_settings(struct net_device *netdev, hw->fc.requested_mode = e1000_fc_default; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__E1000_RESETTING, &adapter->state); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* + * fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { e1000e_down(adapter); e1000e_up(adapter); - } else { + } else e1000e_reset(adapter); - } clear_bit(__E1000_RESETTING, &adapter->state); return 0; @@ -1905,7 +1942,8 @@ static int e1000_set_coalesce(struct net_device *netdev, return -EINVAL; if (ec->rx_coalesce_usecs == 4) { - adapter->itr = adapter->itr_setting = 4; + adapter->itr_setting = 4; + adapter->itr = adapter->itr_setting; } else if (ec->rx_coalesce_usecs <= 3) { adapter->itr = 20000; adapter->itr_setting = ec->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 3f0223ac4c7..fb659dd8db0 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -56,7 +56,7 @@ #define DRV_EXTRAVERSION "-k" -#define DRV_VERSION "2.0.0" DRV_EXTRAVERSION +#define DRV_VERSION "2.1.4" DRV_EXTRAVERSION char e1000e_driver_name[] = "e1000e"; const char e1000e_driver_version[] = DRV_VERSION; @@ -3446,7 +3446,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /* * if short on Rx space, Rx wins and must trump Tx - * adjustment or use Early Receive if available + * adjustment */ if (pba < min_rx_space) pba = min_rx_space; @@ -3755,6 +3755,10 @@ static irqreturn_t e1000_intr_msi_test(int irq, void *data) e_dbg("icr is %08X\n", icr); if (icr & E1000_ICR_RXSEQ) { adapter->flags &= ~FLAG_MSI_TEST_FAILED; + /* + * Force memory writes to complete before acknowledging the + * interrupt is handled. + */ wmb(); } @@ -3796,6 +3800,10 @@ static int e1000_test_msi_interrupt(struct e1000_adapter *adapter) goto msi_test_failed; } + /* + * Force memory writes to complete before enabling and firing an + * interrupt. + */ wmb(); e1000_irq_enable(adapter); @@ -3807,7 +3815,7 @@ static int e1000_test_msi_interrupt(struct e1000_adapter *adapter) e1000_irq_disable(adapter); - rmb(); + rmb(); /* read flags after interrupt has been fired */ if (adapter->flags & FLAG_MSI_TEST_FAILED) { adapter->int_mode = E1000E_INT_MODE_LEGACY; @@ -4670,7 +4678,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) struct e1000_buffer *buffer_info; unsigned int i; u32 cmd_length = 0; - u16 ipcse = 0, tucse, mss; + u16 ipcse = 0, mss; u8 ipcss, ipcso, tucss, tucso, hdr_len; if (!skb_is_gso(skb)) @@ -4704,7 +4712,6 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data; tucss = skb_transport_offset(skb); tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data; - tucse = 0; cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | E1000_TXD_CMD_TCP | (skb->len - (hdr_len))); @@ -4718,7 +4725,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse); context_desc->upper_setup.tcp_fields.tucss = tucss; context_desc->upper_setup.tcp_fields.tucso = tucso; - context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse); + context_desc->upper_setup.tcp_fields.tucse = 0; context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss); context_desc->tcp_seg_setup.fields.hdr_len = hdr_len; context_desc->cmd_and_length = cpu_to_le32(cmd_length); diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index b860d4f7ea2..fc62a3f3a5b 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -84,8 +84,9 @@ static const u16 e1000_igp_2_cable_length_table[] = { #define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200 /* I82577 PHY Control 2 */ -#define I82577_PHY_CTRL2_AUTO_MDIX 0x0400 -#define I82577_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 +#define I82577_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82577_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82577_PHY_CTRL2_MDIX_CFG_MASK 0x0600 /* I82577 PHY Diagnostics Status */ #define I82577_DSTATUS_CABLE_LENGTH 0x03FC @@ -702,6 +703,32 @@ s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) if (ret_val) return ret_val; + /* Set MDI/MDIX mode */ + ret_val = e1e_rphy(hw, I82577_PHY_CTRL_2, &phy_data); + if (ret_val) + return ret_val; + phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK; + /* + * Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = e1e_wphy(hw, I82577_PHY_CTRL_2, phy_data); + if (ret_val) + return ret_val; + return e1000_set_master_slave_mode(hw); } diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index ba994fb4cec..ca4641e2f74 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -2223,11 +2223,10 @@ out: s32 igb_set_eee_i350(struct e1000_hw *hw) { s32 ret_val = 0; - u32 ipcnfg, eeer, ctrl_ext; + u32 ipcnfg, eeer; - ctrl_ext = rd32(E1000_CTRL_EXT); - if ((hw->mac.type != e1000_i350) || - (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK)) + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) goto out; ipcnfg = rd32(E1000_IPCNFG); eeer = rd32(E1000_EEER); @@ -2240,6 +2239,14 @@ s32 igb_set_eee_i350(struct e1000_hw *hw) E1000_EEER_RX_LPI_EN | E1000_EEER_LPI_FC); + /* keep the LPI clock running before EEE is enabled */ + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { + u32 eee_su; + eee_su = rd32(E1000_EEE_SU); + eee_su &= ~E1000_EEE_SU_LPI_CLK_STP; + wr32(E1000_EEE_SU, eee_su); + } + } else { ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); @@ -2249,6 +2256,8 @@ s32 igb_set_eee_i350(struct e1000_hw *hw) } wr32(E1000_IPCNFG, ipcnfg); wr32(E1000_EEER, eeer); + rd32(E1000_IPCNFG); + rd32(E1000_EEER); out: return ret_val; diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index ec7e4fe3e3e..de4b41ec3c4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -322,6 +322,9 @@ #define E1000_FCRTC_RTH_COAL_SHIFT 4 #define E1000_PCIEMISC_LX_DECISION 0x00000080 /* Lx power decision */ +/* Timestamp in Rx buffer */ +#define E1000_RXPBS_CFG_TS_EN 0x80000000 + /* SerDes Control */ #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 @@ -360,6 +363,7 @@ #define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ #define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ #define E1000_ICR_VMMB 0x00000100 /* VM MB event */ +#define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */ #define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */ /* If this bit asserted, the driver should claim the interrupt */ #define E1000_ICR_INT_ASSERTED 0x80000000 @@ -399,6 +403,7 @@ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ #define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */ +#define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */ #define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ @@ -510,6 +515,9 @@ #define E1000_TIMINCA_16NS_SHIFT 24 +#define E1000_TSICR_TXTS 0x00000002 +#define E1000_TSIM_TXTS 0x00000002 + #define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */ #define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */ #define E1000_MDICNFG_PHY_MASK 0x03E00000 @@ -849,8 +857,9 @@ #define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* EEE Enable 100M AN */ #define E1000_EEER_TX_LPI_EN 0x00010000 /* EEE Tx LPI Enable */ #define E1000_EEER_RX_LPI_EN 0x00020000 /* EEE Rx LPI Enable */ -#define E1000_EEER_FRC_AN 0x10000000 /* Enable EEE in loopback */ +#define E1000_EEER_FRC_AN 0x10000000 /* Enable EEE in loopback */ #define E1000_EEER_LPI_FC 0x00040000 /* EEE Enable on FC */ +#define E1000_EEE_SU_LPI_CLK_STP 0X00800000 /* EEE LPI Clock Stop */ /* SerDes Control */ #define E1000_GEN_CTL_READY 0x80000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 7be98b6f105..3404bc79f4c 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -464,6 +464,32 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw) phy_data |= I82580_CFG_ENABLE_DOWNSHIFT; ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data); + if (ret_val) + goto out; + + /* Set MDI/MDIX mode */ + ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data); + if (ret_val) + goto out; + phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; + /* + * Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82580_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82580_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = hw->phy.ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); out: return ret_val; @@ -2246,8 +2272,7 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) if (ret_val) goto out; - phy_data &= ~I82580_PHY_CTRL2_AUTO_MDIX; - phy_data &= ~I82580_PHY_CTRL2_FORCE_MDI_MDIX; + phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; ret_val = phy->ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); if (ret_val) diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index 34e40619f16..6ac3299bfcb 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -111,8 +111,9 @@ s32 igb_check_polarity_m88(struct e1000_hw *hw); #define I82580_PHY_STATUS2_SPEED_100MBPS 0x0100 /* I82580 PHY Control 2 */ -#define I82580_PHY_CTRL2_AUTO_MDIX 0x0400 -#define I82580_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 +#define I82580_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82580_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82580_PHY_CTRL2_MDIX_CFG_MASK 0x0600 /* I82580 PHY Diagnostics Status */ #define I82580_DSTATUS_CABLE_LENGTH 0x03FC diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 28394bea525..e5db48594e8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -91,6 +91,8 @@ #define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ #define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ #define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ +#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ +#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ /* Filtering Registers */ #define E1000_SAQF(_n) (0x5980 + 4 * (_n)) @@ -347,6 +349,7 @@ /* Energy Efficient Ethernet "EEE" register */ #define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ #define E1000_EEER 0x0E30 /* Energy Efficient Ethernet */ +#define E1000_EEE_SU 0X0E34 /* EEE Setup */ /* Thermal Sensor Register */ #define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 9e572dd29ab..8aad230c059 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -34,9 +34,11 @@ #include "e1000_mac.h" #include "e1000_82575.h" +#ifdef CONFIG_IGB_PTP #include <linux/clocksource.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> +#endif /* CONFIG_IGB_PTP */ #include <linux/bitops.h> #include <linux/if_vlan.h> @@ -99,7 +101,6 @@ struct vf_data_storage { u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; u16 tx_rate; - struct pci_dev *vfdev; }; #define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ @@ -131,9 +132,9 @@ struct vf_data_storage { #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 /* Supported Rx Buffer Sizes */ -#define IGB_RXBUFFER_512 512 +#define IGB_RXBUFFER_256 256 #define IGB_RXBUFFER_16384 16384 -#define IGB_RX_HDR_LEN IGB_RXBUFFER_512 +#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 /* How many Tx Descriptors do we need to call netif_wake_queue ? */ #define IGB_TX_QUEUE_WAKE 16 @@ -167,8 +168,8 @@ struct igb_tx_buffer { unsigned int bytecount; u16 gso_segs; __be16 protocol; - dma_addr_t dma; - u32 length; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); u32 tx_flags; }; @@ -212,7 +213,6 @@ struct igb_q_vector { struct igb_ring_container rx, tx; struct napi_struct napi; - int numa_node; u16 itr_val; u8 set_itr; @@ -257,7 +257,6 @@ struct igb_ring { }; /* Items past this point are only used during ring alloc / free */ dma_addr_t dma; /* phys address of the ring */ - int numa_node; /* node to alloc ring memory on */ }; enum e1000_ring_flags_t { @@ -342,7 +341,6 @@ struct igb_adapter { /* OS defined structs */ struct pci_dev *pdev; - struct hwtstamp_config hwtstamp_config; spinlock_t stats64_lock; struct rtnl_link_stats64 stats64; @@ -373,15 +371,19 @@ struct igb_adapter { int vf_rate_link_speed; u32 rss_queues; u32 wvbr; - int node; u32 *shadow_vfta; +#ifdef CONFIG_IGB_PTP struct ptp_clock *ptp_clock; - struct ptp_clock_info caps; - struct delayed_work overflow_work; + struct ptp_clock_info ptp_caps; + struct delayed_work ptp_overflow_work; + struct work_struct ptp_tx_work; + struct sk_buff *ptp_tx_skb; spinlock_t tmreg_lock; struct cyclecounter cc; struct timecounter tc; +#endif /* CONFIG_IGB_PTP */ + char fw_version[32]; }; @@ -390,6 +392,7 @@ struct igb_adapter { #define IGB_FLAG_QUAD_PORT_A (1 << 2) #define IGB_FLAG_QUEUE_PAIRS (1 << 3) #define IGB_FLAG_DMAC (1 << 4) +#define IGB_FLAG_PTP (1 << 5) /* DMA Coalescing defines */ #define IGB_MIN_TXPBSIZE 20408 @@ -435,13 +438,17 @@ extern void igb_power_up_link(struct igb_adapter *); extern void igb_set_fw_version(struct igb_adapter *); #ifdef CONFIG_IGB_PTP extern void igb_ptp_init(struct igb_adapter *adapter); -extern void igb_ptp_remove(struct igb_adapter *adapter); - -extern void igb_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim); +extern void igb_ptp_stop(struct igb_adapter *adapter); +extern void igb_ptp_reset(struct igb_adapter *adapter); +extern void igb_ptp_tx_work(struct work_struct *work); +extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); +extern void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb); +extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd); +#endif /* CONFIG_IGB_PTP */ -#endif static inline s32 igb_reset_phy(struct e1000_hw *hw) { if (hw->phy.ops.reset) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 70591117051..2ea01284982 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -148,9 +148,9 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) SUPPORTED_100baseT_Full | SUPPORTED_1000baseT_Full| SUPPORTED_Autoneg | - SUPPORTED_TP); - ecmd->advertising = (ADVERTISED_TP | - ADVERTISED_Pause); + SUPPORTED_TP | + SUPPORTED_Pause); + ecmd->advertising = ADVERTISED_TP; if (hw->mac.autoneg == 1) { ecmd->advertising |= ADVERTISED_Autoneg; @@ -158,6 +158,21 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) ecmd->advertising |= hw->phy.autoneg_advertised; } + if (hw->mac.autoneg != 1) + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + + if (hw->fc.requested_mode == e1000_fc_full) + ecmd->advertising |= ADVERTISED_Pause; + else if (hw->fc.requested_mode == e1000_fc_rx_pause) + ecmd->advertising |= (ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + else if (hw->fc.requested_mode == e1000_fc_tx_pause) + ecmd->advertising |= ADVERTISED_Asym_Pause; + else + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + ecmd->port = PORT_TP; ecmd->phy_address = hw->phy.addr; } else { @@ -198,6 +213,19 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) } ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 2; MDI =>1; Invalid =>0 */ + if (hw->phy.media_type == e1000_media_type_copper) + ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : + ETH_TP_MDI; + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + return 0; } @@ -214,6 +242,22 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) return -EINVAL; } + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) msleep(1); @@ -227,12 +271,25 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) hw->fc.requested_mode = e1000_fc_default; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (igb_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__IGB_RESETTING, &adapter->state); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* + * fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { igb_down(adapter); @@ -1469,33 +1526,22 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ctrl_reg = 0; - u16 phy_reg = 0; hw->mac.autoneg = false; - switch (hw->phy.type) { - case e1000_phy_m88: - /* Auto-MDI/MDIX Off */ - igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); - /* reset to update Auto-MDI/MDIX */ - igb_write_phy_reg(hw, PHY_CONTROL, 0x9140); - /* autoneg off */ - igb_write_phy_reg(hw, PHY_CONTROL, 0x8140); - break; - case e1000_phy_82580: - /* enable MII loopback */ - igb_write_phy_reg(hw, I82580_PHY_LBK_CTRL, 0x8041); - break; - case e1000_phy_i210: - /* set loopback speed in PHY */ - igb_read_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2), - &phy_reg); - phy_reg |= GS40G_MAC_SPEED_1G; - igb_write_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2), - phy_reg); - ctrl_reg = rd32(E1000_CTRL_EXT); - default: - break; + if (hw->phy.type == e1000_phy_m88) { + if (hw->phy.id != I210_I_PHY_ID) { + /* Auto-MDI/MDIX Off */ + igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); + /* reset to update Auto-MDI/MDIX */ + igb_write_phy_reg(hw, PHY_CONTROL, 0x9140); + /* autoneg off */ + igb_write_phy_reg(hw, PHY_CONTROL, 0x8140); + } else { + /* force 1000, set loopback */ + igb_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); + igb_write_phy_reg(hw, PHY_CONTROL, 0x4140); + } } /* add small delay to avoid loopback test failure */ @@ -1513,7 +1559,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) E1000_CTRL_FD | /* Force Duplex to FULL */ E1000_CTRL_SLU); /* Set link up enable bit */ - if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210)) + if (hw->phy.type == e1000_phy_m88) ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */ wr32(E1000_CTRL, ctrl_reg); @@ -1521,11 +1567,10 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) /* Disable the receiver on the PHY so when a cable is plugged in, the * PHY does not begin to autoneg when a cable is reconnected to the NIC. */ - if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210)) + if (hw->phy.type == e1000_phy_m88) igb_phy_disable_receiver(adapter); - udelay(500); - + mdelay(500); return 0; } @@ -1785,13 +1830,6 @@ static int igb_loopback_test(struct igb_adapter *adapter, u64 *data) *data = 0; goto out; } - if ((adapter->hw.mac.type == e1000_i210) - || (adapter->hw.mac.type == e1000_i211)) { - dev_err(&adapter->pdev->dev, - "Loopback test not supported on this part at this time.\n"); - *data = 0; - goto out; - } *data = igb_setup_desc_rings(adapter); if (*data) goto out; @@ -2257,6 +2295,54 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } +static int igb_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (adapter->hw.mac.type) { +#ifdef CONFIG_IGB_PTP + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i210: + case e1000_i211: + info->so_timestamping = + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + if (adapter->ptp_clock) + info->phc_index = ptp_clock_index(adapter->ptp_clock); + else + info->phc_index = -1; + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = 1 << HWTSTAMP_FILTER_NONE; + + /* 82576 does not support timestamping all packets. */ + if (adapter->hw.mac.type >= e1000_82580) + info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL; + else + info->rx_filters |= + (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +#endif /* CONFIG_IGB_PTP */ + default: + return -EOPNOTSUPP; + } +} + static int igb_ethtool_begin(struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -2270,38 +2356,6 @@ static void igb_ethtool_complete(struct net_device *netdev) pm_runtime_put(&adapter->pdev->dev); } -#ifdef CONFIG_IGB_PTP -static int igb_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) -{ - struct igb_adapter *adapter = netdev_priv(dev); - - info->so_timestamping = - SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; - - if (adapter->ptp_clock) - info->phc_index = ptp_clock_index(adapter->ptp_clock); - else - info->phc_index = -1; - - info->tx_types = - (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON); - - info->rx_filters = - (1 << HWTSTAMP_FILTER_NONE) | - (1 << HWTSTAMP_FILTER_ALL) | - (1 << HWTSTAMP_FILTER_SOME) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | - (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); - - return 0; -} - -#endif static const struct ethtool_ops igb_ethtool_ops = { .get_settings = igb_get_settings, .set_settings = igb_set_settings, @@ -2328,11 +2382,9 @@ static const struct ethtool_ops igb_ethtool_ops = { .get_ethtool_stats = igb_get_ethtool_stats, .get_coalesce = igb_get_coalesce, .set_coalesce = igb_set_coalesce, + .get_ts_info = igb_get_ts_info, .begin = igb_ethtool_begin, .complete = igb_ethtool_complete, -#ifdef CONFIG_IGB_PTP - .get_ts_info = igb_ethtool_get_ts_info, -#endif }; void igb_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index f88c822e57a..e1ceb37ef12 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -172,8 +172,7 @@ static void igb_check_vf_rate_limit(struct igb_adapter *); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); -static int igb_find_enabled_vfs(struct igb_adapter *adapter); -static int igb_check_vf_assignment(struct igb_adapter *adapter); +static bool igb_vfs_are_assigned(struct igb_adapter *adapter); #endif #ifdef CONFIG_PM @@ -404,8 +403,8 @@ static void igb_dump(struct igb_adapter *adapter) buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp); } @@ -456,8 +455,8 @@ static void igb_dump(struct igb_adapter *adapter) " %04X %p %016llX %p%s\n", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp, buffer_info->skb, next_desc); @@ -466,7 +465,8 @@ static void igb_dump(struct igb_adapter *adapter) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, buffer_info->skb->data, - buffer_info->length, true); + dma_unmap_len(buffer_info, len), + true); } } @@ -683,52 +683,29 @@ static int igb_alloc_queues(struct igb_adapter *adapter) { struct igb_ring *ring; int i; - int orig_node = adapter->node; for (i = 0; i < adapter->num_tx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); + ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); if (!ring) goto err; ring->count = adapter->tx_ring_count; ring->queue_index = i; ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; /* For 82575, context index must be unique per ring. */ if (adapter->hw.mac.type == e1000_82575) set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); adapter->tx_ring[i] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; for (i = 0; i < adapter->num_rx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); + ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); if (!ring) goto err; ring->count = adapter->rx_ring_count; ring->queue_index = i; ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; /* set flag indicating ring supports SCTP checksum offload */ if (adapter->hw.mac.type >= e1000_82576) set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); @@ -742,16 +719,12 @@ static int igb_alloc_queues(struct igb_adapter *adapter) adapter->rx_ring[i] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_cache_ring_register(adapter); return 0; err: - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_free_queues(adapter); return -ENOMEM; @@ -1117,24 +1090,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) struct igb_q_vector *q_vector; struct e1000_hw *hw = &adapter->hw; int v_idx; - int orig_node = adapter->node; for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { - if ((adapter->num_q_vectors == (adapter->num_rx_queues + - adapter->num_tx_queues)) && - (adapter->num_rx_queues == v_idx)) - adapter->node = orig_node; - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL, - adapter->node); - if (!q_vector) - q_vector = kzalloc(sizeof(struct igb_q_vector), - GFP_KERNEL); + q_vector = kzalloc(sizeof(struct igb_q_vector), + GFP_KERNEL); if (!q_vector) goto err_out; q_vector->adapter = adapter; @@ -1143,14 +1102,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); adapter->q_vector[v_idx] = q_vector; } - /* Restore the adapter's original node */ - adapter->node = orig_node; return 0; err_out: - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_free_q_vectors(adapter); return -ENOMEM; } @@ -1751,6 +1706,11 @@ void igb_reset(struct igb_adapter *adapter) /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); +#ifdef CONFIG_IGB_PTP + /* Re-enable PTP, where applicable. */ + igb_ptp_reset(adapter); +#endif /* CONFIG_IGB_PTP */ + igb_get_phy_info(hw); } @@ -2180,11 +2140,12 @@ static int __devinit igb_probe(struct pci_dev *pdev, } #endif + #ifdef CONFIG_IGB_PTP /* do hw tstamp init after resetting */ igb_ptp_init(adapter); +#endif /* CONFIG_IGB_PTP */ -#endif dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); /* print bus type/speed/width info */ dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", @@ -2259,9 +2220,9 @@ static void __devexit igb_remove(struct pci_dev *pdev) pm_runtime_get_noresume(&pdev->dev); #ifdef CONFIG_IGB_PTP - igb_ptp_remove(adapter); + igb_ptp_stop(adapter); +#endif /* CONFIG_IGB_PTP */ -#endif /* * The watchdog timer may be rescheduled, so explicitly * disable watchdog from being rescheduled. @@ -2294,11 +2255,11 @@ static void __devexit igb_remove(struct pci_dev *pdev) /* reclaim resources allocated to VFs */ if (adapter->vf_data) { /* disable iov and allow time for transactions to clear */ - if (!igb_check_vf_assignment(adapter)) { + if (igb_vfs_are_assigned(adapter)) { + dev_info(&pdev->dev, "Unloading driver while VFs are assigned - VFs will not be deallocated\n"); + } else { pci_disable_sriov(pdev); msleep(500); - } else { - dev_info(&pdev->dev, "VF(s) assigned to guests!\n"); } kfree(adapter->vf_data); @@ -2338,7 +2299,7 @@ static void __devinit igb_probe_vfs(struct igb_adapter * adapter) #ifdef CONFIG_PCI_IOV struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; - int old_vfs = igb_find_enabled_vfs(adapter); + int old_vfs = pci_num_vf(adapter->pdev); int i; /* Virtualization features not supported on i210 family. */ @@ -2418,8 +2379,6 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; - adapter->node = -1; - spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { @@ -2666,13 +2625,11 @@ static int igb_close(struct net_device *netdev) int igb_setup_tx_resources(struct igb_ring *tx_ring) { struct device *dev = tx_ring->dev; - int orig_node = dev_to_node(dev); int size; size = sizeof(struct igb_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); - if (!tx_ring->tx_buffer_info) - tx_ring->tx_buffer_info = vzalloc(size); + + tx_ring->tx_buffer_info = vzalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -2680,18 +2637,10 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); - set_dev_node(dev, tx_ring->numa_node); tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!tx_ring->desc) - tx_ring->desc = dma_alloc_coherent(dev, - tx_ring->size, - &tx_ring->dma, - GFP_KERNEL); - if (!tx_ring->desc) goto err; @@ -2702,8 +2651,8 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) err: vfree(tx_ring->tx_buffer_info); - dev_err(dev, - "Unable to allocate memory for the transmit descriptor ring\n"); + tx_ring->tx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); return -ENOMEM; } @@ -2820,34 +2769,23 @@ static void igb_configure_tx(struct igb_adapter *adapter) int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct device *dev = rx_ring->dev; - int orig_node = dev_to_node(dev); - int size, desc_len; + int size; size = sizeof(struct igb_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); - if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vzalloc(size); + + rx_ring->rx_buffer_info = vzalloc(size); if (!rx_ring->rx_buffer_info) goto err; - desc_len = sizeof(union e1000_adv_rx_desc); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * desc_len; + rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - set_dev_node(dev, rx_ring->numa_node); rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!rx_ring->desc) - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); - if (!rx_ring->desc) goto err; @@ -2859,8 +2797,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) err: vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; - dev_err(dev, "Unable to allocate memory for the receive descriptor" - " ring\n"); + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); return -ENOMEM; } @@ -2898,57 +2835,48 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mrqc, rxcsum; - u32 j, num_rx_queues, shift = 0, shift2 = 0; - union e1000_reta { - u32 dword; - u8 bytes[4]; - } reta; - static const u8 rsshash[40] = { - 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, - 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, - 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, - 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; + u32 j, num_rx_queues, shift = 0; + static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, + 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, + 0xA32DCB77, 0x0CF23080, 0x3BB7426A, + 0xFA01ACBE }; /* Fill out hash function seeds */ - for (j = 0; j < 10; j++) { - u32 rsskey = rsshash[(j * 4)]; - rsskey |= rsshash[(j * 4) + 1] << 8; - rsskey |= rsshash[(j * 4) + 2] << 16; - rsskey |= rsshash[(j * 4) + 3] << 24; - array_wr32(E1000_RSSRK(0), j, rsskey); - } + for (j = 0; j < 10; j++) + wr32(E1000_RSSRK(j), rsskey[j]); num_rx_queues = adapter->rss_queues; - if (adapter->vfs_allocated_count) { - /* 82575 and 82576 supports 2 RSS queues for VMDq */ - switch (hw->mac.type) { - case e1000_i350: - case e1000_82580: - num_rx_queues = 1; - shift = 0; - break; - case e1000_82576: + switch (hw->mac.type) { + case e1000_82575: + shift = 6; + break; + case e1000_82576: + /* 82576 supports 2 RSS queues for SR-IOV */ + if (adapter->vfs_allocated_count) { shift = 3; num_rx_queues = 2; - break; - case e1000_82575: - shift = 2; - shift2 = 6; - default: - break; } - } else { - if (hw->mac.type == e1000_82575) - shift = 6; + break; + default: + break; } - for (j = 0; j < (32 * 4); j++) { - reta.bytes[j & 3] = (j % num_rx_queues) << shift; - if (shift2) - reta.bytes[j & 3] |= num_rx_queues << shift2; - if ((j & 3) == 3) - wr32(E1000_RETA(j >> 2), reta.dword); + /* + * Populate the indirection table 4 entries at a time. To do this + * we are generating the results for n and n+2 and then interleaving + * those with the results with n+1 and n+3. + */ + for (j = 0; j < 32; j++) { + /* first pass generates n and n+2 */ + u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues; + u32 reta = (base & 0x07800780) >> (7 - shift); + + /* second pass generates n+1 and n+3 */ + base += 0x00010001 * num_rx_queues; + reta |= (base & 0x07800780) << (1 + shift); + + wr32(E1000_RETA(j), reta); } /* @@ -3184,8 +3112,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; #endif srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +#ifdef CONFIG_IGB_PTP if (hw->mac.type >= e1000_82580) srrctl |= E1000_SRRCTL_TIMESTAMP; +#endif /* CONFIG_IGB_PTP */ /* Only set Drop Enable if we are supporting multiple queues */ if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) srrctl |= E1000_SRRCTL_DROP_EN; @@ -3269,20 +3199,20 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *ring, { if (tx_buffer->skb) { dev_kfree_skb_any(tx_buffer->skb); - if (tx_buffer->dma) + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); - } else if (tx_buffer->dma) { + } else if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; - tx_buffer->dma = 0; + dma_unmap_len_set(tx_buffer, len, 0); /* buffer_info must be completely set up in the transmit path */ } @@ -4229,9 +4159,11 @@ static __le32 igb_tx_cmd_type(u32 tx_flags) if (tx_flags & IGB_TX_FLAGS_VLAN) cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); +#ifdef CONFIG_IGB_PTP /* set timestamp bit if present */ - if (tx_flags & IGB_TX_FLAGS_TSTAMP) + if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP)) cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); +#endif /* CONFIG_IGB_PTP */ /* set segmentation bits for TSO */ if (tx_flags & IGB_TX_FLAGS_TSO) @@ -4275,7 +4207,7 @@ static void igb_tx_map(struct igb_ring *tx_ring, const u8 hdr_len) { struct sk_buff *skb = first->skb; - struct igb_tx_buffer *tx_buffer_info; + struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; dma_addr_t dma; struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; @@ -4296,8 +4228,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, goto dma_error; /* record length, and DMA address */ - first->length = size; - first->dma = dma; + dma_unmap_len_set(first, len, size); + dma_unmap_addr_set(first, dma, dma); tx_desc->read.buffer_addr = cpu_to_le64(dma); for (;;) { @@ -4339,9 +4271,9 @@ static void igb_tx_map(struct igb_ring *tx_ring, if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - tx_buffer_info->length = size; - tx_buffer_info->dma = dma; + tx_buffer = &tx_ring->tx_buffer_info[i]; + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); tx_desc->read.olinfo_status = 0; tx_desc->read.buffer_addr = cpu_to_le64(dma); @@ -4392,9 +4324,9 @@ dma_error: /* clear dma mappings for failed tx_buffer_info map */ for (;;) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info == first) + tx_buffer = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) break; if (i == 0) i = tx_ring->count; @@ -4440,6 +4372,9 @@ static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { +#ifdef CONFIG_IGB_PTP + struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); +#endif /* CONFIG_IGB_PTP */ struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; @@ -4462,10 +4397,17 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, first->bytecount = skb->len; first->gso_segs = 1; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { +#ifdef CONFIG_IGB_PTP + if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + !(adapter->ptp_tx_skb))) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGB_TX_FLAGS_TSTAMP; + + adapter->ptp_tx_skb = skb_get(skb); + if (adapter->hw.mac.type == e1000_82576) + schedule_work(&adapter->ptp_tx_work); } +#endif /* CONFIG_IGB_PTP */ if (vlan_tx_tag_present(skb)) { tx_flags |= IGB_TX_FLAGS_VLAN; @@ -4661,11 +4603,13 @@ void igb_update_stats(struct igb_adapter *adapter, bytes = 0; packets = 0; for (i = 0; i < adapter->num_rx_queues; i++) { - u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF; + u32 rqdpc = rd32(E1000_RQDPC(i)); struct igb_ring *ring = adapter->rx_ring[i]; - ring->rx_stats.drops += rqdpc_tmp; - net_stats->rx_fifo_errors += rqdpc_tmp; + if (rqdpc) { + ring->rx_stats.drops += rqdpc; + net_stats->rx_fifo_errors += rqdpc; + } do { start = u64_stats_fetch_begin_bh(&ring->rx_syncp); @@ -4755,7 +4699,11 @@ void igb_update_stats(struct igb_adapter *adapter, reg = rd32(E1000_CTRL_EXT); if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { adapter->stats.rxerrc += rd32(E1000_RXERRC); - adapter->stats.tncrs += rd32(E1000_TNCRS); + + /* this stat has invalid values on i210/i211 */ + if ((hw->mac.type != e1000_i210) && + (hw->mac.type != e1000_i211)) + adapter->stats.tncrs += rd32(E1000_TNCRS); } adapter->stats.tsctc += rd32(E1000_TSCTC); @@ -4852,6 +4800,19 @@ static irqreturn_t igb_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + wr32(E1000_EIMS, adapter->eims_other); return IRQ_HANDLED; @@ -5002,102 +4963,43 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event, static int igb_vf_configure(struct igb_adapter *adapter, int vf) { unsigned char mac_addr[ETH_ALEN]; - struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; - struct pci_dev *pvfdev; - unsigned int device_id; - u16 thisvf_devfn; eth_random_addr(mac_addr); igb_set_vf_mac(adapter, vf, mac_addr); - switch (adapter->hw.mac.type) { - case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) | - (pdev->devfn & 1); - break; - case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) | - (pdev->devfn & 3); - break; - default: - device_id = 0; - thisvf_devfn = 0; - break; - } - - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == thisvf_devfn) - break; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); - } - - if (pvfdev) - adapter->vf_data[vf].vfdev = pvfdev; - else - dev_err(&pdev->dev, - "Couldn't find pci dev ptr for VF %4.4x\n", - thisvf_devfn); - return pvfdev != NULL; + return 0; } -static int igb_find_enabled_vfs(struct igb_adapter *adapter) +static bool igb_vfs_are_assigned(struct igb_adapter *adapter) { - struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct pci_dev *pvfdev; - u16 vf_devfn = 0; - u16 vf_stride; - unsigned int device_id; - int vfs_found = 0; + struct pci_dev *vfdev; + int dev_id; switch (adapter->hw.mac.type) { case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - vf_stride = 2; + dev_id = IGB_82576_VF_DEV_ID; break; case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - vf_stride = 4; + dev_id = IGB_I350_VF_DEV_ID; break; default: - device_id = 0; - vf_stride = 0; - break; - } - - vf_devfn = pdev->devfn + 0x80; - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == vf_devfn && - (pvfdev->bus->number >= pdev->bus->number)) - vfs_found++; - vf_devfn += vf_stride; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); + return false; } - return vfs_found; -} - -static int igb_check_vf_assignment(struct igb_adapter *adapter) -{ - int i; - for (i = 0; i < adapter->vfs_allocated_count; i++) { - if (adapter->vf_data[i].vfdev) { - if (adapter->vf_data[i].vfdev->dev_flags & - PCI_DEV_FLAGS_ASSIGNED) + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL); + while (vfdev) { + /* if we don't own it we don't care */ + if (vfdev->is_virtfn && vfdev->physfn == pdev) { + /* if it is assigned we cannot release it */ + if (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) return true; } + + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, vfdev); } + return false; } @@ -5643,6 +5545,19 @@ static irqreturn_t igb_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5684,6 +5599,19 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5743,37 +5671,6 @@ static int igb_poll(struct napi_struct *napi, int budget) return 0; } -#ifdef CONFIG_IGB_PTP -/** - * igb_tx_hwtstamp - utility function which checks for TX time stamp - * @q_vector: pointer to q_vector containing needed info - * @buffer: pointer to igb_tx_buffer structure - * - * If we were asked to do hardware stamping and such a time stamp is - * available, then it must have been for this skb here because we only - * allow only one such packet into the queue. - */ -static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, - struct igb_tx_buffer *buffer_info) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps shhwtstamps; - u64 regval; - - /* if skb does not support hw timestamp or TX stamp not valid exit */ - if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) || - !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) - return; - - regval = rd32(E1000_TXSTMPL); - regval |= (u64)rd32(E1000_TXSTMPH) << 32; - - igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval); - skb_tstamp_tx(buffer_info->skb, &shhwtstamps); -} - -#endif /** * igb_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: pointer to q_vector containing needed info @@ -5785,7 +5682,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx.ring; struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc, *eop_desc; + union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx.work_limit; unsigned int i = tx_ring->next_to_clean; @@ -5797,16 +5694,16 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_desc = IGB_TX_DESC(tx_ring, i); i -= tx_ring->count; - for (; budget; budget--) { - eop_desc = tx_buffer->next_to_watch; - - /* prevent any other reads prior to eop_desc */ - rmb(); + do { + union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; + /* prevent any other reads prior to eop_desc */ + rmb(); + /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) break; @@ -5818,25 +5715,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; -#ifdef CONFIG_IGB_PTP - /* retrieve hardware timestamp */ - igb_tx_hwtstamp(q_vector, tx_buffer); - -#endif /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; /* unmap skb header data */ dma_unmap_single(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* clear last DMA location and unmap remaining buffers */ while (tx_desc != eop_desc) { - tx_buffer->dma = 0; - tx_buffer++; tx_desc++; i++; @@ -5847,17 +5740,15 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } /* unmap any remaining paged data */ - if (tx_buffer->dma) { + if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); } } - /* clear last DMA location */ - tx_buffer->dma = 0; - /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; tx_desc++; @@ -5867,7 +5758,13 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_buffer = tx_ring->tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } - } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); @@ -5883,12 +5780,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { struct e1000_hw *hw = &adapter->hw; - eop_desc = tx_buffer->next_to_watch; - /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); - if (eop_desc && + if (tx_buffer->next_to_watch && time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { @@ -5912,9 +5807,9 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_ring->next_to_use, tx_ring->next_to_clean, tx_buffer->time_stamp, - eop_desc, + tx_buffer->next_to_watch, jiffies, - eop_desc->wb.status); + tx_buffer->next_to_watch->wb.status); netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); @@ -5994,47 +5889,6 @@ static inline void igb_rx_hash(struct igb_ring *ring, skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); } -#ifdef CONFIG_IGB_PTP -static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - u64 regval; - - if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | - E1000_RXDADV_STAT_TS)) - return; - - /* - * If this bit is set, then the RX registers contain the time stamp. No - * other packet will be time stamped until we read these registers, so - * read the registers to make them available again. Because only one - * packet can be time stamped at a time, we know that the register - * values must belong to this one here and therefore we don't need to - * compare any of the additional attributes stored for it. - * - * If nothing went wrong, then it should have a shared tx_flags that we - * can turn into a skb_shared_hwtstamps. - */ - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - u32 *stamp = (u32 *)skb->data; - regval = le32_to_cpu(*(stamp + 2)); - regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; - skb_pull(skb, IGB_TS_HDR_LEN); - } else { - if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) - return; - - regval = rd32(E1000_RXSTMPL); - regval |= (u64)rd32(E1000_RXSTMPH) << 32; - } - - igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); -} - -#endif static void igb_rx_vlan(struct igb_ring *ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) @@ -6146,8 +6000,8 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) } #ifdef CONFIG_IGB_PTP - igb_rx_hwtstamp(q_vector, rx_desc, skb); -#endif + igb_ptp_rx_hwtstamp(q_vector, rx_desc, skb); +#endif /* CONFIG_IGB_PTP */ igb_rx_hash(rx_ring, rx_desc, skb); igb_rx_checksum(rx_ring, rx_desc, skb); igb_rx_vlan(rx_ring, rx_desc, skb); @@ -6341,181 +6195,6 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } /** - * igb_hwtstamp_ioctl - control hardware time stamping - * @netdev: - * @ifreq: - * @cmd: - * - * Outgoing time stamping can be enabled and disabled. Play nice and - * disable it when requested, although it shouldn't case any overhead - * when no packet needs it. At most one packet in the queue may be - * marked for time stamping, otherwise it would be impossible to tell - * for sure to which packet the hardware time stamp belongs. - * - * Incoming time stamping has to be configured via the hardware - * filters. Not all combinations are supported, in particular event - * type has to be specified. Matching the kind of event packet is - * not supported, with the exception of "all V2 events regardless of - * level 2 or 4". - * - **/ -static int igb_hwtstamp_ioctl(struct net_device *netdev, - struct ifreq *ifr, int cmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct hwtstamp_config config; - u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; - u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - u32 tsync_rx_cfg = 0; - bool is_l4 = false; - bool is_l2 = false; - u32 regval; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - tsync_tx_ctl = 0; - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - tsync_rx_ctl = 0; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_ALL: - /* - * register TSYNCRXCFG must be set, therefore it is not - * possible to time stamp both Sync and Delay_Req messages - * => fall back to time stamping all packets - */ - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = true; - is_l4 = true; - break; - default: - return -ERANGE; - } - - if (hw->mac.type == e1000_82575) { - if (tsync_rx_ctl | tsync_tx_ctl) - return -EINVAL; - return 0; - } - - /* - * Per-packet timestamping only works if all packets are - * timestamped, so enable timestamping in all packets as - * long as one rx filter was configured. - */ - if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { - tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - } - - /* enable/disable TX */ - regval = rd32(E1000_TSYNCTXCTL); - regval &= ~E1000_TSYNCTXCTL_ENABLED; - regval |= tsync_tx_ctl; - wr32(E1000_TSYNCTXCTL, regval); - - /* enable/disable RX */ - regval = rd32(E1000_TSYNCRXCTL); - regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); - regval |= tsync_rx_ctl; - wr32(E1000_TSYNCRXCTL, regval); - - /* define which PTP packets are time stamped */ - wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); - - /* define ethertype filter for timestamped packets */ - if (is_l2) - wr32(E1000_ETQF(3), - (E1000_ETQF_FILTER_ENABLE | /* enable filter */ - E1000_ETQF_1588 | /* enable timestamping */ - ETH_P_1588)); /* 1588 eth protocol type */ - else - wr32(E1000_ETQF(3), 0); - -#define PTP_PORT 319 - /* L4 Queue Filter[3]: filter by destination port and protocol */ - if (is_l4) { - u32 ftqf = (IPPROTO_UDP /* UDP */ - | E1000_FTQF_VF_BP /* VF not compared */ - | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ - | E1000_FTQF_MASK); /* mask all inputs */ - ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ - - wr32(E1000_IMIR(3), htons(PTP_PORT)); - wr32(E1000_IMIREXT(3), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); - if (hw->mac.type == e1000_82576) { - /* enable source port check */ - wr32(E1000_SPQF(3), htons(PTP_PORT)); - ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; - } - wr32(E1000_FTQF(3), ftqf); - } else { - wr32(E1000_FTQF(3), E1000_FTQF_MASK); - } - wrfl(); - - adapter->hwtstamp_config = config; - - /* clear TX/RX time stamp registers, just to be sure */ - regval = rd32(E1000_TXSTMPH); - regval = rd32(E1000_RXSTMPH); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -/** * igb_ioctl - * @netdev: * @ifreq: @@ -6528,8 +6207,10 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); +#ifdef CONFIG_IGB_PTP case SIOCSHWTSTAMP: - return igb_hwtstamp_ioctl(netdev, ifr, cmd); + return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd); +#endif /* CONFIG_IGB_PTP */ default: return -EOPNOTSUPP; } @@ -6667,6 +6348,10 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index c846ea9131a..ee21445157a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -69,22 +69,22 @@ * 2^40 * 10^-9 / 60 = 18.3 minutes. */ -#define IGB_OVERFLOW_PERIOD (HZ * 60 * 9) -#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) -#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) -#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) -#define IGB_NBITS_82580 40 +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) +#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) +#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) +#define IGB_NBITS_82580 40 /* * SYSTIM read access for the 82576 */ -static cycle_t igb_82576_systim_read(const struct cyclecounter *cc) +static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) { - u64 val; - u32 lo, hi; struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi; lo = rd32(E1000_SYSTIML); hi = rd32(E1000_SYSTIMH); @@ -99,12 +99,12 @@ static cycle_t igb_82576_systim_read(const struct cyclecounter *cc) * SYSTIM read access for the 82580 */ -static cycle_t igb_82580_systim_read(const struct cyclecounter *cc) +static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) { - u64 val; - u32 lo, hi, jk; struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi, jk; /* * The timestamp latches on lowest register read. For the 82580 @@ -122,16 +122,101 @@ static cycle_t igb_82580_systim_read(const struct cyclecounter *cc) } /* + * SYSTIM read access for I210/I211 + */ + +static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + u32 sec, nsec, jk; + + /* + * The timestamp latches on lowest register read. For I210/I211, the + * lowest register is SYSTIMR. Since we only need to provide nanosecond + * resolution, we can ignore it. + */ + jk = rd32(E1000_SYSTIMR); + nsec = rd32(E1000_SYSTIML); + sec = rd32(E1000_SYSTIMH); + + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static void igb_ptp_write_i210(struct igb_adapter *adapter, + const struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + + /* + * Writing the SYSTIMR register is not necessary as it only provides + * sub-nanosecond resolution. + */ + wr32(E1000_SYSTIML, ts->tv_nsec); + wr32(E1000_SYSTIMH, ts->tv_sec); +} + +/** + * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp + * @adapter: board private structure + * @hwtstamps: timestamp structure to update + * @systim: unsigned 64bit system time value. + * + * We need to convert the system time value stored in the RX/TXSTMP registers + * into a hwtstamp which can be used by the upper level timestamping functions. + * + * The 'tmreg_lock' spinlock is used to protect the consistency of the + * system time value. This is needed because reading the 64 bit time + * value involves reading two (or three) 32 bit registers. The first + * read latches the value. Ditto for writing. + * + * In addition, here have extended the system time with an overflow + * counter in software. + **/ +static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamps, + u64 systim) +{ + unsigned long flags; + u64 ns; + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_82580: + case e1000_i350: + spin_lock_irqsave(&adapter->tmreg_lock, flags); + + ns = timecounter_cyc2time(&adapter->tc, systim); + + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + memset(hwtstamps, 0, sizeof(*hwtstamps)); + hwtstamps->hwtstamp = ns_to_ktime(ns); + break; + case e1000_i210: + case e1000_i211: + memset(hwtstamps, 0, sizeof(*hwtstamps)); + /* Upper 32 bits contain s, lower 32 bits contain ns. */ + hwtstamps->hwtstamp = ktime_set(systim >> 32, + systim & 0xFFFFFFFF); + break; + default: + break; + } +} + +/* * PTP clock operations */ -static int ptp_82576_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; u64 rate; u32 incvalue; - int neg_adj = 0; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); - struct e1000_hw *hw = &igb->hw; if (ppb < 0) { neg_adj = 1; @@ -153,13 +238,14 @@ static int ptp_82576_adjfreq(struct ptp_clock_info *ptp, s32 ppb) return 0; } -static int ptp_82580_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; u64 rate; u32 inca; - int neg_adj = 0; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); - struct e1000_hw *hw = &igb->hw; if (ppb < 0) { neg_adj = 1; @@ -178,11 +264,12 @@ static int ptp_82580_adjfreq(struct ptp_clock_info *ptp, s32 ppb) return 0; } -static int igb_adjtime(struct ptp_clock_info *ptp, s64 delta) +static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta) { - s64 now; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); + s64 now; spin_lock_irqsave(&igb->tmreg_lock, flags); @@ -195,12 +282,32 @@ static int igb_adjtime(struct ptp_clock_info *ptp, s64 delta) return 0; } -static int igb_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + struct timespec now, then = ns_to_timespec(delta); + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, &now); + now = timespec_add(now, then); + igb_ptp_write_i210(igb, (const struct timespec *)&now); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp, + struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; u64 ns; u32 remainder; - unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); spin_lock_irqsave(&igb->tmreg_lock, flags); @@ -214,11 +321,29 @@ static int igb_gettime(struct ptp_clock_info *ptp, struct timespec *ts) return 0; } -static int igb_settime(struct ptp_clock_info *ptp, const struct timespec *ts) +static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp, + struct timespec *ts) { - u64 ns; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_settime_82576(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + u64 ns; ns = ts->tv_sec * 1000000000ULL; ns += ts->tv_nsec; @@ -232,77 +357,369 @@ static int igb_settime(struct ptp_clock_info *ptp, const struct timespec *ts) return 0; } -static int ptp_82576_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, + const struct timespec *ts) { - return -EOPNOTSUPP; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_write_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; } -static int ptp_82580_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +static int igb_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) { return -EOPNOTSUPP; } -static void igb_overflow_check(struct work_struct *work) +/** + * igb_ptp_tx_work + * @work: pointer to work struct + * + * This work function polls the TSYNCTXCTL valid bit to determine when a + * timestamp has been taken for the current stored skb. + */ +void igb_ptp_tx_work(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, struct igb_adapter, + ptp_tx_work); + struct e1000_hw *hw = &adapter->hw; + u32 tsynctxctl; + + if (!adapter->ptp_tx_skb) + return; + + tsynctxctl = rd32(E1000_TSYNCTXCTL); + if (tsynctxctl & E1000_TSYNCTXCTL_VALID) + igb_ptp_tx_hwtstamp(adapter); + else + /* reschedule to check later */ + schedule_work(&adapter->ptp_tx_work); +} + +static void igb_ptp_overflow_check(struct work_struct *work) { - struct timespec ts; struct igb_adapter *igb = - container_of(work, struct igb_adapter, overflow_work.work); + container_of(work, struct igb_adapter, ptp_overflow_work.work); + struct timespec ts; - igb_gettime(&igb->caps, &ts); + igb->ptp_caps.gettime(&igb->ptp_caps, &ts); pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); - schedule_delayed_work(&igb->overflow_work, IGB_OVERFLOW_PERIOD); + schedule_delayed_work(&igb->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); +} + +/** + * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp + * @adapter: Board private structure. + * + * If we were asked to do hardware stamping and such a time stamp is + * available, then it must have been for this skb here because we only + * allow only one such packet into the queue. + */ +void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct skb_shared_hwtstamps shhwtstamps; + u64 regval; + + regval = rd32(E1000_TXSTMPL); + regval |= (u64)rd32(E1000_TXSTMPH) << 32; + + igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; +} + +void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + u64 regval; + + if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | + E1000_RXDADV_STAT_TS)) + return; + + /* + * If this bit is set, then the RX registers contain the time stamp. No + * other packet will be time stamped until we read these registers, so + * read the registers to make them available again. Because only one + * packet can be time stamped at a time, we know that the register + * values must belong to this one here and therefore we don't need to + * compare any of the additional attributes stored for it. + * + * If nothing went wrong, then it should have a shared tx_flags that we + * can turn into a skb_shared_hwtstamps. + */ + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + u32 *stamp = (u32 *)skb->data; + regval = le32_to_cpu(*(stamp + 2)); + regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; + skb_pull(skb, IGB_TS_HDR_LEN); + } else { + if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) + return; + + regval = rd32(E1000_RXSTMPL); + regval |= (u64)rd32(E1000_RXSTMPH) << 32; + } + + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); +} + +/** + * igb_ptp_hwtstamp_ioctl - control hardware time stamping + * @netdev: + * @ifreq: + * @cmd: + * + * Outgoing time stamping can be enabled and disabled. Play nice and + * disable it when requested, although it shouldn't case any overhead + * when no packet needs it. At most one packet in the queue may be + * marked for time stamping, otherwise it would be impossible to tell + * for sure to which packet the hardware time stamp belongs. + * + * Incoming time stamping has to be configured via the hardware + * filters. Not all combinations are supported, in particular event + * type has to be specified. Matching the kind of event packet is + * not supported, with the exception of "all V2 events regardless of + * level 2 or 4". + * + **/ +int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct hwtstamp_config config; + u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + u32 tsync_rx_cfg = 0; + bool is_l4 = false; + bool is_l2 = false; + u32 regval; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + tsync_tx_ctl = 0; + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl = 0; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_ALL: + /* + * register TSYNCRXCFG must be set, therefore it is not + * possible to time stamp both Sync and Delay_Req messages + * => fall back to time stamping all packets + */ + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; + is_l2 = true; + is_l4 = true; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; + is_l2 = true; + is_l4 = true; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = true; + is_l4 = true; + break; + default: + return -ERANGE; + } + + if (hw->mac.type == e1000_82575) { + if (tsync_rx_ctl | tsync_tx_ctl) + return -EINVAL; + return 0; + } + + /* + * Per-packet timestamping only works if all packets are + * timestamped, so enable timestamping in all packets as + * long as one rx filter was configured. + */ + if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { + tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + + if ((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) { + regval = rd32(E1000_RXPBS); + regval |= E1000_RXPBS_CFG_TS_EN; + wr32(E1000_RXPBS, regval); + } + } + + /* enable/disable TX */ + regval = rd32(E1000_TSYNCTXCTL); + regval &= ~E1000_TSYNCTXCTL_ENABLED; + regval |= tsync_tx_ctl; + wr32(E1000_TSYNCTXCTL, regval); + + /* enable/disable RX */ + regval = rd32(E1000_TSYNCRXCTL); + regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); + regval |= tsync_rx_ctl; + wr32(E1000_TSYNCRXCTL, regval); + + /* define which PTP packets are time stamped */ + wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); + + /* define ethertype filter for timestamped packets */ + if (is_l2) + wr32(E1000_ETQF(3), + (E1000_ETQF_FILTER_ENABLE | /* enable filter */ + E1000_ETQF_1588 | /* enable timestamping */ + ETH_P_1588)); /* 1588 eth protocol type */ + else + wr32(E1000_ETQF(3), 0); + +#define PTP_PORT 319 + /* L4 Queue Filter[3]: filter by destination port and protocol */ + if (is_l4) { + u32 ftqf = (IPPROTO_UDP /* UDP */ + | E1000_FTQF_VF_BP /* VF not compared */ + | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ + | E1000_FTQF_MASK); /* mask all inputs */ + ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ + + wr32(E1000_IMIR(3), htons(PTP_PORT)); + wr32(E1000_IMIREXT(3), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); + if (hw->mac.type == e1000_82576) { + /* enable source port check */ + wr32(E1000_SPQF(3), htons(PTP_PORT)); + ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; + } + wr32(E1000_FTQF(3), ftqf); + } else { + wr32(E1000_FTQF(3), E1000_FTQF_MASK); + } + wrfl(); + + /* clear TX/RX time stamp registers, just to be sure */ + regval = rd32(E1000_TXSTMPL); + regval = rd32(E1000_TXSTMPH); + regval = rd32(E1000_RXSTMPL); + regval = rd32(E1000_RXSTMPH); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; } void igb_ptp_init(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; switch (hw->mac.type) { - case e1000_i210: - case e1000_i211: - case e1000_i350: + case e1000_82576: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 1000000000; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82576; + adapter->cc.mask = CLOCKSOURCE_MASK(64); + adapter->cc.mult = 1; + adapter->cc.shift = IGB_82576_TSYNC_SHIFT; + /* Dial the nominal frequency. */ + wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + break; case e1000_82580: - adapter->caps.owner = THIS_MODULE; - strcpy(adapter->caps.name, "igb-82580"); - adapter->caps.max_adj = 62499999; - adapter->caps.n_ext_ts = 0; - adapter->caps.pps = 0; - adapter->caps.adjfreq = ptp_82580_adjfreq; - adapter->caps.adjtime = igb_adjtime; - adapter->caps.gettime = igb_gettime; - adapter->caps.settime = igb_settime; - adapter->caps.enable = ptp_82580_enable; - adapter->cc.read = igb_82580_systim_read; - adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); - adapter->cc.mult = 1; - adapter->cc.shift = 0; + case e1000_i350: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82580; + adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); + adapter->cc.mult = 1; + adapter->cc.shift = 0; /* Enable the timer functions by clearing bit 31. */ wr32(E1000_TSAUXC, 0x0); break; - - case e1000_82576: - adapter->caps.owner = THIS_MODULE; - strcpy(adapter->caps.name, "igb-82576"); - adapter->caps.max_adj = 1000000000; - adapter->caps.n_ext_ts = 0; - adapter->caps.pps = 0; - adapter->caps.adjfreq = ptp_82576_adjfreq; - adapter->caps.adjtime = igb_adjtime; - adapter->caps.gettime = igb_gettime; - adapter->caps.settime = igb_settime; - adapter->caps.enable = ptp_82576_enable; - adapter->cc.read = igb_82576_systim_read; - adapter->cc.mask = CLOCKSOURCE_MASK(64); - adapter->cc.mult = 1; - adapter->cc.shift = IGB_82576_TSYNC_SHIFT; - /* Dial the nominal frequency. */ - wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + case e1000_i210: + case e1000_i211: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; + adapter->ptp_caps.gettime = igb_ptp_gettime_i210; + adapter->ptp_caps.settime = igb_ptp_settime_i210; + adapter->ptp_caps.enable = igb_ptp_enable; + /* Enable the timer functions by clearing bit 31. */ + wr32(E1000_TSAUXC, 0x0); break; - default: adapter->ptp_clock = NULL; return; @@ -310,86 +727,114 @@ void igb_ptp_init(struct igb_adapter *adapter) wrfl(); - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); + + /* Initialize the clock and overflow work for devices that need it. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); - INIT_DELAYED_WORK(&adapter->overflow_work, igb_overflow_check); + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); - spin_lock_init(&adapter->tmreg_lock); + INIT_DELAYED_WORK(&adapter->ptp_overflow_work, + igb_ptp_overflow_check); - schedule_delayed_work(&adapter->overflow_work, IGB_OVERFLOW_PERIOD); + schedule_delayed_work(&adapter->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); + } + + /* Initialize the time sync interrupts for devices that support it. */ + if (hw->mac.type >= e1000_82580) { + wr32(E1000_TSIM, E1000_TSIM_TXTS); + wr32(E1000_IMS, E1000_IMS_TS); + } - adapter->ptp_clock = ptp_clock_register(&adapter->caps); + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else + } else { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); + adapter->flags |= IGB_FLAG_PTP; + } } -void igb_ptp_remove(struct igb_adapter *adapter) +/** + * igb_ptp_stop - Disable PTP device and stop the overflow check. + * @adapter: Board private structure. + * + * This function stops the PTP support and cancels the delayed work. + **/ +void igb_ptp_stop(struct igb_adapter *adapter) { switch (adapter->hw.mac.type) { - case e1000_i211: - case e1000_i210: - case e1000_i350: - case e1000_82580: case e1000_82576: - cancel_delayed_work_sync(&adapter->overflow_work); + case e1000_82580: + case e1000_i350: + cancel_delayed_work_sync(&adapter->ptp_overflow_work); + break; + case e1000_i210: + case e1000_i211: + /* No delayed work to cancel. */ break; default: return; } + cancel_work_sync(&adapter->ptp_tx_work); + if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); dev_info(&adapter->pdev->dev, "removed PHC on %s\n", adapter->netdev->name); + adapter->flags &= ~IGB_FLAG_PTP; } } /** - * igb_systim_to_hwtstamp - convert system time value to hw timestamp - * @adapter: board private structure - * @hwtstamps: timestamp structure to update - * @systim: unsigned 64bit system time value. - * - * We need to convert the system time value stored in the RX/TXSTMP registers - * into a hwtstamp which can be used by the upper level timestamping functions. + * igb_ptp_reset - Re-enable the adapter for PTP following a reset. + * @adapter: Board private structure. * - * The 'tmreg_lock' spinlock is used to protect the consistency of the - * system time value. This is needed because reading the 64 bit time - * value involves reading two (or three) 32 bit registers. The first - * read latches the value. Ditto for writing. - * - * In addition, here have extended the system time with an overflow - * counter in software. + * This function handles the reset work required to re-enable the PTP device. **/ -void igb_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim) +void igb_ptp_reset(struct igb_adapter *adapter) { - u64 ns; - unsigned long flags; + struct e1000_hw *hw = &adapter->hw; + + if (!(adapter->flags & IGB_FLAG_PTP)) + return; switch (adapter->hw.mac.type) { + case e1000_82576: + /* Dial the nominal frequency. */ + wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + break; + case e1000_82580: + case e1000_i350: case e1000_i210: case e1000_i211: - case e1000_i350: - case e1000_82580: - case e1000_82576: + /* Enable the timer functions and interrupts. */ + wr32(E1000_TSAUXC, 0x0); + wr32(E1000_TSIM, E1000_TSIM_TXTS); + wr32(E1000_IMS, E1000_IMS_TS); break; default: + /* No work to do. */ return; } - spin_lock_irqsave(&adapter->tmreg_lock, flags); + /* Re-initialize the timer. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); - ns = timecounter_cyc2time(&adapter->tc, systim); - - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(ns); + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); + } } diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index 5fd5d04c26c..89f40e51fc1 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_IXGBE) += ixgbe.o -ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ +ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o ixgbe_debugfs.o\ ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \ ixgbe_mbx.o ixgbe_x540.o ixgbe_lib.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index b9623e9ea89..5bd26763554 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -78,6 +78,9 @@ /* Supported Rx Buffer Sizes */ #define IXGBE_RXBUFFER_256 256 /* Used for skb receive header */ +#define IXGBE_RXBUFFER_2K 2048 +#define IXGBE_RXBUFFER_3K 3072 +#define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ /* @@ -104,6 +107,7 @@ #define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) #define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) #define IXGBE_TX_FLAGS_TSTAMP (u32)(1 << 8) +#define IXGBE_TX_FLAGS_NO_IFCS (u32)(1 << 9) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 @@ -293,16 +297,25 @@ struct ixgbe_ring_feature { * this is twice the size of a half page we need to double the page order * for FCoE enabled Rx queues. */ -#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) -static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) { - return test_bit(__IXGBE_RX_FCOE, &ring->state) ? 1 : 0; +#ifdef IXGBE_FCOE + if (test_bit(__IXGBE_RX_FCOE, &ring->state)) + return (PAGE_SIZE < 8192) ? IXGBE_RXBUFFER_4K : + IXGBE_RXBUFFER_3K; +#endif + return IXGBE_RXBUFFER_2K; } -#else -#define ixgbe_rx_pg_order(_ring) 0 + +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ +#ifdef IXGBE_FCOE + if (test_bit(__IXGBE_RX_FCOE, &ring->state)) + return (PAGE_SIZE < 8192) ? 1 : 0; #endif + return 0; +} #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) -#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ @@ -584,6 +597,9 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff ixgbe_hwmon_buff; #endif /* CONFIG_IXGBE_HWMON */ +#ifdef CONFIG_DEBUG_FS + struct dentry *ixgbe_dbg_adapter; +#endif /*CONFIG_DEBUG_FS*/ }; struct ixgbe_fdir_filter { @@ -712,7 +728,12 @@ extern int ixgbe_fcoe_get_hbainfo(struct net_device *netdev, struct netdev_fcoe_hbainfo *info); extern u8 ixgbe_fcoe_get_tc(struct ixgbe_adapter *adapter); #endif /* IXGBE_FCOE */ - +#ifdef CONFIG_DEBUG_FS +extern void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter); +extern void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter); +extern void ixgbe_dbg_init(void); +extern void ixgbe_dbg_exit(void); +#endif /* CONFIG_DEBUG_FS */ static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring) { return netdev_get_tx_queue(ring->netdev, ring->queue_index); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c new file mode 100644 index 00000000000..8d3a2188909 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -0,0 +1,300 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifdef CONFIG_DEBUG_FS + +#include <linux/debugfs.h> +#include <linux/module.h> + +#include "ixgbe.h" + +static struct dentry *ixgbe_dbg_root; + +static char ixgbe_dbg_reg_ops_buf[256] = ""; + +/** + * ixgbe_dbg_reg_ops_open - prep the debugfs pokee data item when opened + * @inode: inode that was opened + * @filp: file info + * + * Stash the adapter pointer hiding in the inode into the file pointer where + * we can find it later in the read and write calls + **/ +static int ixgbe_dbg_reg_ops_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/** + * ixgbe_dbg_reg_ops_read - read for reg_ops datum + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + char buf[256]; + int bytes_not_copied; + int len; + + /* don't allow partial reads */ + if (*ppos != 0) + return 0; + + len = snprintf(buf, sizeof(buf), "%s: %s\n", + adapter->netdev->name, ixgbe_dbg_reg_ops_buf); + if (count < len) + return -ENOSPC; + bytes_not_copied = copy_to_user(buffer, buf, len); + if (bytes_not_copied < 0) + return bytes_not_copied; + + *ppos = len; + return len; +} + +/** + * ixgbe_dbg_reg_ops_write - write into reg_ops datum + * @filp: the opened file + * @buffer: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_reg_ops_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + int bytes_not_copied; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + if (count >= sizeof(ixgbe_dbg_reg_ops_buf)) + return -ENOSPC; + + bytes_not_copied = copy_from_user(ixgbe_dbg_reg_ops_buf, buffer, count); + if (bytes_not_copied < 0) + return bytes_not_copied; + else if (bytes_not_copied < count) + count -= bytes_not_copied; + else + return -ENOSPC; + ixgbe_dbg_reg_ops_buf[count] = '\0'; + + if (strncmp(ixgbe_dbg_reg_ops_buf, "write", 5) == 0) { + u32 reg, value; + int cnt; + cnt = sscanf(&ixgbe_dbg_reg_ops_buf[5], "%x %x", ®, &value); + if (cnt == 2) { + IXGBE_WRITE_REG(&adapter->hw, reg, value); + value = IXGBE_READ_REG(&adapter->hw, reg); + e_dev_info("write: 0x%08x = 0x%08x\n", reg, value); + } else { + e_dev_info("write <reg> <value>\n"); + } + } else if (strncmp(ixgbe_dbg_reg_ops_buf, "read", 4) == 0) { + u32 reg, value; + int cnt; + cnt = sscanf(&ixgbe_dbg_reg_ops_buf[4], "%x", ®); + if (cnt == 1) { + value = IXGBE_READ_REG(&adapter->hw, reg); + e_dev_info("read 0x%08x = 0x%08x\n", reg, value); + } else { + e_dev_info("read <reg>\n"); + } + } else { + e_dev_info("Unknown command %s\n", ixgbe_dbg_reg_ops_buf); + e_dev_info("Available commands:\n"); + e_dev_info(" read <reg>\n"); + e_dev_info(" write <reg> <value>\n"); + } + return count; +} + +static const struct file_operations ixgbe_dbg_reg_ops_fops = { + .owner = THIS_MODULE, + .open = ixgbe_dbg_reg_ops_open, + .read = ixgbe_dbg_reg_ops_read, + .write = ixgbe_dbg_reg_ops_write, +}; + +static char ixgbe_dbg_netdev_ops_buf[256] = ""; + +/** + * ixgbe_dbg_netdev_ops_open - prep the debugfs netdev_ops data item + * @inode: inode that was opened + * @filp: file info + * + * Stash the adapter pointer hiding in the inode into the file pointer + * where we can find it later in the read and write calls + **/ +static int ixgbe_dbg_netdev_ops_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/** + * ixgbe_dbg_netdev_ops_read - read for netdev_ops datum + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_netdev_ops_read(struct file *filp, + char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + char buf[256]; + int bytes_not_copied; + int len; + + /* don't allow partial reads */ + if (*ppos != 0) + return 0; + + len = snprintf(buf, sizeof(buf), "%s: %s\n", + adapter->netdev->name, ixgbe_dbg_netdev_ops_buf); + if (count < len) + return -ENOSPC; + bytes_not_copied = copy_to_user(buffer, buf, len); + if (bytes_not_copied < 0) + return bytes_not_copied; + + *ppos = len; + return len; +} + +/** + * ixgbe_dbg_netdev_ops_write - write into netdev_ops datum + * @filp: the opened file + * @buffer: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + int bytes_not_copied; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + if (count >= sizeof(ixgbe_dbg_netdev_ops_buf)) + return -ENOSPC; + + bytes_not_copied = copy_from_user(ixgbe_dbg_netdev_ops_buf, + buffer, count); + if (bytes_not_copied < 0) + return bytes_not_copied; + else if (bytes_not_copied < count) + count -= bytes_not_copied; + else + return -ENOSPC; + ixgbe_dbg_netdev_ops_buf[count] = '\0'; + + if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) { + adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev); + e_dev_info("tx_timeout called\n"); + } else { + e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf); + e_dev_info("Available commands:\n"); + e_dev_info(" tx_timeout\n"); + } + return count; +} + +static const struct file_operations ixgbe_dbg_netdev_ops_fops = { + .owner = THIS_MODULE, + .open = ixgbe_dbg_netdev_ops_open, + .read = ixgbe_dbg_netdev_ops_read, + .write = ixgbe_dbg_netdev_ops_write, +}; + +/** + * ixgbe_dbg_adapter_init - setup the debugfs directory for the adapter + * @adapter: the adapter that is starting up + **/ +void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter) +{ + const char *name = pci_name(adapter->pdev); + struct dentry *pfile; + adapter->ixgbe_dbg_adapter = debugfs_create_dir(name, ixgbe_dbg_root); + if (adapter->ixgbe_dbg_adapter) { + pfile = debugfs_create_file("reg_ops", 0600, + adapter->ixgbe_dbg_adapter, adapter, + &ixgbe_dbg_reg_ops_fops); + if (!pfile) + e_dev_err("debugfs reg_ops for %s failed\n", name); + pfile = debugfs_create_file("netdev_ops", 0600, + adapter->ixgbe_dbg_adapter, adapter, + &ixgbe_dbg_netdev_ops_fops); + if (!pfile) + e_dev_err("debugfs netdev_ops for %s failed\n", name); + } else { + e_dev_err("debugfs entry for %s failed\n", name); + } +} + +/** + * ixgbe_dbg_adapter_exit - clear out the adapter's debugfs entries + * @pf: the pf that is stopping + **/ +void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter) +{ + if (adapter->ixgbe_dbg_adapter) + debugfs_remove_recursive(adapter->ixgbe_dbg_adapter); + adapter->ixgbe_dbg_adapter = NULL; +} + +/** + * ixgbe_dbg_init - start up debugfs for the driver + **/ +void ixgbe_dbg_init(void) +{ + ixgbe_dbg_root = debugfs_create_dir(ixgbe_driver_name, NULL); + if (ixgbe_dbg_root == NULL) + pr_err("init of debugfs failed\n"); +} + +/** + * ixgbe_dbg_exit - clean out the driver's debugfs entries + **/ +void ixgbe_dbg_exit(void) +{ + debugfs_remove_recursive(ixgbe_dbg_root); +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ee61819d608..868af693821 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1167,7 +1167,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, } bi->dma = dma; - bi->page_offset ^= ixgbe_rx_bufsz(rx_ring); + bi->page_offset = 0; return true; } @@ -1320,29 +1320,6 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, return max_len; } -static void ixgbe_get_rsc_cnt(struct ixgbe_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - __le32 rsc_enabled; - u32 rsc_cnt; - - if (!ring_is_rsc_enabled(rx_ring)) - return; - - rsc_enabled = rx_desc->wb.lower.lo_dword.data & - cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); - - /* If this is an RSC frame rsc_cnt should be non-zero */ - if (!rsc_enabled) - return; - - rsc_cnt = le32_to_cpu(rsc_enabled); - rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; - - IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; -} - static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, struct sk_buff *skb) { @@ -1440,16 +1417,28 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, prefetch(IXGBE_RX_DESC(rx_ring, ntc)); - if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - return false; + /* update RSC append count if present */ + if (ring_is_rsc_enabled(rx_ring)) { + __le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data & + cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); + + if (unlikely(rsc_enabled)) { + u32 rsc_cnt = le32_to_cpu(rsc_enabled); + + rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; + IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; - /* append_cnt indicates packet is RSC, if so fetch nextp */ - if (IXGBE_CB(skb)->append_cnt) { - ntc = le32_to_cpu(rx_desc->wb.upper.status_error); - ntc &= IXGBE_RXDADV_NEXTP_MASK; - ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + /* update ntc based on RSC value */ + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= IXGBE_RXDADV_NEXTP_MASK; + ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + } } + /* if we are the last buffer then there is nothing else to do */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + /* place skb in next buffer to be received */ rx_ring->rx_buffer_info[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; @@ -1458,6 +1447,78 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, } /** + * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being adjusted + * + * This function is an ixgbe specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being updated + * + * This function provides a basic DMA sync up for the first fragment of an + * skb. The reason for doing this is that the first fragment cannot be + * unmapped until we have reached the end of packet descriptor for a buffer + * chain. + */ +static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + /* if the page was released unmap it, else just sync our portion */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } else { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + frag->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + IXGBE_CB(skb)->dma = 0; +} + +/** * ixgbe_cleanup_headers - Correct corrupted or empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor @@ -1479,24 +1540,7 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; struct net_device *netdev = rx_ring->netdev; - unsigned char *va; - unsigned int pull_len; - - /* if the page was released unmap it, else just sync our portion */ - if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - IXGBE_CB(skb)->page_released = false; - } else { - dma_sync_single_range_for_cpu(rx_ring->dev, - IXGBE_CB(skb)->dma, - frag->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - } - IXGBE_CB(skb)->dma = 0; /* verify that the packet does not have any known errors */ if (unlikely(ixgbe_test_staterr(rx_desc, @@ -1506,40 +1550,9 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, return true; } - /* - * it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* - * we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = skb_frag_size(frag); - if (pull_len > IXGBE_RX_HDR_SIZE) - pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; - - /* - * if we sucked the frag empty then we should free it, - * if there are other frags here something is screwed up in hardware - */ - if (skb_frag_size(frag) == 0) { - BUG_ON(skb_shinfo(skb)->nr_frags != 1); - skb_shinfo(skb)->nr_frags = 0; - __skb_frag_unref(frag); - skb->truesize -= ixgbe_rx_bufsz(rx_ring); - } + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + ixgbe_pull_tail(rx_ring, skb); #ifdef IXGBE_FCOE /* do not attempt to pad FCoE Frames as this will disrupt DDP */ @@ -1560,33 +1573,17 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, } /** - * ixgbe_can_reuse_page - determine if we can reuse a page - * @rx_buffer: pointer to rx_buffer containing the page we want to reuse - * - * Returns true if page can be reused in another Rx buffer - **/ -static inline bool ixgbe_can_reuse_page(struct ixgbe_rx_buffer *rx_buffer) -{ - struct page *page = rx_buffer->page; - - /* if we are only owner of page and it is local we can reuse it */ - return likely(page_count(page) == 1) && - likely(page_to_nid(page) == numa_node_id()); -} - -/** * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring * @rx_ring: rx descriptor ring to store buffers on * @old_buff: donor buffer to have page reused * - * Syncronizes page for reuse by the adapter + * Synchronizes page for reuse by the adapter **/ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *old_buff) { struct ixgbe_rx_buffer *new_buff; u16 nta = rx_ring->next_to_alloc; - u16 bufsz = ixgbe_rx_bufsz(rx_ring); new_buff = &rx_ring->rx_buffer_info[nta]; @@ -1597,17 +1594,13 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, /* transfer page from old buffer to new buffer */ new_buff->page = old_buff->page; new_buff->dma = old_buff->dma; - - /* flip page offset to other buffer and store to new_buff */ - new_buff->page_offset = old_buff->page_offset ^ bufsz; + new_buff->page_offset = old_buff->page_offset; /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, - new_buff->page_offset, bufsz, + new_buff->page_offset, + ixgbe_rx_bufsz(rx_ring), DMA_FROM_DEVICE); - - /* bump ref count on page before it is given to the stack */ - get_page(new_buff->page); } /** @@ -1617,20 +1610,159 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buff to place the data into * - * This function is based on skb_add_rx_frag. I would have used that - * function however it doesn't handle the truesize case correctly since we - * are allocating more memory than might be used for a single receive. + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. **/ -static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, +static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - struct sk_buff *skb, int size) + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buffer->page, rx_buffer->page_offset, - size); - skb->len += size; - skb->data_len += size; - skb->truesize += ixgbe_rx_bufsz(rx_ring); + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_bufsz(rx_ring); +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int last_offset = ixgbe_rx_pg_size(rx_ring) - + ixgbe_rx_bufsz(rx_ring); +#endif + + if ((size <= IXGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, truesize); + + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; + + /* + * since we are the only owner of the page and we need to + * increment it, just set the value to 2 in order to avoid + * an unecessary locked operation + */ + atomic_set(&page->_count, 2); +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; + + /* bump ref count on page before it is given to the stack */ + get_page(page); +#endif + + return true; +} + +static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + struct ixgbe_rx_buffer *rx_buffer; + struct sk_buff *skb; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + page = rx_buffer->page; + prefetchw(page); + + skb = rx_buffer->skb; + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + return NULL; + } + + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + + /* + * Delay unmapping of the first packet. It carries the + * header information, HW may still access the header + * after the writeback. Only unmap it when EOP is + * reached + */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + goto dma_sync; + + IXGBE_CB(skb)->dma = rx_buffer->dma; + } else { + if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + ixgbe_dma_sync_frag(rx_ring, skb); + +dma_sync: + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + + /* pull page into skb */ + if (ixgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + } + + /* clear contents of buffer_info */ + rx_buffer->skb = NULL; + rx_buffer->dma = 0; + rx_buffer->page = NULL; + + return skb; } /** @@ -1653,16 +1785,14 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, unsigned int total_rx_bytes = 0, total_rx_packets = 0; #ifdef IXGBE_FCOE struct ixgbe_adapter *adapter = q_vector->adapter; - int ddp_bytes = 0; + int ddp_bytes; + unsigned int mss = 0; #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); do { - struct ixgbe_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; struct sk_buff *skb; - struct page *page; - u16 ntc; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { @@ -1670,9 +1800,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, cleaned_count = 0; } - ntc = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC(rx_ring, ntc); - rx_buffer = &rx_ring->rx_buffer_info[ntc]; + rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) break; @@ -1684,75 +1812,12 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ rmb(); - page = rx_buffer->page; - prefetchw(page); - - skb = rx_buffer->skb; - - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; - - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif + /* retrieve a buffer from the ring */ + skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IXGBE_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - break; - } - - /* - * we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - - /* - * Delay unmapping of the first packet. It carries the - * header information, HW may still access the header - * after the writeback. Only unmap it when EOP is - * reached - */ - IXGBE_CB(skb)->dma = rx_buffer->dma; - } else { - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - } - - /* pull page into skb */ - ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, - le16_to_cpu(rx_desc->wb.upper.length)); - - if (ixgbe_can_reuse_page(rx_buffer)) { - /* hand second half of page back to the ring */ - ixgbe_reuse_rx_page(rx_ring, rx_buffer); - } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { - /* the page has been released from the ring */ - IXGBE_CB(skb)->page_released = true; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE); - } - - /* clear contents of buffer_info */ - rx_buffer->skb = NULL; - rx_buffer->dma = 0; - rx_buffer->page = NULL; - - ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; cleaned_count++; @@ -1775,6 +1840,20 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, /* if ddp, not passing to ULD unless for FCP_RSP or error */ if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) { ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb); + /* include DDPed FCoE data */ + if (ddp_bytes > 0) { + if (!mss) { + mss = rx_ring->netdev->mtu - + sizeof(struct fcoe_hdr) - + sizeof(struct fc_frame_header) - + sizeof(struct fcoe_crc_eof); + if (mss > 512) + mss &= ~511; + } + total_rx_bytes += ddp_bytes; + total_rx_packets += DIV_ROUND_UP(ddp_bytes, + mss); + } if (!ddp_bytes) { dev_kfree_skb_any(skb); continue; @@ -1788,21 +1867,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, budget--; } while (likely(budget)); -#ifdef IXGBE_FCOE - /* include DDPed FCoE data */ - if (ddp_bytes > 0) { - unsigned int mss; - - mss = rx_ring->netdev->mtu - sizeof(struct fcoe_hdr) - - sizeof(struct fc_frame_header) - - sizeof(struct fcoe_crc_eof); - if (mss > 512) - mss &= ~511; - total_rx_bytes += ddp_bytes; - total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); - } - -#endif /* IXGBE_FCOE */ u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -2868,11 +2932,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; /* configure the packet buffer length */ -#if PAGE_SIZE > IXGBE_MAX_RXBUFFER - srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#else srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#endif /* configure descriptor type */ srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; @@ -2980,13 +3040,7 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, * total size of max desc * buf_len is not greater * than 65536 */ -#if (PAGE_SIZE <= 8192) rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (PAGE_SIZE <= 16384) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; -#endif IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl); } @@ -3606,8 +3660,6 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) if (hw->mac.type == ixgbe_mac_82598EB) netif_set_gso_max_size(adapter->netdev, 32768); - hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true); - #ifdef IXGBE_FCOE if (adapter->netdev->features & NETIF_F_FCOE_MTU) max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); @@ -3807,6 +3859,11 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) #ifdef CONFIG_IXGBE_DCB ixgbe_configure_dcb(adapter); #endif + /* + * We must restore virtualization before VLANs or else + * the VLVF registers will not be populated + */ + ixgbe_configure_virtualization(adapter); ixgbe_set_rx_mode(adapter->netdev); ixgbe_restore_vlan(adapter); @@ -3838,8 +3895,6 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) break; } - ixgbe_configure_virtualization(adapter); - #ifdef IXGBE_FCOE /* configure FCoE L2 filters, redirection table, and Rx control */ ixgbe_configure_fcoe(adapter); @@ -4130,27 +4185,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } /** - * ixgbe_init_rx_page_offset - initialize page offset values for Rx buffers - * @rx_ring: ring to setup - * - * On many IA platforms the L1 cache has a critical stride of 4K, this - * results in each receive buffer starting in the same cache set. To help - * reduce the pressure on this cache set we can interleave the offsets so - * that only every other buffer will be in the same cache set. - **/ -static void ixgbe_init_rx_page_offset(struct ixgbe_ring *rx_ring) -{ - struct ixgbe_rx_buffer *rx_buffer = rx_ring->rx_buffer_info; - u16 i; - - for (i = 0; i < rx_ring->count; i += 2) { - rx_buffer[0].page_offset = 0; - rx_buffer[1].page_offset = ixgbe_rx_bufsz(rx_ring); - rx_buffer = &rx_buffer[2]; - } -} - -/** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ @@ -4195,8 +4229,6 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; memset(rx_ring->rx_buffer_info, 0, size); - ixgbe_init_rx_page_offset(rx_ring); - /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); @@ -4646,8 +4678,6 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - ixgbe_init_rx_page_offset(rx_ring); - return 0; err: vfree(rx_ring->rx_buffer_info); @@ -5530,8 +5560,9 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter) { u32 ssvpc; - /* Do not perform spoof check for 82598 */ - if (adapter->hw.mac.type == ixgbe_mac_82598EB) + /* Do not perform spoof check for 82598 or if not in IOV mode */ + if (adapter->hw.mac.type == ixgbe_mac_82598EB || + adapter->num_vfs == 0) return; ssvpc = IXGBE_READ_REG(&adapter->hw, IXGBE_SSVPC); @@ -5543,7 +5574,7 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter) if (!ssvpc) return; - e_warn(drv, "%d Spoofed packets detected\n", ssvpc); + e_warn(drv, "%u Spoofed packets detected\n", ssvpc); } /** @@ -5874,9 +5905,12 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN) && - !(first->tx_flags & IXGBE_TX_FLAGS_TXSW)) - return; + if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN)) { + if (unlikely(skb->no_fcs)) + first->tx_flags |= IXGBE_TX_FLAGS_NO_IFCS; + if (!(first->tx_flags & IXGBE_TX_FLAGS_TXSW)) + return; + } } else { u8 l4_hdr = 0; switch (first->protocol) { @@ -5938,7 +5972,6 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ __le32 cmd_type = cpu_to_le32(IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); /* set HW vlan bit if vlan is present */ @@ -5958,6 +5991,10 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) #endif cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_TSE); + /* insert frame checksum */ + if (!(tx_flags & IXGBE_TX_FLAGS_NO_IFCS)) + cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS); + return cmd_type; } @@ -6063,8 +6100,6 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, if (likely(!data_len)) break; - if (unlikely(skb->no_fcs)) - cmd_type &= ~(cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); i++; @@ -6854,9 +6889,9 @@ static int ixgbe_set_features(struct net_device *netdev, return 0; } -static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, +static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - unsigned char *addr, + const unsigned char *addr, u16 flags) { struct ixgbe_adapter *adapter = netdev_priv(dev); @@ -6893,7 +6928,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr) + const unsigned char *addr) { struct ixgbe_adapter *adapter = netdev_priv(dev); int err = -EOPNOTSUPP; @@ -7136,11 +7171,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, goto err_ioremap; } - for (i = 1; i <= 5; i++) { - if (pci_resource_len(pdev, i) == 0) - continue; - } - netdev->netdev_ops = &ixgbe_netdev_ops; ixgbe_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; @@ -7419,6 +7449,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, e_err(probe, "failed to allocate sysfs resources\n"); #endif /* CONFIG_IXGBE_HWMON */ +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_adapter_init(adapter); +#endif /* CONFIG_DEBUG_FS */ + return 0; err_register: @@ -7453,6 +7487,10 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_adapter_exit(adapter); +#endif /*CONFIG_DEBUG_FS */ + set_bit(__IXGBE_DOWN, &adapter->state); cancel_work_sync(&adapter->service_task); @@ -7708,6 +7746,10 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_init(); +#endif /* CONFIG_DEBUG_FS */ + #ifdef CONFIG_IXGBE_DCA dca_register_notify(&dca_notifier); #endif @@ -7730,6 +7772,11 @@ static void __exit ixgbe_exit_module(void) dca_unregister_notify(&dca_notifier); #endif pci_unregister_driver(&ixgbe_driver); + +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_exit(); +#endif /* CONFIG_DEBUG_FS */ + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 3456d561714..39881cb17a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -960,7 +960,8 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) /* (Re)start the overflow check */ adapter->flags2 |= IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED; - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps); + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; e_dev_err("ptp_clock_register failed\n"); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 4fea8716ab6..dce48bf64d9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -346,6 +346,10 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter) static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf) { + /* VLAN 0 is a special case, don't allow it to be removed */ + if (!vid && !add) + return 0; + return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add); } @@ -414,6 +418,7 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) VLAN_PRIO_SHIFT)), vf); ixgbe_set_vmolr(hw, vf, false); } else { + ixgbe_set_vf_vlan(adapter, true, 0, vf); ixgbe_set_vmvir(adapter, 0, vf); ixgbe_set_vmolr(hw, vf, true); } @@ -810,9 +815,9 @@ out: return err; } -static int ixgbe_link_mbps(int internal_link_speed) +static int ixgbe_link_mbps(struct ixgbe_adapter *adapter) { - switch (internal_link_speed) { + switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: return 100; case IXGBE_LINK_SPEED_1GB_FULL: @@ -824,27 +829,30 @@ static int ixgbe_link_mbps(int internal_link_speed) } } -static void ixgbe_set_vf_rate_limit(struct ixgbe_hw *hw, int vf, int tx_rate, - int link_speed) +static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf) { - int rf_dec, rf_int; - u32 bcnrc_val; + struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; + struct ixgbe_hw *hw = &adapter->hw; + u32 bcnrc_val = 0; + u16 queue, queues_per_pool; + u16 tx_rate = adapter->vfinfo[vf].tx_rate; + + if (tx_rate) { + /* start with base link speed value */ + bcnrc_val = adapter->vf_rate_link_speed; - if (tx_rate != 0) { /* Calculate the rate factor values to set */ - rf_int = link_speed / tx_rate; - rf_dec = (link_speed - (rf_int * tx_rate)); - rf_dec = (rf_dec * (1<<IXGBE_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; - - bcnrc_val = IXGBE_RTTBCNRC_RS_ENA; - bcnrc_val |= ((rf_int<<IXGBE_RTTBCNRC_RF_INT_SHIFT) & - IXGBE_RTTBCNRC_RF_INT_MASK); - bcnrc_val |= (rf_dec & IXGBE_RTTBCNRC_RF_DEC_MASK); - } else { - bcnrc_val = 0; + bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT; + bcnrc_val /= tx_rate; + + /* clear everything but the rate factor */ + bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK | + IXGBE_RTTBCNRC_RF_DEC_MASK; + + /* enable the rate scheduler */ + bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA; } - IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, 2*vf); /* vf Y uses queue 2*Y */ /* * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM * register. Typically MMW_SIZE=0x014 if 9728-byte jumbo is supported @@ -861,53 +869,68 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_hw *hw, int vf, int tx_rate, break; } - IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val); + /* determine how many queues per pool based on VMDq mask */ + queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask); + + /* write value for all Tx queues belonging to VF */ + for (queue = 0; queue < queues_per_pool; queue++) { + unsigned int reg_idx = (vf * queues_per_pool) + queue; + + IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, reg_idx); + IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val); + } } void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter) { - int actual_link_speed, i; - bool reset_rate = false; + int i; /* VF Tx rate limit was not set */ - if (adapter->vf_rate_link_speed == 0) + if (!adapter->vf_rate_link_speed) return; - actual_link_speed = ixgbe_link_mbps(adapter->link_speed); - if (actual_link_speed != adapter->vf_rate_link_speed) { - reset_rate = true; + if (ixgbe_link_mbps(adapter) != adapter->vf_rate_link_speed) { adapter->vf_rate_link_speed = 0; dev_info(&adapter->pdev->dev, - "Link speed has been changed. VF Transmit rate " - "is disabled\n"); + "Link speed has been changed. VF Transmit rate is disabled\n"); } for (i = 0; i < adapter->num_vfs; i++) { - if (reset_rate) + if (!adapter->vf_rate_link_speed) adapter->vfinfo[i].tx_rate = 0; - ixgbe_set_vf_rate_limit(&adapter->hw, i, - adapter->vfinfo[i].tx_rate, - actual_link_speed); + ixgbe_set_vf_rate_limit(adapter, i); } } int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - int actual_link_speed; + int link_speed; + + /* verify VF is active */ + if (vf >= adapter->num_vfs) + return -EINVAL; - actual_link_speed = ixgbe_link_mbps(adapter->link_speed); - if ((vf >= adapter->num_vfs) || (!adapter->link_up) || - (tx_rate > actual_link_speed) || (actual_link_speed != 10000) || - ((tx_rate != 0) && (tx_rate <= 10))) - /* rate limit cannot be set to 10Mb or less in 10Gb adapters */ + /* verify link is up */ + if (!adapter->link_up) return -EINVAL; - adapter->vf_rate_link_speed = actual_link_speed; - adapter->vfinfo[vf].tx_rate = (u16)tx_rate; - ixgbe_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + /* verify we are linked at 10Gbps */ + link_speed = ixgbe_link_mbps(adapter); + if (link_speed != 10000) + return -EINVAL; + + /* rate limit cannot be less than 10Mbs or greater than link speed */ + if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed))) + return -EINVAL; + + /* store values */ + adapter->vf_rate_link_speed = link_speed; + adapter->vfinfo[vf].tx_rate = tx_rate; + + /* update hardware configuration */ + ixgbe_set_vf_rate_limit(adapter, vf); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index 418af827b23..da17ccf5c09 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -272,5 +272,6 @@ struct ixgbe_adv_tx_context_desc { /* Error Codes */ #define IXGBE_ERR_INVALID_MAC_ADDR -1 #define IXGBE_ERR_RESET_FAILED -2 +#define IXGBE_ERR_INVALID_ARGUMENT -3 #endif /* _IXGBEVF_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 98cadb0c4da..383b4e1cd17 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -101,7 +101,9 @@ struct ixgbevf_ring { /* Supported Rx Buffer Sizes */ #define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ -#define IXGBEVF_RXBUFFER_2048 2048 +#define IXGBEVF_RXBUFFER_3K 3072 +#define IXGBEVF_RXBUFFER_7K 7168 +#define IXGBEVF_RXBUFFER_15K 15360 #define IXGBEVF_MAX_RXBUFFER 16384 /* largest size for single descriptor */ #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 @@ -259,6 +261,11 @@ enum ixbgevf_state_t { __IXGBEVF_DOWN }; +struct ixgbevf_cb { + struct sk_buff *prev; +}; +#define IXGBE_CB(skb) ((struct ixgbevf_cb *)(skb)->cb) + enum ixgbevf_boards { board_82599_vf, board_X540_vf, diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 6647383c4dd..0ee9bd4819f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -263,6 +263,8 @@ cont_loop: tx_ring->total_bytes += total_bytes; tx_ring->total_packets += total_packets; u64_stats_update_end(&tx_ring->syncp); + q_vector->tx.total_bytes += total_bytes; + q_vector->tx.total_packets += total_packets; return count < tx_ring->count; } @@ -272,12 +274,10 @@ cont_loop: * @q_vector: structure containing interrupt and ring information * @skb: packet to send up * @status: hardware indication of status of receive - * @rx_ring: rx descriptor ring (for a specific queue) to setup * @rx_desc: rx descriptor **/ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, struct sk_buff *skb, u8 status, - struct ixgbevf_ring *ring, union ixgbe_adv_rx_desc *rx_desc) { struct ixgbevf_adapter *adapter = q_vector->adapter; @@ -433,11 +433,21 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, if (!(staterr & IXGBE_RXD_STAT_EOP)) { skb->next = next_buffer->skb; - skb->next->prev = skb; + IXGBE_CB(skb->next)->prev = skb; adapter->non_eop_descs++; goto next_desc; } + /* we should not be chaining buffers, if we did drop the skb */ + if (IXGBE_CB(skb)->prev) { + do { + struct sk_buff *this = skb; + skb = IXGBE_CB(skb)->prev; + dev_kfree_skb(this); + } while (skb); + goto next_desc; + } + /* ERR_MASK will only have valid bits if EOP set */ if (unlikely(staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)) { dev_kfree_skb_irq(skb); @@ -461,7 +471,7 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, } skb->protocol = eth_type_trans(skb, rx_ring->netdev); - ixgbevf_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc); + ixgbevf_receive_skb(q_vector, skb, staterr, rx_desc); next_desc: rx_desc->wb.upper.status_error = 0; @@ -490,6 +500,8 @@ next_desc: rx_ring->total_packets += total_rx_packets; rx_ring->total_bytes += total_rx_bytes; u64_stats_update_end(&rx_ring->syncp); + q_vector->rx.total_packets += total_rx_packets; + q_vector->rx.total_bytes += total_rx_bytes; return !!budget; } @@ -716,40 +728,15 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector) } } -static irqreturn_t ixgbevf_msix_mbx(int irq, void *data) +static irqreturn_t ixgbevf_msix_other(int irq, void *data) { struct ixgbevf_adapter *adapter = data; struct ixgbe_hw *hw = &adapter->hw; - u32 msg; - bool got_ack = false; - - if (!hw->mbx.ops.check_for_ack(hw)) - got_ack = true; - if (!hw->mbx.ops.check_for_msg(hw)) { - hw->mbx.ops.read(hw, &msg, 1); + hw->mac.get_link_status = 1; - if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 1)); - - if (msg & IXGBE_VT_MSGTYPE_NACK) - pr_warn("Last Request of type %2.2x to PF Nacked\n", - msg & 0xFF); - /* - * Restore the PFSTS bit in case someone is polling for a - * return message from the PF - */ - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFSTS; - } - - /* - * checking for the ack clears the PFACK bit. Place - * it back in the v2p_mailbox cache so that anyone - * polling for an ack will not miss it - */ - if (got_ack) - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFACK; + if (!test_bit(__IXGBEVF_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_other); @@ -899,10 +886,10 @@ static int ixgbevf_request_msix_irqs(struct ixgbevf_adapter *adapter) } err = request_irq(adapter->msix_entries[vector].vector, - &ixgbevf_msix_mbx, 0, netdev->name, adapter); + &ixgbevf_msix_other, 0, netdev->name, adapter); if (err) { hw_dbg(&adapter->hw, - "request_irq for msix_mbx failed: %d\n", err); + "request_irq for msix_other failed: %d\n", err); goto free_queue_irqs; } @@ -1057,15 +1044,46 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - if (rx_ring->rx_buf_len == MAXIMUM_ETHERNET_VLAN_SIZE) - srrctl |= IXGBEVF_RXBUFFER_2048 >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= rx_ring->rx_buf_len >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >> + IXGBE_SRRCTL_BSIZEPKT_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); } +static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + int i; + u16 rx_buf_len; + + /* notify the PF of our intent to use this size of frame */ + ixgbevf_rlpml_set_vf(hw, max_frame); + + /* PF will allow an extra 4 bytes past for vlan tagged frames */ + max_frame += VLAN_HLEN; + + /* + * Make best use of allocation by using all but 1K of a + * power of 2 allocation that will be used for skb->head. + */ + if ((hw->mac.type == ixgbe_mac_X540_vf) && + (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)) + rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; + else if (max_frame <= IXGBEVF_RXBUFFER_3K) + rx_buf_len = IXGBEVF_RXBUFFER_3K; + else if (max_frame <= IXGBEVF_RXBUFFER_7K) + rx_buf_len = IXGBEVF_RXBUFFER_7K; + else if (max_frame <= IXGBEVF_RXBUFFER_15K) + rx_buf_len = IXGBEVF_RXBUFFER_15K; + else + rx_buf_len = IXGBEVF_MAX_RXBUFFER; + + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i].rx_buf_len = rx_buf_len; +} + /** * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset * @adapter: board private structure @@ -1076,18 +1094,14 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) { u64 rdba; struct ixgbe_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; int i, j; u32 rdlen; - int rx_buf_len; /* PSRTYPE must be initialized in 82599 */ IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, 0); - if (netdev->mtu <= ETH_DATA_LEN) - rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; - else - rx_buf_len = ALIGN(max_frame, 1024); + + /* set_rx_buffer_len must be called before ring initialization */ + ixgbevf_set_rx_buffer_len(adapter); rdlen = adapter->rx_ring[0].count * sizeof(union ixgbe_adv_rx_desc); /* Setup the HW Rx Head and Tail Descriptor Pointers and @@ -1103,7 +1117,6 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFRDT(j), 0); adapter->rx_ring[i].head = IXGBE_VFRDH(j); adapter->rx_ring[i].tail = IXGBE_VFRDT(j); - adapter->rx_ring[i].rx_buf_len = rx_buf_len; ixgbevf_configure_srrctl(adapter, j); } @@ -1113,36 +1126,47 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; + int err; + + if (!hw->mac.ops.set_vfta) + return -EOPNOTSUPP; spin_lock(&adapter->mbx_lock); /* add VID to filter table */ - if (hw->mac.ops.set_vfta) - hw->mac.ops.set_vfta(hw, vid, 0, true); + err = hw->mac.ops.set_vfta(hw, vid, 0, true); spin_unlock(&adapter->mbx_lock); + /* translate error return types so error makes sense */ + if (err == IXGBE_ERR_MBX) + return -EIO; + + if (err == IXGBE_ERR_INVALID_ARGUMENT) + return -EACCES; + set_bit(vid, adapter->active_vlans); - return 0; + return err; } static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; + int err = -EOPNOTSUPP; spin_lock(&adapter->mbx_lock); /* remove VID from filter table */ if (hw->mac.ops.set_vfta) - hw->mac.ops.set_vfta(hw, vid, 0, false); + err = hw->mac.ops.set_vfta(hw, vid, 0, false); spin_unlock(&adapter->mbx_lock); clear_bit(vid, adapter->active_vlans); - return 0; + return err; } static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter) @@ -1308,6 +1332,25 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; } +static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int api[] = { ixgbe_mbox_api_10, + ixgbe_mbox_api_unknown }; + int err = 0, idx = 0; + + spin_lock(&adapter->mbx_lock); + + while (api[idx] != ixgbe_mbox_api_unknown) { + err = ixgbevf_negotiate_api_version(hw, api[idx]); + if (!err) + break; + idx++; + } + + spin_unlock(&adapter->mbx_lock); +} + static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1315,7 +1358,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) int i, j = 0; int num_rx_rings = adapter->num_rx_queues; u32 txdctl, rxdctl; - u32 msg[2]; for (i = 0; i < adapter->num_tx_queues; i++) { j = adapter->tx_ring[i].reg_idx; @@ -1356,10 +1398,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0); } - msg[0] = IXGBE_VF_SET_LPE; - msg[1] = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - hw->mbx.ops.write_posted(hw, msg, 2); - spin_unlock(&adapter->mbx_lock); clear_bit(__IXGBEVF_DOWN, &adapter->state); @@ -1371,6 +1409,7 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) ixgbevf_save_reset_stats(adapter); ixgbevf_init_last_counter_stats(adapter); + hw->mac.get_link_status = 1; mod_timer(&adapter->watchdog_timer, jiffies); } @@ -1378,6 +1417,8 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + ixgbevf_negotiate_api(adapter); + ixgbevf_configure(adapter); ixgbevf_up_complete(adapter); @@ -1419,7 +1460,7 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_adapter *adapter, rx_buffer_info->skb = NULL; do { struct sk_buff *this = skb; - skb = skb->prev; + skb = IXGBE_CB(skb)->prev; dev_kfree_skb(this); } while (skb); } @@ -1547,8 +1588,6 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter) { - struct ixgbe_hw *hw = &adapter->hw; - WARN_ON(in_interrupt()); while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state)) @@ -1561,10 +1600,8 @@ void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter) * watchdog task will continue to schedule reset tasks until * the PF is up and running. */ - if (!hw->mac.ops.reset_hw(hw)) { - ixgbevf_down(adapter); - ixgbevf_up(adapter); - } + ixgbevf_down(adapter); + ixgbevf_up(adapter); clear_bit(__IXGBEVF_RESETTING, &adapter->state); } @@ -1867,6 +1904,22 @@ err_set_interrupt: } /** + * ixgbevf_clear_interrupt_scheme - Clear the current interrupt scheme settings + * @adapter: board private structure to clear interrupt scheme on + * + * We go through and clear interrupt specific resources and reset the structure + * to pre-load conditions + **/ +static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) +{ + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + + ixgbevf_free_q_vectors(adapter); + ixgbevf_reset_interrupt_capability(adapter); +} + +/** * ixgbevf_sw_init - Initialize general software structures * (struct ixgbevf_adapter) * @adapter: board private structure to initialize @@ -2351,6 +2404,8 @@ static int ixgbevf_open(struct net_device *netdev) } } + ixgbevf_negotiate_api(adapter); + /* allocate transmit descriptors */ err = ixgbevf_setup_all_tx_resources(adapter); if (err) @@ -2860,10 +2915,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE; - u32 msg[2]; if (adapter->hw.mac.type == ixgbe_mac_X540_vf) max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE; @@ -2877,35 +2930,91 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; - if (!netif_running(netdev)) { - msg[0] = IXGBE_VF_SET_LPE; - msg[1] = max_frame; - hw->mbx.ops.write_posted(hw, msg, 2); - } - if (netif_running(netdev)) ixgbevf_reinit_locked(adapter); return 0; } -static void ixgbevf_shutdown(struct pci_dev *pdev) +static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct ixgbevf_adapter *adapter = netdev_priv(netdev); +#ifdef CONFIG_PM + int retval = 0; +#endif netif_device_detach(netdev); if (netif_running(netdev)) { + rtnl_lock(); ixgbevf_down(adapter); ixgbevf_free_irq(adapter); ixgbevf_free_all_tx_resources(adapter); ixgbevf_free_all_rx_resources(adapter); + rtnl_unlock(); } - pci_save_state(pdev); + ixgbevf_clear_interrupt_scheme(adapter); +#ifdef CONFIG_PM + retval = pci_save_state(pdev); + if (retval) + return retval; + +#endif pci_disable_device(pdev); + + return 0; +} + +#ifdef CONFIG_PM +static int ixgbevf_resume(struct pci_dev *pdev) +{ + struct ixgbevf_adapter *adapter = pci_get_drvdata(pdev); + struct net_device *netdev = adapter->netdev; + u32 err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + /* + * pci_restore_state clears dev->state_saved so call + * pci_save_state to restore it. + */ + pci_save_state(pdev); + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); + return err; + } + pci_set_master(pdev); + + rtnl_lock(); + err = ixgbevf_init_interrupt_scheme(adapter); + rtnl_unlock(); + if (err) { + dev_err(&pdev->dev, "Cannot initialize interrupts\n"); + return err; + } + + ixgbevf_reset(adapter); + + if (netif_running(netdev)) { + err = ixgbevf_open(netdev); + if (err) + return err; + } + + netif_device_attach(netdev); + + return err; +} + +#endif /* CONFIG_PM */ +static void ixgbevf_shutdown(struct pci_dev *pdev) +{ + ixgbevf_suspend(pdev, PMSG_SUSPEND); } static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, @@ -2946,7 +3055,7 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, return stats; } -static const struct net_device_ops ixgbe_netdev_ops = { +static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_open = ixgbevf_open, .ndo_stop = ixgbevf_close, .ndo_start_xmit = ixgbevf_xmit_frame, @@ -2962,7 +3071,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { static void ixgbevf_assign_netdev_ops(struct net_device *dev) { - dev->netdev_ops = &ixgbe_netdev_ops; + dev->netdev_ops = &ixgbevf_netdev_ops; ixgbevf_set_ethtool_ops(dev); dev->watchdog_timeo = 5 * HZ; } @@ -3131,6 +3240,7 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, return 0; err_register: + ixgbevf_clear_interrupt_scheme(adapter); err_sw_init: ixgbevf_reset_interrupt_capability(adapter); iounmap(hw->hw_addr); @@ -3168,6 +3278,7 @@ static void __devexit ixgbevf_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); + ixgbevf_clear_interrupt_scheme(adapter); ixgbevf_reset_interrupt_capability(adapter); iounmap(adapter->hw.hw_addr); @@ -3267,6 +3378,11 @@ static struct pci_driver ixgbevf_driver = { .id_table = ixgbevf_pci_tbl, .probe = ixgbevf_probe, .remove = __devexit_p(ixgbevf_remove), +#ifdef CONFIG_PM + /* Power Management Hooks */ + .suspend = ixgbevf_suspend, + .resume = ixgbevf_resume, +#endif .shutdown = ixgbevf_shutdown, .err_handler = &ixgbevf_err_handler }; diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c index 9c955900fe6..d5028ddf4b3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.c +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c @@ -86,14 +86,17 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw) static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val = IXGBE_ERR_MBX; + s32 ret_val = -IXGBE_ERR_MBX; + + if (!mbx->ops.read) + goto out; ret_val = ixgbevf_poll_for_msg(hw); /* if ack received read message, otherwise we timed out */ if (!ret_val) ret_val = mbx->ops.read(hw, msg, size); - +out: return ret_val; } @@ -109,7 +112,11 @@ static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val; + s32 ret_val = -IXGBE_ERR_MBX; + + /* exit if either we can't write or there isn't a defined timeout */ + if (!mbx->ops.write || !mbx->timeout) + goto out; /* send msg */ ret_val = mbx->ops.write(hw, msg, size); @@ -117,7 +124,7 @@ static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) /* if msg sent wait until we receive an ack */ if (!ret_val) ret_val = ixgbevf_poll_for_ack(hw); - +out: return ret_val; } diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index cf9131c5c11..946ce86f337 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -76,12 +76,29 @@ /* bits 23:16 are used for exra info for certain messages */ #define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT) +/* definitions to support mailbox API version negotiation */ + +/* + * each element denotes a version of the API; existing numbers may not + * change; any additions must go at the end + */ +enum ixgbe_pfvf_api_rev { + ixgbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */ + ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */ + /* This value should always be last */ + ixgbe_mbox_api_unknown, /* indicates that API version is not known */ +}; + +/* mailbox API, legacy requests */ #define IXGBE_VF_RESET 0x01 /* VF requests reset */ #define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */ #define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */ #define IXGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */ -#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ -#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ + +/* mailbox API, version 1.0 VF requests */ +#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ +#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ +#define IXGBE_VF_API_NEGOTIATE 0x08 /* negotiate API version */ /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index ec89b86f7ca..0c7447e6fcc 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -79,6 +79,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) /* Call adapter stop to disable tx/rx and clear interrupts */ hw->mac.ops.stop_adapter(hw); + /* reset the api version */ + hw->api_version = ixgbe_mbox_api_10; + IXGBE_WRITE_REG(hw, IXGBE_VFCTRL, IXGBE_CTRL_RST); IXGBE_WRITE_FLUSH(hw); @@ -97,7 +100,7 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) msgbuf[0] = IXGBE_VF_RESET; mbx->ops.write_posted(hw, msgbuf, 1); - msleep(10); + mdelay(10); /* set our "perm_addr" based on info provided by PF */ /* also set up the mc_filter_type which is piggy backed @@ -346,16 +349,32 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw, static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on) { + struct ixgbe_mbx_info *mbx = &hw->mbx; u32 msgbuf[2]; + s32 err; msgbuf[0] = IXGBE_VF_SET_VLAN; msgbuf[1] = vlan; /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */ msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT; - ixgbevf_write_msg_read_ack(hw, msgbuf, 2); + err = mbx->ops.write_posted(hw, msgbuf, 2); + if (err) + goto mbx_err; - return 0; + err = mbx->ops.read_posted(hw, msgbuf, 2); + if (err) + goto mbx_err; + + /* remove extra bits from the message */ + msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; + msgbuf[0] &= ~(0xFF << IXGBE_VT_MSGINFO_SHIFT); + + if (msgbuf[0] != (IXGBE_VF_SET_VLAN | IXGBE_VT_MSGTYPE_ACK)) + err = IXGBE_ERR_INVALID_ARGUMENT; + +mbx_err: + return err; } /** @@ -389,20 +408,23 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, bool *link_up, bool autoneg_wait_to_complete) { + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val = 0; u32 links_reg; + u32 in_msg = 0; - if (!(hw->mbx.ops.check_for_rst(hw))) { - *link_up = false; - *speed = 0; - return -1; - } + /* If we were hit with a reset drop the link */ + if (!mbx->ops.check_for_rst(hw) || !mbx->timeout) + mac->get_link_status = true; - links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + if (!mac->get_link_status) + goto out; - if (links_reg & IXGBE_LINKS_UP) - *link_up = true; - else - *link_up = false; + /* if link status is down no point in checking to see if pf is up */ + links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + if (!(links_reg & IXGBE_LINKS_UP)) + goto out; switch (links_reg & IXGBE_LINKS_SPEED_82599) { case IXGBE_LINKS_SPEED_10G_82599: @@ -416,7 +438,79 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, break; } - return 0; + /* if the read failed it could just be a mailbox collision, best wait + * until we are called again and don't report an error */ + if (mbx->ops.read(hw, &in_msg, 1)) + goto out; + + if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) { + /* msg is not CTS and is NACK we must have lost CTS status */ + if (in_msg & IXGBE_VT_MSGTYPE_NACK) + ret_val = -1; + goto out; + } + + /* the pf is talking, if we timed out in the past we reinit */ + if (!mbx->timeout) { + ret_val = -1; + goto out; + } + + /* if we passed all the tests above then the link is up and we no + * longer need to check for link */ + mac->get_link_status = false; + +out: + *link_up = !mac->get_link_status; + return ret_val; +} + +/** + * ixgbevf_rlpml_set_vf - Set the maximum receive packet length + * @hw: pointer to the HW structure + * @max_size: value to assign to max frame size + **/ +void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size) +{ + u32 msgbuf[2]; + + msgbuf[0] = IXGBE_VF_SET_LPE; + msgbuf[1] = max_size; + ixgbevf_write_msg_read_ack(hw, msgbuf, 2); +} + +/** + * ixgbevf_negotiate_api_version - Negotiate supported API version + * @hw: pointer to the HW structure + * @api: integer containing requested API version + **/ +int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api) +{ + int err; + u32 msg[3]; + + /* Negotiate the mailbox API version */ + msg[0] = IXGBE_VF_API_NEGOTIATE; + msg[1] = api; + msg[2] = 0; + err = hw->mbx.ops.write_posted(hw, msg, 3); + + if (!err) + err = hw->mbx.ops.read_posted(hw, msg, 3); + + if (!err) { + msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; + + /* Store value and return 0 on success */ + if (msg[0] == (IXGBE_VF_API_NEGOTIATE | IXGBE_VT_MSGTYPE_ACK)) { + hw->api_version = api; + return 0; + } + + err = IXGBE_ERR_INVALID_ARGUMENT; + } + + return err; } static const struct ixgbe_mac_operations ixgbevf_mac_ops = { diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 25c951daee5..47f11a584d8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -137,6 +137,8 @@ struct ixgbe_hw { u8 revision_id; bool adapter_stopped; + + int api_version; }; struct ixgbevf_hw_stats { @@ -170,5 +172,7 @@ struct ixgbevf_info { const struct ixgbe_mac_operations *mac_ops; }; +void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); +int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api); #endif /* __IXGBE_VF_H__ */ |