diff options
Diffstat (limited to 'drivers/net/e1000/e1000_main.c')
-rw-r--r-- | drivers/net/e1000/e1000_main.c | 1612 |
1 files changed, 1010 insertions, 602 deletions
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 438a931fd55..31e332935e5 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -29,11 +29,71 @@ #include "e1000.h" /* Change Log - * 6.0.58 4/20/05 - * o Accepted ethtool cleanup patch from Stephen Hemminger - * 6.0.44+ 2/15/05 - * o applied Anton's patch to resolve tx hang in hardware - * o Applied Andrew Mortons patch - e1000 stops working after resume + * 6.3.9 12/16/2005 + * o incorporate fix for recycled skbs from IBM LTC + * 6.3.7 11/18/2005 + * o Honor eeprom setting for enabling/disabling Wake On Lan + * 6.3.5 11/17/2005 + * o Fix memory leak in rx ring handling for PCI Express adapters + * 6.3.4 11/8/05 + * o Patch from Jesper Juhl to remove redundant NULL checks for kfree + * 6.3.2 9/20/05 + * o Render logic that sets/resets DRV_LOAD as inline functions to + * avoid code replication. If f/w is AMT then set DRV_LOAD only when + * network interface is open. + * o Handle DRV_LOAD set/reset in cases where AMT uses VLANs. + * o Adjust PBA partioning for Jumbo frames using MTU size and not + * rx_buffer_len + * 6.3.1 9/19/05 + * o Use adapter->tx_timeout_factor in Tx Hung Detect logic + (e1000_clean_tx_irq) + * o Support for 8086:10B5 device (Quad Port) + * 6.2.14 9/15/05 + * o In AMT enabled configurations, set/reset DRV_LOAD bit on interface + * open/close + * 6.2.13 9/14/05 + * o Invoke e1000_check_mng_mode only for 8257x controllers since it + * accesses the FWSM that is not supported in other controllers + * 6.2.12 9/9/05 + * o Add support for device id E1000_DEV_ID_82546GB_QUAD_COPPER + * o set RCTL:SECRC only for controllers newer than 82543. + * o When the n/w interface comes down reset DRV_LOAD bit to notify f/w. + * This code was moved from e1000_remove to e1000_close + * 6.2.10 9/6/05 + * o Fix error in updating RDT in el1000_alloc_rx_buffers[_ps] -- one off. + * o Enable fc by default on 82573 controllers (do not read eeprom) + * o Fix rx_errors statistic not to include missed_packet_count + * o Fix rx_dropped statistic not to include missed_packet_count + (Padraig Brady) + * 6.2.9 8/30/05 + * o Remove call to update statistics from the controller ib e1000_get_stats + * 6.2.8 8/30/05 + * o Improved algorithm for rx buffer allocation/rdt update + * o Flow control watermarks relative to rx PBA size + * o Simplified 'Tx Hung' detect logic + * 6.2.7 8/17/05 + * o Report rx buffer allocation failures and tx timeout counts in stats + * 6.2.6 8/16/05 + * o Implement workaround for controller erratum -- linear non-tso packet + * following a TSO gets written back prematurely + * 6.2.5 8/15/05 + * o Set netdev->tx_queue_len based on link speed/duplex settings. + * o Fix net_stats.rx_fifo_errors <p@draigBrady.com> + * o Do not power off PHY if SoL/IDER session is active + * 6.2.4 8/10/05 + * o Fix loopback test setup/cleanup for 82571/3 controllers + * o Fix parsing of outgoing packets (e1000_transfer_dhcp_info) to treat + * all packets as raw + * o Prevent operations that will cause the PHY to be reset if SoL/IDER + * sessions are active and log a message + * 6.2.2 7/21/05 + * o used fixed size descriptors for all MTU sizes, reduces memory load + * 6.1.2 4/13/05 + * o Fixed ethtool diagnostics + * o Enabled flow control to take default eeprom settings + * o Added stats_lock around e1000_read_phy_reg commands to avoid concurrent + * calls, one from mii_ioctl and other from within update_stats while + * processing MIIREG ioctl. */ char e1000_driver_name[] = "e1000"; @@ -43,7 +103,7 @@ static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; #else #define DRIVERNAPI "-NAPI" #endif -#define DRV_VERSION "6.1.16-k2"DRIVERNAPI +#define DRV_VERSION "6.3.9-k2"DRIVERNAPI char e1000_driver_version[] = DRV_VERSION; static char e1000_copyright[] = "Copyright (c) 1999-2005 Intel Corporation."; @@ -97,7 +157,9 @@ static struct pci_device_id e1000_pci_tbl[] = { INTEL_E1000_ETHERNET_DEVICE(0x108A), INTEL_E1000_ETHERNET_DEVICE(0x108B), INTEL_E1000_ETHERNET_DEVICE(0x108C), + INTEL_E1000_ETHERNET_DEVICE(0x1099), INTEL_E1000_ETHERNET_DEVICE(0x109A), + INTEL_E1000_ETHERNET_DEVICE(0x10B5), /* required last entry */ {0,} }; @@ -171,9 +233,11 @@ static boolean_t e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring); #endif static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); + struct e1000_rx_ring *rx_ring, + int cleaned_count); static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); + struct e1000_rx_ring *rx_ring, + int cleaned_count); static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); @@ -291,7 +355,7 @@ e1000_irq_disable(struct e1000_adapter *adapter) static inline void e1000_irq_enable(struct e1000_adapter *adapter) { - if(likely(atomic_dec_and_test(&adapter->irq_sem))) { + if (likely(atomic_dec_and_test(&adapter->irq_sem))) { E1000_WRITE_REG(&adapter->hw, IMS, IMS_ENABLE_MASK); E1000_WRITE_FLUSH(&adapter->hw); } @@ -303,23 +367,91 @@ e1000_update_mng_vlan(struct e1000_adapter *adapter) struct net_device *netdev = adapter->netdev; uint16_t vid = adapter->hw.mng_cookie.vlan_id; uint16_t old_vid = adapter->mng_vlan_id; - if(adapter->vlgrp) { - if(!adapter->vlgrp->vlan_devices[vid]) { - if(adapter->hw.mng_cookie.status & + if (adapter->vlgrp) { + if (!adapter->vlgrp->vlan_devices[vid]) { + if (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) { e1000_vlan_rx_add_vid(netdev, vid); adapter->mng_vlan_id = vid; } else adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; - - if((old_vid != (uint16_t)E1000_MNG_VLAN_NONE) && - (vid != old_vid) && + + if ((old_vid != (uint16_t)E1000_MNG_VLAN_NONE) && + (vid != old_vid) && !adapter->vlgrp->vlan_devices[old_vid]) e1000_vlan_rx_kill_vid(netdev, old_vid); } } } - + +/** + * e1000_release_hw_control - release control of the h/w to f/w + * @adapter: address of board private structure + * + * e1000_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. For AMT version (only with 82573) i + * of the f/w this means that the netowrk i/f is closed. + * + **/ + +static inline void +e1000_release_hw_control(struct e1000_adapter *adapter) +{ + uint32_t ctrl_ext; + uint32_t swsm; + + /* Let firmware taken over control of h/w */ + switch (adapter->hw.mac_type) { + case e1000_82571: + case e1000_82572: + ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, CTRL_EXT, + ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); + break; + case e1000_82573: + swsm = E1000_READ_REG(&adapter->hw, SWSM); + E1000_WRITE_REG(&adapter->hw, SWSM, + swsm & ~E1000_SWSM_DRV_LOAD); + default: + break; + } +} + +/** + * e1000_get_hw_control - get control of the h/w from f/w + * @adapter: address of board private structure + * + * e1000_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. For AMT version (only with 82573) + * of the f/w this means that the netowrk i/f is open. + * + **/ + +static inline void +e1000_get_hw_control(struct e1000_adapter *adapter) +{ + uint32_t ctrl_ext; + uint32_t swsm; + /* Let firmware know the driver has taken over */ + switch (adapter->hw.mac_type) { + case e1000_82571: + case e1000_82572: + ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, CTRL_EXT, + ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); + break; + case e1000_82573: + swsm = E1000_READ_REG(&adapter->hw, SWSM); + E1000_WRITE_REG(&adapter->hw, SWSM, + swsm | E1000_SWSM_DRV_LOAD); + break; + default: + break; + } +} + int e1000_up(struct e1000_adapter *adapter) { @@ -329,10 +461,10 @@ e1000_up(struct e1000_adapter *adapter) /* hardware has been reset, we need to reload some things */ /* Reset the PHY if it was previously powered down */ - if(adapter->hw.media_type == e1000_media_type_copper) { + if (adapter->hw.media_type == e1000_media_type_copper) { uint16_t mii_reg; e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg); - if(mii_reg & MII_CR_POWER_DOWN) + if (mii_reg & MII_CR_POWER_DOWN) e1000_phy_reset(&adapter->hw); } @@ -343,20 +475,26 @@ e1000_up(struct e1000_adapter *adapter) e1000_configure_tx(adapter); e1000_setup_rctl(adapter); e1000_configure_rx(adapter); - for (i = 0; i < adapter->num_queues; i++) - adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i]); + /* call E1000_DESC_UNUSED which always leaves + * at least 1 descriptor unused to make sure + * next_to_use != next_to_clean */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct e1000_rx_ring *ring = &adapter->rx_ring[i]; + adapter->alloc_rx_buf(adapter, ring, + E1000_DESC_UNUSED(ring)); + } #ifdef CONFIG_PCI_MSI - if(adapter->hw.mac_type > e1000_82547_rev_2) { + if (adapter->hw.mac_type > e1000_82547_rev_2) { adapter->have_msi = TRUE; - if((err = pci_enable_msi(adapter->pdev))) { + if ((err = pci_enable_msi(adapter->pdev))) { DPRINTK(PROBE, ERR, "Unable to allocate MSI interrupt Error: %d\n", err); adapter->have_msi = FALSE; } } #endif - if((err = request_irq(adapter->pdev->irq, &e1000_intr, + if ((err = request_irq(adapter->pdev->irq, &e1000_intr, SA_SHIRQ | SA_SAMPLE_RANDOM, netdev->name, netdev))) { DPRINTK(PROBE, ERR, @@ -364,6 +502,12 @@ e1000_up(struct e1000_adapter *adapter) return err; } +#ifdef CONFIG_E1000_MQ + e1000_setup_queue_mapping(adapter); +#endif + + adapter->tx_queue_len = netdev->tx_queue_len; + mod_timer(&adapter->watchdog_timer, jiffies); #ifdef CONFIG_E1000_NAPI @@ -378,6 +522,8 @@ void e1000_down(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; + boolean_t mng_mode_enabled = (adapter->hw.mac_type >= e1000_82571) && + e1000_check_mng_mode(&adapter->hw); e1000_irq_disable(adapter); #ifdef CONFIG_E1000_MQ @@ -385,7 +531,7 @@ e1000_down(struct e1000_adapter *adapter) #endif free_irq(adapter->pdev->irq, netdev); #ifdef CONFIG_PCI_MSI - if(adapter->hw.mac_type > e1000_82547_rev_2 && + if (adapter->hw.mac_type > e1000_82547_rev_2 && adapter->have_msi == TRUE) pci_disable_msi(adapter->pdev); #endif @@ -396,6 +542,7 @@ e1000_down(struct e1000_adapter *adapter) #ifdef CONFIG_E1000_NAPI netif_poll_disable(netdev); #endif + netdev->tx_queue_len = adapter->tx_queue_len; adapter->link_speed = 0; adapter->link_duplex = 0; netif_carrier_off(netdev); @@ -405,12 +552,16 @@ e1000_down(struct e1000_adapter *adapter) e1000_clean_all_tx_rings(adapter); e1000_clean_all_rx_rings(adapter); - /* If WoL is not enabled and management mode is not IAMT - * Power down the PHY so no link is implied when interface is down */ - if(!adapter->wol && adapter->hw.mac_type >= e1000_82540 && + /* Power down the PHY so no link is implied when interface is down * + * The PHY cannot be powered down if any of the following is TRUE * + * (a) WoL is enabled + * (b) AMT is active + * (c) SoL/IDER session is active */ + if (!adapter->wol && adapter->hw.mac_type >= e1000_82540 && adapter->hw.media_type == e1000_media_type_copper && - !e1000_check_mng_mode(&adapter->hw) && - !(E1000_READ_REG(&adapter->hw, MANC) & E1000_MANC_SMBUS_EN)) { + !(E1000_READ_REG(&adapter->hw, MANC) & E1000_MANC_SMBUS_EN) && + !mng_mode_enabled && + !e1000_check_phy_reset_block(&adapter->hw)) { uint16_t mii_reg; e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg); mii_reg |= MII_CR_POWER_DOWN; @@ -422,10 +573,8 @@ e1000_down(struct e1000_adapter *adapter) void e1000_reset(struct e1000_adapter *adapter) { - struct net_device *netdev = adapter->netdev; uint32_t pba, manc; uint16_t fc_high_water_mark = E1000_FC_HIGH_DIFF; - uint16_t fc_low_water_mark = E1000_FC_LOW_DIFF; /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. @@ -448,19 +597,12 @@ e1000_reset(struct e1000_adapter *adapter) break; } - if((adapter->hw.mac_type != e1000_82573) && - (adapter->rx_buffer_len > E1000_RXBUFFER_8192)) { + if ((adapter->hw.mac_type != e1000_82573) && + (adapter->netdev->mtu > E1000_RXBUFFER_8192)) pba -= 8; /* allocate more FIFO for Tx */ - /* send an XOFF when there is enough space in the - * Rx FIFO to hold one extra full size Rx packet - */ - fc_high_water_mark = netdev->mtu + ENET_HEADER_SIZE + - ETHERNET_FCS_SIZE + 1; - fc_low_water_mark = fc_high_water_mark + 8; - } - if(adapter->hw.mac_type == e1000_82547) { + if (adapter->hw.mac_type == e1000_82547) { adapter->tx_fifo_head = 0; adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT; adapter->tx_fifo_size = @@ -471,19 +613,21 @@ e1000_reset(struct e1000_adapter *adapter) E1000_WRITE_REG(&adapter->hw, PBA, pba); /* flow control settings */ - adapter->hw.fc_high_water = (pba << E1000_PBA_BYTES_SHIFT) - - fc_high_water_mark; - adapter->hw.fc_low_water = (pba << E1000_PBA_BYTES_SHIFT) - - fc_low_water_mark; + /* Set the FC high water mark to 90% of the FIFO size. + * Required to clear last 3 LSB */ + fc_high_water_mark = ((pba * 9216)/10) & 0xFFF8; + + adapter->hw.fc_high_water = fc_high_water_mark; + adapter->hw.fc_low_water = fc_high_water_mark - 8; adapter->hw.fc_pause_time = E1000_FC_PAUSE_TIME; adapter->hw.fc_send_xon = 1; adapter->hw.fc = adapter->hw.original_fc; /* Allow time for pending master requests to run */ e1000_reset_hw(&adapter->hw); - if(adapter->hw.mac_type >= e1000_82544) + if (adapter->hw.mac_type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, WUC, 0); - if(e1000_init_hw(&adapter->hw)) + if (e1000_init_hw(&adapter->hw)) DPRINTK(PROBE, ERR, "Hardware Error\n"); e1000_update_mng_vlan(adapter); /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ @@ -517,33 +661,31 @@ e1000_probe(struct pci_dev *pdev, struct net_device *netdev; struct e1000_adapter *adapter; unsigned long mmio_start, mmio_len; - uint32_t ctrl_ext; - uint32_t swsm; static int cards_found = 0; int i, err, pci_using_dac; uint16_t eeprom_data; uint16_t eeprom_apme_mask = E1000_EEPROM_APME; - if((err = pci_enable_device(pdev))) + if ((err = pci_enable_device(pdev))) return err; - if(!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { + if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { pci_using_dac = 1; } else { - if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) { + if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) { E1000_ERR("No usable DMA configuration, aborting\n"); return err; } pci_using_dac = 0; } - if((err = pci_request_regions(pdev, e1000_driver_name))) + if ((err = pci_request_regions(pdev, e1000_driver_name))) return err; pci_set_master(pdev); netdev = alloc_etherdev(sizeof(struct e1000_adapter)); - if(!netdev) { + if (!netdev) { err = -ENOMEM; goto err_alloc_etherdev; } @@ -562,15 +704,15 @@ e1000_probe(struct pci_dev *pdev, mmio_len = pci_resource_len(pdev, BAR_0); adapter->hw.hw_addr = ioremap(mmio_start, mmio_len); - if(!adapter->hw.hw_addr) { + if (!adapter->hw.hw_addr) { err = -EIO; goto err_ioremap; } - for(i = BAR_1; i <= BAR_5; i++) { - if(pci_resource_len(pdev, i) == 0) + for (i = BAR_1; i <= BAR_5; i++) { + if (pci_resource_len(pdev, i) == 0) continue; - if(pci_resource_flags(pdev, i) & IORESOURCE_IO) { + if (pci_resource_flags(pdev, i) & IORESOURCE_IO) { adapter->hw.io_base = pci_resource_start(pdev, i); break; } @@ -607,13 +749,13 @@ e1000_probe(struct pci_dev *pdev, /* setup the private structure */ - if((err = e1000_sw_init(adapter))) + if ((err = e1000_sw_init(adapter))) goto err_sw_init; - if((err = e1000_check_phy_reset_block(&adapter->hw))) + if ((err = e1000_check_phy_reset_block(&adapter->hw))) DPRINTK(PROBE, INFO, "PHY reset is blocked due to SOL/IDER session.\n"); - if(adapter->hw.mac_type >= e1000_82543) { + if (adapter->hw.mac_type >= e1000_82543) { netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX | @@ -622,16 +764,16 @@ e1000_probe(struct pci_dev *pdev, } #ifdef NETIF_F_TSO - if((adapter->hw.mac_type >= e1000_82544) && + if ((adapter->hw.mac_type >= e1000_82544) && (adapter->hw.mac_type != e1000_82547)) netdev->features |= NETIF_F_TSO; #ifdef NETIF_F_TSO_IPV6 - if(adapter->hw.mac_type > e1000_82547_rev_2) + if (adapter->hw.mac_type > e1000_82547_rev_2) netdev->features |= NETIF_F_TSO_IPV6; #endif #endif - if(pci_using_dac) + if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; /* hard_start_xmit is safe against parallel locking */ @@ -639,14 +781,14 @@ e1000_probe(struct pci_dev *pdev, adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); - /* before reading the EEPROM, reset the controller to + /* before reading the EEPROM, reset the controller to * put the device in a known good starting state */ - + e1000_reset_hw(&adapter->hw); /* make sure the EEPROM is good */ - if(e1000_validate_eeprom_checksum(&adapter->hw) < 0) { + if (e1000_validate_eeprom_checksum(&adapter->hw) < 0) { DPRINTK(PROBE, ERR, "The EEPROM Checksum Is Not Valid\n"); err = -EIO; goto err_eeprom; @@ -654,12 +796,12 @@ e1000_probe(struct pci_dev *pdev, /* copy the MAC address out of the EEPROM */ - if(e1000_read_mac_addr(&adapter->hw)) + if (e1000_read_mac_addr(&adapter->hw)) DPRINTK(PROBE, ERR, "EEPROM Read Error\n"); memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len); memcpy(netdev->perm_addr, adapter->hw.mac_addr, netdev->addr_len); - if(!is_valid_ether_addr(netdev->perm_addr)) { + if (!is_valid_ether_addr(netdev->perm_addr)) { DPRINTK(PROBE, ERR, "Invalid MAC Address\n"); err = -EIO; goto err_eeprom; @@ -699,7 +841,7 @@ e1000_probe(struct pci_dev *pdev, * enable the ACPI Magic Packet filter */ - switch(adapter->hw.mac_type) { + switch (adapter->hw.mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: case e1000_82543: @@ -712,8 +854,7 @@ e1000_probe(struct pci_dev *pdev, case e1000_82546: case e1000_82546_rev_3: case e1000_82571: - if((E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_FUNC_1) - && (adapter->hw.media_type == e1000_media_type_copper)) { + if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_FUNC_1){ e1000_read_eeprom(&adapter->hw, EEPROM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; @@ -724,31 +865,42 @@ e1000_probe(struct pci_dev *pdev, EEPROM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } - if(eeprom_data & eeprom_apme_mask) + if (eeprom_data & eeprom_apme_mask) adapter->wol |= E1000_WUFC_MAG; + /* print bus type/speed/width info */ + { + struct e1000_hw *hw = &adapter->hw; + DPRINTK(PROBE, INFO, "(PCI%s:%s:%s) ", + ((hw->bus_type == e1000_bus_type_pcix) ? "-X" : + (hw->bus_type == e1000_bus_type_pci_express ? " Express":"")), + ((hw->bus_speed == e1000_bus_speed_2500) ? "2.5Gb/s" : + (hw->bus_speed == e1000_bus_speed_133) ? "133MHz" : + (hw->bus_speed == e1000_bus_speed_120) ? "120MHz" : + (hw->bus_speed == e1000_bus_speed_100) ? "100MHz" : + (hw->bus_speed == e1000_bus_speed_66) ? "66MHz" : "33MHz"), + ((hw->bus_width == e1000_bus_width_64) ? "64-bit" : + (hw->bus_width == e1000_bus_width_pciex_4) ? "Width x4" : + (hw->bus_width == e1000_bus_width_pciex_1) ? "Width x1" : + "32-bit")); + } + + for (i = 0; i < 6; i++) + printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); + /* reset the hardware with the new settings */ e1000_reset(adapter); - /* Let firmware know the driver has taken over */ - switch(adapter->hw.mac_type) { - case e1000_82571: - case e1000_82572: - ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, CTRL_EXT, - ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); - break; - case e1000_82573: - swsm = E1000_READ_REG(&adapter->hw, SWSM); - E1000_WRITE_REG(&adapter->hw, SWSM, - swsm | E1000_SWSM_DRV_LOAD); - break; - default: - break; - } + /* If the controller is 82573 and f/w is AMT, do not set + * DRV_LOAD until the interface is up. For all other cases, + * let the f/w know that the h/w is now under the control + * of the driver. */ + if (adapter->hw.mac_type != e1000_82573 || + !e1000_check_mng_mode(&adapter->hw)) + e1000_get_hw_control(adapter); strcpy(netdev->name, "eth%d"); - if((err = register_netdev(netdev))) + if ((err = register_netdev(netdev))) goto err_register; DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n"); @@ -782,47 +934,33 @@ e1000_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); - uint32_t ctrl_ext; - uint32_t manc, swsm; + uint32_t manc; #ifdef CONFIG_E1000_NAPI int i; #endif flush_scheduled_work(); - if(adapter->hw.mac_type >= e1000_82540 && + if (adapter->hw.mac_type >= e1000_82540 && adapter->hw.media_type == e1000_media_type_copper) { manc = E1000_READ_REG(&adapter->hw, MANC); - if(manc & E1000_MANC_SMBUS_EN) { + if (manc & E1000_MANC_SMBUS_EN) { manc |= E1000_MANC_ARP_EN; E1000_WRITE_REG(&adapter->hw, MANC, manc); } } - switch(adapter->hw.mac_type) { - case e1000_82571: - case e1000_82572: - ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, CTRL_EXT, - ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); - break; - case e1000_82573: - swsm = E1000_READ_REG(&adapter->hw, SWSM); - E1000_WRITE_REG(&adapter->hw, SWSM, - swsm & ~E1000_SWSM_DRV_LOAD); - break; - - default: - break; - } + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. */ + e1000_release_hw_control(adapter); unregister_netdev(netdev); #ifdef CONFIG_E1000_NAPI - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_rx_queues; i++) __dev_put(&adapter->polling_netdev[i]); #endif - if(!e1000_check_phy_reset_block(&adapter->hw)) + if (!e1000_check_phy_reset_block(&adapter->hw)) e1000_phy_hw_reset(&adapter->hw); kfree(adapter->tx_ring); @@ -881,19 +1019,19 @@ e1000_sw_init(struct e1000_adapter *adapter) /* identify the MAC */ - if(e1000_set_mac_type(hw)) { + if (e1000_set_mac_type(hw)) { DPRINTK(PROBE, ERR, "Unknown MAC Type\n"); return -EIO; } /* initialize eeprom parameters */ - if(e1000_init_eeprom_params(hw)) { + if (e1000_init_eeprom_params(hw)) { E1000_ERR("EEPROM initialization failed\n"); return -EIO; } - switch(hw->mac_type) { + switch (hw->mac_type) { default: break; case e1000_82541: @@ -912,7 +1050,7 @@ e1000_sw_init(struct e1000_adapter *adapter) /* Copper options */ - if(hw->media_type == e1000_media_type_copper) { + if (hw->media_type == e1000_media_type_copper) { hw->mdix = AUTO_ALL_MODES; hw->disable_polarity_correction = FALSE; hw->master_slave = E1000_MASTER_SLAVE; @@ -923,15 +1061,34 @@ e1000_sw_init(struct e1000_adapter *adapter) switch (hw->mac_type) { case e1000_82571: case e1000_82572: - adapter->num_queues = 2; + /* These controllers support 2 tx queues, but with a single + * qdisc implementation, multiple tx queues aren't quite as + * interesting. If we can find a logical way of mapping + * flows to a queue, then perhaps we can up the num_tx_queue + * count back to its default. Until then, we run the risk of + * terrible performance due to SACK overload. */ + adapter->num_tx_queues = 1; + adapter->num_rx_queues = 2; break; default: - adapter->num_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_rx_queues = 1; break; } - adapter->num_queues = min(adapter->num_queues, num_online_cpus()); + adapter->num_rx_queues = min(adapter->num_rx_queues, num_online_cpus()); + adapter->num_tx_queues = min(adapter->num_tx_queues, num_online_cpus()); + DPRINTK(DRV, INFO, "Multiqueue Enabled: Rx Queue count = %u %s\n", + adapter->num_rx_queues, + ((adapter->num_rx_queues == 1) + ? ((num_online_cpus() > 1) + ? "(due to unsupported feature in current adapter)" + : "(due to unsupported system configuration)") + : "")); + DPRINTK(DRV, INFO, "Multiqueue Enabled: Tx Queue count = %u\n", + adapter->num_tx_queues); #else - adapter->num_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_rx_queues = 1; #endif if (e1000_alloc_queues(adapter)) { @@ -940,17 +1097,14 @@ e1000_sw_init(struct e1000_adapter *adapter) } #ifdef CONFIG_E1000_NAPI - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_rx_queues; i++) { adapter->polling_netdev[i].priv = adapter; adapter->polling_netdev[i].poll = &e1000_clean; adapter->polling_netdev[i].weight = 64; dev_hold(&adapter->polling_netdev[i]); set_bit(__LINK_STATE_START, &adapter->polling_netdev[i].state); } -#endif - -#ifdef CONFIG_E1000_MQ - e1000_setup_queue_mapping(adapter); + spin_lock_init(&adapter->tx_queue_lock); #endif atomic_set(&adapter->irq_sem, 1); @@ -973,13 +1127,13 @@ e1000_alloc_queues(struct e1000_adapter *adapter) { int size; - size = sizeof(struct e1000_tx_ring) * adapter->num_queues; + size = sizeof(struct e1000_tx_ring) * adapter->num_tx_queues; adapter->tx_ring = kmalloc(size, GFP_KERNEL); if (!adapter->tx_ring) return -ENOMEM; memset(adapter->tx_ring, 0, size); - size = sizeof(struct e1000_rx_ring) * adapter->num_queues; + size = sizeof(struct e1000_rx_ring) * adapter->num_rx_queues; adapter->rx_ring = kmalloc(size, GFP_KERNEL); if (!adapter->rx_ring) { kfree(adapter->tx_ring); @@ -988,7 +1142,7 @@ e1000_alloc_queues(struct e1000_adapter *adapter) memset(adapter->rx_ring, 0, size); #ifdef CONFIG_E1000_NAPI - size = sizeof(struct net_device) * adapter->num_queues; + size = sizeof(struct net_device) * adapter->num_rx_queues; adapter->polling_netdev = kmalloc(size, GFP_KERNEL); if (!adapter->polling_netdev) { kfree(adapter->tx_ring); @@ -998,6 +1152,14 @@ e1000_alloc_queues(struct e1000_adapter *adapter) memset(adapter->polling_netdev, 0, size); #endif +#ifdef CONFIG_E1000_MQ + adapter->rx_sched_call_data.func = e1000_rx_schedule; + adapter->rx_sched_call_data.info = adapter->netdev; + + adapter->cpu_netdev = alloc_percpu(struct net_device *); + adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *); +#endif + return E1000_SUCCESS; } @@ -1017,14 +1179,15 @@ e1000_setup_queue_mapping(struct e1000_adapter *adapter) lock_cpu_hotplug(); i = 0; for_each_online_cpu(cpu) { - *per_cpu_ptr(adapter->cpu_tx_ring, cpu) = &adapter->tx_ring[i % adapter->num_queues]; + *per_cpu_ptr(adapter->cpu_tx_ring, cpu) = &adapter->tx_ring[i % adapter->num_tx_queues]; /* This is incomplete because we'd like to assign separate * physical cpus to these netdev polling structures and * avoid saturating a subset of cpus. */ - if (i < adapter->num_queues) { + if (i < adapter->num_rx_queues) { *per_cpu_ptr(adapter->cpu_netdev, cpu) = &adapter->polling_netdev[i]; - adapter->cpu_for_queue[i] = cpu; + adapter->rx_ring[i].cpu = cpu; + cpu_set(cpu, adapter->cpumask); } else *per_cpu_ptr(adapter->cpu_netdev, cpu) = NULL; @@ -1063,14 +1226,20 @@ e1000_open(struct net_device *netdev) if ((err = e1000_setup_all_rx_resources(adapter))) goto err_setup_rx; - if((err = e1000_up(adapter))) + if ((err = e1000_up(adapter))) goto err_up; adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; - if((adapter->hw.mng_cookie.status & + if ((adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) { e1000_update_mng_vlan(adapter); } + /* If AMT is enabled, let the firmware know that the network + * interface is now open */ + if (adapter->hw.mac_type == e1000_82573 && + e1000_check_mng_mode(&adapter->hw)) + e1000_get_hw_control(adapter); + return E1000_SUCCESS; err_up: @@ -1105,10 +1274,17 @@ e1000_close(struct net_device *netdev) e1000_free_all_tx_resources(adapter); e1000_free_all_rx_resources(adapter); - if((adapter->hw.mng_cookie.status & + if ((adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) { e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); } + + /* If AMT is enabled, let the firmware know that the network + * interface is now closed */ + if (adapter->hw.mac_type == e1000_82573 && + e1000_check_mng_mode(&adapter->hw)) + e1000_release_hw_control(adapter); + return 0; } @@ -1153,7 +1329,7 @@ e1000_setup_tx_resources(struct e1000_adapter *adapter, size = sizeof(struct e1000_buffer) * txdr->count; txdr->buffer_info = vmalloc_node(size, pcibus_to_node(pdev->bus)); - if(!txdr->buffer_info) { + if (!txdr->buffer_info) { DPRINTK(PROBE, ERR, "Unable to allocate memory for the transmit descriptor ring\n"); return -ENOMEM; @@ -1166,7 +1342,7 @@ e1000_setup_tx_resources(struct e1000_adapter *adapter, E1000_ROUNDUP(txdr->size, 4096); txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma); - if(!txdr->desc) { + if (!txdr->desc) { setup_tx_desc_die: vfree(txdr->buffer_info); DPRINTK(PROBE, ERR, @@ -1182,8 +1358,8 @@ setup_tx_desc_die: "at %p\n", txdr->size, txdr->desc); /* Try again, without freeing the previous */ txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma); - if(!txdr->desc) { /* Failed allocation, critical failure */ + if (!txdr->desc) { pci_free_consistent(pdev, txdr->size, olddesc, olddma); goto setup_tx_desc_die; } @@ -1229,7 +1405,7 @@ e1000_setup_all_tx_resources(struct e1000_adapter *adapter) { int i, err = 0; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_tx_queues; i++) { err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]); if (err) { DPRINTK(PROBE, ERR, @@ -1254,10 +1430,11 @@ e1000_configure_tx(struct e1000_adapter *adapter) uint64_t tdba; struct e1000_hw *hw = &adapter->hw; uint32_t tdlen, tctl, tipg, tarc; + uint32_t ipgr1, ipgr2; /* Setup the HW Tx Head and Tail descriptor pointers */ - switch (adapter->num_queues) { + switch (adapter->num_tx_queues) { case 2: tdba = adapter->tx_ring[1].dma; tdlen = adapter->tx_ring[1].count * @@ -1287,22 +1464,26 @@ e1000_configure_tx(struct e1000_adapter *adapter) /* Set the default values for the Tx Inter Packet Gap timer */ + if (hw->media_type == e1000_media_type_fiber || + hw->media_type == e1000_media_type_internal_serdes) + tipg = DEFAULT_82543_TIPG_IPGT_FIBER; + else + tipg = DEFAULT_82543_TIPG_IPGT_COPPER; + switch (hw->mac_type) { case e1000_82542_rev2_0: case e1000_82542_rev2_1: tipg = DEFAULT_82542_TIPG_IPGT; - tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; - tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; + ipgr1 = DEFAULT_82542_TIPG_IPGR1; + ipgr2 = DEFAULT_82542_TIPG_IPGR2; break; default: - if (hw->media_type == e1000_media_type_fiber || - hw->media_type == e1000_media_type_internal_serdes) - tipg = DEFAULT_82543_TIPG_IPGT_FIBER; - else - tipg = DEFAULT_82543_TIPG_IPGT_COPPER; - tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; - tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; + ipgr1 = DEFAULT_82543_TIPG_IPGR1; + ipgr2 = DEFAULT_82543_TIPG_IPGR2; + break; } + tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT; + tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT; E1000_WRITE_REG(hw, TIPG, tipg); /* Set the Tx Interrupt Delay register */ @@ -1378,7 +1559,7 @@ e1000_setup_rx_resources(struct e1000_adapter *adapter, size = sizeof(struct e1000_ps_page) * rxdr->count; rxdr->ps_page = kmalloc(size, GFP_KERNEL); - if(!rxdr->ps_page) { + if (!rxdr->ps_page) { vfree(rxdr->buffer_info); DPRINTK(PROBE, ERR, "Unable to allocate memory for the receive descriptor ring\n"); @@ -1388,7 +1569,7 @@ e1000_setup_rx_resources(struct e1000_adapter *adapter, size = sizeof(struct e1000_ps_page_dma) * rxdr->count; rxdr->ps_page_dma = kmalloc(size, GFP_KERNEL); - if(!rxdr->ps_page_dma) { + if (!rxdr->ps_page_dma) { vfree(rxdr->buffer_info); kfree(rxdr->ps_page); DPRINTK(PROBE, ERR, @@ -1397,7 +1578,7 @@ e1000_setup_rx_resources(struct e1000_adapter *adapter, } memset(rxdr->ps_page_dma, 0, size); - if(adapter->hw.mac_type <= e1000_82547_rev_2) + if (adapter->hw.mac_type <= e1000_82547_rev_2) desc_len = sizeof(struct e1000_rx_desc); else desc_len = sizeof(union e1000_rx_desc_packet_split); @@ -1454,6 +1635,8 @@ setup_rx_desc_die: rxdr->next_to_clean = 0; rxdr->next_to_use = 0; + rxdr->rx_skb_top = NULL; + rxdr->rx_skb_prev = NULL; return 0; } @@ -1475,7 +1658,7 @@ e1000_setup_all_rx_resources(struct e1000_adapter *adapter) { int i, err = 0; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_rx_queues; i++) { err = e1000_setup_rx_resources(adapter, &adapter->rx_ring[i]); if (err) { DPRINTK(PROBE, ERR, @@ -1498,7 +1681,7 @@ e1000_setup_rctl(struct e1000_adapter *adapter) { uint32_t rctl, rfctl; uint32_t psrctl = 0; -#ifdef CONFIG_E1000_PACKET_SPLIT +#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT uint32_t pages = 0; #endif @@ -1510,7 +1693,10 @@ e1000_setup_rctl(struct e1000_adapter *adapter) E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT); - if(adapter->hw.tbi_compatibility_on == 1) + if (adapter->hw.mac_type > e1000_82543) + rctl |= E1000_RCTL_SECRC; + + if (adapter->hw.tbi_compatibility_on == 1) rctl |= E1000_RCTL_SBP; else rctl &= ~E1000_RCTL_SBP; @@ -1521,32 +1707,17 @@ e1000_setup_rctl(struct e1000_adapter *adapter) rctl |= E1000_RCTL_LPE; /* Setup buffer sizes */ - if(adapter->hw.mac_type >= e1000_82571) { + if (adapter->hw.mac_type >= e1000_82571) { /* We can now specify buffers in 1K increments. * BSIZE and BSEX are ignored in this case. */ rctl |= adapter->rx_buffer_len << 0x11; } else { rctl &= ~E1000_RCTL_SZ_4096; - rctl |= E1000_RCTL_BSEX; - switch (adapter->rx_buffer_len) { - case E1000_RXBUFFER_2048: - default: - rctl |= E1000_RCTL_SZ_2048; - rctl &= ~E1000_RCTL_BSEX; - break; - case E1000_RXBUFFER_4096: - rctl |= E1000_RCTL_SZ_4096; - break; - case E1000_RXBUFFER_8192: - rctl |= E1000_RCTL_SZ_8192; - break; - case E1000_RXBUFFER_16384: - rctl |= E1000_RCTL_SZ_16384; - break; - } + rctl &= ~E1000_RCTL_BSEX; + rctl |= E1000_RCTL_SZ_2048; } -#ifdef CONFIG_E1000_PACKET_SPLIT +#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT /* 82571 and greater support packet-split where the protocol * header is placed in skb->data and the packet data is * placed in pages hanging off of skb_shinfo(skb)->nr_frags. @@ -1570,7 +1741,7 @@ e1000_setup_rctl(struct e1000_adapter *adapter) E1000_WRITE_REG(&adapter->hw, RFCTL, rfctl); rctl |= E1000_RCTL_DTYP_PS | E1000_RCTL_SECRC; - + psrctl |= adapter->rx_ps_bsize0 >> E1000_PSRCTL_BSIZE0_SHIFT; @@ -1632,22 +1803,27 @@ e1000_configure_rx(struct e1000_adapter *adapter) if (hw->mac_type >= e1000_82540) { E1000_WRITE_REG(hw, RADV, adapter->rx_abs_int_delay); - if(adapter->itr > 1) + if (adapter->itr > 1) E1000_WRITE_REG(hw, ITR, 1000000000 / (adapter->itr * 256)); } if (hw->mac_type >= e1000_82571) { - /* Reset delay timers after every interrupt */ ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); + /* Reset delay timers after every interrupt */ ctrl_ext |= E1000_CTRL_EXT_CANC; +#ifdef CONFIG_E1000_NAPI + /* Auto-Mask interrupts upon ICR read. */ + ctrl_ext |= E1000_CTRL_EXT_IAME; +#endif E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); + E1000_WRITE_REG(hw, IAM, ~0); E1000_WRITE_FLUSH(hw); } /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ - switch (adapter->num_queues) { + switch (adapter->num_rx_queues) { #ifdef CONFIG_E1000_MQ case 2: rdba = adapter->rx_ring[1].dma; @@ -1674,7 +1850,7 @@ e1000_configure_rx(struct e1000_adapter *adapter) } #ifdef CONFIG_E1000_MQ - if (adapter->num_queues > 1) { + if (adapter->num_rx_queues > 1) { uint32_t random[10]; get_random_bytes(&random[0], 40); @@ -1684,7 +1860,7 @@ e1000_configure_rx(struct e1000_adapter *adapter) E1000_WRITE_REG(hw, RSSIM, 0); } - switch (adapter->num_queues) { + switch (adapter->num_rx_queues) { case 2: default: reta = 0x00800080; @@ -1716,13 +1892,13 @@ e1000_configure_rx(struct e1000_adapter *adapter) /* Enable 82543 Receive Checksum Offload for TCP and UDP */ if (hw->mac_type >= e1000_82543) { rxcsum = E1000_READ_REG(hw, RXCSUM); - if(adapter->rx_csum == TRUE) { + if (adapter->rx_csum == TRUE) { rxcsum |= E1000_RXCSUM_TUOFL; /* Enable 82571 IPv4 payload checksum for UDP fragments * Must be used in conjunction with packet-split. */ - if ((hw->mac_type >= e1000_82571) && - (adapter->rx_ps_pages)) { + if ((hw->mac_type >= e1000_82571) && + (adapter->rx_ps_pages)) { rxcsum |= E1000_RXCSUM_IPPCSE; } } else { @@ -1776,7 +1952,7 @@ e1000_free_all_tx_resources(struct e1000_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_tx_queues; i++) e1000_free_tx_resources(adapter, &adapter->tx_ring[i]); } @@ -1784,17 +1960,15 @@ static inline void e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter, struct e1000_buffer *buffer_info) { - if(buffer_info->dma) { + if (buffer_info->dma) { pci_unmap_page(adapter->pdev, buffer_info->dma, buffer_info->length, PCI_DMA_TODEVICE); - buffer_info->dma = 0; } - if(buffer_info->skb) { + if (buffer_info->skb) dev_kfree_skb_any(buffer_info->skb); - buffer_info->skb = NULL; - } + memset(buffer_info, 0, sizeof(struct e1000_buffer)); } /** @@ -1813,7 +1987,7 @@ e1000_clean_tx_ring(struct e1000_adapter *adapter, /* Free all the Tx ring sk_buffs */ - for(i = 0; i < tx_ring->count; i++) { + for (i = 0; i < tx_ring->count; i++) { buffer_info = &tx_ring->buffer_info[i]; e1000_unmap_and_free_tx_resource(adapter, buffer_info); } @@ -1843,7 +2017,7 @@ e1000_clean_all_tx_rings(struct e1000_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_tx_queues; i++) e1000_clean_tx_ring(adapter, &adapter->tx_ring[i]); } @@ -1887,7 +2061,7 @@ e1000_free_all_rx_resources(struct e1000_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_rx_queues; i++) e1000_free_rx_resources(adapter, &adapter->rx_ring[i]); } @@ -1909,12 +2083,9 @@ e1000_clean_rx_ring(struct e1000_adapter *adapter, unsigned int i, j; /* Free all the Rx ring sk_buffs */ - - for(i = 0; i < rx_ring->count; i++) { + for (i = 0; i < rx_ring->count; i++) { buffer_info = &rx_ring->buffer_info[i]; - if(buffer_info->skb) { - ps_page = &rx_ring->ps_page[i]; - ps_page_dma = &rx_ring->ps_page_dma[i]; + if (buffer_info->skb) { pci_unmap_single(pdev, buffer_info->dma, buffer_info->length, @@ -1922,19 +2093,30 @@ e1000_clean_rx_ring(struct e1000_adapter *adapter, dev_kfree_skb(buffer_info->skb); buffer_info->skb = NULL; - - for(j = 0; j < adapter->rx_ps_pages; j++) { - if(!ps_page->ps_page[j]) break; - pci_unmap_single(pdev, - ps_page_dma->ps_page_dma[j], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - ps_page_dma->ps_page_dma[j] = 0; - put_page(ps_page->ps_page[j]); - ps_page->ps_page[j] = NULL; - } } + ps_page = &rx_ring->ps_page[i]; + ps_page_dma = &rx_ring->ps_page_dma[i]; + for (j = 0; j < adapter->rx_ps_pages; j++) { + if (!ps_page->ps_page[j]) break; + pci_unmap_page(pdev, + ps_page_dma->ps_page_dma[j], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + ps_page_dma->ps_page_dma[j] = 0; + put_page(ps_page->ps_page[j]); + ps_page->ps_page[j] = NULL; + } + } + + /* there also may be some cached data in our adapter */ + if (rx_ring->rx_skb_top) { + dev_kfree_skb(rx_ring->rx_skb_top); + + /* rx_skb_prev will be wiped out by rx_skb_top */ + rx_ring->rx_skb_top = NULL; + rx_ring->rx_skb_prev = NULL; } + size = sizeof(struct e1000_buffer) * rx_ring->count; memset(rx_ring->buffer_info, 0, size); size = sizeof(struct e1000_ps_page) * rx_ring->count; @@ -1963,7 +2145,7 @@ e1000_clean_all_rx_rings(struct e1000_adapter *adapter) { int i; - for (i = 0; i < adapter->num_queues; i++) + for (i = 0; i < adapter->num_rx_queues; i++) e1000_clean_rx_ring(adapter, &adapter->rx_ring[i]); } @@ -1984,7 +2166,7 @@ e1000_enter_82542_rst(struct e1000_adapter *adapter) E1000_WRITE_FLUSH(&adapter->hw); mdelay(5); - if(netif_running(netdev)) + if (netif_running(netdev)) e1000_clean_all_rx_rings(adapter); } @@ -2000,12 +2182,14 @@ e1000_leave_82542_rst(struct e1000_adapter *adapter) E1000_WRITE_FLUSH(&adapter->hw); mdelay(5); - if(adapter->hw.pci_cmd_word & PCI_COMMAND_INVALIDATE) + if (adapter->hw.pci_cmd_word & PCI_COMMAND_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); - if(netif_running(netdev)) { + if (netif_running(netdev)) { + /* No need to loop, because 82542 supports only 1 queue */ + struct e1000_rx_ring *ring = &adapter->rx_ring[0]; e1000_configure_rx(adapter); - e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0]); + adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring)); } } @@ -2023,12 +2207,12 @@ e1000_set_mac(struct net_device *netdev, void *p) struct e1000_adapter *adapter = netdev_priv(netdev); struct sockaddr *addr = p; - if(!is_valid_ether_addr(addr->sa_data)) + if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; /* 82542 2.0 needs to be in reset to write receive address registers */ - if(adapter->hw.mac_type == e1000_82542_rev2_0) + if (adapter->hw.mac_type == e1000_82542_rev2_0) e1000_enter_82542_rst(adapter); memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); @@ -2042,17 +2226,17 @@ e1000_set_mac(struct net_device *netdev, void *p) /* activate the work around */ adapter->hw.laa_is_present = 1; - /* Hold a copy of the LAA in RAR[14] This is done so that - * between the time RAR[0] gets clobbered and the time it - * gets fixed (in e1000_watchdog), the actual LAA is in one + /* Hold a copy of the LAA in RAR[14] This is done so that + * between the time RAR[0] gets clobbered and the time it + * gets fixed (in e1000_watchdog), the actual LAA is in one * of the RARs and no incoming packets directed to this port - * are dropped. Eventaully the LAA will be in RAR[0] and + * are dropped. Eventaully the LAA will be in RAR[0] and * RAR[14] */ - e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, + e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, E1000_RAR_ENTRIES - 1); } - if(adapter->hw.mac_type == e1000_82542_rev2_0) + if (adapter->hw.mac_type == e1000_82542_rev2_0) e1000_leave_82542_rst(adapter); return 0; @@ -2086,9 +2270,9 @@ e1000_set_multi(struct net_device *netdev) rctl = E1000_READ_REG(hw, RCTL); - if(netdev->flags & IFF_PROMISC) { + if (netdev->flags & IFF_PROMISC) { rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - } else if(netdev->flags & IFF_ALLMULTI) { + } else if (netdev->flags & IFF_ALLMULTI) { rctl |= E1000_RCTL_MPE; rctl &= ~E1000_RCTL_UPE; } else { @@ -2099,7 +2283,7 @@ e1000_set_multi(struct net_device *netdev) /* 82542 2.0 needs to be in reset to write receive address registers */ - if(hw->mac_type == e1000_82542_rev2_0) + if (hw->mac_type == e1000_82542_rev2_0) e1000_enter_82542_rst(adapter); /* load the first 14 multicast address into the exact filters 1-14 @@ -2109,7 +2293,7 @@ e1000_set_multi(struct net_device *netdev) */ mc_ptr = netdev->mc_list; - for(i = 1; i < rar_entries; i++) { + for (i = 1; i < rar_entries; i++) { if (mc_ptr) { e1000_rar_set(hw, mc_ptr->dmi_addr, i); mc_ptr = mc_ptr->next; @@ -2121,17 +2305,17 @@ e1000_set_multi(struct net_device *netdev) /* clear the old settings from the multicast hash table */ - for(i = 0; i < E1000_NUM_MTA_REGISTERS; i++) + for (i = 0; i < E1000_NUM_MTA_REGISTERS; i++) E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); /* load any remaining addresses into the hash table */ - for(; mc_ptr; mc_ptr = mc_ptr->next) { + for (; mc_ptr; mc_ptr = mc_ptr->next) { hash_value = e1000_hash_mc_addr(hw, mc_ptr->dmi_addr); e1000_mta_set(hw, hash_value); } - if(hw->mac_type == e1000_82542_rev2_0) + if (hw->mac_type == e1000_82542_rev2_0) e1000_leave_82542_rst(adapter); } @@ -2157,8 +2341,8 @@ e1000_82547_tx_fifo_stall(unsigned long data) struct net_device *netdev = adapter->netdev; uint32_t tctl; - if(atomic_read(&adapter->tx_fifo_stall)) { - if((E1000_READ_REG(&adapter->hw, TDT) == + if (atomic_read(&adapter->tx_fifo_stall)) { + if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) && (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) && @@ -2204,24 +2388,24 @@ static void e1000_watchdog_task(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct e1000_tx_ring *txdr = &adapter->tx_ring[0]; + struct e1000_tx_ring *txdr = adapter->tx_ring; uint32_t link; e1000_check_for_link(&adapter->hw); if (adapter->hw.mac_type == e1000_82573) { e1000_enable_tx_pkt_filtering(&adapter->hw); - if(adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id) + if (adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id) e1000_update_mng_vlan(adapter); - } + } - if((adapter->hw.media_type == e1000_media_type_internal_serdes) && + if ((adapter->hw.media_type == e1000_media_type_internal_serdes) && !(E1000_READ_REG(&adapter->hw, TXCW) & E1000_TXCW_ANE)) link = !adapter->hw.serdes_link_down; else link = E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU; - if(link) { - if(!netif_carrier_ok(netdev)) { + if (link) { + if (!netif_carrier_ok(netdev)) { e1000_get_speed_and_duplex(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); @@ -2231,13 +2415,28 @@ e1000_watchdog_task(struct e1000_adapter *adapter) adapter->link_duplex == FULL_DUPLEX ? "Full Duplex" : "Half Duplex"); + /* tweak tx_queue_len according to speed/duplex */ + netdev->tx_queue_len = adapter->tx_queue_len; + adapter->tx_timeout_factor = 1; + if (adapter->link_duplex == HALF_DUPLEX) { + switch (adapter->link_speed) { + case SPEED_10: + netdev->tx_queue_len = 10; + adapter->tx_timeout_factor = 8; + break; + case SPEED_100: + netdev->tx_queue_len = 100; + break; + } + } + netif_carrier_on(netdev); netif_wake_queue(netdev); mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ); adapter->smartspeed = 0; } } else { - if(netif_carrier_ok(netdev)) { + if (netif_carrier_ok(netdev)) { adapter->link_speed = 0; adapter->link_duplex = 0; DPRINTK(LINK, INFO, "NIC Link is Down\n"); @@ -2263,7 +2462,10 @@ e1000_watchdog_task(struct e1000_adapter *adapter) e1000_update_adaptive(&adapter->hw); - if (adapter->num_queues == 1 && !netif_carrier_ok(netdev)) { +#ifdef CONFIG_E1000_MQ + txdr = *per_cpu_ptr(adapter->cpu_tx_ring, smp_processor_id()); +#endif + if (!netif_carrier_ok(netdev)) { if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going @@ -2274,12 +2476,12 @@ e1000_watchdog_task(struct e1000_adapter *adapter) } /* Dynamic mode for Interrupt Throttle Rate (ITR) */ - if(adapter->hw.mac_type >= e1000_82540 && adapter->itr == 1) { + if (adapter->hw.mac_type >= e1000_82540 && adapter->itr == 1) { /* Symmetric Tx/Rx gets a reduced ITR=2000; Total * asymmetrical Tx or Rx gets ITR=8000; everyone * else is between 2000-8000. */ uint32_t goc = (adapter->gotcl + adapter->gorcl) / 10000; - uint32_t dif = (adapter->gotcl > adapter->gorcl ? + uint32_t dif = (adapter->gotcl > adapter->gorcl ? adapter->gotcl - adapter->gorcl : adapter->gorcl - adapter->gotcl) / 10000; uint32_t itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000; @@ -2292,7 +2494,7 @@ e1000_watchdog_task(struct e1000_adapter *adapter) /* Force detection of hung controller every watchdog period */ adapter->detect_tx_hung = TRUE; - /* With 82571 controllers, LAA may be overwritten due to controller + /* With 82571 controllers, LAA may be overwritten due to controller * reset from the other port. Set the appropriate LAA in RAR[0] */ if (adapter->hw.mac_type == e1000_82571 && adapter->hw.laa_is_present) e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 0); @@ -2314,13 +2516,14 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, { #ifdef NETIF_F_TSO struct e1000_context_desc *context_desc; + struct e1000_buffer *buffer_info; unsigned int i; uint32_t cmd_length = 0; uint16_t ipcse = 0, tucse, mss; uint8_t ipcss, ipcso, tucss, tucso, hdr_len; int err; - if(skb_shinfo(skb)->tso_size) { + if (skb_shinfo(skb)->tso_size) { if (skb_header_cloned(skb)) { err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (err) @@ -2329,7 +2532,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); mss = skb_shinfo(skb)->tso_size; - if(skb->protocol == ntohs(ETH_P_IP)) { + if (skb->protocol == ntohs(ETH_P_IP)) { skb->nh.iph->tot_len = 0; skb->nh.iph->check = 0; skb->h.th->check = @@ -2341,7 +2544,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, cmd_length = E1000_TXD_CMD_IP; ipcse = skb->h.raw - skb->data - 1; #ifdef NETIF_F_TSO_IPV6 - } else if(skb->protocol == ntohs(ETH_P_IPV6)) { + } else if (skb->protocol == ntohs(ETH_P_IPV6)) { skb->nh.ipv6h->payload_len = 0; skb->h.th->check = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, @@ -2363,6 +2566,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, i = tx_ring->next_to_use; context_desc = E1000_CONTEXT_DESC(*tx_ring, i); + buffer_info = &tx_ring->buffer_info[i]; context_desc->lower_setup.ip_fields.ipcss = ipcss; context_desc->lower_setup.ip_fields.ipcso = ipcso; @@ -2374,14 +2578,16 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, context_desc->tcp_seg_setup.fields.hdr_len = hdr_len; context_desc->cmd_and_length = cpu_to_le32(cmd_length); + buffer_info->time_stamp = jiffies; + if (++i == tx_ring->count) i = 0; tx_ring->next_to_use = i; - return 1; + return TRUE; } #endif - return 0; + return FALSE; } static inline boolean_t @@ -2389,13 +2595,15 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, struct sk_buff *skb) { struct e1000_context_desc *context_desc; + struct e1000_buffer *buffer_info; unsigned int i; uint8_t css; - if(likely(skb->ip_summed == CHECKSUM_HW)) { + if (likely(skb->ip_summed == CHECKSUM_HW)) { css = skb->h.raw - skb->data; i = tx_ring->next_to_use; + buffer_info = &tx_ring->buffer_info[i]; context_desc = E1000_CONTEXT_DESC(*tx_ring, i); context_desc->upper_setup.tcp_fields.tucss = css; @@ -2404,6 +2612,8 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, context_desc->tcp_seg_setup.data = 0; context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT); + buffer_info->time_stamp = jiffies; + if (unlikely(++i == tx_ring->count)) i = 0; tx_ring->next_to_use = i; @@ -2429,7 +2639,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, i = tx_ring->next_to_use; - while(len) { + while (len) { buffer_info = &tx_ring->buffer_info[i]; size = min(len, max_per_txd); #ifdef NETIF_F_TSO @@ -2445,7 +2655,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, /* Workaround for premature desc write-backs * in TSO mode. Append 4-byte sentinel desc */ - if(unlikely(mss && !nr_frags && size == len && size > 8)) + if (unlikely(mss && !nr_frags && size == len && size > 8)) size -= 4; #endif /* work-around for errata 10 and it applies @@ -2453,13 +2663,13 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, * The fix is to make sure that the first descriptor of a * packet is smaller than 2048 - 16 - 16 (or 2016) bytes */ - if(unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) && + if (unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) && (size > 2015) && count == 0)) size = 2015; - + /* Workaround for potential 82544 hang in PCI-X. Avoid * terminating buffers within evenly-aligned dwords. */ - if(unlikely(adapter->pcix_82544 && + if (unlikely(adapter->pcix_82544 && !((unsigned long)(skb->data + offset + size - 1) & 4) && size > 4)) size -= 4; @@ -2475,29 +2685,29 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, len -= size; offset += size; count++; - if(unlikely(++i == tx_ring->count)) i = 0; + if (unlikely(++i == tx_ring->count)) i = 0; } - for(f = 0; f < nr_frags; f++) { + for (f = 0; f < nr_frags; f++) { struct skb_frag_struct *frag; frag = &skb_shinfo(skb)->frags[f]; len = frag->size; offset = frag->page_offset; - while(len) { + while (len) { buffer_info = &tx_ring->buffer_info[i]; size = min(len, max_per_txd); #ifdef NETIF_F_TSO /* Workaround for premature desc write-backs * in TSO mode. Append 4-byte sentinel desc */ - if(unlikely(mss && f == (nr_frags-1) && size == len && size > 8)) + if (unlikely(mss && f == (nr_frags-1) && size == len && size > 8)) size -= 4; #endif /* Workaround for potential 82544 hang in PCI-X. * Avoid terminating buffers within evenly-aligned * dwords. */ - if(unlikely(adapter->pcix_82544 && + if (unlikely(adapter->pcix_82544 && !((unsigned long)(frag->page+offset+size-1) & 4) && size > 4)) size -= 4; @@ -2514,7 +2724,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, len -= size; offset += size; count++; - if(unlikely(++i == tx_ring->count)) i = 0; + if (unlikely(++i == tx_ring->count)) i = 0; } } @@ -2534,35 +2744,35 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, uint32_t txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS; unsigned int i; - if(likely(tx_flags & E1000_TX_FLAGS_TSO)) { + if (likely(tx_flags & E1000_TX_FLAGS_TSO)) { txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | E1000_TXD_CMD_TSE; txd_upper |= E1000_TXD_POPTS_TXSM << 8; - if(likely(tx_flags & E1000_TX_FLAGS_IPV4)) + if (likely(tx_flags & E1000_TX_FLAGS_IPV4)) txd_upper |= E1000_TXD_POPTS_IXSM << 8; } - if(likely(tx_flags & E1000_TX_FLAGS_CSUM)) { + if (likely(tx_flags & E1000_TX_FLAGS_CSUM)) { txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; txd_upper |= E1000_TXD_POPTS_TXSM << 8; } - if(unlikely(tx_flags & E1000_TX_FLAGS_VLAN)) { + if (unlikely(tx_flags & E1000_TX_FLAGS_VLAN)) { txd_lower |= E1000_TXD_CMD_VLE; txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); } i = tx_ring->next_to_use; - while(count--) { + while (count--) { buffer_info = &tx_ring->buffer_info[i]; tx_desc = E1000_TX_DESC(*tx_ring, i); tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); tx_desc->lower.data = cpu_to_le32(txd_lower | buffer_info->length); tx_desc->upper.data = cpu_to_le32(txd_upper); - if(unlikely(++i == tx_ring->count)) i = 0; + if (unlikely(++i == tx_ring->count)) i = 0; } tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); @@ -2597,20 +2807,20 @@ e1000_82547_fifo_workaround(struct e1000_adapter *adapter, struct sk_buff *skb) E1000_ROUNDUP(skb_fifo_len, E1000_FIFO_HDR); - if(adapter->link_duplex != HALF_DUPLEX) + if (adapter->link_duplex != HALF_DUPLEX) goto no_fifo_stall_required; - if(atomic_read(&adapter->tx_fifo_stall)) + if (atomic_read(&adapter->tx_fifo_stall)) return 1; - if(skb_fifo_len >= (E1000_82547_PAD_LEN + fifo_space)) { + if (skb_fifo_len >= (E1000_82547_PAD_LEN + fifo_space)) { atomic_set(&adapter->tx_fifo_stall, 1); return 1; } no_fifo_stall_required: adapter->tx_fifo_head += skb_fifo_len; - if(adapter->tx_fifo_head >= adapter->tx_fifo_size) + if (adapter->tx_fifo_head >= adapter->tx_fifo_size) adapter->tx_fifo_head -= adapter->tx_fifo_size; return 0; } @@ -2621,27 +2831,27 @@ e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct sk_buff *skb) { struct e1000_hw *hw = &adapter->hw; uint16_t length, offset; - if(vlan_tx_tag_present(skb)) { - if(!((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) && + if (vlan_tx_tag_present(skb)) { + if (!((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) && ( adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) ) return 0; } - if ((skb->len > MINIMUM_DHCP_PACKET_SIZE) && (!skb->protocol)) { + if ((skb->len > MINIMUM_DHCP_PACKET_SIZE) && (!skb->protocol)) { struct ethhdr *eth = (struct ethhdr *) skb->data; - if((htons(ETH_P_IP) == eth->h_proto)) { - const struct iphdr *ip = + if ((htons(ETH_P_IP) == eth->h_proto)) { + const struct iphdr *ip = (struct iphdr *)((uint8_t *)skb->data+14); - if(IPPROTO_UDP == ip->protocol) { - struct udphdr *udp = - (struct udphdr *)((uint8_t *)ip + + if (IPPROTO_UDP == ip->protocol) { + struct udphdr *udp = + (struct udphdr *)((uint8_t *)ip + (ip->ihl << 2)); - if(ntohs(udp->dest) == 67) { + if (ntohs(udp->dest) == 67) { offset = (uint8_t *)udp + 8 - skb->data; length = skb->len - offset; return e1000_mng_write_dhcp_info(hw, - (uint8_t *)udp + 8, + (uint8_t *)udp + 8, length); } } @@ -2664,7 +2874,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) unsigned int nr_frags = 0; unsigned int mss = 0; int count = 0; - int tso; + int tso; unsigned int f; len -= skb->data_len; @@ -2687,16 +2897,35 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * 4 = ceil(buffer len/mss). To make sure we don't * overrun the FIFO, adjust the max buffer len if mss * drops. */ - if(mss) { + if (mss) { + uint8_t hdr_len; max_per_txd = min(mss << 2, max_per_txd); max_txd_pwr = fls(max_per_txd) - 1; + + /* TSO Workaround for 82571/2 Controllers -- if skb->data + * points to just header, pull a few bytes of payload from + * frags into skb->data */ + hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); + if (skb->data_len && (hdr_len == (skb->len - skb->data_len)) && + (adapter->hw.mac_type == e1000_82571 || + adapter->hw.mac_type == e1000_82572)) { + unsigned int pull_size; + pull_size = min((unsigned int)4, skb->data_len); + if (!__pskb_pull_tail(skb, pull_size)) { + printk(KERN_ERR "__pskb_pull_tail failed.\n"); + dev_kfree_skb_any(skb); + return -EFAULT; + } + len = skb->len - skb->data_len; + } } - if((mss) || (skb->ip_summed == CHECKSUM_HW)) + /* reserve a descriptor for the offload context */ + if ((mss) || (skb->ip_summed == CHECKSUM_HW)) count++; count++; #else - if(skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_HW) count++; #endif @@ -2709,45 +2938,24 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) count += TXD_USE_COUNT(len, max_txd_pwr); - if(adapter->pcix_82544) + if (adapter->pcix_82544) count++; - /* work-around for errata 10 and it applies to all controllers + /* work-around for errata 10 and it applies to all controllers * in PCI-X mode, so add one more descriptor to the count */ - if(unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) && + if (unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) && (len > 2015))) count++; nr_frags = skb_shinfo(skb)->nr_frags; - for(f = 0; f < nr_frags; f++) + for (f = 0; f < nr_frags; f++) count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, max_txd_pwr); - if(adapter->pcix_82544) + if (adapter->pcix_82544) count += nr_frags; -#ifdef NETIF_F_TSO - /* TSO Workaround for 82571/2 Controllers -- if skb->data - * points to just header, pull a few bytes of payload from - * frags into skb->data */ - if (skb_shinfo(skb)->tso_size) { - uint8_t hdr_len; - hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - if (skb->data_len && (hdr_len < (skb->len - skb->data_len)) && - (adapter->hw.mac_type == e1000_82571 || - adapter->hw.mac_type == e1000_82572)) { - unsigned int pull_size; - pull_size = min((unsigned int)4, skb->data_len); - if (!__pskb_pull_tail(skb, pull_size)) { - printk(KERN_ERR "__pskb_pull_tail failed.\n"); - dev_kfree_skb_any(skb); - return -EFAULT; - } - } - } -#endif - - if(adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) ) + if (adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) ) e1000_transfer_dhcp_info(adapter, skb); local_irq_save(flags); @@ -2765,8 +2973,8 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } - if(unlikely(adapter->hw.mac_type == e1000_82547)) { - if(unlikely(e1000_82547_fifo_workaround(adapter, skb))) { + if (unlikely(adapter->hw.mac_type == e1000_82547)) { + if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) { netif_stop_queue(netdev); mod_timer(&adapter->tx_fifo_stall_timer, jiffies); spin_unlock_irqrestore(&tx_ring->tx_lock, flags); @@ -2774,13 +2982,13 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } } - if(unlikely(adapter->vlgrp && vlan_tx_tag_present(skb))) { + if (unlikely(adapter->vlgrp && vlan_tx_tag_present(skb))) { tx_flags |= E1000_TX_FLAGS_VLAN; tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT); } first = tx_ring->next_to_use; - + tso = e1000_tso(adapter, tx_ring, skb); if (tso < 0) { dev_kfree_skb_any(skb); @@ -2833,6 +3041,7 @@ e1000_tx_timeout_task(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); + adapter->tx_timeout_count++; e1000_down(adapter); e1000_up(adapter); } @@ -2850,7 +3059,7 @@ e1000_get_stats(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); - e1000_update_stats(adapter); + /* only return the current stats */ return &adapter->net_stats; } @@ -2868,56 +3077,57 @@ e1000_change_mtu(struct net_device *netdev, int new_mtu) struct e1000_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; - if((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) || - (max_frame > MAX_JUMBO_FRAME_SIZE)) { - DPRINTK(PROBE, ERR, "Invalid MTU setting\n"); - return -EINVAL; - } - -#define MAX_STD_JUMBO_FRAME_SIZE 9234 - /* might want this to be bigger enum check... */ - /* 82571 controllers limit jumbo frame size to 10500 bytes */ - if ((adapter->hw.mac_type == e1000_82571 || - adapter->hw.mac_type == e1000_82572) && - max_frame > MAX_STD_JUMBO_FRAME_SIZE) { - DPRINTK(PROBE, ERR, "MTU > 9216 bytes not supported " - "on 82571 and 82572 controllers.\n"); - return -EINVAL; - } - - if(adapter->hw.mac_type == e1000_82573 && - max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) { - DPRINTK(PROBE, ERR, "Jumbo Frames not supported " - "on 82573\n"); + if ((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) || + (max_frame > MAX_JUMBO_FRAME_SIZE)) { + DPRINTK(PROBE, ERR, "Invalid MTU setting\n"); return -EINVAL; } - if(adapter->hw.mac_type > e1000_82547_rev_2) { - adapter->rx_buffer_len = max_frame; - E1000_ROUNDUP(adapter->rx_buffer_len, 1024); - } else { - if(unlikely((adapter->hw.mac_type < e1000_82543) && - (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) { - DPRINTK(PROBE, ERR, "Jumbo Frames not supported " - "on 82542\n"); + /* Adapter-specific max frame size limits. */ + switch (adapter->hw.mac_type) { + case e1000_82542_rev2_0: + case e1000_82542_rev2_1: + case e1000_82573: + if (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) { + DPRINTK(PROBE, ERR, "Jumbo Frames not supported.\n"); + return -EINVAL; + } + break; + case e1000_82571: + case e1000_82572: +#define MAX_STD_JUMBO_FRAME_SIZE 9234 + if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { + DPRINTK(PROBE, ERR, "MTU > 9216 not supported.\n"); return -EINVAL; - - } else { - if(max_frame <= E1000_RXBUFFER_2048) { - adapter->rx_buffer_len = E1000_RXBUFFER_2048; - } else if(max_frame <= E1000_RXBUFFER_4096) { - adapter->rx_buffer_len = E1000_RXBUFFER_4096; - } else if(max_frame <= E1000_RXBUFFER_8192) { - adapter->rx_buffer_len = E1000_RXBUFFER_8192; - } else if(max_frame <= E1000_RXBUFFER_16384) { - adapter->rx_buffer_len = E1000_RXBUFFER_16384; - } } + break; + default: + /* Capable of supporting up to MAX_JUMBO_FRAME_SIZE limit. */ + break; } + /* since the driver code now supports splitting a packet across + * multiple descriptors, most of the fifo related limitations on + * jumbo frame traffic have gone away. + * simply use 2k descriptors for everything. + * + * NOTE: dev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN + * means we reserve 2 more, this pushes us to allocate from the next + * larger slab size + * i.e. RXBUFFER_2048 --> size-4096 slab */ + + /* recent hardware supports 1KB granularity */ + if (adapter->hw.mac_type > e1000_82547_rev_2) { + adapter->rx_buffer_len = + ((max_frame < E1000_RXBUFFER_2048) ? + max_frame : E1000_RXBUFFER_2048); + E1000_ROUNDUP(adapter->rx_buffer_len, 1024); + } else + adapter->rx_buffer_len = E1000_RXBUFFER_2048; + netdev->mtu = new_mtu; - if(netif_running(netdev)) { + if (netif_running(netdev)) { e1000_down(adapter); e1000_up(adapter); } @@ -3004,7 +3214,7 @@ e1000_update_stats(struct e1000_adapter *adapter) hw->collision_delta = E1000_READ_REG(hw, COLC); adapter->stats.colc += hw->collision_delta; - if(hw->mac_type >= e1000_82543) { + if (hw->mac_type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(hw, ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(hw, RXERRC); adapter->stats.tncrs += E1000_READ_REG(hw, TNCRS); @@ -3012,7 +3222,7 @@ e1000_update_stats(struct e1000_adapter *adapter) adapter->stats.tsctc += E1000_READ_REG(hw, TSCTC); adapter->stats.tsctfc += E1000_READ_REG(hw, TSCTFC); } - if(hw->mac_type > e1000_82547_rev_2) { + if (hw->mac_type > e1000_82547_rev_2) { adapter->stats.iac += E1000_READ_REG(hw, IAC); adapter->stats.icrxoc += E1000_READ_REG(hw, ICRXOC); adapter->stats.icrxptc += E1000_READ_REG(hw, ICRXPTC); @@ -3037,12 +3247,11 @@ e1000_update_stats(struct e1000_adapter *adapter) adapter->net_stats.rx_errors = adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + - adapter->stats.rlec + adapter->stats.mpc + - adapter->stats.cexterr; + adapter->stats.rlec + adapter->stats.cexterr; + adapter->net_stats.rx_dropped = 0; adapter->net_stats.rx_length_errors = adapter->stats.rlec; adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs; adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc; - adapter->net_stats.rx_fifo_errors = adapter->stats.mpc; adapter->net_stats.rx_missed_errors = adapter->stats.mpc; /* Tx Errors */ @@ -3057,14 +3266,14 @@ e1000_update_stats(struct e1000_adapter *adapter) /* Phy Stats */ - if(hw->media_type == e1000_media_type_copper) { - if((adapter->link_speed == SPEED_1000) && + if (hw->media_type == e1000_media_type_copper) { + if ((adapter->link_speed == SPEED_1000) && (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; adapter->phy_stats.idle_errors += phy_tmp; } - if((hw->mac_type <= e1000_82546) && + if ((hw->mac_type <= e1000_82546) && (hw->phy_type == e1000_phy_m88) && !e1000_read_phy_reg(hw, M88E1000_RX_ERR_CNTR, &phy_tmp)) adapter->phy_stats.receive_errors += phy_tmp; @@ -3110,32 +3319,44 @@ e1000_intr(int irq, void *data, struct pt_regs *regs) struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; uint32_t icr = E1000_READ_REG(hw, ICR); -#if defined(CONFIG_E1000_NAPI) && defined(CONFIG_E1000_MQ) || !defined(CONFIG_E1000_NAPI) +#ifndef CONFIG_E1000_NAPI int i; +#else + /* Interrupt Auto-Mask...upon reading ICR, + * interrupts are masked. No need for the + * IMC write, but it does mean we should + * account for it ASAP. */ + if (likely(hw->mac_type >= e1000_82571)) + atomic_inc(&adapter->irq_sem); #endif - if(unlikely(!icr)) + if (unlikely(!icr)) { +#ifdef CONFIG_E1000_NAPI + if (hw->mac_type >= e1000_82571) + e1000_irq_enable(adapter); +#endif return IRQ_NONE; /* Not our interrupt */ + } - if(unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) { + if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) { hw->get_link_status = 1; mod_timer(&adapter->watchdog_timer, jiffies); } #ifdef CONFIG_E1000_NAPI - atomic_inc(&adapter->irq_sem); - E1000_WRITE_REG(hw, IMC, ~0); - E1000_WRITE_FLUSH(hw); + if (unlikely(hw->mac_type < e1000_82571)) { + atomic_inc(&adapter->irq_sem); + E1000_WRITE_REG(hw, IMC, ~0); + E1000_WRITE_FLUSH(hw); + } #ifdef CONFIG_E1000_MQ if (atomic_read(&adapter->rx_sched_call_data.count) == 0) { - cpu_set(adapter->cpu_for_queue[0], - adapter->rx_sched_call_data.cpumask); - for (i = 1; i < adapter->num_queues; i++) { - cpu_set(adapter->cpu_for_queue[i], - adapter->rx_sched_call_data.cpumask); - atomic_inc(&adapter->irq_sem); - } - atomic_set(&adapter->rx_sched_call_data.count, i); + /* We must setup the cpumask once count == 0 since + * each cpu bit is cleared when the work is done. */ + adapter->rx_sched_call_data.cpumask = adapter->cpumask; + atomic_add(adapter->num_rx_queues - 1, &adapter->irq_sem); + atomic_set(&adapter->rx_sched_call_data.count, + adapter->num_rx_queues); smp_call_async_mask(&adapter->rx_sched_call_data); } else { printk("call_data.count == %u\n", atomic_read(&adapter->rx_sched_call_data.count)); @@ -3149,26 +3370,26 @@ e1000_intr(int irq, void *data, struct pt_regs *regs) #else /* if !CONFIG_E1000_NAPI */ /* Writing IMC and IMS is needed for 82547. - Due to Hub Link bus being occupied, an interrupt - de-assertion message is not able to be sent. - When an interrupt assertion message is generated later, - two messages are re-ordered and sent out. - That causes APIC to think 82547 is in de-assertion - state, while 82547 is in assertion state, resulting - in dead lock. Writing IMC forces 82547 into - de-assertion state. - */ - if(hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2){ + * Due to Hub Link bus being occupied, an interrupt + * de-assertion message is not able to be sent. + * When an interrupt assertion message is generated later, + * two messages are re-ordered and sent out. + * That causes APIC to think 82547 is in de-assertion + * state, while 82547 is in assertion state, resulting + * in dead lock. Writing IMC forces 82547 into + * de-assertion state. + */ + if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) { atomic_inc(&adapter->irq_sem); E1000_WRITE_REG(hw, IMC, ~0); } - for(i = 0; i < E1000_MAX_INTR; i++) - if(unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) & + for (i = 0; i < E1000_MAX_INTR; i++) + if (unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) & !e1000_clean_tx_irq(adapter, adapter->tx_ring))) break; - if(hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) + if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) e1000_irq_enable(adapter); #endif /* CONFIG_E1000_NAPI */ @@ -3187,7 +3408,7 @@ e1000_clean(struct net_device *poll_dev, int *budget) { struct e1000_adapter *adapter; int work_to_do = min(*budget, poll_dev->quota); - int tx_cleaned, i = 0, work_done = 0; + int tx_cleaned = 0, i = 0, work_done = 0; /* Must NOT use netdev_priv macro here. */ adapter = poll_dev->priv; @@ -3198,19 +3419,31 @@ e1000_clean(struct net_device *poll_dev, int *budget) while (poll_dev != &adapter->polling_netdev[i]) { i++; - if (unlikely(i == adapter->num_queues)) + if (unlikely(i == adapter->num_rx_queues)) BUG(); } - tx_cleaned = e1000_clean_tx_irq(adapter, &adapter->tx_ring[i]); + if (likely(adapter->num_tx_queues == 1)) { + /* e1000_clean is called per-cpu. This lock protects + * tx_ring[0] from being cleaned by multiple cpus + * simultaneously. A failure obtaining the lock means + * tx_ring[0] is currently being cleaned anyway. */ + if (spin_trylock(&adapter->tx_queue_lock)) { + tx_cleaned = e1000_clean_tx_irq(adapter, + &adapter->tx_ring[0]); + spin_unlock(&adapter->tx_queue_lock); + } + } else + tx_cleaned = e1000_clean_tx_irq(adapter, &adapter->tx_ring[i]); + adapter->clean_rx(adapter, &adapter->rx_ring[i], &work_done, work_to_do); *budget -= work_done; poll_dev->quota -= work_done; - + /* If no Tx and not enough Rx work done, exit the polling mode */ - if((!tx_cleaned && (work_done == 0)) || + if ((!tx_cleaned && (work_done == 0)) || !netif_running(adapter->netdev)) { quit_polling: netif_rx_complete(poll_dev); @@ -3242,22 +3475,24 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, eop_desc = E1000_TX_DESC(*tx_ring, eop); while (eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) { - for(cleaned = FALSE; !cleaned; ) { + for (cleaned = FALSE; !cleaned; ) { tx_desc = E1000_TX_DESC(*tx_ring, i); buffer_info = &tx_ring->buffer_info[i]; cleaned = (i == eop); +#ifdef CONFIG_E1000_MQ + tx_ring->tx_stats.bytes += buffer_info->length; +#endif e1000_unmap_and_free_tx_resource(adapter, buffer_info); + memset(tx_desc, 0, sizeof(struct e1000_tx_desc)); - tx_desc->buffer_addr = 0; - tx_desc->lower.data = 0; - tx_desc->upper.data = 0; - - if(unlikely(++i == tx_ring->count)) i = 0; + if (unlikely(++i == tx_ring->count)) i = 0; } - tx_ring->pkt++; - +#ifdef CONFIG_E1000_MQ + tx_ring->tx_stats.packets++; +#endif + eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC(*tx_ring, eop); } @@ -3266,7 +3501,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, spin_lock(&tx_ring->tx_lock); - if(unlikely(cleaned && netif_queue_stopped(netdev) && + if (unlikely(cleaned && netif_queue_stopped(netdev) && netif_carrier_ok(netdev))) netif_wake_queue(netdev); @@ -3276,32 +3511,31 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ adapter->detect_tx_hung = FALSE; - if (tx_ring->buffer_info[i].dma && - time_after(jiffies, tx_ring->buffer_info[i].time_stamp + HZ) + if (tx_ring->buffer_info[eop].dma && + time_after(jiffies, tx_ring->buffer_info[eop].time_stamp + + adapter->tx_timeout_factor * HZ) && !(E1000_READ_REG(&adapter->hw, STATUS) & - E1000_STATUS_TXOFF)) { + E1000_STATUS_TXOFF)) { /* detected Tx unit hang */ - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = E1000_TX_DESC(*tx_ring, eop); DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n" + " Tx Queue <%lu>\n" " TDH <%x>\n" " TDT <%x>\n" " next_to_use <%x>\n" " next_to_clean <%x>\n" "buffer_info[next_to_clean]\n" - " dma <%llx>\n" " time_stamp <%lx>\n" " next_to_watch <%x>\n" " jiffies <%lx>\n" " next_to_watch.status <%x>\n", + (unsigned long)((tx_ring - adapter->tx_ring) / + sizeof(struct e1000_tx_ring)), readl(adapter->hw.hw_addr + tx_ring->tdh), readl(adapter->hw.hw_addr + tx_ring->tdt), tx_ring->next_to_use, - i, - (unsigned long long)tx_ring->buffer_info[i].dma, - tx_ring->buffer_info[i].time_stamp, + tx_ring->next_to_clean, + tx_ring->buffer_info[eop].time_stamp, eop, jiffies, eop_desc->upper.fields.status); @@ -3329,21 +3563,21 @@ e1000_rx_checksum(struct e1000_adapter *adapter, skb->ip_summed = CHECKSUM_NONE; /* 82543 or newer only */ - if(unlikely(adapter->hw.mac_type < e1000_82543)) return; + if (unlikely(adapter->hw.mac_type < e1000_82543)) return; /* Ignore Checksum bit is set */ - if(unlikely(status & E1000_RXD_STAT_IXSM)) return; + if (unlikely(status & E1000_RXD_STAT_IXSM)) return; /* TCP/UDP checksum error bit is set */ - if(unlikely(errors & E1000_RXD_ERR_TCPE)) { + if (unlikely(errors & E1000_RXD_ERR_TCPE)) { /* let the stack verify checksum errors */ adapter->hw_csum_err++; return; } /* TCP/UDP Checksum has not been calculated */ - if(adapter->hw.mac_type <= e1000_82547_rev_2) { - if(!(status & E1000_RXD_STAT_TCPCS)) + if (adapter->hw.mac_type <= e1000_82547_rev_2) { + if (!(status & E1000_RXD_STAT_TCPCS)) return; } else { - if(!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) + if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) return; } /* It must be a TCP or UDP packet with a valid checksum */ @@ -3379,46 +3613,87 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; - struct e1000_rx_desc *rx_desc; - struct e1000_buffer *buffer_info; - struct sk_buff *skb; + struct e1000_rx_desc *rx_desc, *next_rxd; + struct e1000_buffer *buffer_info, *next_buffer; unsigned long flags; uint32_t length; uint8_t last_byte; unsigned int i; - boolean_t cleaned = FALSE; + int cleaned_count = 0; + boolean_t cleaned = FALSE, multi_descriptor = FALSE; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC(*rx_ring, i); + buffer_info = &rx_ring->buffer_info[i]; - while(rx_desc->status & E1000_RXD_STAT_DD) { - buffer_info = &rx_ring->buffer_info[i]; + while (rx_desc->status & E1000_RXD_STAT_DD) { + struct sk_buff *skb, *next_skb; + u8 status; #ifdef CONFIG_E1000_NAPI - if(*work_done >= work_to_do) + if (*work_done >= work_to_do) break; (*work_done)++; #endif - cleaned = TRUE; + status = rx_desc->status; + skb = buffer_info->skb; + buffer_info->skb = NULL; + + if (++i == rx_ring->count) i = 0; + next_rxd = E1000_RX_DESC(*rx_ring, i); + next_buffer = &rx_ring->buffer_info[i]; + next_skb = next_buffer->skb; + cleaned = TRUE; + cleaned_count++; pci_unmap_single(pdev, buffer_info->dma, buffer_info->length, PCI_DMA_FROMDEVICE); - skb = buffer_info->skb; length = le16_to_cpu(rx_desc->length); - if(unlikely(!(rx_desc->status & E1000_RXD_STAT_EOP))) { - /* All receives must fit into a single buffer */ - E1000_DBG("%s: Receive packet consumed multiple" - " buffers\n", netdev->name); - dev_kfree_skb_irq(skb); + skb_put(skb, length); + + if (!(status & E1000_RXD_STAT_EOP)) { + if (!rx_ring->rx_skb_top) { + rx_ring->rx_skb_top = skb; + rx_ring->rx_skb_top->len = length; + rx_ring->rx_skb_prev = skb; + } else { + if (skb_shinfo(rx_ring->rx_skb_top)->frag_list) { + rx_ring->rx_skb_prev->next = skb; + skb->prev = rx_ring->rx_skb_prev; + } else { + skb_shinfo(rx_ring->rx_skb_top)->frag_list = skb; + } + rx_ring->rx_skb_prev = skb; + rx_ring->rx_skb_top->data_len += length; + } goto next_desc; + } else { + if (rx_ring->rx_skb_top) { + if (skb_shinfo(rx_ring->rx_skb_top) + ->frag_list) { + rx_ring->rx_skb_prev->next = skb; + skb->prev = rx_ring->rx_skb_prev; + } else + skb_shinfo(rx_ring->rx_skb_top) + ->frag_list = skb; + + rx_ring->rx_skb_top->data_len += length; + rx_ring->rx_skb_top->len += + rx_ring->rx_skb_top->data_len; + + skb = rx_ring->rx_skb_top; + multi_descriptor = TRUE; + rx_ring->rx_skb_top = NULL; + rx_ring->rx_skb_prev = NULL; + } } - if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { + if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { last_byte = *(skb->data + length - 1); - if(TBI_ACCEPT(&adapter->hw, rx_desc->status, + if (TBI_ACCEPT(&adapter->hw, status, rx_desc->errors, length, last_byte)) { spin_lock_irqsave(&adapter->stats_lock, flags); e1000_tbi_adjust_stats(&adapter->hw, @@ -3433,18 +3708,41 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, } } - /* Good Receive */ - skb_put(skb, length - ETHERNET_FCS_SIZE); + /* code added for copybreak, this should improve + * performance for small packets with large amounts + * of reassembly being done in the stack */ +#define E1000_CB_LENGTH 256 + if ((length < E1000_CB_LENGTH) && + !rx_ring->rx_skb_top && + /* or maybe (status & E1000_RXD_STAT_EOP) && */ + !multi_descriptor) { + struct sk_buff *new_skb = + dev_alloc_skb(length + NET_IP_ALIGN); + if (new_skb) { + skb_reserve(new_skb, NET_IP_ALIGN); + new_skb->dev = netdev; + memcpy(new_skb->data - NET_IP_ALIGN, + skb->data - NET_IP_ALIGN, + length + NET_IP_ALIGN); + /* save the skb in buffer_info as good */ + buffer_info->skb = skb; + skb = new_skb; + skb_put(skb, length); + } + } + + /* end copybreak code */ /* Receive Checksum Offload */ e1000_rx_checksum(adapter, - (uint32_t)(rx_desc->status) | + (uint32_t)(status) | ((uint32_t)(rx_desc->errors) << 24), rx_desc->csum, skb); + skb->protocol = eth_type_trans(skb, netdev); #ifdef CONFIG_E1000_NAPI - if(unlikely(adapter->vlgrp && - (rx_desc->status & E1000_RXD_STAT_VP))) { + if (unlikely(adapter->vlgrp && + (status & E1000_RXD_STAT_VP))) { vlan_hwaccel_receive_skb(skb, adapter->vlgrp, le16_to_cpu(rx_desc->special) & E1000_RXD_SPC_VLAN_MASK); @@ -3452,8 +3750,8 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, netif_receive_skb(skb); } #else /* CONFIG_E1000_NAPI */ - if(unlikely(adapter->vlgrp && - (rx_desc->status & E1000_RXD_STAT_VP))) { + if (unlikely(adapter->vlgrp && + (status & E1000_RXD_STAT_VP))) { vlan_hwaccel_rx(skb, adapter->vlgrp, le16_to_cpu(rx_desc->special) & E1000_RXD_SPC_VLAN_MASK); @@ -3462,17 +3760,28 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, } #endif /* CONFIG_E1000_NAPI */ netdev->last_rx = jiffies; - rx_ring->pkt++; +#ifdef CONFIG_E1000_MQ + rx_ring->rx_stats.packets++; + rx_ring->rx_stats.bytes += length; +#endif next_desc: rx_desc->status = 0; - buffer_info->skb = NULL; - if(unlikely(++i == rx_ring->count)) i = 0; - rx_desc = E1000_RX_DESC(*rx_ring, i); + /* return some buffers to hardware, one at a time is too slow */ + if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) { + adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); + cleaned_count = 0; + } + + rx_desc = next_rxd; + buffer_info = next_buffer; } rx_ring->next_to_clean = i; - adapter->alloc_rx_buf(adapter, rx_ring); + + cleaned_count = E1000_DESC_UNUSED(rx_ring); + if (cleaned_count) + adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); return cleaned; } @@ -3492,52 +3801,59 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring) #endif { - union e1000_rx_desc_packet_split *rx_desc; + union e1000_rx_desc_packet_split *rx_desc, *next_rxd; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; - struct e1000_buffer *buffer_info; + struct e1000_buffer *buffer_info, *next_buffer; struct e1000_ps_page *ps_page; struct e1000_ps_page_dma *ps_page_dma; - struct sk_buff *skb; + struct sk_buff *skb, *next_skb; unsigned int i, j; uint32_t length, staterr; + int cleaned_count = 0; boolean_t cleaned = FALSE; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC_PS(*rx_ring, i); staterr = le32_to_cpu(rx_desc->wb.middle.status_error); + buffer_info = &rx_ring->buffer_info[i]; - while(staterr & E1000_RXD_STAT_DD) { - buffer_info = &rx_ring->buffer_info[i]; + while (staterr & E1000_RXD_STAT_DD) { ps_page = &rx_ring->ps_page[i]; ps_page_dma = &rx_ring->ps_page_dma[i]; #ifdef CONFIG_E1000_NAPI - if(unlikely(*work_done >= work_to_do)) + if (unlikely(*work_done >= work_to_do)) break; (*work_done)++; #endif + skb = buffer_info->skb; + + if (++i == rx_ring->count) i = 0; + next_rxd = E1000_RX_DESC_PS(*rx_ring, i); + next_buffer = &rx_ring->buffer_info[i]; + next_skb = next_buffer->skb; + cleaned = TRUE; + cleaned_count++; pci_unmap_single(pdev, buffer_info->dma, buffer_info->length, PCI_DMA_FROMDEVICE); - skb = buffer_info->skb; - - if(unlikely(!(staterr & E1000_RXD_STAT_EOP))) { + if (unlikely(!(staterr & E1000_RXD_STAT_EOP))) { E1000_DBG("%s: Packet Split buffers didn't pick up" " the full packet\n", netdev->name); dev_kfree_skb_irq(skb); goto next_desc; } - if(unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + if (unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { dev_kfree_skb_irq(skb); goto next_desc; } length = le16_to_cpu(rx_desc->wb.middle.length0); - if(unlikely(!length)) { + if (unlikely(!length)) { E1000_DBG("%s: Last part of the packet spanning" " multiple descriptors\n", netdev->name); dev_kfree_skb_irq(skb); @@ -3547,8 +3863,8 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, /* Good Receive */ skb_put(skb, length); - for(j = 0; j < adapter->rx_ps_pages; j++) { - if(!(length = le16_to_cpu(rx_desc->wb.upper.length[j]))) + for (j = 0; j < adapter->rx_ps_pages; j++) { + if (!(length = le16_to_cpu(rx_desc->wb.upper.length[j]))) break; pci_unmap_page(pdev, ps_page_dma->ps_page_dma[j], @@ -3568,15 +3884,11 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, rx_desc->wb.lower.hi_dword.csum_ip.csum, skb); skb->protocol = eth_type_trans(skb, netdev); - if(likely(rx_desc->wb.upper.header_status & - E1000_RXDPS_HDRSTAT_HDRSP)) { + if (likely(rx_desc->wb.upper.header_status & + E1000_RXDPS_HDRSTAT_HDRSP)) adapter->rx_hdr_split++; -#ifdef HAVE_RX_ZERO_COPY - skb_shinfo(skb)->zero_copy = TRUE; -#endif - } #ifdef CONFIG_E1000_NAPI - if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { + if (unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { vlan_hwaccel_receive_skb(skb, adapter->vlgrp, le16_to_cpu(rx_desc->wb.middle.vlan) & E1000_RXD_SPC_VLAN_MASK); @@ -3584,7 +3896,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, netif_receive_skb(skb); } #else /* CONFIG_E1000_NAPI */ - if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { + if (unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { vlan_hwaccel_rx(skb, adapter->vlgrp, le16_to_cpu(rx_desc->wb.middle.vlan) & E1000_RXD_SPC_VLAN_MASK); @@ -3593,18 +3905,31 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, } #endif /* CONFIG_E1000_NAPI */ netdev->last_rx = jiffies; - rx_ring->pkt++; +#ifdef CONFIG_E1000_MQ + rx_ring->rx_stats.packets++; + rx_ring->rx_stats.bytes += length; +#endif next_desc: rx_desc->wb.middle.status_error &= ~0xFF; buffer_info->skb = NULL; - if(unlikely(++i == rx_ring->count)) i = 0; - rx_desc = E1000_RX_DESC_PS(*rx_ring, i); + /* return some buffers to hardware, one at a time is too slow */ + if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) { + adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); + cleaned_count = 0; + } + + rx_desc = next_rxd; + buffer_info = next_buffer; + staterr = le32_to_cpu(rx_desc->wb.middle.status_error); } rx_ring->next_to_clean = i; - adapter->alloc_rx_buf(adapter, rx_ring); + + cleaned_count = E1000_DESC_UNUSED(rx_ring); + if (cleaned_count) + adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); return cleaned; } @@ -3616,7 +3941,8 @@ next_desc: static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring) + struct e1000_rx_ring *rx_ring, + int cleaned_count) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; @@ -3629,11 +3955,18 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, i = rx_ring->next_to_use; buffer_info = &rx_ring->buffer_info[i]; - while(!buffer_info->skb) { - skb = dev_alloc_skb(bufsz); + while (cleaned_count--) { + if (!(skb = buffer_info->skb)) + skb = dev_alloc_skb(bufsz); + else { + skb_trim(skb, 0); + goto map_skb; + } + - if(unlikely(!skb)) { + if (unlikely(!skb)) { /* Better luck next round */ + adapter->alloc_rx_buff_failed++; break; } @@ -3670,6 +4003,7 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, buffer_info->skb = skb; buffer_info->length = adapter->rx_buffer_len; +map_skb: buffer_info->dma = pci_map_single(pdev, skb->data, adapter->rx_buffer_len, @@ -3695,20 +4029,23 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, rx_desc = E1000_RX_DESC(*rx_ring, i); rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); - if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) { - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). */ - wmb(); - writel(i, adapter->hw.hw_addr + rx_ring->rdt); - } - - if(unlikely(++i == rx_ring->count)) i = 0; + if (unlikely(++i == rx_ring->count)) + i = 0; buffer_info = &rx_ring->buffer_info[i]; } - rx_ring->next_to_use = i; + if (likely(rx_ring->next_to_use != i)) { + rx_ring->next_to_use = i; + if (unlikely(i-- == 0)) + i = (rx_ring->count - 1); + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). */ + wmb(); + writel(i, adapter->hw.hw_addr + rx_ring->rdt); + } } /** @@ -3718,7 +4055,8 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring) + struct e1000_rx_ring *rx_ring, + int cleaned_count) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; @@ -3734,16 +4072,18 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, ps_page = &rx_ring->ps_page[i]; ps_page_dma = &rx_ring->ps_page_dma[i]; - while(!buffer_info->skb) { + while (cleaned_count--) { rx_desc = E1000_RX_DESC_PS(*rx_ring, i); - for(j = 0; j < PS_PAGE_BUFFERS; j++) { + for (j = 0; j < PS_PAGE_BUFFERS; j++) { if (j < adapter->rx_ps_pages) { if (likely(!ps_page->ps_page[j])) { ps_page->ps_page[j] = alloc_page(GFP_ATOMIC); - if (unlikely(!ps_page->ps_page[j])) + if (unlikely(!ps_page->ps_page[j])) { + adapter->alloc_rx_buff_failed++; goto no_buffers; + } ps_page_dma->ps_page_dma[j] = pci_map_page(pdev, ps_page->ps_page[j], @@ -3751,7 +4091,7 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, PCI_DMA_FROMDEVICE); } /* Refresh the desc even if buffer_addrs didn't - * change because each write-back erases + * change because each write-back erases * this info. */ rx_desc->read.buffer_addr[j+1] = @@ -3762,8 +4102,10 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN); - if(unlikely(!skb)) + if (unlikely(!skb)) { + adapter->alloc_rx_buff_failed++; break; + } /* Make buffer alignment 2 beyond a 16 byte boundary * this will result in a 16 byte aligned IP header after @@ -3781,27 +4123,28 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma); - if(unlikely((i & ~(E1000_RX_BUFFER_WRITE - 1)) == i)) { - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). */ - wmb(); - /* Hardware increments by 16 bytes, but packet split - * descriptors are 32 bytes...so we increment tail - * twice as much. - */ - writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt); - } - - if(unlikely(++i == rx_ring->count)) i = 0; + if (unlikely(++i == rx_ring->count)) i = 0; buffer_info = &rx_ring->buffer_info[i]; ps_page = &rx_ring->ps_page[i]; ps_page_dma = &rx_ring->ps_page_dma[i]; } no_buffers: - rx_ring->next_to_use = i; + if (likely(rx_ring->next_to_use != i)) { + rx_ring->next_to_use = i; + if (unlikely(i-- == 0)) i = (rx_ring->count - 1); + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). */ + wmb(); + /* Hardware increments by 16 bytes, but packet split + * descriptors are 32 bytes...so we increment tail + * twice as much. + */ + writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt); + } } /** @@ -3815,24 +4158,24 @@ e1000_smartspeed(struct e1000_adapter *adapter) uint16_t phy_status; uint16_t phy_ctrl; - if((adapter->hw.phy_type != e1000_phy_igp) || !adapter->hw.autoneg || + if ((adapter->hw.phy_type != e1000_phy_igp) || !adapter->hw.autoneg || !(adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL)) return; - if(adapter->smartspeed == 0) { + if (adapter->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status); - if(!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; + if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status); - if(!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; + if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl); - if(phy_ctrl & CR_1000T_MS_ENABLE) { + if (phy_ctrl & CR_1000T_MS_ENABLE) { phy_ctrl &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_ctrl); adapter->smartspeed++; - if(!e1000_phy_setup_autoneg(&adapter->hw) && + if (!e1000_phy_setup_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_ctrl)) { phy_ctrl |= (MII_CR_AUTO_NEG_EN | @@ -3842,12 +4185,12 @@ e1000_smartspeed(struct e1000_adapter *adapter) } } return; - } else if(adapter->smartspeed == E1000_SMARTSPEED_DOWNSHIFT) { + } else if (adapter->smartspeed == E1000_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl); phy_ctrl |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_ctrl); - if(!e1000_phy_setup_autoneg(&adapter->hw) && + if (!e1000_phy_setup_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_ctrl)) { phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); @@ -3855,7 +4198,7 @@ e1000_smartspeed(struct e1000_adapter *adapter) } } /* Restart process after E1000_SMARTSPEED_MAX iterations */ - if(adapter->smartspeed++ == E1000_SMARTSPEED_MAX) + if (adapter->smartspeed++ == E1000_SMARTSPEED_MAX) adapter->smartspeed = 0; } @@ -3896,7 +4239,7 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) uint16_t spddplx; unsigned long flags; - if(adapter->hw.media_type != e1000_media_type_copper) + if (adapter->hw.media_type != e1000_media_type_copper) return -EOPNOTSUPP; switch (cmd) { @@ -3904,10 +4247,10 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) data->phy_id = adapter->hw.phy_addr; break; case SIOCGMIIREG: - if(!capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) return -EPERM; spin_lock_irqsave(&adapter->stats_lock, flags); - if(e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, + if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, &data->val_out)) { spin_unlock_irqrestore(&adapter->stats_lock, flags); return -EIO; @@ -3915,23 +4258,23 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) spin_unlock_irqrestore(&adapter->stats_lock, flags); break; case SIOCSMIIREG: - if(!capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) return -EPERM; - if(data->reg_num & ~(0x1F)) + if (data->reg_num & ~(0x1F)) return -EFAULT; mii_reg = data->val_in; spin_lock_irqsave(&adapter->stats_lock, flags); - if(e1000_write_phy_reg(&adapter->hw, data->reg_num, + if (e1000_write_phy_reg(&adapter->hw, data->reg_num, mii_reg)) { spin_unlock_irqrestore(&adapter->stats_lock, flags); return -EIO; } - if(adapter->hw.phy_type == e1000_phy_m88) { + if (adapter->hw.phy_type == e1000_phy_m88) { switch (data->reg_num) { case PHY_CTRL: - if(mii_reg & MII_CR_POWER_DOWN) + if (mii_reg & MII_CR_POWER_DOWN) break; - if(mii_reg & MII_CR_AUTO_NEG_EN) { + if (mii_reg & MII_CR_AUTO_NEG_EN) { adapter->hw.autoneg = 1; adapter->hw.autoneg_advertised = 0x2F; } else { @@ -3946,14 +4289,14 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) HALF_DUPLEX; retval = e1000_set_spd_dplx(adapter, spddplx); - if(retval) { + if (retval) { spin_unlock_irqrestore( - &adapter->stats_lock, + &adapter->stats_lock, flags); return retval; } } - if(netif_running(adapter->netdev)) { + if (netif_running(adapter->netdev)) { e1000_down(adapter); e1000_up(adapter); } else @@ -3961,7 +4304,7 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) break; case M88E1000_PHY_SPEC_CTRL: case M88E1000_EXT_PHY_SPEC_CTRL: - if(e1000_phy_reset(&adapter->hw)) { + if (e1000_phy_reset(&adapter->hw)) { spin_unlock_irqrestore( &adapter->stats_lock, flags); return -EIO; @@ -3971,9 +4314,9 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } else { switch (data->reg_num) { case PHY_CTRL: - if(mii_reg & MII_CR_POWER_DOWN) + if (mii_reg & MII_CR_POWER_DOWN) break; - if(netif_running(adapter->netdev)) { + if (netif_running(adapter->netdev)) { e1000_down(adapter); e1000_up(adapter); } else @@ -3995,7 +4338,7 @@ e1000_pci_set_mwi(struct e1000_hw *hw) struct e1000_adapter *adapter = hw->back; int ret_val = pci_set_mwi(adapter->pdev); - if(ret_val) + if (ret_val) DPRINTK(PROBE, ERR, "Error in setting MWI\n"); } @@ -4044,7 +4387,7 @@ e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) e1000_irq_disable(adapter); adapter->vlgrp = grp; - if(grp) { + if (grp) { /* enable VLAN tag insert/strip */ ctrl = E1000_READ_REG(&adapter->hw, CTRL); ctrl |= E1000_CTRL_VME; @@ -4066,7 +4409,7 @@ e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) rctl = E1000_READ_REG(&adapter->hw, RCTL); rctl &= ~E1000_RCTL_VFE; E1000_WRITE_REG(&adapter->hw, RCTL, rctl); - if(adapter->mng_vlan_id != (uint16_t)E1000_MNG_VLAN_NONE) { + if (adapter->mng_vlan_id != (uint16_t)E1000_MNG_VLAN_NONE) { e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; } @@ -4080,9 +4423,10 @@ e1000_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid) { struct e1000_adapter *adapter = netdev_priv(netdev); uint32_t vfta, index; - if((adapter->hw.mng_cookie.status & - E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) && - (vid == adapter->mng_vlan_id)) + + if ((adapter->hw.mng_cookie.status & + E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) && + (vid == adapter->mng_vlan_id)) return; /* add VID to filter table */ index = (vid >> 5) & 0x7F; @@ -4099,15 +4443,19 @@ e1000_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid) e1000_irq_disable(adapter); - if(adapter->vlgrp) + if (adapter->vlgrp) adapter->vlgrp->vlan_devices[vid] = NULL; e1000_irq_enable(adapter); - if((adapter->hw.mng_cookie.status & - E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) && - (vid == adapter->mng_vlan_id)) + if ((adapter->hw.mng_cookie.status & + E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) && + (vid == adapter->mng_vlan_id)) { + /* release control to f/w */ + e1000_release_hw_control(adapter); return; + } + /* remove VID from filter table */ index = (vid >> 5) & 0x7F; vfta = E1000_READ_REG_ARRAY(&adapter->hw, VFTA, index); @@ -4120,10 +4468,10 @@ e1000_restore_vlan(struct e1000_adapter *adapter) { e1000_vlan_rx_register(adapter->netdev, adapter->vlgrp); - if(adapter->vlgrp) { + if (adapter->vlgrp) { uint16_t vid; - for(vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) { - if(!adapter->vlgrp->vlan_devices[vid]) + for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) { + if (!adapter->vlgrp->vlan_devices[vid]) continue; e1000_vlan_rx_add_vid(adapter->netdev, vid); } @@ -4136,13 +4484,13 @@ e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx) adapter->hw.autoneg = 0; /* Fiber NICs only allow 1000 gbps Full duplex */ - if((adapter->hw.media_type == e1000_media_type_fiber) && + if ((adapter->hw.media_type == e1000_media_type_fiber) && spddplx != (SPEED_1000 + DUPLEX_FULL)) { DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n"); return -EINVAL; } - switch(spddplx) { + switch (spddplx) { case SPEED_10 + DUPLEX_HALF: adapter->hw.forced_speed_duplex = e1000_10_half; break; @@ -4168,35 +4516,92 @@ e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx) } #ifdef CONFIG_PM +/* these functions save and restore 16 or 64 dwords (64-256 bytes) of config + * space versus the 64 bytes that pci_[save|restore]_state handle + */ +#define PCIE_CONFIG_SPACE_LEN 256 +#define PCI_CONFIG_SPACE_LEN 64 +static int +e1000_pci_save_state(struct e1000_adapter *adapter) +{ + struct pci_dev *dev = adapter->pdev; + int size; + int i; + if (adapter->hw.mac_type >= e1000_82571) + size = PCIE_CONFIG_SPACE_LEN; + else + size = PCI_CONFIG_SPACE_LEN; + + WARN_ON(adapter->config_space != NULL); + + adapter->config_space = kmalloc(size, GFP_KERNEL); + if (!adapter->config_space) { + DPRINTK(PROBE, ERR, "unable to allocate %d bytes\n", size); + return -ENOMEM; + } + for (i = 0; i < (size / 4); i++) + pci_read_config_dword(dev, i * 4, &adapter->config_space[i]); + return 0; +} + +static void +e1000_pci_restore_state(struct e1000_adapter *adapter) +{ + struct pci_dev *dev = adapter->pdev; + int size; + int i; + if (adapter->config_space == NULL) + return; + if (adapter->hw.mac_type >= e1000_82571) + size = PCIE_CONFIG_SPACE_LEN; + else + size = PCI_CONFIG_SPACE_LEN; + for (i = 0; i < (size / 4); i++) + pci_write_config_dword(dev, i * 4, adapter->config_space[i]); + kfree(adapter->config_space); + adapter->config_space = NULL; + return; +} +#endif /* CONFIG_PM */ + static int e1000_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); - uint32_t ctrl, ctrl_ext, rctl, manc, status, swsm; + uint32_t ctrl, ctrl_ext, rctl, manc, status; uint32_t wufc = adapter->wol; + int retval = 0; netif_device_detach(netdev); - if(netif_running(netdev)) + if (netif_running(netdev)) e1000_down(adapter); +#ifdef CONFIG_PM + /* implement our own version of pci_save_state(pdev) because pci + * express adapters have larger 256 byte config spaces */ + retval = e1000_pci_save_state(adapter); + if (retval) + return retval; +#endif + status = E1000_READ_REG(&adapter->hw, STATUS); - if(status & E1000_STATUS_LU) + if (status & E1000_STATUS_LU) wufc &= ~E1000_WUFC_LNKC; - if(wufc) { + if (wufc) { e1000_setup_rctl(adapter); e1000_set_multi(netdev); /* turn on all-multi mode if wake on multicast is enabled */ - if(adapter->wol & E1000_WUFC_MC) { + if (adapter->wol & E1000_WUFC_MC) { rctl = E1000_READ_REG(&adapter->hw, RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, RCTL, rctl); } - if(adapter->hw.mac_type >= e1000_82540) { + if (adapter->hw.mac_type >= e1000_82540) { ctrl = E1000_READ_REG(&adapter->hw, CTRL); /* advertise wake from D3Cold */ #define E1000_CTRL_ADVD3WUC 0x00100000 @@ -4207,7 +4612,7 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state) E1000_WRITE_REG(&adapter->hw, CTRL, ctrl); } - if(adapter->hw.media_type == e1000_media_type_fiber || + if (adapter->hw.media_type == e1000_media_type_fiber || adapter->hw.media_type == e1000_media_type_internal_serdes) { /* keep the laser running in D3 */ ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); @@ -4220,96 +4625,96 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state) E1000_WRITE_REG(&adapter->hw, WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, WUFC, wufc); - pci_enable_wake(pdev, 3, 1); - pci_enable_wake(pdev, 4, 1); /* 4 == D3 cold */ + retval = pci_enable_wake(pdev, PCI_D3hot, 1); + if (retval) + DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); + retval = pci_enable_wake(pdev, PCI_D3cold, 1); + if (retval) + DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); } else { E1000_WRITE_REG(&adapter->hw, WUC, 0); E1000_WRITE_REG(&adapter->hw, WUFC, 0); - pci_enable_wake(pdev, 3, 0); - pci_enable_wake(pdev, 4, 0); /* 4 == D3 cold */ + retval = pci_enable_wake(pdev, PCI_D3hot, 0); + if (retval) + DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); + retval = pci_enable_wake(pdev, PCI_D3cold, 0); /* 4 == D3 cold */ + if (retval) + DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); } - pci_save_state(pdev); - - if(adapter->hw.mac_type >= e1000_82540 && + if (adapter->hw.mac_type >= e1000_82540 && adapter->hw.media_type == e1000_media_type_copper) { manc = E1000_READ_REG(&adapter->hw, MANC); - if(manc & E1000_MANC_SMBUS_EN) { + if (manc & E1000_MANC_SMBUS_EN) { manc |= E1000_MANC_ARP_EN; E1000_WRITE_REG(&adapter->hw, MANC, manc); - pci_enable_wake(pdev, 3, 1); - pci_enable_wake(pdev, 4, 1); /* 4 == D3 cold */ + retval = pci_enable_wake(pdev, PCI_D3hot, 1); + if (retval) + DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); + retval = pci_enable_wake(pdev, PCI_D3cold, 1); + if (retval) + DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); } } - switch(adapter->hw.mac_type) { - case e1000_82571: - case e1000_82572: - ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, CTRL_EXT, - ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); - break; - case e1000_82573: - swsm = E1000_READ_REG(&adapter->hw, SWSM); - E1000_WRITE_REG(&adapter->hw, SWSM, - swsm & ~E1000_SWSM_DRV_LOAD); - break; - default: - break; - } + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. */ + e1000_release_hw_control(adapter); pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + retval = pci_set_power_state(pdev, pci_choose_state(pdev, state)); + if (retval) + DPRINTK(PROBE, ERR, "Error in setting power state\n"); return 0; } +#ifdef CONFIG_PM static int e1000_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); - uint32_t manc, ret_val, swsm; - uint32_t ctrl_ext; + int retval; + uint32_t manc, ret_val; - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); + retval = pci_set_power_state(pdev, PCI_D0); + if (retval) + DPRINTK(PROBE, ERR, "Error in setting power state\n"); + e1000_pci_restore_state(adapter); ret_val = pci_enable_device(pdev); pci_set_master(pdev); - pci_enable_wake(pdev, PCI_D3hot, 0); - pci_enable_wake(pdev, PCI_D3cold, 0); + retval = pci_enable_wake(pdev, PCI_D3hot, 0); + if (retval) + DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); + retval = pci_enable_wake(pdev, PCI_D3cold, 0); + if (retval) + DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); e1000_reset(adapter); E1000_WRITE_REG(&adapter->hw, WUS, ~0); - if(netif_running(netdev)) + if (netif_running(netdev)) e1000_up(adapter); netif_device_attach(netdev); - if(adapter->hw.mac_type >= e1000_82540 && + if (adapter->hw.mac_type >= e1000_82540 && adapter->hw.media_type == e1000_media_type_copper) { manc = E1000_READ_REG(&adapter->hw, MANC); manc &= ~(E1000_MANC_ARP_EN); E1000_WRITE_REG(&adapter->hw, MANC, manc); } - switch(adapter->hw.mac_type) { - case e1000_82571: - case e1000_82572: - ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); - E1000_WRITE_REG(&adapter->hw, CTRL_EXT, - ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); - break; - case e1000_82573: - swsm = E1000_READ_REG(&adapter->hw, SWSM); - E1000_WRITE_REG(&adapter->hw, SWSM, - swsm | E1000_SWSM_DRV_LOAD); - break; - default: - break; - } + /* If the controller is 82573 and f/w is AMT, do not set + * DRV_LOAD until the interface is up. For all other cases, + * let the f/w know that the h/w is now under the control + * of the driver. */ + if (adapter->hw.mac_type != e1000_82573 || + !e1000_check_mng_mode(&adapter->hw)) + e1000_get_hw_control(adapter); return 0; } @@ -4327,6 +4732,9 @@ e1000_netpoll(struct net_device *netdev) disable_irq(adapter->pdev->irq); e1000_intr(adapter->pdev->irq, netdev, NULL); e1000_clean_tx_irq(adapter, adapter->tx_ring); +#ifndef CONFIG_E1000_NAPI + adapter->clean_rx(adapter, adapter->rx_ring); +#endif enable_irq(adapter->pdev->irq); } #endif |