diff options
Diffstat (limited to 'drivers/net/igb/igb_main.c')
-rw-r--r-- | drivers/net/igb/igb_main.c | 1915 |
1 files changed, 1419 insertions, 496 deletions
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index a50db5398fa..ca842163dce 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007 Intel Corporation. + Copyright(c) 2007-2009 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -34,6 +34,7 @@ #include <linux/ipv6.h> #include <net/checksum.h> #include <net/ip6_checksum.h> +#include <linux/net_tstamp.h> #include <linux/mii.h> #include <linux/ethtool.h> #include <linux/if_vlan.h> @@ -48,12 +49,12 @@ #endif #include "igb.h" -#define DRV_VERSION "1.2.45-k2" +#define DRV_VERSION "1.3.16-k2" char igb_driver_name[] = "igb"; char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = "Intel(R) Gigabit Ethernet Network Driver"; -static const char igb_copyright[] = "Copyright (c) 2008 Intel Corporation."; +static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation."; static const struct e1000_info *igb_info_tbl[] = { [board_82575] = &e1000_82575_info, @@ -61,8 +62,10 @@ static const struct e1000_info *igb_info_tbl[] = { static struct pci_device_id igb_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 }, @@ -105,7 +108,6 @@ static irqreturn_t igb_intr_msi(int irq, void *); static irqreturn_t igb_msix_other(int irq, void *); static irqreturn_t igb_msix_rx(int irq, void *); static irqreturn_t igb_msix_tx(int irq, void *); -static int igb_clean_rx_ring_msix(struct napi_struct *, int); #ifdef CONFIG_IGB_DCA static void igb_update_rx_dca(struct igb_ring *); static void igb_update_tx_dca(struct igb_ring *); @@ -115,9 +117,6 @@ static bool igb_clean_tx_irq(struct igb_ring *); static int igb_poll(struct napi_struct *, int); static bool igb_clean_rx_irq_adv(struct igb_ring *, int *, int); static void igb_alloc_rx_buffers_adv(struct igb_ring *, int); -#ifdef CONFIG_IGB_LRO -static int igb_get_skb_hdr(struct sk_buff *skb, void **, void **, u64 *, void *); -#endif static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); static void igb_tx_timeout(struct net_device *); static void igb_reset_task(struct work_struct *); @@ -125,6 +124,16 @@ static void igb_vlan_rx_register(struct net_device *, struct vlan_group *); static void igb_vlan_rx_add_vid(struct net_device *, u16); static void igb_vlan_rx_kill_vid(struct net_device *, u16); static void igb_restore_vlan(struct igb_adapter *); +static void igb_ping_all_vfs(struct igb_adapter *); +static void igb_msg_task(struct igb_adapter *); +static int igb_rcv_msg_from_vf(struct igb_adapter *, u32); +static inline void igb_set_rah_pool(struct e1000_hw *, int , int); +static void igb_set_mc_list_pools(struct igb_adapter *, int, u16); +static void igb_vmm_control(struct igb_adapter *); +static inline void igb_set_vmolr(struct e1000_hw *, int); +static inline int igb_set_vf_rlpml(struct igb_adapter *, int, int); +static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *); +static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_suspend(struct pci_dev *, pm_message_t); #ifdef CONFIG_PM @@ -139,12 +148,18 @@ static struct notifier_block dca_notifier = { .priority = 0 }; #endif - #ifdef CONFIG_NET_POLL_CONTROLLER /* for netdump / net console */ static void igb_netpoll(struct net_device *); #endif +#ifdef CONFIG_PCI_IOV +static ssize_t igb_set_num_vfs(struct device *, struct device_attribute *, + const char *, size_t); +static ssize_t igb_show_num_vfs(struct device *, struct device_attribute *, + char *); +DEVICE_ATTR(num_vfs, S_IRUGO | S_IWUSR, igb_show_num_vfs, igb_set_num_vfs); +#endif static pci_ers_result_t igb_io_error_detected(struct pci_dev *, pci_channel_state_t); static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); @@ -178,6 +193,54 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +/** + * Scale the NIC clock cycle by a large factor so that + * relatively small clock corrections can be added or + * substracted at each clock tick. The drawbacks of a + * large factor are a) that the clock register overflows + * more quickly (not such a big deal) and b) that the + * increment per tick has to fit into 24 bits. + * + * Note that + * TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * + * IGB_TSYNC_SCALE + * TIMINCA += TIMINCA * adjustment [ppm] / 1e9 + * + * The base scale factor is intentionally a power of two + * so that the division in %struct timecounter can be done with + * a shift. + */ +#define IGB_TSYNC_SHIFT (19) +#define IGB_TSYNC_SCALE (1<<IGB_TSYNC_SHIFT) + +/** + * The duration of one clock cycle of the NIC. + * + * @todo This hard-coded value is part of the specification and might change + * in future hardware revisions. Add revision check. + */ +#define IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS 16 + +#if (IGB_TSYNC_SCALE * IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS) >= (1<<24) +# error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA +#endif + +/** + * igb_read_clock - read raw cycle counter (to be used by time counter) + */ +static cycle_t igb_read_clock(const struct cyclecounter *tc) +{ + struct igb_adapter *adapter = + container_of(tc, struct igb_adapter, cycles); + struct e1000_hw *hw = &adapter->hw; + u64 stamp; + + stamp = rd32(E1000_SYSTIML); + stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL; + + return stamp; +} + #ifdef DEBUG /** * igb_get_hw_dev_name - return device name string @@ -188,9 +251,44 @@ char *igb_get_hw_dev_name(struct e1000_hw *hw) struct igb_adapter *adapter = hw->back; return adapter->netdev->name; } + +/** + * igb_get_time_str - format current NIC and system time as string + */ +static char *igb_get_time_str(struct igb_adapter *adapter, + char buffer[160]) +{ + cycle_t hw = adapter->cycles.read(&adapter->cycles); + struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock)); + struct timespec sys; + struct timespec delta; + getnstimeofday(&sys); + + delta = timespec_sub(nic, sys); + + sprintf(buffer, + "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns", + hw, + (long)nic.tv_sec, nic.tv_nsec, + (long)sys.tv_sec, sys.tv_nsec, + (long)delta.tv_sec, delta.tv_nsec); + + return buffer; +} #endif /** + * igb_desc_unused - calculate if we have unused descriptors + **/ +static int igb_desc_unused(struct igb_ring *ring) +{ + if (ring->next_to_clean > ring->next_to_use) + return ring->next_to_clean - ring->next_to_use - 1; + + return ring->count + ring->next_to_clean - ring->next_to_use - 1; +} + +/** * igb_init_module - Driver Registration Routine * * igb_init_module is the first routine called when the driver is @@ -243,6 +341,7 @@ module_exit(igb_exit_module); static void igb_cache_ring_register(struct igb_adapter *adapter) { int i; + unsigned int rbase_offset = adapter->vfs_allocated_count; switch (adapter->hw.mac.type) { case e1000_82576: @@ -252,9 +351,11 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) * and continue consuming queues in the same sequence */ for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i].reg_idx = Q_IDX_82576(i); + adapter->rx_ring[i].reg_idx = rbase_offset + + Q_IDX_82576(i); for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i].reg_idx = Q_IDX_82576(i); + adapter->tx_ring[i].reg_idx = rbase_offset + + Q_IDX_82576(i); break; case e1000_82575: default: @@ -354,7 +455,7 @@ static void igb_assign_vector(struct igb_adapter *adapter, int rx_queue, a vector number along with a "valid" bit. Sadly, the layout of the table is somewhat counterintuitive. */ if (rx_queue > IGB_N0_QUEUE) { - index = (rx_queue >> 1); + index = (rx_queue >> 1) + adapter->vfs_allocated_count; ivar = array_rd32(E1000_IVAR0, index); if (rx_queue & 0x1) { /* vector goes into third byte of register */ @@ -369,7 +470,7 @@ static void igb_assign_vector(struct igb_adapter *adapter, int rx_queue, array_wr32(E1000_IVAR0, index, ivar); } if (tx_queue > IGB_N0_QUEUE) { - index = (tx_queue >> 1); + index = (tx_queue >> 1) + adapter->vfs_allocated_count; ivar = array_rd32(E1000_IVAR0, index); if (tx_queue & 0x1) { /* vector goes into high byte of register */ @@ -407,7 +508,7 @@ static void igb_configure_msix(struct igb_adapter *adapter) /* Turn on MSI-X capability first, or our settings * won't stick. And it will take days to debug. */ wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | - E1000_GPIE_PBA | E1000_GPIE_EIAME | + E1000_GPIE_PBA | E1000_GPIE_EIAME | E1000_GPIE_NSICR); for (i = 0; i < adapter->num_tx_queues; i++) { @@ -506,9 +607,6 @@ static int igb_request_msix(struct igb_adapter *adapter) goto out; ring->itr_register = E1000_EITR(0) + (vector << 2); ring->itr_val = adapter->itr; - /* overwrite the poll routine for MSIX, we've already done - * netif_napi_add */ - ring->napi.poll = &igb_clean_rx_ring_msix; vector++; } @@ -546,6 +644,11 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter) int err; int numvecs, i; + /* Number of supported queues. */ + /* Having more queues than CPUs doesn't make sense. */ + adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus()); + adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus()); + numvecs = adapter->num_tx_queues + adapter->num_rx_queues + 1; adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), GFP_KERNEL); @@ -687,7 +790,10 @@ static void igb_irq_enable(struct igb_adapter *adapter) wr32(E1000_EIAC, adapter->eims_enable_mask); wr32(E1000_EIAM, adapter->eims_enable_mask); wr32(E1000_EIMS, adapter->eims_enable_mask); - wr32(E1000_IMS, E1000_IMS_LSC); + if (adapter->vfs_allocated_count) + wr32(E1000_MBVFIMR, 0xFF); + wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB | + E1000_IMS_DOUTSYNC)); } else { wr32(E1000_IMS, IMS_ENABLE_MASK); wr32(E1000_IAM, IMS_ENABLE_MASK); @@ -778,12 +884,12 @@ static void igb_configure(struct igb_adapter *adapter) igb_rx_fifo_flush_82575(&adapter->hw); - /* call IGB_DESC_UNUSED which always leaves + /* call igb_desc_unused which always leaves * at least 1 descriptor unused to make sure * next_to_use != next_to_clean */ for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = &adapter->rx_ring[i]; - igb_alloc_rx_buffers_adv(ring, IGB_DESC_UNUSED(ring)); + igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring)); } @@ -811,6 +917,10 @@ int igb_up(struct igb_adapter *adapter) if (adapter->msix_entries) igb_configure_msix(adapter); + igb_vmm_control(adapter); + igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0); + igb_set_vmolr(hw, adapter->vfs_allocated_count); + /* Clear any pending interrupts. */ rd32(E1000_ICR); igb_irq_enable(adapter); @@ -856,6 +966,10 @@ void igb_down(struct igb_adapter *adapter) netdev->tx_queue_len = adapter->tx_queue_len; netif_carrier_off(netdev); + + /* record the stats before reset*/ + igb_update_stats(adapter); + adapter->link_speed = 0; adapter->link_duplex = 0; @@ -886,11 +1000,14 @@ void igb_reset(struct igb_adapter *adapter) /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. */ - if (mac->type != e1000_82576) { - pba = E1000_PBA_34K; - } - else { + switch (mac->type) { + case e1000_82576: pba = E1000_PBA_64K; + break; + case e1000_82575: + default: + pba = E1000_PBA_34K; + break; } if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && @@ -912,7 +1029,7 @@ void igb_reset(struct igb_adapter *adapter) /* the tx fifo also stores 16 bytes of information about the tx * but don't include ethernet FCS because hardware appends it */ min_tx_space = (adapter->max_frame_size + - sizeof(struct e1000_tx_desc) - + sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN) * 2; min_tx_space = ALIGN(min_tx_space, 1024); min_tx_space >>= 10; @@ -956,6 +1073,20 @@ void igb_reset(struct igb_adapter *adapter) fc->send_xon = 1; fc->type = fc->original_type; + /* disable receive for all VFs and wait one second */ + if (adapter->vfs_allocated_count) { + int i; + for (i = 0 ; i < adapter->vfs_allocated_count; i++) + adapter->vf_data[i].clear_to_send = false; + + /* ping all the active vfs to let them know we are going down */ + igb_ping_all_vfs(adapter); + + /* disable transmits and receives */ + wr32(E1000_VFRE, 0); + wr32(E1000_VFTE, 0); + } + /* Allow time for pending master requests to run */ adapter->hw.mac.ops.reset_hw(&adapter->hw); wr32(E1000_WUC, 0); @@ -972,21 +1103,6 @@ void igb_reset(struct igb_adapter *adapter) igb_get_phy_info(&adapter->hw); } -/** - * igb_is_need_ioport - determine if an adapter needs ioport resources or not - * @pdev: PCI device information struct - * - * Returns true if an adapter needs ioport resources - **/ -static int igb_is_need_ioport(struct pci_dev *pdev) -{ - switch (pdev->device) { - /* Currently there are no adapters that need ioport resources */ - default: - return false; - } -} - static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, @@ -1023,24 +1139,14 @@ static int __devinit igb_probe(struct pci_dev *pdev, struct net_device *netdev; struct igb_adapter *adapter; struct e1000_hw *hw; - struct pci_dev *us_dev; const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; unsigned long mmio_start, mmio_len; - int i, err, pci_using_dac, pos; - u16 eeprom_data = 0, state = 0; + int err, pci_using_dac; + u16 eeprom_data = 0; u16 eeprom_apme_mask = IGB_EEPROM_APME; u32 part_num; - int bars, need_ioport; - /* do not allocate ioport bars when not needed */ - need_ioport = igb_is_need_ioport(pdev); - if (need_ioport) { - bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO); - err = pci_enable_device(pdev); - } else { - bars = pci_select_bars(pdev, IORESOURCE_MEM); - err = pci_enable_device_mem(pdev); - } + err = pci_enable_device_mem(pdev); if (err) return err; @@ -1062,28 +1168,9 @@ static int __devinit igb_probe(struct pci_dev *pdev, } } - /* 82575 requires that the pci-e link partner disable the L0s state */ - switch (pdev->device) { - case E1000_DEV_ID_82575EB_COPPER: - case E1000_DEV_ID_82575EB_FIBER_SERDES: - case E1000_DEV_ID_82575GB_QUAD_COPPER: - us_dev = pdev->bus->self; - pos = pci_find_capability(us_dev, PCI_CAP_ID_EXP); - if (pos) { - pci_read_config_word(us_dev, pos + PCI_EXP_LNKCTL, - &state); - state &= ~PCIE_LINK_STATE_L0S; - pci_write_config_word(us_dev, pos + PCI_EXP_LNKCTL, - state); - dev_info(&pdev->dev, - "Disabling ASPM L0s upstream switch port %s\n", - pci_name(us_dev)); - } - default: - break; - } - - err = pci_request_selected_regions(pdev, bars, igb_driver_name); + err = pci_request_selected_regions(pdev, pci_select_bars(pdev, + IORESOURCE_MEM), + igb_driver_name); if (err) goto err_pci_reg; @@ -1098,7 +1185,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, pci_save_state(pdev); err = -ENOMEM; - netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), IGB_MAX_TX_QUEUES); + netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), + IGB_ABS_MAX_TX_QUEUES); if (!netdev) goto err_alloc_etherdev; @@ -1111,15 +1199,13 @@ static int __devinit igb_probe(struct pci_dev *pdev, hw = &adapter->hw; hw->back = adapter; adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE; - adapter->bars = bars; - adapter->need_ioport = need_ioport; mmio_start = pci_resource_start(pdev, 0); mmio_len = pci_resource_len(pdev, 0); err = -EIO; - adapter->hw.hw_addr = ioremap(mmio_start, mmio_len); - if (!adapter->hw.hw_addr) + hw->hw_addr = ioremap(mmio_start, mmio_len); + if (!hw->hw_addr) goto err_ioremap; netdev->netdev_ops = &igb_netdev_ops; @@ -1147,8 +1233,9 @@ static int __devinit igb_probe(struct pci_dev *pdev, /* Initialize skew-specific constants */ err = ei->get_invariants(hw); if (err) - goto err_hw_init; + goto err_sw_init; + /* setup the private structure */ err = igb_sw_init(adapter); if (err) goto err_sw_init; @@ -1180,27 +1267,25 @@ static int __devinit igb_probe(struct pci_dev *pdev, "PHY reset is blocked due to SOL/IDER session.\n"); netdev->features = NETIF_F_SG | - NETIF_F_HW_CSUM | + NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + netdev->features |= NETIF_F_IPV6_CSUM; netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; -#ifdef CONFIG_IGB_LRO - netdev->features |= NETIF_F_LRO; -#endif + netdev->features |= NETIF_F_GRO; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; - netdev->vlan_features |= NETIF_F_HW_CSUM; + netdev->vlan_features |= NETIF_F_IP_CSUM; netdev->vlan_features |= NETIF_F_SG; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; - netdev->features |= NETIF_F_LLTX; adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw); /* before reading the NVM, reset the controller to put the device in a @@ -1227,25 +1312,15 @@ static int __devinit igb_probe(struct pci_dev *pdev, goto err_eeprom; } - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = &igb_watchdog; - adapter->watchdog_timer.data = (unsigned long) adapter; - - init_timer(&adapter->phy_info_timer); - adapter->phy_info_timer.function = &igb_update_phy_info; - adapter->phy_info_timer.data = (unsigned long) adapter; + setup_timer(&adapter->watchdog_timer, &igb_watchdog, + (unsigned long) adapter); + setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, + (unsigned long) adapter); INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); - /* Initialize link & ring properties that are user-changeable */ - adapter->tx_ring->count = 256; - for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i].count = adapter->tx_ring->count; - adapter->rx_ring->count = 256; - for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i].count = adapter->rx_ring->count; - + /* Initialize link properties that are user-changeable */ adapter->fc_autoneg = true; hw->mac.autoneg = true; hw->phy.autoneg_advertised = 0x2f; @@ -1253,7 +1328,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, hw->fc.original_type = e1000_fc_default; hw->fc.type = e1000_fc_default; - adapter->itr_setting = 3; + adapter->itr_setting = IGB_DEFAULT_ITR; adapter->itr = IGB_START_ITR; igb_validate_mdi_setting(hw); @@ -1264,10 +1339,10 @@ static int __devinit igb_probe(struct pci_dev *pdev, * enable the ACPI Magic Packet filter */ - if (hw->bus.func == 0 || - hw->device_id == E1000_DEV_ID_82575EB_COPPER) - hw->nvm.ops.read_nvm(hw, NVM_INIT_CONTROL3_PORT_A, 1, - &eeprom_data); + if (hw->bus.func == 0) + hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); + else if (hw->bus.func == 1) + hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); if (eeprom_data & eeprom_apme_mask) adapter->eeprom_wol |= E1000_WUFC_MAG; @@ -1287,6 +1362,16 @@ static int __devinit igb_probe(struct pci_dev *pdev, if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->eeprom_wol = 0; break; + case E1000_DEV_ID_82576_QUAD_COPPER: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->eeprom_wol = 0; + else + adapter->flags |= IGB_FLAG_QUAD_PORT_A; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; } /* initialize the wol settings based on the eeprom settings */ @@ -1309,17 +1394,82 @@ static int __devinit igb_probe(struct pci_dev *pdev, if (err) goto err_register; +#ifdef CONFIG_PCI_IOV + /* since iov functionality isn't critical to base device function we + * can accept failure. If it fails we don't allow iov to be enabled */ + if (hw->mac.type == e1000_82576) { + err = pci_enable_sriov(pdev, 0); + if (!err) + err = device_create_file(&netdev->dev, + &dev_attr_num_vfs); + if (err) + dev_err(&pdev->dev, "Failed to initialize IOV\n"); + } + +#endif #ifdef CONFIG_IGB_DCA if (dca_add_requester(&pdev->dev) == 0) { adapter->flags |= IGB_FLAG_DCA_ENABLED; dev_info(&pdev->dev, "DCA enabled\n"); /* Always use CB2 mode, difference is masked * in the CB driver. */ - wr32(E1000_DCA_CTRL, 2); + wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); igb_setup_dca(adapter); } #endif + /* + * Initialize hardware timer: we keep it running just in case + * that some program needs it later on. + */ + memset(&adapter->cycles, 0, sizeof(adapter->cycles)); + adapter->cycles.read = igb_read_clock; + adapter->cycles.mask = CLOCKSOURCE_MASK(64); + adapter->cycles.mult = 1; + adapter->cycles.shift = IGB_TSYNC_SHIFT; + wr32(E1000_TIMINCA, + (1<<24) | + IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE); +#if 0 + /* + * Avoid rollover while we initialize by resetting the time counter. + */ + wr32(E1000_SYSTIML, 0x00000000); + wr32(E1000_SYSTIMH, 0x00000000); +#else + /* + * Set registers so that rollover occurs soon to test this. + */ + wr32(E1000_SYSTIML, 0x00000000); + wr32(E1000_SYSTIMH, 0xFF800000); +#endif + wrfl(); + timecounter_init(&adapter->clock, + &adapter->cycles, + ktime_to_ns(ktime_get_real())); + + /* + * Synchronize our NIC clock against system wall clock. NIC + * time stamp reading requires ~3us per sample, each sample + * was pretty stable even under load => only require 10 + * samples for each offset comparison. + */ + memset(&adapter->compare, 0, sizeof(adapter->compare)); + adapter->compare.source = &adapter->clock; + adapter->compare.target = ktime_get_real; + adapter->compare.num_samples = 10; + timecompare_update(&adapter->compare, 0); + +#ifdef DEBUG + { + char buffer[160]; + printk(KERN_DEBUG + "igb: %s: hw %p initialized timer\n", + igb_get_time_str(adapter, buffer), + &adapter->hw); + } +#endif + dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); /* print bus type/speed/width info */ dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", @@ -1352,15 +1502,14 @@ err_eeprom: if (hw->flash_address) iounmap(hw->flash_address); - igb_remove_device(hw); igb_free_queues(adapter); err_sw_init: -err_hw_init: iounmap(hw->hw_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: - pci_release_selected_regions(pdev, bars); + pci_release_selected_regions(pdev, pci_select_bars(pdev, + IORESOURCE_MEM)); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -1380,9 +1529,7 @@ static void __devexit igb_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); -#ifdef CONFIG_IGB_DCA struct e1000_hw *hw = &adapter->hw; -#endif int err; /* flush_scheduled work may reschedule our watchdog task, so @@ -1398,7 +1545,7 @@ static void __devexit igb_remove(struct pci_dev *pdev) dev_info(&pdev->dev, "DCA disabled\n"); dca_remove_requester(&pdev->dev); adapter->flags &= ~IGB_FLAG_DCA_ENABLED; - wr32(E1000_DCA_CTRL, 1); + wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); } #endif @@ -1411,15 +1558,29 @@ static void __devexit igb_remove(struct pci_dev *pdev) if (!igb_check_reset_block(&adapter->hw)) igb_reset_phy(&adapter->hw); - igb_remove_device(&adapter->hw); igb_reset_interrupt_capability(adapter); igb_free_queues(adapter); - iounmap(adapter->hw.hw_addr); - if (adapter->hw.flash_address) - iounmap(adapter->hw.flash_address); - pci_release_selected_regions(pdev, adapter->bars); +#ifdef CONFIG_PCI_IOV + /* reclaim resources allocated to VFs */ + if (adapter->vf_data) { + /* disable iov and allow time for transactions to clear */ + pci_disable_sriov(pdev); + msleep(500); + + kfree(adapter->vf_data); + adapter->vf_data = NULL; + wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); + msleep(100); + dev_info(&pdev->dev, "IOV Disabled\n"); + } +#endif + iounmap(hw->hw_addr); + if (hw->flash_address) + iounmap(hw->flash_address); + pci_release_selected_regions(pdev, pci_select_bars(pdev, + IORESOURCE_MEM)); free_netdev(netdev); @@ -1454,11 +1615,6 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; - /* Number of supported queues. */ - /* Having more queues than CPUs doesn't make sense. */ - adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus()); - adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus()); - /* This call may decrease the number of queues depending on * interrupt mode. */ igb_set_interrupt_capability(adapter); @@ -1521,6 +1677,10 @@ static int igb_open(struct net_device *netdev) * clean_rx handler before we do so. */ igb_configure(adapter); + igb_vmm_control(adapter); + igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0); + igb_set_vmolr(hw, adapter->vfs_allocated_count); + err = igb_request_irq(adapter); if (err) goto err_req_irq; @@ -1596,7 +1756,6 @@ static int igb_close(struct net_device *netdev) * * Return 0 on success, negative on failure **/ - int igb_setup_tx_resources(struct igb_adapter *adapter, struct igb_ring *tx_ring) { @@ -1610,7 +1769,7 @@ int igb_setup_tx_resources(struct igb_adapter *adapter, memset(tx_ring->buffer_info, 0, size); /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc); + tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size, @@ -1657,7 +1816,7 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter) for (i = 0; i < IGB_MAX_TX_QUEUES; i++) { r_idx = i % adapter->num_tx_queues; adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx]; - } + } return err; } @@ -1676,13 +1835,13 @@ static void igb_configure_tx(struct igb_adapter *adapter) int i, j; for (i = 0; i < adapter->num_tx_queues; i++) { - struct igb_ring *ring = &(adapter->tx_ring[i]); + struct igb_ring *ring = &adapter->tx_ring[i]; j = ring->reg_idx; wr32(E1000_TDLEN(j), - ring->count * sizeof(struct e1000_tx_desc)); + ring->count * sizeof(union e1000_adv_tx_desc)); tdba = ring->dma; wr32(E1000_TDBAL(j), - tdba & 0x00000000ffffffffULL); + tdba & 0x00000000ffffffffULL); wr32(E1000_TDBAH(j), tdba >> 32); ring->head = E1000_TDH(j); @@ -1702,12 +1861,11 @@ static void igb_configure_tx(struct igb_adapter *adapter) wr32(E1000_DCA_TXCTRL(j), txctrl); } - - - /* Use the default values for the Tx Inter Packet Gap (IPG) timer */ + /* disable queue 0 to prevent tail bump w/o re-configuration */ + if (adapter->vfs_allocated_count) + wr32(E1000_TXDCTL(0), 0); /* Program the Transmit Control Register */ - tctl = rd32(E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | @@ -1731,21 +1889,12 @@ static void igb_configure_tx(struct igb_adapter *adapter) * * Returns 0 on success, negative on failure **/ - int igb_setup_rx_resources(struct igb_adapter *adapter, struct igb_ring *rx_ring) { struct pci_dev *pdev = adapter->pdev; int size, desc_len; -#ifdef CONFIG_IGB_LRO - size = sizeof(struct net_lro_desc) * MAX_LRO_DESCRIPTORS; - rx_ring->lro_mgr.lro_arr = vmalloc(size); - if (!rx_ring->lro_mgr.lro_arr) - goto err; - memset(rx_ring->lro_mgr.lro_arr, 0, size); -#endif - size = sizeof(struct igb_buffer) * rx_ring->count; rx_ring->buffer_info = vmalloc(size); if (!rx_ring->buffer_info) @@ -1772,10 +1921,6 @@ int igb_setup_rx_resources(struct igb_adapter *adapter, return 0; err: -#ifdef CONFIG_IGB_LRO - vfree(rx_ring->lro_mgr.lro_arr); - rx_ring->lro_mgr.lro_arr = NULL; -#endif vfree(rx_ring->buffer_info); dev_err(&adapter->pdev->dev, "Unable to allocate memory for " "the receive descriptor ring\n"); @@ -1824,13 +1969,13 @@ static void igb_setup_rctl(struct igb_adapter *adapter) rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | - (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* * enable stripping of CRC. It's unlikely this will break BMC * redirection as it did with e1000. Newer features require * that the HW strips the CRC. - */ + */ rctl |= E1000_RCTL_SECRC; /* @@ -1874,6 +2019,30 @@ static void igb_setup_rctl(struct igb_adapter *adapter) srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; } + /* Attention!!! For SR-IOV PF driver operations you must enable + * queue drop for all VF and PF queues to prevent head of line blocking + * if an un-trusted VF does not provide descriptors to hardware. + */ + if (adapter->vfs_allocated_count) { + u32 vmolr; + + j = adapter->rx_ring[0].reg_idx; + + /* set all queue drop enable bits */ + wr32(E1000_QDE, ALL_QUEUES); + srrctl |= E1000_SRRCTL_DROP_EN; + + /* disable queue 0 to prevent tail write w/o re-config */ + wr32(E1000_RXDCTL(0), 0); + + vmolr = rd32(E1000_VMOLR(j)); + if (rctl & E1000_RCTL_LPE) + vmolr |= E1000_VMOLR_LPE; + if (adapter->num_rx_queues > 0) + vmolr |= E1000_VMOLR_RSSE; + wr32(E1000_VMOLR(j), vmolr); + } + for (i = 0; i < adapter->num_rx_queues; i++) { j = adapter->rx_ring[i].reg_idx; wr32(E1000_SRRCTL(j), srrctl); @@ -1883,6 +2052,54 @@ static void igb_setup_rctl(struct igb_adapter *adapter) } /** + * igb_rlpml_set - set maximum receive packet size + * @adapter: board private structure + * + * Configure maximum receivable packet size. + **/ +static void igb_rlpml_set(struct igb_adapter *adapter) +{ + u32 max_frame_size = adapter->max_frame_size; + struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; + + if (adapter->vlgrp) + max_frame_size += VLAN_TAG_SIZE; + + /* if vfs are enabled we set RLPML to the largest possible request + * size and set the VMOLR RLPML to the size we need */ + if (pf_id) { + igb_set_vf_rlpml(adapter, max_frame_size, pf_id); + max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE; + } + + wr32(E1000_RLPML, max_frame_size); +} + +/** + * igb_configure_vt_default_pool - Configure VT default pool + * @adapter: board private structure + * + * Configure the default pool + **/ +static void igb_configure_vt_default_pool(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; + u32 vtctl; + + /* not in sr-iov mode - do nothing */ + if (!pf_id) + return; + + vtctl = rd32(E1000_VT_CTL); + vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | + E1000_VT_CTL_DISABLE_DEF_POOL); + vtctl |= pf_id << E1000_VT_CTL_DEFAULT_POOL_SHIFT; + wr32(E1000_VT_CTL, vtctl); +} + +/** * igb_configure_rx - Configure receive Unit after Reset * @adapter: board private structure * @@ -1894,7 +2111,7 @@ static void igb_configure_rx(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum; u32 rxdctl; - int i, j; + int i; /* disable receives while setting up the descriptors */ rctl = rd32(E1000_RCTL); @@ -1908,14 +2125,14 @@ static void igb_configure_rx(struct igb_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ for (i = 0; i < adapter->num_rx_queues; i++) { - struct igb_ring *ring = &(adapter->rx_ring[i]); - j = ring->reg_idx; + struct igb_ring *ring = &adapter->rx_ring[i]; + int j = ring->reg_idx; rdba = ring->dma; wr32(E1000_RDBAL(j), - rdba & 0x00000000ffffffffULL); + rdba & 0x00000000ffffffffULL); wr32(E1000_RDBAH(j), rdba >> 32); wr32(E1000_RDLEN(j), - ring->count * sizeof(union e1000_adv_rx_desc)); + ring->count * sizeof(union e1000_adv_rx_desc)); ring->head = E1000_RDH(j); ring->tail = E1000_RDT(j); @@ -1929,16 +2146,6 @@ static void igb_configure_rx(struct igb_adapter *adapter) rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; wr32(E1000_RXDCTL(j), rxdctl); -#ifdef CONFIG_IGB_LRO - /* Intitial LRO Settings */ - ring->lro_mgr.max_aggr = MAX_LRO_AGGR; - ring->lro_mgr.max_desc = MAX_LRO_DESCRIPTORS; - ring->lro_mgr.get_skb_header = igb_get_skb_hdr; - ring->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; - ring->lro_mgr.dev = adapter->netdev; - ring->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; - ring->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; -#endif } if (adapter->num_rx_queues > 1) { @@ -1963,7 +2170,10 @@ static void igb_configure_rx(struct igb_adapter *adapter) writel(reta.dword, hw->hw_addr + E1000_RETA(0) + (j & ~3)); } - mrqc = E1000_MRQC_ENABLE_RSS_4Q; + if (adapter->vfs_allocated_count) + mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q; + else + mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* Fill out hash function seeds */ for (j = 0; j < 10; j++) @@ -1988,27 +2198,23 @@ static void igb_configure_rx(struct igb_adapter *adapter) rxcsum |= E1000_RXCSUM_PCSD; wr32(E1000_RXCSUM, rxcsum); } else { + /* Enable multi-queue for sr-iov */ + if (adapter->vfs_allocated_count) + wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ); /* Enable Receive Checksum Offload for TCP and UDP */ rxcsum = rd32(E1000_RXCSUM); - if (adapter->rx_csum) { - rxcsum |= E1000_RXCSUM_TUOFL; + if (adapter->rx_csum) + rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE; + else + rxcsum &= ~(E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE); - /* Enable IPv4 payload checksum for UDP fragments - * Must be used in conjunction with packet-split. */ - if (adapter->rx_ps_hdr_size) - rxcsum |= E1000_RXCSUM_IPPCSE; - } else { - rxcsum &= ~E1000_RXCSUM_TUOFL; - /* don't need to clear IPPCSE as it defaults to 0 */ - } wr32(E1000_RXCSUM, rxcsum); } - if (adapter->vlgrp) - wr32(E1000_RLPML, - adapter->max_frame_size + VLAN_TAG_SIZE); - else - wr32(E1000_RLPML, adapter->max_frame_size); + /* Set the default pool for the PF's first queue */ + igb_configure_vt_default_pool(adapter); + + igb_rlpml_set(adapter); /* Enable Receives */ wr32(E1000_RCTL, rctl); @@ -2051,14 +2257,10 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter) static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter, struct igb_buffer *buffer_info) { - if (buffer_info->dma) { - pci_unmap_page(adapter->pdev, - buffer_info->dma, - buffer_info->length, - PCI_DMA_TODEVICE); - buffer_info->dma = 0; - } + buffer_info->dma = 0; if (buffer_info->skb) { + skb_dma_unmap(&adapter->pdev->dev, buffer_info->skb, + DMA_TO_DEVICE); dev_kfree_skb_any(buffer_info->skb); buffer_info->skb = NULL; } @@ -2127,11 +2329,6 @@ void igb_free_rx_resources(struct igb_ring *rx_ring) vfree(rx_ring->buffer_info); rx_ring->buffer_info = NULL; -#ifdef CONFIG_IGB_LRO - vfree(rx_ring->lro_mgr.lro_arr); - rx_ring->lro_mgr.lro_arr = NULL; -#endif - pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; @@ -2231,15 +2428,18 @@ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) static int igb_set_mac(struct net_device *netdev, void *p) { struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len); + memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + + hw->mac.ops.rar_set(hw, hw->mac.addr, 0); - adapter->hw.mac.ops.rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0); return 0; } @@ -2259,7 +2459,7 @@ static void igb_set_multi(struct net_device *netdev) struct e1000_hw *hw = &adapter->hw; struct e1000_mac_info *mac = &hw->mac; struct dev_mc_list *mc_ptr; - u8 *mta_list; + u8 *mta_list = NULL; u32 rctl; int i; @@ -2280,17 +2480,15 @@ static void igb_set_multi(struct net_device *netdev) } wr32(E1000_RCTL, rctl); - if (!netdev->mc_count) { - /* nothing to program, so clear mc list */ - igb_update_mc_addr_list_82575(hw, NULL, 0, 1, - mac->rar_entry_count); - return; + if (netdev->mc_count) { + mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC); + if (!mta_list) { + dev_err(&adapter->pdev->dev, + "failed to allocate multicast filter list\n"); + return; + } } - mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC); - if (!mta_list) - return; - /* The shared function expects a packed array of only addresses. */ mc_ptr = netdev->mc_list; @@ -2300,8 +2498,13 @@ static void igb_set_multi(struct net_device *netdev) memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN); mc_ptr = mc_ptr->next; } - igb_update_mc_addr_list_82575(hw, mta_list, i, 1, - mac->rar_entry_count); + igb_update_mc_addr_list(hw, mta_list, i, + adapter->vfs_allocated_count + 1, + mac->rar_entry_count); + + igb_set_mc_list_pools(adapter, i, mac->rar_entry_count); + igb_restore_vf_multicasts(adapter); + kfree(mta_list); } @@ -2314,6 +2517,46 @@ static void igb_update_phy_info(unsigned long data) } /** + * igb_has_link - check shared code for link and determine up/down + * @adapter: pointer to driver private info + **/ +static bool igb_has_link(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + bool link_active = false; + s32 ret_val = 0; + + /* get_link_status is set on LSC (link status) interrupt or + * rx sequence error interrupt. get_link_status will stay + * false until the e1000_check_for_link establishes link + * for copper adapters ONLY + */ + switch (hw->phy.media_type) { + case e1000_media_type_copper: + if (hw->mac.get_link_status) { + ret_val = hw->mac.ops.check_for_link(hw); + link_active = !hw->mac.get_link_status; + } else { + link_active = true; + } + break; + case e1000_media_type_fiber: + ret_val = hw->mac.ops.check_for_link(hw); + link_active = !!(rd32(E1000_STATUS) & E1000_STATUS_LU); + break; + case e1000_media_type_internal_serdes: + ret_val = hw->mac.ops.check_for_link(hw); + link_active = hw->mac.serdes_has_link; + break; + default: + case e1000_media_type_unknown: + break; + } + + return link_active; +} + +/** * igb_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ @@ -2329,34 +2572,16 @@ static void igb_watchdog_task(struct work_struct *work) struct igb_adapter *adapter = container_of(work, struct igb_adapter, watchdog_task); struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; struct igb_ring *tx_ring = adapter->tx_ring; - struct e1000_mac_info *mac = &adapter->hw.mac; u32 link; u32 eics = 0; - s32 ret_val; int i; - if ((netif_carrier_ok(netdev)) && - (rd32(E1000_STATUS) & E1000_STATUS_LU)) + link = igb_has_link(adapter); + if ((netif_carrier_ok(netdev)) && link) goto link_up; - ret_val = hw->mac.ops.check_for_link(&adapter->hw); - if ((ret_val == E1000_ERR_PHY) && - (hw->phy.type == e1000_phy_igp_3) && - (rd32(E1000_CTRL) & - E1000_PHY_CTRL_GBE_DISABLE)) - dev_info(&adapter->pdev->dev, - "Gigabit has been disabled, downgrading speed\n"); - - if ((hw->phy.media_type == e1000_media_type_internal_serdes) && - !(rd32(E1000_TXCW) & E1000_TXCW_ANE)) - link = mac->serdes_has_link; - else - link = rd32(E1000_STATUS) & - E1000_STATUS_LU; - if (link) { if (!netif_carrier_ok(netdev)) { u32 ctrl; @@ -2395,6 +2620,9 @@ static void igb_watchdog_task(struct work_struct *work) netif_carrier_on(netdev); netif_tx_wake_all_queues(netdev); + igb_ping_all_vfs(adapter); + + /* link state has changed, schedule phy info update */ if (!test_bit(__IGB_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); @@ -2408,6 +2636,10 @@ static void igb_watchdog_task(struct work_struct *work) netdev->name); netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); + + igb_ping_all_vfs(adapter); + + /* link state has changed, schedule phy info update */ if (!test_bit(__IGB_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); @@ -2417,9 +2649,9 @@ static void igb_watchdog_task(struct work_struct *work) link_up: igb_update_stats(adapter); - mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; + hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; adapter->tpt_old = adapter->stats.tpt; - mac->collision_delta = adapter->stats.colc - adapter->colc_old; + hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old; adapter->colc_old = adapter->stats.colc; adapter->gorc = adapter->stats.gorc - adapter->gorc_old; @@ -2430,7 +2662,7 @@ link_up: igb_update_adaptive(&adapter->hw); if (!netif_carrier_ok(netdev)) { - if (IGB_DESC_UNUSED(tx_ring) + 1 < tx_ring->count) { + if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. @@ -2576,7 +2808,7 @@ static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting, if (bytes > 25000) { if (packets > 35) retval = low_latency; - } else if (bytes < 6000) { + } else if (bytes < 1500) { retval = low_latency; } break; @@ -2608,15 +2840,13 @@ static void igb_set_itr(struct igb_adapter *adapter) adapter->tx_itr, adapter->tx_ring->total_packets, adapter->tx_ring->total_bytes); - current_itr = max(adapter->rx_itr, adapter->tx_itr); } else { current_itr = adapter->rx_itr; } /* conservative mode (itr 3) eliminates the lowest_latency setting */ - if (adapter->itr_setting == 3 && - current_itr == lowest_latency) + if (adapter->itr_setting == 3 && current_itr == lowest_latency) current_itr = low_latency; switch (current_itr) { @@ -2668,6 +2898,7 @@ set_itr_now: #define IGB_TX_FLAGS_VLAN 0x00000002 #define IGB_TX_FLAGS_TSO 0x00000004 #define IGB_TX_FLAGS_IPV4 0x00000008 +#define IGB_TX_FLAGS_TSTAMP 0x00000010 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGB_TX_FLAGS_VLAN_SHIFT 16 @@ -2733,7 +2964,7 @@ static inline int igb_tso_adv(struct igb_adapter *adapter, mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT); - /* Context index must be unique per ring. */ + /* For 82575, context index must be unique per ring. */ if (adapter->flags & IGB_FLAG_NEED_CTX_IDX) mss_l4len_idx |= tx_ring->queue_index << 4; @@ -2778,13 +3009,24 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT); if (skb->ip_summed == CHECKSUM_PARTIAL) { - switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + __be16 protocol; + + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + const struct vlan_ethhdr *vhdr = + (const struct vlan_ethhdr*)skb->data; + + protocol = vhdr->h_vlan_encapsulated_proto; + } else { + protocol = skb->protocol; + } + + switch (protocol) { + case cpu_to_be16(ETH_P_IP): tu_cmd |= E1000_ADVTXD_TUCMD_IPV4; if (ip_hdr(skb)->protocol == IPPROTO_TCP) tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; break; - case __constant_htons(ETH_P_IPV6): + case cpu_to_be16(ETH_P_IPV6): /* XXX what about other V6 headers?? */ if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; @@ -2803,6 +3045,8 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, if (adapter->flags & IGB_FLAG_NEED_CTX_IDX) context_desc->mss_l4len_idx = cpu_to_le32(tx_ring->queue_index << 4); + else + context_desc->mss_l4len_idx = 0; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; @@ -2815,8 +3059,6 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, return true; } - - return false; } @@ -2831,25 +3073,33 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, unsigned int len = skb_headlen(skb); unsigned int count = 0, i; unsigned int f; + dma_addr_t *map; i = tx_ring->next_to_use; + if (skb_dma_map(&adapter->pdev->dev, skb, DMA_TO_DEVICE)) { + dev_err(&adapter->pdev->dev, "TX DMA map failed\n"); + return 0; + } + + map = skb_shinfo(skb)->dma_maps; + buffer_info = &tx_ring->buffer_info[i]; BUG_ON(len >= IGB_MAX_DATA_PER_TXD); buffer_info->length = len; /* set time_stamp *before* dma to help avoid a possible race */ buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len, - PCI_DMA_TODEVICE); + buffer_info->dma = map[count]; count++; - i++; - if (i == tx_ring->count) - i = 0; for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { struct skb_frag_struct *frag; + i++; + if (i == tx_ring->count) + i = 0; + frag = &skb_shinfo(skb)->frags[f]; len = frag->size; @@ -2858,19 +3108,10 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter, buffer_info->length = len; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - buffer_info->dma = pci_map_page(adapter->pdev, - frag->page, - frag->page_offset, - len, - PCI_DMA_TODEVICE); - + buffer_info->dma = map[count]; count++; - i++; - if (i == tx_ring->count) - i = 0; } - i = ((i == 0) ? tx_ring->count - 1 : i - 1); tx_ring->buffer_info[i].skb = skb; tx_ring->buffer_info[first].next_to_watch = i; @@ -2893,6 +3134,9 @@ static inline void igb_tx_queue_adv(struct igb_adapter *adapter, if (tx_flags & IGB_TX_FLAGS_VLAN) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; + if (tx_flags & IGB_TX_FLAGS_TSTAMP) + cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP; + if (tx_flags & IGB_TX_FLAGS_TSO) { cmd_type_len |= E1000_ADVTXD_DCMD_TSE; @@ -2955,7 +3199,7 @@ static int __igb_maybe_stop_tx(struct net_device *netdev, /* We need to check again in a case another CPU has just * made room available. */ - if (IGB_DESC_UNUSED(tx_ring) < size) + if (igb_desc_unused(tx_ring) < size) return -EBUSY; /* A reprieve! */ @@ -2967,13 +3211,11 @@ static int __igb_maybe_stop_tx(struct net_device *netdev, static int igb_maybe_stop_tx(struct net_device *netdev, struct igb_ring *tx_ring, int size) { - if (IGB_DESC_UNUSED(tx_ring) >= size) + if (igb_desc_unused(tx_ring) >= size) return 0; return __igb_maybe_stop_tx(netdev, tx_ring, size); } -#define TXD_USE_COUNT(S) (((S) >> (IGB_MAX_TXD_PWR)) + 1) - static int igb_xmit_frame_ring_adv(struct sk_buff *skb, struct net_device *netdev, struct igb_ring *tx_ring) @@ -2981,11 +3223,10 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, struct igb_adapter *adapter = netdev_priv(netdev); unsigned int first; unsigned int tx_flags = 0; - unsigned int len; u8 hdr_len = 0; + int count = 0; int tso = 0; - - len = skb_headlen(skb); + union skb_shared_tx *shtx; if (test_bit(__IGB_DOWN, &adapter->state)) { dev_kfree_skb_any(skb); @@ -3006,7 +3247,21 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, /* this is a hard error */ return NETDEV_TX_BUSY; } - skb_orphan(skb); + + /* + * TODO: check that there currently is no other packet with + * time stamping in the queue + * + * When doing time stamping, keep the connection to the socket + * a while longer: it is still needed by skb_hwtstamp_tx(), + * called either in igb_tx_hwtstamp() or by our caller when + * doing software time stamping. + */ + shtx = skb_tx(skb); + if (unlikely(shtx->hardware)) { + shtx->in_progress = 1; + tx_flags |= IGB_TX_FLAGS_TSTAMP; + } if (adapter->vlgrp && vlan_tx_tag_present(skb)) { tx_flags |= IGB_TX_FLAGS_VLAN; @@ -3017,7 +3272,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, tx_flags |= IGB_TX_FLAGS_IPV4; first = tx_ring->next_to_use; - tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags, &hdr_len) : 0; @@ -3028,18 +3282,27 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, if (tso) tx_flags |= IGB_TX_FLAGS_TSO; - else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags)) - if (skb->ip_summed == CHECKSUM_PARTIAL) - tx_flags |= IGB_TX_FLAGS_CSUM; + else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags) && + (skb->ip_summed == CHECKSUM_PARTIAL)) + tx_flags |= IGB_TX_FLAGS_CSUM; - igb_tx_queue_adv(adapter, tx_ring, tx_flags, - igb_tx_map_adv(adapter, tx_ring, skb, first), - skb->len, hdr_len); - - netdev->trans_start = jiffies; - - /* Make sure there is space in the ring for the next send. */ - igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4); + /* + * count reflects descriptors mapped, if 0 then mapping error + * has occured and we need to rewind the descriptor queue + */ + count = igb_tx_map_adv(adapter, tx_ring, skb, first); + + if (count) { + igb_tx_queue_adv(adapter, tx_ring, tx_flags, count, + skb->len, hdr_len); + netdev->trans_start = jiffies; + /* Make sure there is space in the ring for the next send. */ + igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4); + } else { + dev_kfree_skb_any(skb); + tx_ring->buffer_info[first].time_stamp = 0; + tx_ring->next_to_use = first; + } return NETDEV_TX_OK; } @@ -3050,7 +3313,7 @@ static int igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *netdev) struct igb_ring *tx_ring; int r_idx = 0; - r_idx = skb->queue_mapping & (IGB_MAX_TX_QUEUES - 1); + r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1); tx_ring = adapter->multi_tx_table[r_idx]; /* This goes back to the question of how to logically map a tx queue @@ -3072,8 +3335,8 @@ static void igb_tx_timeout(struct net_device *netdev) /* Do the reset outside of interrupt context */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); - wr32(E1000_EICS, adapter->eims_enable_mask & - ~(E1000_EIMS_TCP_TIMER | E1000_EIMS_OTHER)); + wr32(E1000_EICS, + (adapter->eims_enable_mask & ~adapter->eims_other)); } static void igb_reset_task(struct work_struct *work) @@ -3091,8 +3354,7 @@ static void igb_reset_task(struct work_struct *work) * Returns the address of the device statistics structure. * The statistics are actually updated from the timer callback. **/ -static struct net_device_stats * -igb_get_stats(struct net_device *netdev) +static struct net_device_stats *igb_get_stats(struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -3118,7 +3380,6 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } -#define MAX_STD_JUMBO_FRAME_SIZE 9234 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n"); return -EINVAL; @@ -3126,6 +3387,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) msleep(1); + /* igb_down has a dependency on max_frame_size */ adapter->max_frame_size = max_frame; if (netif_running(netdev)) @@ -3151,6 +3413,12 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) #else adapter->rx_buffer_len = PAGE_SIZE / 2; #endif + + /* if sr-iov is enabled we need to force buffer size to 1K or larger */ + if (adapter->vfs_allocated_count && + (adapter->rx_buffer_len < IGB_RXBUFFER_1024)) + adapter->rx_buffer_len = IGB_RXBUFFER_1024; + /* adjust allocation if LPE protects us, and we aren't using SBP */ if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE)) @@ -3295,8 +3563,7 @@ void igb_update_stats(struct igb_adapter *adapter) /* Phy Stats */ if (hw->phy.media_type == e1000_media_type_copper) { if ((adapter->link_speed == SPEED_1000) && - (!igb_read_phy_reg(hw, PHY_1000T_STATUS, - &phy_tmp))) { + (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; adapter->phy_stats.idle_errors += phy_tmp; } @@ -3308,7 +3575,6 @@ void igb_update_stats(struct igb_adapter *adapter) adapter->stats.mgpdc += rd32(E1000_MGTPDC); } - static irqreturn_t igb_msix_other(int irq, void *data) { struct net_device *netdev = data; @@ -3317,15 +3583,24 @@ static irqreturn_t igb_msix_other(int irq, void *data) u32 icr = rd32(E1000_ICR); /* reading ICR causes bit 31 of EICR to be cleared */ - if (!(icr & E1000_ICR_LSC)) - goto no_link_interrupt; - hw->mac.get_link_status = 1; - /* guard against interrupt when we're going down */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); - -no_link_interrupt: - wr32(E1000_IMS, E1000_IMS_LSC); + + if(icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + /* Check for a mailbox event */ + if (icr & E1000_ICR_VMMB) + igb_msg_task(adapter); + + if (icr & E1000_ICR_LSC) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + + wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB); wr32(E1000_EIMS, adapter->eims_other); return IRQ_HANDLED; @@ -3341,6 +3616,7 @@ static irqreturn_t igb_msix_tx(int irq, void *data) if (adapter->flags & IGB_FLAG_DCA_ENABLED) igb_update_tx_dca(tx_ring); #endif + tx_ring->total_bytes = 0; tx_ring->total_packets = 0; @@ -3361,13 +3637,11 @@ static void igb_write_itr(struct igb_ring *ring) if ((ring->adapter->itr_setting & 3) && ring->set_itr) { switch (hw->mac.type) { case e1000_82576: - wr32(ring->itr_register, - ring->itr_val | + wr32(ring->itr_register, ring->itr_val | 0x80000000); break; default: - wr32(ring->itr_register, - ring->itr_val | + wr32(ring->itr_register, ring->itr_val | (ring->itr_val << 16)); break; } @@ -3385,8 +3659,8 @@ static irqreturn_t igb_msix_rx(int irq, void *data) igb_write_itr(rx_ring); - if (netif_rx_schedule_prep(&rx_ring->napi)) - __netif_rx_schedule(&rx_ring->napi); + if (napi_schedule_prep(&rx_ring->napi)) + __napi_schedule(&rx_ring->napi); #ifdef CONFIG_IGB_DCA if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED) @@ -3408,11 +3682,11 @@ static void igb_update_rx_dca(struct igb_ring *rx_ring) dca_rxctrl = rd32(E1000_DCA_RXCTRL(q)); if (hw->mac.type == e1000_82576) { dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576; - dca_rxctrl |= dca_get_tag(cpu) << + dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << E1000_DCA_RXCTRL_CPUID_SHIFT; } else { dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK; - dca_rxctrl |= dca_get_tag(cpu); + dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); } dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN; dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN; @@ -3435,11 +3709,11 @@ static void igb_update_tx_dca(struct igb_ring *tx_ring) dca_txctrl = rd32(E1000_DCA_TXCTRL(q)); if (hw->mac.type == e1000_82576) { dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576; - dca_txctrl |= dca_get_tag(cpu) << + dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << E1000_DCA_TXCTRL_CPUID_SHIFT; } else { dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK; - dca_txctrl |= dca_get_tag(cpu); + dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); } dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN; wr32(E1000_DCA_TXCTRL(q), dca_txctrl); @@ -3479,7 +3753,7 @@ static int __igb_notify_dca(struct device *dev, void *data) break; /* Always use CB2 mode, difference is masked * in the CB driver. */ - wr32(E1000_DCA_CTRL, 2); + wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); if (dca_add_requester(dev) == 0) { adapter->flags |= IGB_FLAG_DCA_ENABLED; dev_info(&adapter->pdev->dev, "DCA enabled\n"); @@ -3494,7 +3768,7 @@ static int __igb_notify_dca(struct device *dev, void *data) dca_remove_requester(dev); dev_info(&adapter->pdev->dev, "DCA disabled\n"); adapter->flags &= ~IGB_FLAG_DCA_ENABLED; - wr32(E1000_DCA_CTRL, 1); + wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); } break; } @@ -3514,6 +3788,322 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event, } #endif /* CONFIG_IGB_DCA */ +static void igb_ping_all_vfs(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ping; + int i; + + for (i = 0 ; i < adapter->vfs_allocated_count; i++) { + ping = E1000_PF_CONTROL_MSG; + if (adapter->vf_data[i].clear_to_send) + ping |= E1000_VT_MSGTYPE_CTS; + igb_write_mbx(hw, &ping, 1, i); + } +} + +static int igb_set_vf_multicasts(struct igb_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + u16 *hash_list = (u16 *)&msgbuf[1]; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + int i; + + /* only up to 30 hash values supported */ + if (n > 30) + n = 30; + + /* salt away the number of multi cast addresses assigned + * to this VF for later use to restore when the PF multi cast + * list changes + */ + vf_data->num_vf_mc_hashes = n; + + /* VFs are limited to using the MTA hash table for their multicast + * addresses */ + for (i = 0; i < n; i++) + vf_data->vf_mc_hashes[i] = hash_list[i];; + + /* Flush and reset the mta with the new values */ + igb_set_multi(adapter->netdev); + + return 0; +} + +static void igb_restore_vf_multicasts(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct vf_data_storage *vf_data; + int i, j; + + for (i = 0; i < adapter->vfs_allocated_count; i++) { + vf_data = &adapter->vf_data[i]; + for (j = 0; j < vf_data->num_vf_mc_hashes; j++) + igb_mta_set(hw, vf_data->vf_mc_hashes[j]); + } +} + +static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 pool_mask, reg, vid; + int i; + + pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = rd32(E1000_VLVF(i)); + + /* remove the vf from the pool */ + reg &= ~pool_mask; + + /* if pool is empty then remove entry from vfta */ + if (!(reg & E1000_VLVF_POOLSEL_MASK) && + (reg & E1000_VLVF_VLANID_ENABLE)) { + reg = 0; + vid = reg & E1000_VLVF_VLANID_MASK; + igb_vfta_set(hw, vid, false); + } + + wr32(E1000_VLVF(i), reg); + } +} + +static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg, i; + + /* It is an error to call this function when VFs are not enabled */ + if (!adapter->vfs_allocated_count) + return -1; + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = rd32(E1000_VLVF(i)); + if ((reg & E1000_VLVF_VLANID_ENABLE) && + vid == (reg & E1000_VLVF_VLANID_MASK)) + break; + } + + if (add) { + if (i == E1000_VLVF_ARRAY_SIZE) { + /* Did not find a matching VLAN ID entry that was + * enabled. Search for a free filter entry, i.e. + * one without the enable bit set + */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = rd32(E1000_VLVF(i)); + if (!(reg & E1000_VLVF_VLANID_ENABLE)) + break; + } + } + if (i < E1000_VLVF_ARRAY_SIZE) { + /* Found an enabled/available entry */ + reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* if !enabled we need to set this up in vfta */ + if (!(reg & E1000_VLVF_VLANID_ENABLE)) { + /* add VID to filter table, if bit already set + * PF must have added it outside of table */ + if (igb_vfta_set(hw, vid, true)) + reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count); + reg |= E1000_VLVF_VLANID_ENABLE; + } + reg &= ~E1000_VLVF_VLANID_MASK; + reg |= vid; + + wr32(E1000_VLVF(i), reg); + return 0; + } + } else { + if (i < E1000_VLVF_ARRAY_SIZE) { + /* remove vf from the pool */ + reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); + /* if pool is empty then remove entry from vfta */ + if (!(reg & E1000_VLVF_POOLSEL_MASK)) { + reg = 0; + igb_vfta_set(hw, vid, false); + } + wr32(E1000_VLVF(i), reg); + return 0; + } + } + return -1; +} + +static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + + return igb_vlvf_set(adapter, vid, add, vf); +} + +static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + + /* disable mailbox functionality for vf */ + adapter->vf_data[vf].clear_to_send = false; + + /* reset offloads to defaults */ + igb_set_vmolr(hw, vf); + + /* reset vlans for device */ + igb_clear_vf_vfta(adapter, vf); + + /* reset multicast table array for vf */ + adapter->vf_data[vf].num_vf_mc_hashes = 0; + + /* Flush and reset the mta with the new values */ + igb_set_multi(adapter->netdev); +} + +static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; + u32 reg, msgbuf[3]; + u8 *addr = (u8 *)(&msgbuf[1]); + + /* process all the same items cleared in a function level reset */ + igb_vf_reset_event(adapter, vf); + + /* set vf mac address */ + igb_rar_set(hw, vf_mac, vf + 1); + igb_set_rah_pool(hw, vf, vf + 1); + + /* enable transmit and receive for vf */ + reg = rd32(E1000_VFTE); + wr32(E1000_VFTE, reg | (1 << vf)); + reg = rd32(E1000_VFRE); + wr32(E1000_VFRE, reg | (1 << vf)); + + /* enable mailbox functionality for vf */ + adapter->vf_data[vf].clear_to_send = true; + + /* reply to reset with ack and vf mac address */ + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, 6); + igb_write_mbx(hw, msgbuf, 3, vf); +} + +static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) +{ + unsigned char *addr = (char *)&msg[1]; + int err = -1; + + if (is_valid_ether_addr(addr)) + err = igb_set_vf_mac(adapter, vf, addr); + + return err; + +} + +static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 msg = E1000_VT_MSGTYPE_NACK; + + /* if device isn't clear to send it shouldn't be reading either */ + if (!adapter->vf_data[vf].clear_to_send) + igb_write_mbx(hw, &msg, 1, vf); +} + + +static void igb_msg_task(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vf; + + for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { + /* process any reset requests */ + if (!igb_check_for_rst(hw, vf)) { + adapter->vf_data[vf].clear_to_send = false; + igb_vf_reset_event(adapter, vf); + } + + /* process any messages pending */ + if (!igb_check_for_msg(hw, vf)) + igb_rcv_msg_from_vf(adapter, vf); + + /* process any acks */ + if (!igb_check_for_ack(hw, vf)) + igb_rcv_ack_from_vf(adapter, vf); + + } +} + +static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) +{ + u32 mbx_size = E1000_VFMAILBOX_SIZE; + u32 msgbuf[mbx_size]; + struct e1000_hw *hw = &adapter->hw; + s32 retval; + + retval = igb_read_mbx(hw, msgbuf, mbx_size, vf); + + if (retval) + dev_err(&adapter->pdev->dev, + "Error receiving message from VF\n"); + + /* this is a message we already processed, do nothing */ + if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) + return retval; + + /* + * until the vf completes a reset it should not be + * allowed to start any configuration. + */ + + if (msgbuf[0] == E1000_VF_RESET) { + igb_vf_reset_msg(adapter, vf); + + return retval; + } + + if (!adapter->vf_data[vf].clear_to_send) { + msgbuf[0] |= E1000_VT_MSGTYPE_NACK; + igb_write_mbx(hw, msgbuf, 1, vf); + return retval; + } + + switch ((msgbuf[0] & 0xFFFF)) { + case E1000_VF_SET_MAC_ADDR: + retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); + break; + case E1000_VF_SET_MULTICAST: + retval = igb_set_vf_multicasts(adapter, msgbuf, vf); + break; + case E1000_VF_SET_LPE: + retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); + break; + case E1000_VF_SET_VLAN: + retval = igb_set_vf_vlan(adapter, msgbuf, vf); + break; + default: + dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); + retval = -1; + break; + } + + /* notify the VF of the results of what it sent us */ + if (retval) + msgbuf[0] |= E1000_VT_MSGTYPE_NACK; + else + msgbuf[0] |= E1000_VT_MSGTYPE_ACK; + + msgbuf[0] |= E1000_VT_MSGTYPE_CTS; + + igb_write_mbx(hw, msgbuf, 1, vf); + + return retval; +} + /** * igb_intr_msi - Interrupt Handler * @irq: interrupt number @@ -3529,19 +4119,24 @@ static irqreturn_t igb_intr_msi(int irq, void *data) igb_write_itr(adapter->rx_ring); + if(icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { hw->mac.get_link_status = 1; if (!test_bit(__IGB_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - netif_rx_schedule(&adapter->rx_ring[0].napi); + napi_schedule(&adapter->rx_ring[0].napi); return IRQ_HANDLED; } /** - * igb_intr - Interrupt Handler + * igb_intr - Legacy Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure **/ @@ -3553,7 +4148,6 @@ static irqreturn_t igb_intr(int irq, void *data) /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No * need for the IMC write */ u32 icr = rd32(E1000_ICR); - u32 eicr = 0; if (!icr) return IRQ_NONE; /* Not our interrupt */ @@ -3564,7 +4158,10 @@ static irqreturn_t igb_intr(int irq, void *data) if (!(icr & E1000_ICR_INT_ASSERTED)) return IRQ_NONE; - eicr = rd32(E1000_EICR); + if(icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { hw->mac.get_link_status = 1; @@ -3573,11 +4170,31 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - netif_rx_schedule(&adapter->rx_ring[0].napi); + napi_schedule(&adapter->rx_ring[0].napi); return IRQ_HANDLED; } +static inline void igb_rx_irq_enable(struct igb_ring *rx_ring) +{ + struct igb_adapter *adapter = rx_ring->adapter; + struct e1000_hw *hw = &adapter->hw; + + if (adapter->itr_setting & 3) { + if (adapter->num_rx_queues == 1) + igb_set_itr(adapter); + else + igb_update_ring_itr(rx_ring); + } + + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (adapter->msix_entries) + wr32(E1000_EIMS, rx_ring->eims_value); + else + igb_irq_enable(adapter); + } +} + /** * igb_poll - NAPI Rx polling callback * @napi: napi polling structure @@ -3586,70 +4203,64 @@ static irqreturn_t igb_intr(int irq, void *data) static int igb_poll(struct napi_struct *napi, int budget) { struct igb_ring *rx_ring = container_of(napi, struct igb_ring, napi); - struct igb_adapter *adapter = rx_ring->adapter; - struct net_device *netdev = adapter->netdev; - int tx_clean_complete, work_done = 0; + int work_done = 0; - /* this poll routine only supports one tx and one rx queue */ #ifdef CONFIG_IGB_DCA - if (adapter->flags & IGB_FLAG_DCA_ENABLED) - igb_update_tx_dca(&adapter->tx_ring[0]); + if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED) + igb_update_rx_dca(rx_ring); #endif - tx_clean_complete = igb_clean_tx_irq(&adapter->tx_ring[0]); + igb_clean_rx_irq_adv(rx_ring, &work_done, budget); + if (rx_ring->buddy) { #ifdef CONFIG_IGB_DCA - if (adapter->flags & IGB_FLAG_DCA_ENABLED) - igb_update_rx_dca(&adapter->rx_ring[0]); + if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED) + igb_update_tx_dca(rx_ring->buddy); #endif - igb_clean_rx_irq_adv(&adapter->rx_ring[0], &work_done, budget); + if (!igb_clean_tx_irq(rx_ring->buddy)) + work_done = budget; + } - /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((tx_clean_complete && (work_done < budget)) || - !netif_running(netdev)) { - if (adapter->itr_setting & 3) - igb_set_itr(adapter); - netif_rx_complete(napi); - if (!test_bit(__IGB_DOWN, &adapter->state)) - igb_irq_enable(adapter); - return 0; + /* If not enough Rx work done, exit the polling mode */ + if (work_done < budget) { + napi_complete(napi); + igb_rx_irq_enable(rx_ring); } - return 1; + return work_done; } -static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget) +/** + * igb_hwtstamp - utility function which checks for TX time stamp + * @adapter: board private structure + * @skb: packet that was just sent + * + * If we were asked to do hardware stamping and such a time stamp is + * available, then it must have been for this skb here because we only + * allow only one such packet into the queue. + */ +static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb) { - struct igb_ring *rx_ring = container_of(napi, struct igb_ring, napi); - struct igb_adapter *adapter = rx_ring->adapter; + union skb_shared_tx *shtx = skb_tx(skb); struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - int work_done = 0; - -#ifdef CONFIG_IGB_DCA - if (adapter->flags & IGB_FLAG_DCA_ENABLED) - igb_update_rx_dca(rx_ring); -#endif - igb_clean_rx_irq_adv(rx_ring, &work_done, budget); - - - /* If not enough Rx work done, exit the polling mode */ - if ((work_done == 0) || !netif_running(netdev)) { - netif_rx_complete(napi); - if (adapter->itr_setting & 3) { - if (adapter->num_rx_queues == 1) - igb_set_itr(adapter); - else - igb_update_ring_itr(rx_ring); + if (unlikely(shtx->hardware)) { + u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID; + if (valid) { + u64 regval = rd32(E1000_TXSTMPL); + u64 ns; + struct skb_shared_hwtstamps shhwtstamps; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + regval |= (u64)rd32(E1000_TXSTMPH) << 32; + ns = timecounter_cyc2time(&adapter->clock, + regval); + timecompare_update(&adapter->compare, ns); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + shhwtstamps.syststamp = + timecompare_transform(&adapter->compare, ns); + skb_tstamp_tx(skb, &shhwtstamps); } - - if (!test_bit(__IGB_DOWN, &adapter->state)) - wr32(E1000_EIMS, rx_ring->eims_value); - - return 0; } - - return 1; } /** @@ -3690,6 +4301,8 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) skb->len; total_packets += segs; total_bytes += bytecount; + + igb_tx_hwtstamp(adapter, skb); } igb_unmap_and_free_tx_resource(adapter, buffer_info); @@ -3699,7 +4312,6 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) if (i == tx_ring->count) i = 0; } - eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop); } @@ -3708,7 +4320,7 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) if (unlikely(count && netif_carrier_ok(netdev) && - IGB_DESC_UNUSED(tx_ring) >= IGB_TX_QUEUE_WAKE)) { + igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ @@ -3764,44 +4376,11 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring) return (count < tx_ring->count); } -#ifdef CONFIG_IGB_LRO - /** - * igb_get_skb_hdr - helper function for LRO header processing - * @skb: pointer to sk_buff to be added to LRO packet - * @iphdr: pointer to ip header structure - * @tcph: pointer to tcp header structure - * @hdr_flags: pointer to header flags - * @priv: pointer to the receive descriptor for the current sk_buff - **/ -static int igb_get_skb_hdr(struct sk_buff *skb, void **iphdr, void **tcph, - u64 *hdr_flags, void *priv) -{ - union e1000_adv_rx_desc *rx_desc = priv; - u16 pkt_type = rx_desc->wb.lower.lo_dword.pkt_info & - (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP); - - /* Verify that this is a valid IPv4 TCP packet */ - if (pkt_type != (E1000_RXDADV_PKTTYPE_IPV4 | - E1000_RXDADV_PKTTYPE_TCP)) - return -1; - - /* Set network headers */ - skb_reset_network_header(skb); - skb_set_transport_header(skb, ip_hdrlen(skb)); - *iphdr = ip_hdr(skb); - *tcph = tcp_hdr(skb); - *hdr_flags = LRO_IPV4 | LRO_TCP; - - return 0; - -} -#endif /* CONFIG_IGB_LRO */ - /** * igb_receive_skb - helper function to handle rx indications - * @ring: pointer to receive ring receving this packet + * @ring: pointer to receive ring receving this packet * @status: descriptor status field as written by hardware - * @vlan: descriptor vlan field as written by hardware (no le/be conversion) + * @rx_desc: receive descriptor containing vlan and type information. * @skb: pointer to sk_buff to be indicated to stack **/ static void igb_receive_skb(struct igb_ring *ring, u8 status, @@ -3811,31 +4390,23 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status, struct igb_adapter * adapter = ring->adapter; bool vlan_extracted = (adapter->vlgrp && (status & E1000_RXD_STAT_VP)); -#ifdef CONFIG_IGB_LRO - if (adapter->netdev->features & NETIF_F_LRO && - skb->ip_summed == CHECKSUM_UNNECESSARY) { + skb_record_rx_queue(skb, ring->queue_index); + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { if (vlan_extracted) - lro_vlan_hwaccel_receive_skb(&ring->lro_mgr, skb, - adapter->vlgrp, - le16_to_cpu(rx_desc->wb.upper.vlan), - rx_desc); + vlan_gro_receive(&ring->napi, adapter->vlgrp, + le16_to_cpu(rx_desc->wb.upper.vlan), + skb); else - lro_receive_skb(&ring->lro_mgr,skb, rx_desc); - ring->lro_used = 1; + napi_gro_receive(&ring->napi, skb); } else { -#endif if (vlan_extracted) vlan_hwaccel_receive_skb(skb, adapter->vlgrp, le16_to_cpu(rx_desc->wb.upper.vlan)); else - netif_receive_skb(skb); -#ifdef CONFIG_IGB_LRO } -#endif } - static inline void igb_rx_checksum_adv(struct igb_adapter *adapter, u32 status_err, struct sk_buff *skb) { @@ -3863,17 +4434,19 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, { struct igb_adapter *adapter = rx_ring->adapter; struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; union e1000_adv_rx_desc *rx_desc , *next_rxd; struct igb_buffer *buffer_info , *next_buffer; struct sk_buff *skb; - unsigned int i; - u32 length, hlen, staterr; bool cleaned = false; int cleaned_count = 0; unsigned int total_bytes = 0, total_packets = 0; + unsigned int i; + u32 length, hlen, staterr; i = rx_ring->next_to_clean; + buffer_info = &rx_ring->buffer_info[i]; rx_desc = E1000_RX_DESC_ADV(*rx_ring, i); staterr = le32_to_cpu(rx_desc->wb.upper.status_error); @@ -3881,25 +4454,22 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, if (*work_done >= budget) break; (*work_done)++; - buffer_info = &rx_ring->buffer_info[i]; - /* HW will not DMA in data larger than the given buffer, even - * if it parses the (NFS, of course) header to be larger. In - * that case, it fills the header buffer and spills the rest - * into the page. - */ - hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) & - E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > adapter->rx_ps_hdr_size) - hlen = adapter->rx_ps_hdr_size; + skb = buffer_info->skb; + prefetch(skb->data - NET_IP_ALIGN); + buffer_info->skb = NULL; + + i++; + if (i == rx_ring->count) + i = 0; + next_rxd = E1000_RX_DESC_ADV(*rx_ring, i); + prefetch(next_rxd); + next_buffer = &rx_ring->buffer_info[i]; length = le16_to_cpu(rx_desc->wb.upper.length); cleaned = true; cleaned_count++; - skb = buffer_info->skb; - prefetch(skb->data - NET_IP_ALIGN); - buffer_info->skb = NULL; if (!adapter->rx_ps_hdr_size) { pci_unmap_single(pdev, buffer_info->dma, adapter->rx_buffer_len + @@ -3909,10 +4479,19 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, goto send_up; } + /* HW will not DMA in data larger than the given buffer, even + * if it parses the (NFS, of course) header to be larger. In + * that case, it fills the header buffer and spills the rest + * into the page. + */ + hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) & + E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; + if (hlen > adapter->rx_ps_hdr_size) + hlen = adapter->rx_ps_hdr_size; + if (!skb_shinfo(skb)->nr_frags) { pci_unmap_single(pdev, buffer_info->dma, - adapter->rx_ps_hdr_size + - NET_IP_ALIGN, + adapter->rx_ps_hdr_size + NET_IP_ALIGN, PCI_DMA_FROMDEVICE); skb_put(skb, hlen); } @@ -3938,13 +4517,6 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, skb->truesize += length; } -send_up: - i++; - if (i == rx_ring->count) - i = 0; - next_rxd = E1000_RX_DESC_ADV(*rx_ring, i); - prefetch(next_rxd); - next_buffer = &rx_ring->buffer_info[i]; if (!(staterr & E1000_RXD_STAT_EOP)) { buffer_info->skb = next_buffer->skb; @@ -3953,6 +4525,47 @@ send_up: next_buffer->dma = 0; goto next_desc; } +send_up: + /* + * If this bit is set, then the RX registers contain + * the time stamp. No other packet will be time + * stamped until we read these registers, so read the + * registers to make them available again. Because + * only one packet can be time stamped at a time, we + * know that the register values must belong to this + * one here and therefore we don't need to compare + * any of the additional attributes stored for it. + * + * If nothing went wrong, then it should have a + * skb_shared_tx that we can turn into a + * skb_shared_hwtstamps. + * + * TODO: can time stamping be triggered (thus locking + * the registers) without the packet reaching this point + * here? In that case RX time stamping would get stuck. + * + * TODO: in "time stamp all packets" mode this bit is + * not set. Need a global flag for this mode and then + * always read the registers. Cannot be done without + * a race condition. + */ + if (unlikely(staterr & E1000_RXD_STAT_TS)) { + u64 regval; + u64 ns; + struct skb_shared_hwtstamps *shhwtstamps = + skb_hwtstamps(skb); + + WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID), + "igb: no RX time stamp available for time stamped packet"); + regval = rd32(E1000_RXSTMPL); + regval |= (u64)rd32(E1000_RXSTMPH) << 32; + ns = timecounter_cyc2time(&adapter->clock, regval); + timecompare_update(&adapter->compare, ns); + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); + shhwtstamps->hwtstamp = ns_to_ktime(ns); + shhwtstamps->syststamp = + timecompare_transform(&adapter->compare, ns); + } if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { dev_kfree_skb_irq(skb); @@ -3980,19 +4593,11 @@ next_desc: /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); } rx_ring->next_to_clean = i; - cleaned_count = IGB_DESC_UNUSED(rx_ring); - -#ifdef CONFIG_IGB_LRO - if (rx_ring->lro_used) { - lro_flush_all(&rx_ring->lro_mgr); - rx_ring->lro_used = 0; - } -#endif + cleaned_count = igb_desc_unused(rx_ring); if (cleaned_count) igb_alloc_rx_buffers_adv(rx_ring, cleaned_count); @@ -4006,7 +4611,6 @@ next_desc: return cleaned; } - /** * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split * @adapter: address of board private structure @@ -4021,10 +4625,17 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, struct igb_buffer *buffer_info; struct sk_buff *skb; unsigned int i; + int bufsz; i = rx_ring->next_to_use; buffer_info = &rx_ring->buffer_info[i]; + if (adapter->rx_ps_hdr_size) + bufsz = adapter->rx_ps_hdr_size; + else + bufsz = adapter->rx_buffer_len; + bufsz += NET_IP_ALIGN; + while (cleaned_count--) { rx_desc = E1000_RX_DESC_ADV(*rx_ring, i); @@ -4040,23 +4651,14 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, buffer_info->page_offset ^= PAGE_SIZE / 2; } buffer_info->page_dma = - pci_map_page(pdev, - buffer_info->page, + pci_map_page(pdev, buffer_info->page, buffer_info->page_offset, PAGE_SIZE / 2, PCI_DMA_FROMDEVICE); } if (!buffer_info->skb) { - int bufsz; - - if (adapter->rx_ps_hdr_size) - bufsz = adapter->rx_ps_hdr_size; - else - bufsz = adapter->rx_buffer_len; - bufsz += NET_IP_ALIGN; skb = netdev_alloc_skb(netdev, bufsz); - if (!skb) { adapter->alloc_rx_buff_failed++; goto no_buffers; @@ -4072,7 +4674,6 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, buffer_info->dma = pci_map_single(pdev, skb->data, bufsz, PCI_DMA_FROMDEVICE); - } /* Refresh the desc even if buffer_addrs didn't change because * each write-back erases this info. */ @@ -4142,6 +4743,163 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } /** + * igb_hwtstamp_ioctl - control hardware time stamping + * @netdev: + * @ifreq: + * @cmd: + * + * Outgoing time stamping can be enabled and disabled. Play nice and + * disable it when requested, although it shouldn't case any overhead + * when no packet needs it. At most one packet in the queue may be + * marked for time stamping, otherwise it would be impossible to tell + * for sure to which packet the hardware time stamp belongs. + * + * Incoming time stamping has to be configured via the hardware + * filters. Not all combinations are supported, in particular event + * type has to be specified. Matching the kind of event packet is + * not supported, with the exception of "all V2 events regardless of + * level 2 or 4". + * + **/ +static int igb_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct hwtstamp_config config; + u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED; + u32 tsync_rx_ctl_type = 0; + u32 tsync_rx_cfg = 0; + int is_l4 = 0; + int is_l2 = 0; + short port = 319; /* PTP */ + u32 regval; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + tsync_tx_ctl_bit = 0; + break; + case HWTSTAMP_TX_ON: + tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED; + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl_bit = 0; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_ALL: + /* + * register TSYNCRXCFG must be set, therefore it is not + * possible to time stamp both Sync and Delay_Req messages + * => fall back to time stamping all packets + */ + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; + is_l4 = 1; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = 1; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; + is_l2 = 1; + is_l4 = 1; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; + is_l2 = 1; + is_l4 = 1; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2; + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = 1; + break; + default: + return -ERANGE; + } + + /* enable/disable TX */ + regval = rd32(E1000_TSYNCTXCTL); + regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit; + wr32(E1000_TSYNCTXCTL, regval); + + /* enable/disable RX, define which PTP packets are time stamped */ + regval = rd32(E1000_TSYNCRXCTL); + regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit; + regval = (regval & ~0xE) | tsync_rx_ctl_type; + wr32(E1000_TSYNCRXCTL, regval); + wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); + + /* + * Ethertype Filter Queue Filter[0][15:0] = 0x88F7 + * (Ethertype to filter on) + * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter) + * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping) + */ + wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0); + + /* L4 Queue Filter[0]: only filter by source and destination port */ + wr32(E1000_SPQF0, htons(port)); + wr32(E1000_IMIREXT(0), is_l4 ? + ((1<<12) | (1<<19) /* bypass size and control flags */) : 0); + wr32(E1000_IMIR(0), is_l4 ? + (htons(port) + | (0<<16) /* immediate interrupt disabled */ + | 0 /* (1<<17) bit cleared: do not bypass + destination port check */) + : 0); + wr32(E1000_FTQF0, is_l4 ? + (0x11 /* UDP */ + | (1<<15) /* VF not compared */ + | (1<<27) /* Enable Timestamping */ + | (7<<28) /* only source port filter enabled, + source/target address and protocol + masked */) + : ((1<<15) | (15<<28) /* all mask bits set = filter not + enabled */)); + + wrfl(); + + adapter->hwtstamp_config = config; + + /* clear TX/RX time stamp registers, just to be sure */ + regval = rd32(E1000_TXSTMPH); + regval = rd32(E1000_RXSTMPH); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +/** * igb_ioctl - * @netdev: * @ifreq: @@ -4154,6 +4912,8 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); + case SIOCSHWTSTAMP: + return igb_hwtstamp_ioctl(netdev, ifr, cmd); default: return -EOPNOTSUPP; } @@ -4180,8 +4940,6 @@ static void igb_vlan_rx_register(struct net_device *netdev, rctl &= ~E1000_RCTL_CFIEN; wr32(E1000_RCTL, rctl); igb_update_mng_vlan(adapter); - wr32(E1000_RLPML, - adapter->max_frame_size + VLAN_TAG_SIZE); } else { /* disable VLAN tag insert/strip */ ctrl = rd32(E1000_CTRL); @@ -4192,10 +4950,10 @@ static void igb_vlan_rx_register(struct net_device *netdev, igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; } - wr32(E1000_RLPML, - adapter->max_frame_size); } + igb_rlpml_set(adapter); + if (!test_bit(__IGB_DOWN, &adapter->state)) igb_irq_enable(adapter); } @@ -4204,24 +4962,25 @@ static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 vfta, index; + int pf_id = adapter->vfs_allocated_count; - if ((adapter->hw.mng_cookie.status & + if ((hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && (vid == adapter->mng_vlan_id)) return; - /* add VID to filter table */ - index = (vid >> 5) & 0x7F; - vfta = array_rd32(E1000_VFTA, index); - vfta |= (1 << (vid & 0x1F)); - igb_write_vfta(&adapter->hw, index, vfta); + + /* add vid to vlvf if sr-iov is enabled, + * if that fails add directly to filter table */ + if (igb_vlvf_set(adapter, vid, true, pf_id)) + igb_vfta_set(hw, vid, true); + } static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 vfta, index; + int pf_id = adapter->vfs_allocated_count; igb_irq_disable(adapter); vlan_group_set_device(adapter->vlgrp, vid, NULL); @@ -4237,11 +4996,10 @@ static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) return; } - /* remove VID from filter table */ - index = (vid >> 5) & 0x7F; - vfta = array_rd32(E1000_VFTA, index); - vfta &= ~(1 << (vid & 0x1F)); - igb_write_vfta(&adapter->hw, index, vfta); + /* remove vid from vlvf if sr-iov is enabled, + * if not in vlvf remove from vfta */ + if (igb_vlvf_set(adapter, vid, false, pf_id)) + igb_vfta_set(hw, vid, false); } static void igb_restore_vlan(struct igb_adapter *adapter) @@ -4298,7 +5056,6 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx) return 0; } - static int igb_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -4390,10 +5147,7 @@ static int igb_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (adapter->need_ioport) - err = pci_enable_device(pdev); - else - err = pci_enable_device_mem(pdev); + err = pci_enable_device_mem(pdev); if (err) { dev_err(&pdev->dev, "igb: Cannot enable PCI device from suspend\n"); @@ -4414,6 +5168,11 @@ static int igb_resume(struct pci_dev *pdev) /* e1000_power_up_phy(adapter); */ igb_reset(adapter); + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); + wr32(E1000_WUS, ~0); if (netif_running(netdev)) { @@ -4424,10 +5183,6 @@ static int igb_resume(struct pci_dev *pdev) netif_device_attach(netdev); - /* let the f/w know that the h/w is now under the control of the - * driver. */ - igb_get_hw_control(adapter); - return 0; } #endif @@ -4446,22 +5201,27 @@ static void igb_shutdown(struct pci_dev *pdev) static void igb_netpoll(struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; int i; - int work_done = 0; - - igb_irq_disable(adapter); - adapter->flags |= IGB_FLAG_IN_NETPOLL; - for (i = 0; i < adapter->num_tx_queues; i++) - igb_clean_tx_irq(&adapter->tx_ring[i]); + if (!adapter->msix_entries) { + igb_irq_disable(adapter); + napi_schedule(&adapter->rx_ring[0].napi); + return; + } - for (i = 0; i < adapter->num_rx_queues; i++) - igb_clean_rx_irq_adv(&adapter->rx_ring[i], - &work_done, - adapter->rx_ring[i].napi.weight); + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igb_ring *tx_ring = &adapter->tx_ring[i]; + wr32(E1000_EIMC, tx_ring->eims_value); + igb_clean_tx_irq(tx_ring); + wr32(E1000_EIMS, tx_ring->eims_value); + } - adapter->flags &= ~IGB_FLAG_IN_NETPOLL; - igb_irq_enable(adapter); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *rx_ring = &adapter->rx_ring[i]; + wr32(E1000_EIMC, rx_ring->eims_value); + napi_schedule(&rx_ring->napi); + } } #endif /* CONFIG_NET_POLL_CONTROLLER */ @@ -4504,12 +5264,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) pci_ers_result_t result; int err; - if (adapter->need_ioport) - err = pci_enable_device(pdev); - else - err = pci_enable_device_mem(pdev); - - if (err) { + if (pci_enable_device_mem(pdev)) { dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n"); result = PCI_ERS_RESULT_DISCONNECT; @@ -4562,4 +5317,172 @@ static void igb_io_resume(struct pci_dev *pdev) igb_get_hw_control(adapter); } +static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn) +{ + u32 reg_data; + + reg_data = rd32(E1000_VMOLR(vfn)); + reg_data |= E1000_VMOLR_BAM | /* Accept broadcast */ + E1000_VMOLR_ROPE | /* Accept packets matched in UTA */ + E1000_VMOLR_ROMPE | /* Accept packets matched in MTA */ + E1000_VMOLR_AUPE | /* Accept untagged packets */ + E1000_VMOLR_STRVLAN; /* Strip vlan tags */ + wr32(E1000_VMOLR(vfn), reg_data); +} + +static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, + int vfn) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vmolr; + + vmolr = rd32(E1000_VMOLR(vfn)); + vmolr &= ~E1000_VMOLR_RLPML_MASK; + vmolr |= size | E1000_VMOLR_LPE; + wr32(E1000_VMOLR(vfn), vmolr); + + return 0; +} + +static inline void igb_set_rah_pool(struct e1000_hw *hw, int pool, int entry) +{ + u32 reg_data; + + reg_data = rd32(E1000_RAH(entry)); + reg_data &= ~E1000_RAH_POOL_MASK; + reg_data |= E1000_RAH_POOL_1 << pool;; + wr32(E1000_RAH(entry), reg_data); +} + +static void igb_set_mc_list_pools(struct igb_adapter *adapter, + int entry_count, u16 total_rar_filters) +{ + struct e1000_hw *hw = &adapter->hw; + int i = adapter->vfs_allocated_count + 1; + + if ((i + entry_count) < total_rar_filters) + total_rar_filters = i + entry_count; + + for (; i < total_rar_filters; i++) + igb_set_rah_pool(hw, adapter->vfs_allocated_count, i); +} + +static int igb_set_vf_mac(struct igb_adapter *adapter, + int vf, unsigned char *mac_addr) +{ + struct e1000_hw *hw = &adapter->hw; + int rar_entry = vf + 1; /* VF MAC addresses start at entry 1 */ + + igb_rar_set(hw, mac_addr, rar_entry); + + memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); + + igb_set_rah_pool(hw, vf, rar_entry); + + return 0; +} + +static void igb_vmm_control(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg_data; + + if (!adapter->vfs_allocated_count) + return; + + /* VF's need PF reset indication before they + * can send/receive mail */ + reg_data = rd32(E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; + wr32(E1000_CTRL_EXT, reg_data); + + igb_vmdq_set_loopback_pf(hw, true); + igb_vmdq_set_replication_pf(hw, true); +} + +#ifdef CONFIG_PCI_IOV +static ssize_t igb_show_num_vfs(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct igb_adapter *adapter = netdev_priv(to_net_dev(dev)); + + return sprintf(buf, "%d\n", adapter->vfs_allocated_count); +} + +static ssize_t igb_set_num_vfs(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct net_device *netdev = to_net_dev(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct pci_dev *pdev = adapter->pdev; + unsigned int num_vfs, i; + unsigned char mac_addr[ETH_ALEN]; + int err; + + sscanf(buf, "%u", &num_vfs); + + if (num_vfs > 7) + num_vfs = 7; + + /* value unchanged do nothing */ + if (num_vfs == adapter->vfs_allocated_count) + return count; + + if (netdev->flags & IFF_UP) + igb_close(netdev); + + igb_reset_interrupt_capability(adapter); + igb_free_queues(adapter); + adapter->tx_ring = NULL; + adapter->rx_ring = NULL; + adapter->vfs_allocated_count = 0; + + /* reclaim resources allocated to VFs since we are changing count */ + if (adapter->vf_data) { + /* disable iov and allow time for transactions to clear */ + pci_disable_sriov(pdev); + msleep(500); + + kfree(adapter->vf_data); + adapter->vf_data = NULL; + wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); + msleep(100); + dev_info(&pdev->dev, "IOV Disabled\n"); + } + + if (num_vfs) { + adapter->vf_data = kcalloc(num_vfs, + sizeof(struct vf_data_storage), + GFP_KERNEL); + if (!adapter->vf_data) { + dev_err(&pdev->dev, "Could not allocate VF private " + "data - IOV enable failed\n"); + } else { + err = pci_enable_sriov(pdev, num_vfs); + if (!err) { + adapter->vfs_allocated_count = num_vfs; + dev_info(&pdev->dev, "%d vfs allocated\n", num_vfs); + for (i = 0; i < adapter->vfs_allocated_count; i++) { + random_ether_addr(mac_addr); + igb_set_vf_mac(adapter, i, mac_addr); + } + } else { + kfree(adapter->vf_data); + adapter->vf_data = NULL; + } + } + } + + igb_set_interrupt_capability(adapter); + igb_alloc_queues(adapter); + igb_reset(adapter); + + if (netdev->flags & IFF_UP) + igb_open(netdev); + + return count; +} +#endif /* CONFIG_PCI_IOV */ /* igb_main.c */ |