diff options
Diffstat (limited to 'drivers/net/ethernet/sfc')
-rw-r--r-- | drivers/net/ethernet/sfc/Kconfig | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/bitfield.h | 22 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 254 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ethtool.c | 16 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/falcon_boards.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/filter.c | 108 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/filter.h | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.c | 49 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.h | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi_pcol.h | 29 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mtd.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 90 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.c | 10 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.h | 36 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ptp.c | 1481 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/rx.c | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/selftest.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/siena.c | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/siena_sriov.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/tx.c | 627 |
22 files changed, 2153 insertions, 638 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index fb3cbc27063..25906c1d1b1 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -34,3 +34,10 @@ config SFC_SRIOV This enables support for the SFC9000 I/O Virtualization features, allowing accelerated network performance in virtualized environments. +config SFC_PTP + bool "Solarflare SFC9000-family PTP support" + depends on SFC && PTP_1588_CLOCK && !(SFC=y && PTP_1588_CLOCK=m) + default y + ---help--- + This enables support for the Precision Time Protocol (PTP) + on SFC9000-family NICs diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index ea1f8db5731..e11f2ecf69d 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -5,5 +5,6 @@ sfc-y += efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \ mcdi.o mcdi_phy.o mcdi_mon.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o +sfc-$(CONFIG_SFC_PTP) += ptp.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h index b26a954c27f..5400a33f254 100644 --- a/drivers/net/ethernet/sfc/bitfield.h +++ b/drivers/net/ethernet/sfc/bitfield.h @@ -120,10 +120,10 @@ typedef union efx_oword { * [0,high-low), with garbage in bits [high-low+1,...). */ #define EFX_EXTRACT_NATIVE(native_element, min, max, low, high) \ - (((low > max) || (high < min)) ? 0 : \ - ((low > min) ? \ - ((native_element) >> (low - min)) : \ - ((native_element) << (min - low)))) + ((low) > (max) || (high) < (min) ? 0 : \ + (low) > (min) ? \ + (native_element) >> ((low) - (min)) : \ + (native_element) << ((min) - (low))) /* * Extract bit field portion [low,high) from the 64-bit little-endian @@ -142,27 +142,27 @@ typedef union efx_oword { #define EFX_EXTRACT_OWORD64(oword, low, high) \ ((EFX_EXTRACT64((oword).u64[0], 0, 63, low, high) | \ EFX_EXTRACT64((oword).u64[1], 64, 127, low, high)) & \ - EFX_MASK64(high + 1 - low)) + EFX_MASK64((high) + 1 - (low))) #define EFX_EXTRACT_QWORD64(qword, low, high) \ (EFX_EXTRACT64((qword).u64[0], 0, 63, low, high) & \ - EFX_MASK64(high + 1 - low)) + EFX_MASK64((high) + 1 - (low))) #define EFX_EXTRACT_OWORD32(oword, low, high) \ ((EFX_EXTRACT32((oword).u32[0], 0, 31, low, high) | \ EFX_EXTRACT32((oword).u32[1], 32, 63, low, high) | \ EFX_EXTRACT32((oword).u32[2], 64, 95, low, high) | \ EFX_EXTRACT32((oword).u32[3], 96, 127, low, high)) & \ - EFX_MASK32(high + 1 - low)) + EFX_MASK32((high) + 1 - (low))) #define EFX_EXTRACT_QWORD32(qword, low, high) \ ((EFX_EXTRACT32((qword).u32[0], 0, 31, low, high) | \ EFX_EXTRACT32((qword).u32[1], 32, 63, low, high)) & \ - EFX_MASK32(high + 1 - low)) + EFX_MASK32((high) + 1 - (low))) #define EFX_EXTRACT_DWORD(dword, low, high) \ (EFX_EXTRACT32((dword).u32[0], 0, 31, low, high) & \ - EFX_MASK32(high + 1 - low)) + EFX_MASK32((high) + 1 - (low))) #define EFX_OWORD_FIELD64(oword, field) \ EFX_EXTRACT_OWORD64(oword, EFX_LOW_BIT(field), \ @@ -442,10 +442,10 @@ typedef union efx_oword { cpu_to_le32(EFX_INSERT_NATIVE(min, max, low, high, value)) #define EFX_INPLACE_MASK64(min, max, low, high) \ - EFX_INSERT64(min, max, low, high, EFX_MASK64(high + 1 - low)) + EFX_INSERT64(min, max, low, high, EFX_MASK64((high) + 1 - (low))) #define EFX_INPLACE_MASK32(min, max, low, high) \ - EFX_INSERT32(min, max, low, high, EFX_MASK32(high + 1 - low)) + EFX_INSERT32(min, max, low, high, EFX_MASK32((high) + 1 - (low))) #define EFX_SET_OWORD64(oword, low, high, value) do { \ (oword).u64[0] = (((oword).u64[0] \ diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 65a8d49106a..4f86d0cd516 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -202,11 +202,21 @@ static void efx_stop_all(struct efx_nic *efx); #define EFX_ASSERT_RESET_SERIALISED(efx) \ do { \ - if ((efx->state == STATE_RUNNING) || \ + if ((efx->state == STATE_READY) || \ (efx->state == STATE_DISABLED)) \ ASSERT_RTNL(); \ } while (0) +static int efx_check_disabled(struct efx_nic *efx) +{ + if (efx->state == STATE_DISABLED) { + netif_err(efx, drv, efx->net_dev, + "device is disabled due to earlier errors\n"); + return -EIO; + } + return 0; +} + /************************************************************************** * * Event queue processing @@ -630,6 +640,16 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_buffer_order = get_order(efx->rx_buffer_len + sizeof(struct efx_rx_page_state)); + /* We must keep at least one descriptor in a TX ring empty. + * We could avoid this when the queue size does not exactly + * match the hardware ring size, but it's not that important. + * Therefore we stop the queue when one more skb might fill + * the ring completely. We wake it when half way back to + * empty. + */ + efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); + efx->txq_wake_thresh = efx->txq_stop_thresh / 2; + /* Initialise the channels */ efx_for_each_channel(channel, efx) { efx_for_each_channel_tx_queue(tx_queue, channel) @@ -714,6 +734,7 @@ static void efx_remove_channel(struct efx_channel *channel) efx_for_each_possible_channel_tx_queue(tx_queue, channel) efx_remove_tx_queue(tx_queue); efx_remove_eventq(channel); + channel->type->post_remove(channel); } static void efx_remove_channels(struct efx_nic *efx) @@ -730,7 +751,11 @@ efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; u32 old_rxq_entries, old_txq_entries; unsigned i, next_buffer_table = 0; - int rc = 0; + int rc; + + rc = efx_check_disabled(efx); + if (rc) + return rc; /* Not all channels should be reallocated. We must avoid * reallocating their buffer table entries. @@ -828,6 +853,7 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) static const struct efx_channel_type efx_default_channel_type = { .pre_probe = efx_channel_dummy_op_int, + .post_remove = efx_channel_dummy_op_void, .get_name = efx_get_channel_name, .copy = efx_copy_channel, .keep_eventq = false, @@ -838,6 +864,10 @@ int efx_channel_dummy_op_int(struct efx_channel *channel) return 0; } +void efx_channel_dummy_op_void(struct efx_channel *channel) +{ +} + /************************************************************************** * * Port handling @@ -1365,6 +1395,8 @@ static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq) { struct efx_channel *channel; + BUG_ON(efx->state == STATE_DISABLED); + if (efx->legacy_irq) efx->legacy_irq_enabled = true; efx_nic_enable_interrupts(efx); @@ -1382,6 +1414,9 @@ static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq) { struct efx_channel *channel; + if (efx->state == STATE_DISABLED) + return; + efx_mcdi_mode_poll(efx); efx_nic_disable_interrupts(efx); @@ -1422,10 +1457,16 @@ static void efx_set_channels(struct efx_nic *efx) efx->tx_channel_offset = separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0; - /* We need to adjust the TX queue numbers if we have separate + /* We need to mark which channels really have RX and TX + * queues, and adjust the TX queue numbers if we have separate * RX-only and TX-only channels. */ efx_for_each_channel(channel, efx) { + if (channel->channel < efx->n_rx_channels) + channel->rx_queue.core_index = channel->channel; + else + channel->rx_queue.core_index = -1; + efx_for_each_channel_tx_queue(tx_queue, channel) tx_queue->queue -= (efx->tx_channel_offset * EFX_TXQ_TYPES); @@ -1533,22 +1574,21 @@ static int efx_probe_all(struct efx_nic *efx) return rc; } -/* Called after previous invocation(s) of efx_stop_all, restarts the port, - * kernel transmit queues and NAPI processing, and ensures that the port is - * scheduled to be reconfigured. This function is safe to call multiple - * times when the NIC is in any state. +/* If the interface is supposed to be running but is not, start + * the hardware and software data path, regular activity for the port + * (MAC statistics, link polling, etc.) and schedule the port to be + * reconfigured. Interrupts must already be enabled. This function + * is safe to call multiple times, so long as the NIC is not disabled. + * Requires the RTNL lock. */ static void efx_start_all(struct efx_nic *efx) { EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->state == STATE_DISABLED); /* Check that it is appropriate to restart the interface. All * of these flags are safe to read under just the rtnl lock */ - if (efx->port_enabled) - return; - if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) - return; - if (!netif_running(efx->net_dev)) + if (efx->port_enabled || !netif_running(efx->net_dev)) return; efx_start_port(efx); @@ -1582,11 +1622,11 @@ static void efx_flush_all(struct efx_nic *efx) cancel_work_sync(&efx->mac_work); } -/* Quiesce hardware and software without bringing the link down. - * Safe to call multiple times, when the nic and interface is in any - * state. The caller is guaranteed to subsequently be in a position - * to modify any hardware and software state they see fit without - * taking locks. */ +/* Quiesce the hardware and software data path, and regular activity + * for the port without bringing the link down. Safe to call multiple + * times with the NIC in almost any state, but interrupts should be + * enabled. Requires the RTNL lock. + */ static void efx_stop_all(struct efx_nic *efx) { EFX_ASSERT_RESET_SERIALISED(efx); @@ -1739,7 +1779,8 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) struct efx_nic *efx = netdev_priv(net_dev); struct mii_ioctl_data *data = if_mii(ifr); - EFX_ASSERT_RESET_SERIALISED(efx); + if (cmd == SIOCSHWTSTAMP) + return efx_ptp_ioctl(efx, ifr, cmd); /* Convert phy_id from older PRTAD/DEVAD format */ if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && @@ -1820,13 +1861,14 @@ static void efx_netpoll(struct net_device *net_dev) static int efx_net_open(struct net_device *net_dev) { struct efx_nic *efx = netdev_priv(net_dev); - EFX_ASSERT_RESET_SERIALISED(efx); + int rc; netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", raw_smp_processor_id()); - if (efx->state == STATE_DISABLED) - return -EIO; + rc = efx_check_disabled(efx); + if (rc) + return rc; if (efx->phy_mode & PHY_MODE_SPECIAL) return -EBUSY; if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) @@ -1852,10 +1894,8 @@ static int efx_net_stop(struct net_device *net_dev) netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", raw_smp_processor_id()); - if (efx->state != STATE_DISABLED) { - /* Stop the device and flush all the channels */ - efx_stop_all(efx); - } + /* Stop the device and flush all the channels */ + efx_stop_all(efx); return 0; } @@ -1915,9 +1955,11 @@ static void efx_watchdog(struct net_device *net_dev) static int efx_change_mtu(struct net_device *net_dev, int new_mtu) { struct efx_nic *efx = netdev_priv(net_dev); + int rc; - EFX_ASSERT_RESET_SERIALISED(efx); - + rc = efx_check_disabled(efx); + if (rc) + return rc; if (new_mtu > EFX_MAX_MTU) return -EINVAL; @@ -1926,8 +1968,6 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); mutex_lock(&efx->mac_lock); - /* Reconfigure the MAC before enabling the dma queues so that - * the RX buffers don't overflow */ net_dev->mtu = new_mtu; efx->type->reconfigure_mac(efx); mutex_unlock(&efx->mac_lock); @@ -1942,8 +1982,6 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data) struct sockaddr *addr = data; char *new_addr = addr->sa_data; - EFX_ASSERT_RESET_SERIALISED(efx); - if (!is_valid_ether_addr(new_addr)) { netif_err(efx, drv, efx->net_dev, "invalid ethernet MAC address requested: %pM\n", @@ -1981,14 +2019,14 @@ static void efx_set_rx_mode(struct net_device *net_dev) netdev_for_each_mc_addr(ha, net_dev) { crc = ether_crc_le(ETH_ALEN, ha->addr); bit = crc & (EFX_MCAST_HASH_ENTRIES - 1); - set_bit_le(bit, mc_hash->byte); + __set_bit_le(bit, mc_hash); } /* Broadcast packets go through the multicast hash filter. * ether_crc_le() of the broadcast address is 0xbe2612ff * so we always add bit 0xff to the mask. */ - set_bit_le(0xff, mc_hash->byte); + __set_bit_le(0xff, mc_hash); } if (efx->port_enabled) @@ -2079,11 +2117,27 @@ static int efx_register_netdev(struct efx_nic *efx) rtnl_lock(); + /* Enable resets to be scheduled and check whether any were + * already requested. If so, the NIC is probably hosed so we + * abort. + */ + efx->state = STATE_READY; + smp_mb(); /* ensure we change state before checking reset_pending */ + if (efx->reset_pending) { + netif_err(efx, probe, efx->net_dev, + "aborting probe due to scheduled reset\n"); + rc = -EIO; + goto fail_locked; + } + rc = dev_alloc_name(net_dev, net_dev->name); if (rc < 0) goto fail_locked; efx_update_name(efx); + /* Always start with carrier off; PHY events will detect the link */ + netif_carrier_off(net_dev); + rc = register_netdevice(net_dev); if (rc) goto fail_locked; @@ -2094,9 +2148,6 @@ static int efx_register_netdev(struct efx_nic *efx) efx_init_tx_queue_core_txq(tx_queue); } - /* Always start with carrier off; PHY events will detect the link */ - netif_carrier_off(net_dev); - rtnl_unlock(); rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); @@ -2108,14 +2159,14 @@ static int efx_register_netdev(struct efx_nic *efx) return 0; +fail_registered: + rtnl_lock(); + unregister_netdevice(net_dev); fail_locked: + efx->state = STATE_UNINIT; rtnl_unlock(); netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); return rc; - -fail_registered: - unregister_netdev(net_dev); - return rc; } static void efx_unregister_netdev(struct efx_nic *efx) @@ -2138,7 +2189,11 @@ static void efx_unregister_netdev(struct efx_nic *efx) strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); - unregister_netdev(efx->net_dev); + + rtnl_lock(); + unregister_netdevice(efx->net_dev); + efx->state = STATE_UNINIT; + rtnl_unlock(); } /************************************************************************** @@ -2154,9 +2209,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) EFX_ASSERT_RESET_SERIALISED(efx); efx_stop_all(efx); - mutex_lock(&efx->mac_lock); - efx_stop_interrupts(efx, false); + + mutex_lock(&efx->mac_lock); if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) efx->phy_op->fini(efx); efx->type->fini(efx); @@ -2276,16 +2331,15 @@ static void efx_reset_work(struct work_struct *data) if (!pending) return; - /* If we're not RUNNING then don't reset. Leave the reset_pending - * flags set so that efx_pci_probe_main will be retried */ - if (efx->state != STATE_RUNNING) { - netif_info(efx, drv, efx->net_dev, - "scheduled reset quenched. NIC not RUNNING\n"); - return; - } - rtnl_lock(); - (void)efx_reset(efx, fls(pending) - 1); + + /* We checked the state in efx_schedule_reset() but it may + * have changed by now. Now that we have the RTNL lock, + * it cannot change again. + */ + if (efx->state == STATE_READY) + (void)efx_reset(efx, fls(pending) - 1); + rtnl_unlock(); } @@ -2311,6 +2365,13 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) } set_bit(method, &efx->reset_pending); + smp_mb(); /* ensure we change reset_pending before checking state */ + + /* If we're not READY then just leave the flags set as the cue + * to abort probing or reschedule the reset later. + */ + if (ACCESS_ONCE(efx->state) != STATE_READY) + return; /* efx_process_channel() will no longer read events once a * reset is scheduled. So switch back to poll'd MCDI completions. */ @@ -2376,13 +2437,12 @@ static const struct efx_phy_operations efx_dummy_phy_operations = { /* This zeroes out and then fills in the invariants in a struct * efx_nic (including all sub-structures). */ -static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, +static int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, struct net_device *net_dev) { int i; /* Initialise common structures */ - memset(efx, 0, sizeof(*efx)); spin_lock_init(&efx->biu_lock); #ifdef CONFIG_SFC_MTD INIT_LIST_HEAD(&efx->mtd_list); @@ -2392,7 +2452,7 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); efx->pci_dev = pci_dev; efx->msg_enable = debug; - efx->state = STATE_INIT; + efx->state = STATE_UNINIT; strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); efx->net_dev = net_dev; @@ -2409,8 +2469,6 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, goto fail; } - efx->type = type; - EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS); /* Higher numbered interrupt modes are less capable! */ @@ -2455,6 +2513,12 @@ static void efx_fini_struct(struct efx_nic *efx) */ static void efx_pci_remove_main(struct efx_nic *efx) { + /* Flush reset_work. It can no longer be scheduled since we + * are not READY. + */ + BUG_ON(efx->state == STATE_READY); + cancel_work_sync(&efx->reset_work); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); efx->net_dev->rx_cpu_rmap = NULL; @@ -2480,24 +2544,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev) /* Mark the NIC as fini, then stop the interface */ rtnl_lock(); - efx->state = STATE_FINI; dev_close(efx->net_dev); - - /* Allow any queued efx_resets() to complete */ + efx_stop_interrupts(efx, false); rtnl_unlock(); - efx_stop_interrupts(efx, false); efx_sriov_fini(efx); efx_unregister_netdev(efx); efx_mtd_remove(efx); - /* Wait for any scheduled resets to complete. No more will be - * scheduled from this point because efx_stop_all() has been - * called, we are no longer registered with driverlink, and - * the net_device's have been removed. */ - cancel_work_sync(&efx->reset_work); - efx_pci_remove_main(efx); efx_fini_io(efx); @@ -2617,7 +2672,6 @@ static int efx_pci_probe_main(struct efx_nic *efx) static int __devinit efx_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *entry) { - const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data; struct net_device *net_dev; struct efx_nic *efx; int rc; @@ -2627,10 +2681,12 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, EFX_MAX_RX_QUEUES); if (!net_dev) return -ENOMEM; - net_dev->features |= (type->offload_features | NETIF_F_SG | + efx = netdev_priv(net_dev); + efx->type = (const struct efx_nic_type *) entry->driver_data; + net_dev->features |= (efx->type->offload_features | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_RXCSUM); - if (type->offload_features & NETIF_F_V6_CSUM) + if (efx->type->offload_features & NETIF_F_V6_CSUM) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | @@ -2638,10 +2694,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, NETIF_F_RXCSUM); /* All offloads can be toggled */ net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA; - efx = netdev_priv(net_dev); pci_set_drvdata(pci_dev, efx); SET_NETDEV_DEV(net_dev, &pci_dev->dev); - rc = efx_init_struct(efx, type, pci_dev, net_dev); + rc = efx_init_struct(efx, pci_dev, net_dev); if (rc) goto fail1; @@ -2656,28 +2711,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, goto fail2; rc = efx_pci_probe_main(efx); - - /* Serialise against efx_reset(). No more resets will be - * scheduled since efx_stop_all() has been called, and we have - * not and never have been registered. - */ - cancel_work_sync(&efx->reset_work); - if (rc) goto fail3; - /* If there was a scheduled reset during probe, the NIC is - * probably hosed anyway. - */ - if (efx->reset_pending) { - rc = -EIO; - goto fail4; - } - - /* Switch to the running state before we expose the device to the OS, - * so that dev_open()|efx_start_all() will actually start the device */ - efx->state = STATE_RUNNING; - rc = efx_register_netdev(efx); if (rc) goto fail4; @@ -2717,12 +2753,18 @@ static int efx_pm_freeze(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); - efx->state = STATE_FINI; + rtnl_lock(); - netif_device_detach(efx->net_dev); + if (efx->state != STATE_DISABLED) { + efx->state = STATE_UNINIT; - efx_stop_all(efx); - efx_stop_interrupts(efx, false); + netif_device_detach(efx->net_dev); + + efx_stop_all(efx); + efx_stop_interrupts(efx, false); + } + + rtnl_unlock(); return 0; } @@ -2731,21 +2773,25 @@ static int efx_pm_thaw(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); - efx->state = STATE_INIT; + rtnl_lock(); - efx_start_interrupts(efx, false); + if (efx->state != STATE_DISABLED) { + efx_start_interrupts(efx, false); - mutex_lock(&efx->mac_lock); - efx->phy_op->reconfigure(efx); - mutex_unlock(&efx->mac_lock); + mutex_lock(&efx->mac_lock); + efx->phy_op->reconfigure(efx); + mutex_unlock(&efx->mac_lock); - efx_start_all(efx); + efx_start_all(efx); - netif_device_attach(efx->net_dev); + netif_device_attach(efx->net_dev); - efx->state = STATE_RUNNING; + efx->state = STATE_READY; - efx->type->resume_wol(efx); + efx->type->resume_wol(efx); + } + + rtnl_unlock(); /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ queue_work(reset_workqueue, &efx->reset_work); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 70755c97251..f11170bc48b 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -102,6 +102,7 @@ static inline void efx_filter_rfs_expire(struct efx_channel *channel) {} /* Channels */ extern int efx_channel_dummy_op_int(struct efx_channel *channel); +extern void efx_channel_dummy_op_void(struct efx_channel *channel); extern void efx_process_channel_now(struct efx_channel *channel); extern int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries); diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 5faedd855b7..90f078eff8e 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -337,7 +337,8 @@ static int efx_fill_loopback_test(struct efx_nic *efx, unsigned int test_index, struct ethtool_string *strings, u64 *data) { - struct efx_channel *channel = efx_get_channel(efx, 0); + struct efx_channel *channel = + efx_get_channel(efx, efx->tx_channel_offset); struct efx_tx_queue *tx_queue; efx_for_each_channel_tx_queue(tx_queue, channel) { @@ -529,9 +530,7 @@ static void efx_ethtool_self_test(struct net_device *net_dev, if (!efx_tests) goto fail; - - ASSERT_RTNL(); - if (efx->state != STATE_RUNNING) { + if (efx->state != STATE_READY) { rc = -EIO; goto fail1; } @@ -962,9 +961,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, int rc; /* Check that user wants us to choose the location */ - if (rule->location != RX_CLS_LOC_ANY && - rule->location != RX_CLS_LOC_FIRST && - rule->location != RX_CLS_LOC_LAST) + if (rule->location != RX_CLS_LOC_ANY) return -EINVAL; /* Range-check ring_cookie */ @@ -978,9 +975,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, rule->m_ext.data[1])) return -EINVAL; - efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, - (rule->location == RX_CLS_LOC_FIRST) ? - EFX_FILTER_FLAG_RX_OVERRIDE_IP : 0, + efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, 0, (rule->ring_cookie == RX_CLS_FLOW_DISC) ? 0xfff : rule->ring_cookie); @@ -1176,6 +1171,7 @@ const struct ethtool_ops efx_ethtool_ops = { .get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size, .get_rxfh_indir = efx_ethtool_get_rxfh_indir, .set_rxfh_indir = efx_ethtool_set_rxfh_indir, + .get_ts_info = efx_ptp_get_ts_info, .get_module_info = efx_ethtool_get_module_info, .get_module_eeprom = efx_ethtool_get_module_eeprom, }; diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c index 8687a6c3db0..ec1e99d0dca 100644 --- a/drivers/net/ethernet/sfc/falcon_boards.c +++ b/drivers/net/ethernet/sfc/falcon_boards.c @@ -380,7 +380,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev, new_mode = PHY_MODE_SPECIAL; if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) { err = 0; - } else if (efx->state != STATE_RUNNING || netif_running(efx->net_dev)) { + } else if (efx->state != STATE_READY || netif_running(efx->net_dev)) { err = -EBUSY; } else { /* Reset the PHY, reconfigure the MAC and enable/disable diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c index c3fd61f0a95..8af42cd1fed 100644 --- a/drivers/net/ethernet/sfc/filter.c +++ b/drivers/net/ethernet/sfc/filter.c @@ -162,20 +162,12 @@ static void efx_filter_push_rx_config(struct efx_nic *efx) !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags & EFX_FILTER_FLAG_RX_RSS)); EFX_SET_OWORD_FIELD( - filter_ctl, FRF_CZ_UNICAST_NOMATCH_IP_OVERRIDE, - !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags & - EFX_FILTER_FLAG_RX_OVERRIDE_IP)); - EFX_SET_OWORD_FIELD( filter_ctl, FRF_CZ_MULTICAST_NOMATCH_Q_ID, table->spec[EFX_FILTER_INDEX_MC_DEF].dmaq_id); EFX_SET_OWORD_FIELD( filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED, !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & EFX_FILTER_FLAG_RX_RSS)); - EFX_SET_OWORD_FIELD( - filter_ctl, FRF_CZ_MULTICAST_NOMATCH_IP_OVERRIDE, - !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & - EFX_FILTER_FLAG_RX_OVERRIDE_IP)); } efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); @@ -480,14 +472,12 @@ static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec) case EFX_FILTER_TABLE_RX_MAC: { bool is_wild = spec->type == EFX_FILTER_MAC_WILD; - EFX_POPULATE_OWORD_8( + EFX_POPULATE_OWORD_7( *filter, FRF_CZ_RMFT_RSS_EN, !!(spec->flags & EFX_FILTER_FLAG_RX_RSS), FRF_CZ_RMFT_SCATTER_EN, !!(spec->flags & EFX_FILTER_FLAG_RX_SCATTER), - FRF_CZ_RMFT_IP_OVERRIDE, - !!(spec->flags & EFX_FILTER_FLAG_RX_OVERRIDE_IP), FRF_CZ_RMFT_RXQ_ID, spec->dmaq_id, FRF_CZ_RMFT_WILDCARD_MATCH, is_wild, FRF_CZ_RMFT_DEST_MAC_HI, spec->data[2], @@ -567,49 +557,62 @@ static int efx_filter_search(struct efx_filter_table *table, } /* - * Construct/deconstruct external filter IDs. These must be ordered - * by matching priority, for RX NFC semantics. + * Construct/deconstruct external filter IDs. At least the RX filter + * IDs must be ordered by matching priority, for RX NFC semantics. * - * Each RX MAC filter entry has a flag for whether it can override an - * RX IP filter that also matches. So we assign locations for MAC - * filters with overriding behaviour, then for IP filters, then for - * MAC filters without overriding behaviour. + * Deconstruction needs to be robust against invalid IDs so that + * efx_filter_remove_id_safe() and efx_filter_get_filter_safe() can + * accept user-provided IDs. */ -#define EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP 0 -#define EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP 1 -#define EFX_FILTER_MATCH_PRI_NORMAL_BASE 2 +#define EFX_FILTER_MATCH_PRI_COUNT 5 + +static const u8 efx_filter_type_match_pri[EFX_FILTER_TYPE_COUNT] = { + [EFX_FILTER_TCP_FULL] = 0, + [EFX_FILTER_UDP_FULL] = 0, + [EFX_FILTER_TCP_WILD] = 1, + [EFX_FILTER_UDP_WILD] = 1, + [EFX_FILTER_MAC_FULL] = 2, + [EFX_FILTER_MAC_WILD] = 3, + [EFX_FILTER_UC_DEF] = 4, + [EFX_FILTER_MC_DEF] = 4, +}; + +static const enum efx_filter_table_id efx_filter_range_table[] = { + EFX_FILTER_TABLE_RX_IP, /* RX match pri 0 */ + EFX_FILTER_TABLE_RX_IP, + EFX_FILTER_TABLE_RX_MAC, + EFX_FILTER_TABLE_RX_MAC, + EFX_FILTER_TABLE_RX_DEF, /* RX match pri 4 */ + EFX_FILTER_TABLE_COUNT, /* TX match pri 0; invalid */ + EFX_FILTER_TABLE_COUNT, /* invalid */ + EFX_FILTER_TABLE_TX_MAC, + EFX_FILTER_TABLE_TX_MAC, /* TX match pri 3 */ +}; #define EFX_FILTER_INDEX_WIDTH 13 #define EFX_FILTER_INDEX_MASK ((1 << EFX_FILTER_INDEX_WIDTH) - 1) -static inline u32 efx_filter_make_id(enum efx_filter_table_id table_id, - unsigned int index, u8 flags) +static inline u32 +efx_filter_make_id(const struct efx_filter_spec *spec, unsigned int index) { - unsigned int match_pri = EFX_FILTER_MATCH_PRI_NORMAL_BASE + table_id; + unsigned int range; - if (flags & EFX_FILTER_FLAG_RX_OVERRIDE_IP) { - if (table_id == EFX_FILTER_TABLE_RX_MAC) - match_pri = EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP; - else if (table_id == EFX_FILTER_TABLE_RX_DEF) - match_pri = EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP; - } + range = efx_filter_type_match_pri[spec->type]; + if (!(spec->flags & EFX_FILTER_FLAG_RX)) + range += EFX_FILTER_MATCH_PRI_COUNT; - return match_pri << EFX_FILTER_INDEX_WIDTH | index; + return range << EFX_FILTER_INDEX_WIDTH | index; } static inline enum efx_filter_table_id efx_filter_id_table_id(u32 id) { - unsigned int match_pri = id >> EFX_FILTER_INDEX_WIDTH; + unsigned int range = id >> EFX_FILTER_INDEX_WIDTH; - switch (match_pri) { - case EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP: - return EFX_FILTER_TABLE_RX_MAC; - case EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP: - return EFX_FILTER_TABLE_RX_DEF; - default: - return match_pri - EFX_FILTER_MATCH_PRI_NORMAL_BASE; - } + if (range < ARRAY_SIZE(efx_filter_range_table)) + return efx_filter_range_table[range]; + else + return EFX_FILTER_TABLE_COUNT; /* invalid */ } static inline unsigned int efx_filter_id_index(u32 id) @@ -619,12 +622,9 @@ static inline unsigned int efx_filter_id_index(u32 id) static inline u8 efx_filter_id_flags(u32 id) { - unsigned int match_pri = id >> EFX_FILTER_INDEX_WIDTH; + unsigned int range = id >> EFX_FILTER_INDEX_WIDTH; - if (match_pri < EFX_FILTER_MATCH_PRI_NORMAL_BASE) - return EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_OVERRIDE_IP; - else if (match_pri <= - EFX_FILTER_MATCH_PRI_NORMAL_BASE + EFX_FILTER_TABLE_RX_DEF) + if (range < EFX_FILTER_MATCH_PRI_COUNT) return EFX_FILTER_FLAG_RX; else return EFX_FILTER_FLAG_TX; @@ -633,14 +633,15 @@ static inline u8 efx_filter_id_flags(u32 id) u32 efx_filter_get_rx_id_limit(struct efx_nic *efx) { struct efx_filter_state *state = efx->filter_state; - unsigned int table_id = EFX_FILTER_TABLE_RX_DEF; + unsigned int range = EFX_FILTER_MATCH_PRI_COUNT - 1; + enum efx_filter_table_id table_id; do { + table_id = efx_filter_range_table[range]; if (state->table[table_id].size != 0) - return ((EFX_FILTER_MATCH_PRI_NORMAL_BASE + table_id) - << EFX_FILTER_INDEX_WIDTH) + + return range << EFX_FILTER_INDEX_WIDTH | state->table[table_id].size; - } while (table_id--); + } while (range--); return 0; } @@ -718,7 +719,7 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, netif_vdbg(efx, hw, efx->net_dev, "%s: filter type %d index %d rxq %u set", __func__, spec->type, filter_idx, spec->dmaq_id); - rc = efx_filter_make_id(table->id, filter_idx, spec->flags); + rc = efx_filter_make_id(spec, filter_idx); out: spin_unlock_bh(&state->lock); @@ -781,8 +782,7 @@ int efx_filter_remove_id_safe(struct efx_nic *efx, spin_lock_bh(&state->lock); if (test_bit(filter_idx, table->used_bitmap) && - spec->priority == priority && - !((spec->flags ^ filter_flags) & EFX_FILTER_FLAG_RX_OVERRIDE_IP)) { + spec->priority == priority) { efx_filter_table_clear_entry(efx, table, filter_idx); if (table->used == 0) efx_filter_table_reset_search_depth(table); @@ -833,8 +833,7 @@ int efx_filter_get_filter_safe(struct efx_nic *efx, spin_lock_bh(&state->lock); if (test_bit(filter_idx, table->used_bitmap) && - spec->priority == priority && - !((spec->flags ^ filter_flags) & EFX_FILTER_FLAG_RX_OVERRIDE_IP)) { + spec->priority == priority) { *spec_buf = *spec; rc = 0; } else { @@ -927,8 +926,7 @@ s32 efx_filter_get_rx_ids(struct efx_nic *efx, goto out; } buf[count++] = efx_filter_make_id( - table_id, filter_idx, - table->spec[filter_idx].flags); + &table->spec[filter_idx], filter_idx); } } } diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h index 3c77802aed6..5cb54723b82 100644 --- a/drivers/net/ethernet/sfc/filter.h +++ b/drivers/net/ethernet/sfc/filter.h @@ -61,16 +61,12 @@ enum efx_filter_priority { * according to the indirection table. * @EFX_FILTER_FLAG_RX_SCATTER: Enable DMA scatter on the receiving * queue. - * @EFX_FILTER_FLAG_RX_OVERRIDE_IP: Enables a MAC filter to override - * any IP filter that matches the same packet. By default, IP - * filters take precedence. * @EFX_FILTER_FLAG_RX: Filter is for RX * @EFX_FILTER_FLAG_TX: Filter is for TX */ enum efx_filter_flags { EFX_FILTER_FLAG_RX_RSS = 0x01, EFX_FILTER_FLAG_RX_SCATTER = 0x02, - EFX_FILTER_FLAG_RX_OVERRIDE_IP = 0x04, EFX_FILTER_FLAG_RX = 0x08, EFX_FILTER_FLAG_TX = 0x10, }; @@ -88,8 +84,7 @@ enum efx_filter_flags { * * The @priority field is used by software to determine whether a new * filter may replace an old one. The hardware priority of a filter - * depends on the filter type and %EFX_FILTER_FLAG_RX_OVERRIDE_IP - * flag. + * depends on the filter type. */ struct efx_filter_spec { u8 type:4; diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index fc5e7bbcbc9..aea43cbd052 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -320,14 +320,20 @@ static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno, efx_mcdi_complete(mcdi); } -/* Issue the given command by writing the data into the shared memory PDU, - * ring the doorbell and wait for completion. Copyout the result. */ int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen, size_t *outlen_actual) { + efx_mcdi_rpc_start(efx, cmd, inbuf, inlen); + return efx_mcdi_rpc_finish(efx, cmd, inlen, + outbuf, outlen, outlen_actual); +} + +void efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, + size_t inlen) +{ struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - int rc; + BUG_ON(efx_nic_rev(efx) < EFX_REV_SIENA_A0); efx_mcdi_acquire(mcdi); @@ -338,6 +344,15 @@ int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, spin_unlock_bh(&mcdi->iface_lock); efx_mcdi_copyin(efx, cmd, inbuf, inlen); +} + +int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, + u8 *outbuf, size_t outlen, size_t *outlen_actual) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + int rc; + + BUG_ON(efx_nic_rev(efx) < EFX_REV_SIENA_A0); if (mcdi->mode == MCDI_MODE_POLL) rc = efx_mcdi_poll(efx); @@ -563,6 +578,11 @@ void efx_mcdi_process_event(struct efx_channel *channel, case MCDI_EVENT_CODE_FLR: efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF)); break; + case MCDI_EVENT_CODE_PTP_RX: + case MCDI_EVENT_CODE_PTP_FAULT: + case MCDI_EVENT_CODE_PTP_PPS: + efx_ptp_event(efx, event); + break; default: netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n", @@ -641,9 +661,8 @@ int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address, u16 *fw_subtype_list, u32 *capabilities) { uint8_t outbuf[MC_CMD_GET_BOARD_CFG_OUT_LENMIN]; - size_t outlen; + size_t outlen, offset, i; int port_num = efx_port_num(efx); - int offset; int rc; BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_IN_LEN != 0); @@ -663,11 +682,18 @@ int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address, : MC_CMD_GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT0_OFST; if (mac_address) memcpy(mac_address, outbuf + offset, ETH_ALEN); - if (fw_subtype_list) - memcpy(fw_subtype_list, - outbuf + MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_OFST, - MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MINNUM * - sizeof(fw_subtype_list[0])); + if (fw_subtype_list) { + /* Byte-swap and truncate or zero-pad as necessary */ + offset = MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_OFST; + for (i = 0; + i < MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM; + i++) { + fw_subtype_list[i] = + (offset + 2 <= outlen) ? + le16_to_cpup((__le16 *)(outbuf + offset)) : 0; + offset += 2; + } + } if (capabilities) { if (port_num) *capabilities = MCDI_DWORD(outbuf, @@ -1169,6 +1195,9 @@ int efx_mcdi_flush_rxqs(struct efx_nic *efx) __le32 *qid; int rc, count; + BUILD_BUG_ON(EFX_MAX_CHANNELS > + MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM); + qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL); if (qid == NULL) return -ENOMEM; diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 0bdf3e33183..3ba2e5b5a9c 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -71,6 +71,12 @@ extern int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen, size_t *outlen_actual); +extern void efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, + const u8 *inbuf, size_t inlen); +extern int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, + u8 *outbuf, size_t outlen, + size_t *outlen_actual); + extern int efx_mcdi_poll_reboot(struct efx_nic *efx); extern void efx_mcdi_mode_poll(struct efx_nic *efx); extern void efx_mcdi_mode_event(struct efx_nic *efx); @@ -107,11 +113,13 @@ extern void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); #define MCDI_EVENT_FIELD(_ev, _field) \ EFX_QWORD_FIELD(_ev, MCDI_EVENT_ ## _field) #define MCDI_ARRAY_FIELD(_buf, _field1, _type, _index, _field2) \ - EFX_DWORD_FIELD( \ + EFX_EXTRACT_DWORD( \ *((efx_dword_t *) \ (MCDI_ARRAY_PTR(_buf, _field1, _type, _index) + \ (MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _OFST & ~3))), \ - MC_CMD_ ## _type ## _TYPEDEF_ ## _field2) + MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _LBN & 0x1f, \ + (MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _LBN & 0x1f) + \ + MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _WIDTH - 1) extern void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len); extern int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index db4beed9766..9d426d0457b 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -289,6 +289,7 @@ #define MCDI_EVENT_CODE_TX_FLUSH 0xc /* enum */ #define MCDI_EVENT_CODE_PTP_RX 0xd /* enum */ #define MCDI_EVENT_CODE_PTP_FAULT 0xe /* enum */ +#define MCDI_EVENT_CODE_PTP_PPS 0xf /* enum */ #define MCDI_EVENT_CMDDONE_DATA_OFST 0 #define MCDI_EVENT_CMDDONE_DATA_LBN 0 #define MCDI_EVENT_CMDDONE_DATA_WIDTH 32 @@ -491,12 +492,12 @@ /* MC_CMD_GET_FPGAREG_OUT msgresponse */ #define MC_CMD_GET_FPGAREG_OUT_LENMIN 1 -#define MC_CMD_GET_FPGAREG_OUT_LENMAX 255 +#define MC_CMD_GET_FPGAREG_OUT_LENMAX 252 #define MC_CMD_GET_FPGAREG_OUT_LEN(num) (0+1*(num)) #define MC_CMD_GET_FPGAREG_OUT_BUFFER_OFST 0 #define MC_CMD_GET_FPGAREG_OUT_BUFFER_LEN 1 #define MC_CMD_GET_FPGAREG_OUT_BUFFER_MINNUM 1 -#define MC_CMD_GET_FPGAREG_OUT_BUFFER_MAXNUM 255 +#define MC_CMD_GET_FPGAREG_OUT_BUFFER_MAXNUM 252 /***********************************/ @@ -507,13 +508,13 @@ /* MC_CMD_PUT_FPGAREG_IN msgrequest */ #define MC_CMD_PUT_FPGAREG_IN_LENMIN 5 -#define MC_CMD_PUT_FPGAREG_IN_LENMAX 255 +#define MC_CMD_PUT_FPGAREG_IN_LENMAX 252 #define MC_CMD_PUT_FPGAREG_IN_LEN(num) (4+1*(num)) #define MC_CMD_PUT_FPGAREG_IN_ADDR_OFST 0 #define MC_CMD_PUT_FPGAREG_IN_BUFFER_OFST 4 #define MC_CMD_PUT_FPGAREG_IN_BUFFER_LEN 1 #define MC_CMD_PUT_FPGAREG_IN_BUFFER_MINNUM 1 -#define MC_CMD_PUT_FPGAREG_IN_BUFFER_MAXNUM 251 +#define MC_CMD_PUT_FPGAREG_IN_BUFFER_MAXNUM 248 /* MC_CMD_PUT_FPGAREG_OUT msgresponse */ #define MC_CMD_PUT_FPGAREG_OUT_LEN 0 @@ -560,7 +561,7 @@ /* MC_CMD_PTP_IN_TRANSMIT msgrequest */ #define MC_CMD_PTP_IN_TRANSMIT_LENMIN 13 -#define MC_CMD_PTP_IN_TRANSMIT_LENMAX 255 +#define MC_CMD_PTP_IN_TRANSMIT_LENMAX 252 #define MC_CMD_PTP_IN_TRANSMIT_LEN(num) (12+1*(num)) /* MC_CMD_PTP_IN_CMD_OFST 0 */ /* MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */ @@ -568,7 +569,7 @@ #define MC_CMD_PTP_IN_TRANSMIT_PACKET_OFST 12 #define MC_CMD_PTP_IN_TRANSMIT_PACKET_LEN 1 #define MC_CMD_PTP_IN_TRANSMIT_PACKET_MINNUM 1 -#define MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM 243 +#define MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM 240 /* MC_CMD_PTP_IN_READ_NIC_TIME msgrequest */ #define MC_CMD_PTP_IN_READ_NIC_TIME_LEN 8 @@ -1145,7 +1146,7 @@ /* MC_CMD_PUTS_IN msgrequest */ #define MC_CMD_PUTS_IN_LENMIN 13 -#define MC_CMD_PUTS_IN_LENMAX 255 +#define MC_CMD_PUTS_IN_LENMAX 252 #define MC_CMD_PUTS_IN_LEN(num) (12+1*(num)) #define MC_CMD_PUTS_IN_DEST_OFST 0 #define MC_CMD_PUTS_IN_UART_LBN 0 @@ -1157,7 +1158,7 @@ #define MC_CMD_PUTS_IN_STRING_OFST 12 #define MC_CMD_PUTS_IN_STRING_LEN 1 #define MC_CMD_PUTS_IN_STRING_MINNUM 1 -#define MC_CMD_PUTS_IN_STRING_MAXNUM 243 +#define MC_CMD_PUTS_IN_STRING_MAXNUM 240 /* MC_CMD_PUTS_OUT msgresponse */ #define MC_CMD_PUTS_OUT_LEN 0 @@ -1947,12 +1948,12 @@ /* MC_CMD_NVRAM_READ_OUT msgresponse */ #define MC_CMD_NVRAM_READ_OUT_LENMIN 1 -#define MC_CMD_NVRAM_READ_OUT_LENMAX 255 +#define MC_CMD_NVRAM_READ_OUT_LENMAX 252 #define MC_CMD_NVRAM_READ_OUT_LEN(num) (0+1*(num)) #define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_OFST 0 #define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_LEN 1 #define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MINNUM 1 -#define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MAXNUM 255 +#define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MAXNUM 252 /***********************************/ @@ -1963,7 +1964,7 @@ /* MC_CMD_NVRAM_WRITE_IN msgrequest */ #define MC_CMD_NVRAM_WRITE_IN_LENMIN 13 -#define MC_CMD_NVRAM_WRITE_IN_LENMAX 255 +#define MC_CMD_NVRAM_WRITE_IN_LENMAX 252 #define MC_CMD_NVRAM_WRITE_IN_LEN(num) (12+1*(num)) #define MC_CMD_NVRAM_WRITE_IN_TYPE_OFST 0 /* Enum values, see field(s): */ @@ -1973,7 +1974,7 @@ #define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_OFST 12 #define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN 1 #define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MINNUM 1 -#define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM 243 +#define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM 240 /* MC_CMD_NVRAM_WRITE_OUT msgresponse */ #define MC_CMD_NVRAM_WRITE_OUT_LEN 0 @@ -2305,13 +2306,13 @@ /* MC_CMD_GET_PHY_MEDIA_INFO_OUT msgresponse */ #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMIN 5 -#define MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX 255 +#define MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX 252 #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_LEN(num) (4+1*(num)) #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATALEN_OFST 0 #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST 4 #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_LEN 1 #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MINNUM 1 -#define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MAXNUM 251 +#define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MAXNUM 248 /***********************************/ diff --git a/drivers/net/ethernet/sfc/mtd.c b/drivers/net/ethernet/sfc/mtd.c index 758148379b0..08f825b71ac 100644 --- a/drivers/net/ethernet/sfc/mtd.c +++ b/drivers/net/ethernet/sfc/mtd.c @@ -585,6 +585,7 @@ static const struct siena_nvram_type_info siena_nvram_types[] = { [MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT1] = { 1, "sfc_exp_rom_cfg" }, [MC_CMD_NVRAM_TYPE_PHY_PORT0] = { 0, "sfc_phy_fw" }, [MC_CMD_NVRAM_TYPE_PHY_PORT1] = { 1, "sfc_phy_fw" }, + [MC_CMD_NVRAM_TYPE_FPGA] = { 0, "sfc_fpga" }, }; static int siena_mtd_probe_partition(struct efx_nic *efx, @@ -598,7 +599,8 @@ static int siena_mtd_probe_partition(struct efx_nic *efx, bool protected; int rc; - if (type >= ARRAY_SIZE(siena_nvram_types)) + if (type >= ARRAY_SIZE(siena_nvram_types) || + siena_nvram_types[type].name == NULL) return -ENODEV; info = &siena_nvram_types[type]; @@ -627,7 +629,8 @@ static int siena_mtd_get_fw_subtypes(struct efx_nic *efx, struct efx_mtd *efx_mtd) { struct efx_mtd_partition *part; - uint16_t fw_subtype_list[MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MINNUM]; + uint16_t fw_subtype_list[ + MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM]; int rc; rc = efx_mcdi_get_board_cfg(efx, NULL, fw_subtype_list, NULL); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index cd9c0a98969..576a3109116 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -37,7 +37,7 @@ * **************************************************************************/ -#define EFX_DRIVER_VERSION "3.1" +#define EFX_DRIVER_VERSION "3.2" #ifdef DEBUG #define EFX_BUG_ON_PARANOID(x) BUG_ON(x) @@ -56,7 +56,8 @@ #define EFX_MAX_CHANNELS 32U #define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS #define EFX_EXTRA_CHANNEL_IOV 0 -#define EFX_MAX_EXTRA_CHANNELS 1U +#define EFX_EXTRA_CHANNEL_PTP 1 +#define EFX_MAX_EXTRA_CHANNELS 2U /* Checksum generation is a per-queue option in hardware, so each * queue visible to the networking core is backed by two hardware TX @@ -68,6 +69,9 @@ #define EFX_TXQ_TYPES 4 #define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS) +/* Forward declare Precision Time Protocol (PTP) support structure. */ +struct efx_ptp_data; + struct efx_self_tests; /** @@ -91,29 +95,31 @@ struct efx_special_buffer { }; /** - * struct efx_tx_buffer - An Efx TX buffer - * @skb: The associated socket buffer. - * Set only on the final fragment of a packet; %NULL for all other - * fragments. When this fragment completes, then we can free this - * skb. - * @tsoh: The associated TSO header structure, or %NULL if this - * buffer is not a TSO header. + * struct efx_tx_buffer - buffer state for a TX descriptor + * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be + * freed when descriptor completes + * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be + * freed when descriptor completes. * @dma_addr: DMA address of the fragment. + * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. * This field is zero when the queue slot is empty. - * @continuation: True if this fragment is not the end of a packet. - * @unmap_single: True if dma_unmap_single should be used. * @unmap_len: Length of this fragment to unmap */ struct efx_tx_buffer { - const struct sk_buff *skb; - struct efx_tso_header *tsoh; + union { + const struct sk_buff *skb; + void *heap_buf; + }; dma_addr_t dma_addr; + unsigned short flags; unsigned short len; - bool continuation; - bool unmap_single; unsigned short unmap_len; }; +#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */ +#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ +#define EFX_TX_BUF_HEAP 4 /* buffer was allocated with kmalloc() */ +#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ /** * struct efx_tx_queue - An Efx TX queue @@ -133,6 +139,7 @@ struct efx_tx_buffer { * @channel: The associated channel * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring + * @tsoh_page: Array of pages of TSO header buffers * @txd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. * @initialised: Has hardware queue been initialised? @@ -156,9 +163,6 @@ struct efx_tx_buffer { * variable indicates that the queue is full. This is to * avoid cache-line ping-pong between the xmit path and the * completion path. - * @tso_headers_free: A list of TSO headers allocated for this TX queue - * that are not in use, and so available for new TSO sends. The list - * is protected by the TX queue lock. * @tso_bursts: Number of times TSO xmit invoked by kernel * @tso_long_headers: Number of packets with headers too long for standard * blocks @@ -175,6 +179,7 @@ struct efx_tx_queue { struct efx_channel *channel; struct netdev_queue *core_txq; struct efx_tx_buffer *buffer; + struct efx_buffer *tsoh_page; struct efx_special_buffer txd; unsigned int ptr_mask; bool initialised; @@ -187,7 +192,6 @@ struct efx_tx_queue { unsigned int insert_count ____cacheline_aligned_in_smp; unsigned int write_count; unsigned int old_read_count; - struct efx_tso_header *tso_headers_free; unsigned int tso_bursts; unsigned int tso_long_headers; unsigned int tso_packets; @@ -242,6 +246,8 @@ struct efx_rx_page_state { /** * struct efx_rx_queue - An Efx RX queue * @efx: The associated Efx NIC + * @core_index: Index of network core RX queue. Will be >= 0 iff this + * is associated with a real RX queue. * @buffer: The software buffer ring * @rxd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. @@ -263,6 +269,7 @@ struct efx_rx_page_state { */ struct efx_rx_queue { struct efx_nic *efx; + int core_index; struct efx_rx_buffer *buffer; struct efx_special_buffer rxd; unsigned int ptr_mask; @@ -390,14 +397,17 @@ struct efx_channel { * @get_name: Generate the channel's name (used for its IRQ handler) * @copy: Copy the channel state prior to reallocation. May be %NULL if * reallocation is not supported. + * @receive_skb: Handle an skb ready to be passed to netif_receive_skb() * @keep_eventq: Flag for whether event queue should be kept initialised * while the device is stopped */ struct efx_channel_type { void (*handle_no_channel)(struct efx_nic *); int (*pre_probe)(struct efx_channel *); + void (*post_remove)(struct efx_channel *); void (*get_name)(struct efx_channel *, char *buf, size_t len); struct efx_channel *(*copy)(const struct efx_channel *); + void (*receive_skb)(struct efx_channel *, struct sk_buff *); bool keep_eventq; }; @@ -430,11 +440,9 @@ enum efx_int_mode { #define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI) enum nic_state { - STATE_INIT = 0, - STATE_RUNNING = 1, - STATE_FINI = 2, - STATE_DISABLED = 3, - STATE_MAX, + STATE_UNINIT = 0, /* device being probed/removed or is frozen */ + STATE_READY = 1, /* hardware ready and netdev registered */ + STATE_DISABLED = 2, /* device disabled due to hardware errors */ }; /* @@ -654,7 +662,7 @@ struct vfdi_status; * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues * @irq_rx_moderation: IRQ moderation time for RX event queues * @msg_enable: Log message enable flags - * @state: Device state flag. Serialised by the rtnl_lock. + * @state: Device state number (%STATE_*). Serialised by the rtnl_lock. * @reset_pending: Bitmask for pending resets * @tx_queue: TX DMA queues * @rx_queue: RX DMA queues @@ -664,6 +672,8 @@ struct vfdi_status; * should be allocated for this NIC * @rxq_entries: Size of receive queues requested by user. * @txq_entries: Size of transmit queues requested by user. + * @txq_stop_thresh: TX queue fill level at or above which we stop it. + * @txq_wake_thresh: TX queue fill level at or below which we wake it. * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches * @sram_lim_qw: Qword address limit of SRAM @@ -730,6 +740,7 @@ struct vfdi_status; * %local_addr_list. Protected by %local_lock. * @local_lock: Mutex protecting %local_addr_list and %local_page_list. * @peer_work: Work item to broadcast peer addresses to VMs. + * @ptp_data: PTP state data * @monitor_work: Hardware monitor workitem * @biu_lock: BIU (bus interface unit) lock * @last_irq_cpu: Last CPU to handle a possible test interrupt. This @@ -774,6 +785,9 @@ struct efx_nic { unsigned rxq_entries; unsigned txq_entries; + unsigned int txq_stop_thresh; + unsigned int txq_wake_thresh; + unsigned tx_dc_base; unsigned rx_dc_base; unsigned sram_lim_qw; @@ -854,6 +868,10 @@ struct efx_nic { struct work_struct peer_work; #endif +#ifdef CONFIG_SFC_PTP + struct efx_ptp_data *ptp_data; +#endif + /* The following fields may be written more often */ struct delayed_work monitor_work ____cacheline_aligned_in_smp; @@ -1044,7 +1062,7 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue) static inline bool efx_channel_has_rx_queue(struct efx_channel *channel) { - return channel->channel < channel->efx->n_rx_channels; + return channel->rx_queue.core_index >= 0; } static inline struct efx_rx_queue * @@ -1083,18 +1101,6 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue, return &rx_queue->buffer[index]; } -/* Set bit in a little-endian bitfield */ -static inline void set_bit_le(unsigned nr, unsigned char *addr) -{ - addr[nr / 8] |= (1 << (nr % 8)); -} - -/* Clear bit in a little-endian bitfield */ -static inline void clear_bit_le(unsigned nr, unsigned char *addr) -{ - addr[nr / 8] &= ~(1 << (nr % 8)); -} - /** * EFX_MAX_FRAME_LEN - calculate maximum frame length @@ -1116,5 +1122,13 @@ static inline void clear_bit_le(unsigned nr, unsigned char *addr) #define EFX_MAX_FRAME_LEN(mtu) \ ((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */ + 7) & ~7) + 16) +static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb) +{ + return skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP; +} +static inline void efx_xmit_hwtstamp_pending(struct sk_buff *skb) +{ + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; +} #endif /* EFX_NET_DRIVER_H */ diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 326d799762d..aab7cacb2e3 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -298,7 +298,7 @@ efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer) /************************************************************************** * * Generic buffer handling - * These buffers are used for interrupt status and MAC stats + * These buffers are used for interrupt status, MAC stats, etc. * **************************************************************************/ @@ -401,8 +401,10 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) ++tx_queue->write_count; /* Create TX descriptor ring entry */ + BUILD_BUG_ON(EFX_TX_BUF_CONT != 1); EFX_POPULATE_QWORD_4(*txd, - FSF_AZ_TX_KER_CONT, buffer->continuation, + FSF_AZ_TX_KER_CONT, + buffer->flags & EFX_TX_BUF_CONT, FSF_AZ_TX_KER_BYTE_COUNT, buffer->len, FSF_AZ_TX_KER_BUF_REGION, 0, FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr); @@ -470,9 +472,9 @@ void efx_nic_init_tx(struct efx_tx_queue *tx_queue) efx_reado(efx, ®, FR_AA_TX_CHKSM_CFG); if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) - clear_bit_le(tx_queue->queue, (void *)®); + __clear_bit_le(tx_queue->queue, ®); else - set_bit_le(tx_queue->queue, (void *)®); + __set_bit_le(tx_queue->queue, ®); efx_writeo(efx, ®, FR_AA_TX_CHKSM_CFG); } diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index bab5cd9f574..438cef11f72 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -11,6 +11,7 @@ #ifndef EFX_NIC_H #define EFX_NIC_H +#include <linux/net_tstamp.h> #include <linux/i2c-algo-bit.h> #include "net_driver.h" #include "efx.h" @@ -250,6 +251,41 @@ extern int efx_sriov_get_vf_config(struct net_device *dev, int vf, extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf, bool spoofchk); +struct ethtool_ts_info; +#ifdef CONFIG_SFC_PTP +extern void efx_ptp_probe(struct efx_nic *efx); +extern int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd); +extern int efx_ptp_get_ts_info(struct net_device *net_dev, + struct ethtool_ts_info *ts_info); +extern bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); +extern int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); +extern void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev); +#else +static inline void efx_ptp_probe(struct efx_nic *efx) {} +static inline int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd) +{ + return -EOPNOTSUPP; +} +static inline int efx_ptp_get_ts_info(struct net_device *net_dev, + struct ethtool_ts_info *ts_info) +{ + ts_info->so_timestamping = (SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE); + ts_info->phc_index = -1; + + return 0; +} +static inline bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + return false; +} +static inline int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + return NETDEV_TX_OK; +} +static inline void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) {} +#endif + extern const struct efx_nic_type falcon_a1_nic_type; extern const struct efx_nic_type falcon_b0_nic_type; extern const struct efx_nic_type siena_a0_nic_type; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c new file mode 100644 index 00000000000..0767043f44a --- /dev/null +++ b/drivers/net/ethernet/sfc/ptp.c @@ -0,0 +1,1481 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2011 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +/* Theory of operation: + * + * PTP support is assisted by firmware running on the MC, which provides + * the hardware timestamping capabilities. Both transmitted and received + * PTP event packets are queued onto internal queues for subsequent processing; + * this is because the MC operations are relatively long and would block + * block NAPI/interrupt operation. + * + * Receive event processing: + * The event contains the packet's UUID and sequence number, together + * with the hardware timestamp. The PTP receive packet queue is searched + * for this UUID/sequence number and, if found, put on a pending queue. + * Packets not matching are delivered without timestamps (MCDI events will + * always arrive after the actual packet). + * It is important for the operation of the PTP protocol that the ordering + * of packets between the event and general port is maintained. + * + * Work queue processing: + * If work waiting, synchronise host/hardware time + * + * Transmit: send packet through MC, which returns the transmission time + * that is converted to an appropriate timestamp. + * + * Receive: the packet's reception time is converted to an appropriate + * timestamp. + */ +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/time.h> +#include <linux/ktime.h> +#include <linux/module.h> +#include <linux/net_tstamp.h> +#include <linux/pps_kernel.h> +#include <linux/ptp_clock_kernel.h> +#include "net_driver.h" +#include "efx.h" +#include "mcdi.h" +#include "mcdi_pcol.h" +#include "io.h" +#include "regs.h" +#include "nic.h" + +/* Maximum number of events expected to make up a PTP event */ +#define MAX_EVENT_FRAGS 3 + +/* Maximum delay, ms, to begin synchronisation */ +#define MAX_SYNCHRONISE_WAIT_MS 2 + +/* How long, at most, to spend synchronising */ +#define SYNCHRONISE_PERIOD_NS 250000 + +/* How often to update the shared memory time */ +#define SYNCHRONISATION_GRANULARITY_NS 200 + +/* Minimum permitted length of a (corrected) synchronisation time */ +#define MIN_SYNCHRONISATION_NS 120 + +/* Maximum permitted length of a (corrected) synchronisation time */ +#define MAX_SYNCHRONISATION_NS 1000 + +/* How many (MC) receive events that can be queued */ +#define MAX_RECEIVE_EVENTS 8 + +/* Length of (modified) moving average. */ +#define AVERAGE_LENGTH 16 + +/* How long an unmatched event or packet can be held */ +#define PKT_EVENT_LIFETIME_MS 10 + +/* Offsets into PTP packet for identification. These offsets are from the + * start of the IP header, not the MAC header. Note that neither PTP V1 nor + * PTP V2 permit the use of IPV4 options. + */ +#define PTP_DPORT_OFFSET 22 + +#define PTP_V1_VERSION_LENGTH 2 +#define PTP_V1_VERSION_OFFSET 28 + +#define PTP_V1_UUID_LENGTH 6 +#define PTP_V1_UUID_OFFSET 50 + +#define PTP_V1_SEQUENCE_LENGTH 2 +#define PTP_V1_SEQUENCE_OFFSET 58 + +/* The minimum length of a PTP V1 packet for offsets, etc. to be valid: + * includes IP header. + */ +#define PTP_V1_MIN_LENGTH 64 + +#define PTP_V2_VERSION_LENGTH 1 +#define PTP_V2_VERSION_OFFSET 29 + +/* Although PTP V2 UUIDs are comprised a ClockIdentity (8) and PortNumber (2), + * the MC only captures the last six bytes of the clock identity. These values + * reflect those, not the ones used in the standard. The standard permits + * mapping of V1 UUIDs to V2 UUIDs with these same values. + */ +#define PTP_V2_MC_UUID_LENGTH 6 +#define PTP_V2_MC_UUID_OFFSET 50 + +#define PTP_V2_SEQUENCE_LENGTH 2 +#define PTP_V2_SEQUENCE_OFFSET 58 + +/* The minimum length of a PTP V2 packet for offsets, etc. to be valid: + * includes IP header. + */ +#define PTP_V2_MIN_LENGTH 63 + +#define PTP_MIN_LENGTH 63 + +#define PTP_ADDRESS 0xe0000181 /* 224.0.1.129 */ +#define PTP_EVENT_PORT 319 +#define PTP_GENERAL_PORT 320 + +/* Annoyingly the format of the version numbers are different between + * versions 1 and 2 so it isn't possible to simply look for 1 or 2. + */ +#define PTP_VERSION_V1 1 + +#define PTP_VERSION_V2 2 +#define PTP_VERSION_V2_MASK 0x0f + +enum ptp_packet_state { + PTP_PACKET_STATE_UNMATCHED = 0, + PTP_PACKET_STATE_MATCHED, + PTP_PACKET_STATE_TIMED_OUT, + PTP_PACKET_STATE_MATCH_UNWANTED +}; + +/* NIC synchronised with single word of time only comprising + * partial seconds and full nanoseconds: 10^9 ~ 2^30 so 2 bits for seconds. + */ +#define MC_NANOSECOND_BITS 30 +#define MC_NANOSECOND_MASK ((1 << MC_NANOSECOND_BITS) - 1) +#define MC_SECOND_MASK ((1 << (32 - MC_NANOSECOND_BITS)) - 1) + +/* Maximum parts-per-billion adjustment that is acceptable */ +#define MAX_PPB 1000000 + +/* Number of bits required to hold the above */ +#define MAX_PPB_BITS 20 + +/* Number of extra bits allowed when calculating fractional ns. + * EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS + MAX_PPB_BITS should + * be less than 63. + */ +#define PPB_EXTRA_BITS 2 + +/* Precalculate scale word to avoid long long division at runtime */ +#define PPB_SCALE_WORD ((1LL << (PPB_EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS +\ + MAX_PPB_BITS)) / 1000000000LL) + +#define PTP_SYNC_ATTEMPTS 4 + +/** + * struct efx_ptp_match - Matching structure, stored in sk_buff's cb area. + * @words: UUID and (partial) sequence number + * @expiry: Time after which the packet should be delivered irrespective of + * event arrival. + * @state: The state of the packet - whether it is ready for processing or + * whether that is of no interest. + */ +struct efx_ptp_match { + u32 words[DIV_ROUND_UP(PTP_V1_UUID_LENGTH, 4)]; + unsigned long expiry; + enum ptp_packet_state state; +}; + +/** + * struct efx_ptp_event_rx - A PTP receive event (from MC) + * @seq0: First part of (PTP) UUID + * @seq1: Second part of (PTP) UUID and sequence number + * @hwtimestamp: Event timestamp + */ +struct efx_ptp_event_rx { + struct list_head link; + u32 seq0; + u32 seq1; + ktime_t hwtimestamp; + unsigned long expiry; +}; + +/** + * struct efx_ptp_timeset - Synchronisation between host and MC + * @host_start: Host time immediately before hardware timestamp taken + * @seconds: Hardware timestamp, seconds + * @nanoseconds: Hardware timestamp, nanoseconds + * @host_end: Host time immediately after hardware timestamp taken + * @waitns: Number of nanoseconds between hardware timestamp being read and + * host end time being seen + * @window: Difference of host_end and host_start + * @valid: Whether this timeset is valid + */ +struct efx_ptp_timeset { + u32 host_start; + u32 seconds; + u32 nanoseconds; + u32 host_end; + u32 waitns; + u32 window; /* Derived: end - start, allowing for wrap */ +}; + +/** + * struct efx_ptp_data - Precision Time Protocol (PTP) state + * @channel: The PTP channel + * @rxq: Receive queue (awaiting timestamps) + * @txq: Transmit queue + * @evt_list: List of MC receive events awaiting packets + * @evt_free_list: List of free events + * @evt_lock: Lock for manipulating evt_list and evt_free_list + * @rx_evts: Instantiated events (on evt_list and evt_free_list) + * @workwq: Work queue for processing pending PTP operations + * @work: Work task + * @reset_required: A serious error has occurred and the PTP task needs to be + * reset (disable, enable). + * @rxfilter_event: Receive filter when operating + * @rxfilter_general: Receive filter when operating + * @config: Current timestamp configuration + * @enabled: PTP operation enabled + * @mode: Mode in which PTP operating (PTP version) + * @evt_frags: Partly assembled PTP events + * @evt_frag_idx: Current fragment number + * @evt_code: Last event code + * @start: Address at which MC indicates ready for synchronisation + * @host_time_pps: Host time at last PPS + * @last_sync_ns: Last number of nanoseconds between readings when synchronising + * @base_sync_ns: Number of nanoseconds for last synchronisation. + * @base_sync_valid: Whether base_sync_time is valid. + * @current_adjfreq: Current ppb adjustment. + * @phc_clock: Pointer to registered phc device + * @phc_clock_info: Registration structure for phc device + * @pps_work: pps work task for handling pps events + * @pps_workwq: pps work queue + * @nic_ts_enabled: Flag indicating if NIC generated TS events are handled + * @txbuf: Buffer for use when transmitting (PTP) packets to MC (avoids + * allocations in main data path). + * @debug_ptp_dir: PTP debugfs directory + * @missed_rx_sync: Number of packets received without syncrhonisation. + * @good_syncs: Number of successful synchronisations. + * @no_time_syncs: Number of synchronisations with no good times. + * @bad_sync_durations: Number of synchronisations with bad durations. + * @bad_syncs: Number of failed synchronisations. + * @last_sync_time: Number of nanoseconds for last synchronisation. + * @sync_timeouts: Number of synchronisation timeouts + * @fast_syncs: Number of synchronisations requiring short delay + * @min_sync_delta: Minimum time between event and synchronisation + * @max_sync_delta: Maximum time between event and synchronisation + * @average_sync_delta: Average time between event and synchronisation. + * Modified moving average. + * @last_sync_delta: Last time between event and synchronisation + * @mc_stats: Context value for MC statistics + * @timeset: Last set of synchronisation statistics. + */ +struct efx_ptp_data { + struct efx_channel *channel; + struct sk_buff_head rxq; + struct sk_buff_head txq; + struct list_head evt_list; + struct list_head evt_free_list; + spinlock_t evt_lock; + struct efx_ptp_event_rx rx_evts[MAX_RECEIVE_EVENTS]; + struct workqueue_struct *workwq; + struct work_struct work; + bool reset_required; + u32 rxfilter_event; + u32 rxfilter_general; + bool rxfilter_installed; + struct hwtstamp_config config; + bool enabled; + unsigned int mode; + efx_qword_t evt_frags[MAX_EVENT_FRAGS]; + int evt_frag_idx; + int evt_code; + struct efx_buffer start; + struct pps_event_time host_time_pps; + unsigned last_sync_ns; + unsigned base_sync_ns; + bool base_sync_valid; + s64 current_adjfreq; + struct ptp_clock *phc_clock; + struct ptp_clock_info phc_clock_info; + struct work_struct pps_work; + struct workqueue_struct *pps_workwq; + bool nic_ts_enabled; + u8 txbuf[ALIGN(MC_CMD_PTP_IN_TRANSMIT_LEN( + MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM), 4)]; + struct efx_ptp_timeset + timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM]; +}; + +static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta); +static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta); +static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts); +static int efx_phc_settime(struct ptp_clock_info *ptp, + const struct timespec *e_ts); +static int efx_phc_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *request, int on); + +/* Enable MCDI PTP support. */ +static int efx_ptp_enable(struct efx_nic *efx) +{ + u8 inbuf[MC_CMD_PTP_IN_ENABLE_LEN]; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ENABLE); + MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_QUEUE, + efx->ptp_data->channel->channel); + MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_MODE, efx->ptp_data->mode); + + return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +/* Disable MCDI PTP support. + * + * Note that this function should never rely on the presence of ptp_data - + * may be called before that exists. + */ +static int efx_ptp_disable(struct efx_nic *efx) +{ + u8 inbuf[MC_CMD_PTP_IN_DISABLE_LEN]; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_DISABLE); + return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static void efx_ptp_deliver_rx_queue(struct sk_buff_head *q) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(q))) { + local_bh_disable(); + netif_receive_skb(skb); + local_bh_enable(); + } +} + +static void efx_ptp_handle_no_channel(struct efx_nic *efx) +{ + netif_err(efx, drv, efx->net_dev, + "ERROR: PTP requires MSI-X and 1 additional interrupt" + "vector. PTP disabled\n"); +} + +/* Repeatedly send the host time to the MC which will capture the hardware + * time. + */ +static void efx_ptp_send_times(struct efx_nic *efx, + struct pps_event_time *last_time) +{ + struct pps_event_time now; + struct timespec limit; + struct efx_ptp_data *ptp = efx->ptp_data; + struct timespec start; + int *mc_running = ptp->start.addr; + + pps_get_ts(&now); + start = now.ts_real; + limit = now.ts_real; + timespec_add_ns(&limit, SYNCHRONISE_PERIOD_NS); + + /* Write host time for specified period or until MC is done */ + while ((timespec_compare(&now.ts_real, &limit) < 0) && + ACCESS_ONCE(*mc_running)) { + struct timespec update_time; + unsigned int host_time; + + /* Don't update continuously to avoid saturating the PCIe bus */ + update_time = now.ts_real; + timespec_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS); + do { + pps_get_ts(&now); + } while ((timespec_compare(&now.ts_real, &update_time) < 0) && + ACCESS_ONCE(*mc_running)); + + /* Synchronise NIC with single word of time only */ + host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS | + now.ts_real.tv_nsec); + /* Update host time in NIC memory */ + _efx_writed(efx, cpu_to_le32(host_time), + FR_CZ_MC_TREG_SMEM + MC_SMEM_P0_PTP_TIME_OFST); + } + *last_time = now; +} + +/* Read a timeset from the MC's results and partial process. */ +static void efx_ptp_read_timeset(u8 *data, struct efx_ptp_timeset *timeset) +{ + unsigned start_ns, end_ns; + + timeset->host_start = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTSTART); + timeset->seconds = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_SECONDS); + timeset->nanoseconds = MCDI_DWORD(data, + PTP_OUT_SYNCHRONIZE_NANOSECONDS); + timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND), + timeset->waitns = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_WAITNS); + + /* Ignore seconds */ + start_ns = timeset->host_start & MC_NANOSECOND_MASK; + end_ns = timeset->host_end & MC_NANOSECOND_MASK; + /* Allow for rollover */ + if (end_ns < start_ns) + end_ns += NSEC_PER_SEC; + /* Determine duration of operation */ + timeset->window = end_ns - start_ns; +} + +/* Process times received from MC. + * + * Extract times from returned results, and establish the minimum value + * seen. The minimum value represents the "best" possible time and events + * too much greater than this are rejected - the machine is, perhaps, too + * busy. A number of readings are taken so that, hopefully, at least one good + * synchronisation will be seen in the results. + */ +static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf, + size_t response_length, + const struct pps_event_time *last_time) +{ + unsigned number_readings = (response_length / + MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN); + unsigned i; + unsigned min; + unsigned min_set = 0; + unsigned total; + unsigned ngood = 0; + unsigned last_good = 0; + struct efx_ptp_data *ptp = efx->ptp_data; + bool min_valid = false; + u32 last_sec; + u32 start_sec; + struct timespec delta; + + if (number_readings == 0) + return -EAGAIN; + + /* Find minimum value in this set of results, discarding clearly + * erroneous results. + */ + for (i = 0; i < number_readings; i++) { + efx_ptp_read_timeset(synch_buf, &ptp->timeset[i]); + synch_buf += MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN; + if (ptp->timeset[i].window > SYNCHRONISATION_GRANULARITY_NS) { + if (min_valid) { + if (ptp->timeset[i].window < min_set) + min_set = ptp->timeset[i].window; + } else { + min_valid = true; + min_set = ptp->timeset[i].window; + } + } + } + + if (min_valid) { + if (ptp->base_sync_valid && (min_set > ptp->base_sync_ns)) + min = ptp->base_sync_ns; + else + min = min_set; + } else { + min = SYNCHRONISATION_GRANULARITY_NS; + } + + /* Discard excessively long synchronise durations. The MC times + * when it finishes reading the host time so the corrected window + * time should be fairly constant for a given platform. + */ + total = 0; + for (i = 0; i < number_readings; i++) + if (ptp->timeset[i].window > ptp->timeset[i].waitns) { + unsigned win; + + win = ptp->timeset[i].window - ptp->timeset[i].waitns; + if (win >= MIN_SYNCHRONISATION_NS && + win < MAX_SYNCHRONISATION_NS) { + total += ptp->timeset[i].window; + ngood++; + last_good = i; + } + } + + if (ngood == 0) { + netif_warn(efx, drv, efx->net_dev, + "PTP no suitable synchronisations %dns %dns\n", + ptp->base_sync_ns, min_set); + return -EAGAIN; + } + + /* Average minimum this synchronisation */ + ptp->last_sync_ns = DIV_ROUND_UP(total, ngood); + if (!ptp->base_sync_valid || (ptp->last_sync_ns < ptp->base_sync_ns)) { + ptp->base_sync_valid = true; + ptp->base_sync_ns = ptp->last_sync_ns; + } + + /* Calculate delay from actual PPS to last_time */ + delta.tv_nsec = + ptp->timeset[last_good].nanoseconds + + last_time->ts_real.tv_nsec - + (ptp->timeset[last_good].host_start & MC_NANOSECOND_MASK); + + /* It is possible that the seconds rolled over between taking + * the start reading and the last value written by the host. The + * timescales are such that a gap of more than one second is never + * expected. + */ + start_sec = ptp->timeset[last_good].host_start >> MC_NANOSECOND_BITS; + last_sec = last_time->ts_real.tv_sec & MC_SECOND_MASK; + if (start_sec != last_sec) { + if (((start_sec + 1) & MC_SECOND_MASK) != last_sec) { + netif_warn(efx, hw, efx->net_dev, + "PTP bad synchronisation seconds\n"); + return -EAGAIN; + } else { + delta.tv_sec = 1; + } + } else { + delta.tv_sec = 0; + } + + ptp->host_time_pps = *last_time; + pps_sub_ts(&ptp->host_time_pps, delta); + + return 0; +} + +/* Synchronize times between the host and the MC */ +static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + u8 synch_buf[MC_CMD_PTP_OUT_SYNCHRONIZE_LENMAX]; + size_t response_length; + int rc; + unsigned long timeout; + struct pps_event_time last_time = {}; + unsigned int loops = 0; + int *start = ptp->start.addr; + + MCDI_SET_DWORD(synch_buf, PTP_IN_OP, MC_CMD_PTP_OP_SYNCHRONIZE); + MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_NUMTIMESETS, + num_readings); + MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_START_ADDR_LO, + (u32)ptp->start.dma_addr); + MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_START_ADDR_HI, + (u32)((u64)ptp->start.dma_addr >> 32)); + + /* Clear flag that signals MC ready */ + ACCESS_ONCE(*start) = 0; + efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf, + MC_CMD_PTP_IN_SYNCHRONIZE_LEN); + + /* Wait for start from MCDI (or timeout) */ + timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS); + while (!ACCESS_ONCE(*start) && (time_before(jiffies, timeout))) { + udelay(20); /* Usually start MCDI execution quickly */ + loops++; + } + + if (ACCESS_ONCE(*start)) + efx_ptp_send_times(efx, &last_time); + + /* Collect results */ + rc = efx_mcdi_rpc_finish(efx, MC_CMD_PTP, + MC_CMD_PTP_IN_SYNCHRONIZE_LEN, + synch_buf, sizeof(synch_buf), + &response_length); + if (rc == 0) + rc = efx_ptp_process_times(efx, synch_buf, response_length, + &last_time); + + return rc; +} + +/* Transmit a PTP packet, via the MCDI interface, to the wire. */ +static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb) +{ + u8 *txbuf = efx->ptp_data->txbuf; + struct skb_shared_hwtstamps timestamps; + int rc = -EIO; + /* MCDI driver requires word aligned lengths */ + size_t len = ALIGN(MC_CMD_PTP_IN_TRANSMIT_LEN(skb->len), 4); + u8 txtime[MC_CMD_PTP_OUT_TRANSMIT_LEN]; + + MCDI_SET_DWORD(txbuf, PTP_IN_OP, MC_CMD_PTP_OP_TRANSMIT); + MCDI_SET_DWORD(txbuf, PTP_IN_TRANSMIT_LENGTH, skb->len); + if (skb_shinfo(skb)->nr_frags != 0) { + rc = skb_linearize(skb); + if (rc != 0) + goto fail; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + rc = skb_checksum_help(skb); + if (rc != 0) + goto fail; + } + skb_copy_from_linear_data(skb, + &txbuf[MC_CMD_PTP_IN_TRANSMIT_PACKET_OFST], + len); + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, txbuf, len, txtime, + sizeof(txtime), &len); + if (rc != 0) + goto fail; + + memset(×tamps, 0, sizeof(timestamps)); + timestamps.hwtstamp = ktime_set( + MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_SECONDS), + MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_NANOSECONDS)); + + skb_tstamp_tx(skb, ×tamps); + + rc = 0; + +fail: + dev_kfree_skb(skb); + + return rc; +} + +static void efx_ptp_drop_time_expired_events(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct list_head *cursor; + struct list_head *next; + + /* Drop time-expired events */ + spin_lock_bh(&ptp->evt_lock); + if (!list_empty(&ptp->evt_list)) { + list_for_each_safe(cursor, next, &ptp->evt_list) { + struct efx_ptp_event_rx *evt; + + evt = list_entry(cursor, struct efx_ptp_event_rx, + link); + if (time_after(jiffies, evt->expiry)) { + list_move(&evt->link, &ptp->evt_free_list); + netif_warn(efx, hw, efx->net_dev, + "PTP rx event dropped\n"); + } + } + } + spin_unlock_bh(&ptp->evt_lock); +} + +static enum ptp_packet_state efx_ptp_match_rx(struct efx_nic *efx, + struct sk_buff *skb) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + bool evts_waiting; + struct list_head *cursor; + struct list_head *next; + struct efx_ptp_match *match; + enum ptp_packet_state rc = PTP_PACKET_STATE_UNMATCHED; + + spin_lock_bh(&ptp->evt_lock); + evts_waiting = !list_empty(&ptp->evt_list); + spin_unlock_bh(&ptp->evt_lock); + + if (!evts_waiting) + return PTP_PACKET_STATE_UNMATCHED; + + match = (struct efx_ptp_match *)skb->cb; + /* Look for a matching timestamp in the event queue */ + spin_lock_bh(&ptp->evt_lock); + list_for_each_safe(cursor, next, &ptp->evt_list) { + struct efx_ptp_event_rx *evt; + + evt = list_entry(cursor, struct efx_ptp_event_rx, link); + if ((evt->seq0 == match->words[0]) && + (evt->seq1 == match->words[1])) { + struct skb_shared_hwtstamps *timestamps; + + /* Match - add in hardware timestamp */ + timestamps = skb_hwtstamps(skb); + timestamps->hwtstamp = evt->hwtimestamp; + + match->state = PTP_PACKET_STATE_MATCHED; + rc = PTP_PACKET_STATE_MATCHED; + list_move(&evt->link, &ptp->evt_free_list); + break; + } + } + spin_unlock_bh(&ptp->evt_lock); + + return rc; +} + +/* Process any queued receive events and corresponding packets + * + * q is returned with all the packets that are ready for delivery. + * true is returned if at least one of those packets requires + * synchronisation. + */ +static bool efx_ptp_process_events(struct efx_nic *efx, struct sk_buff_head *q) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + bool rc = false; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&ptp->rxq))) { + struct efx_ptp_match *match; + + match = (struct efx_ptp_match *)skb->cb; + if (match->state == PTP_PACKET_STATE_MATCH_UNWANTED) { + __skb_queue_tail(q, skb); + } else if (efx_ptp_match_rx(efx, skb) == + PTP_PACKET_STATE_MATCHED) { + rc = true; + __skb_queue_tail(q, skb); + } else if (time_after(jiffies, match->expiry)) { + match->state = PTP_PACKET_STATE_TIMED_OUT; + netif_warn(efx, rx_err, efx->net_dev, + "PTP packet - no timestamp seen\n"); + __skb_queue_tail(q, skb); + } else { + /* Replace unprocessed entry and stop */ + skb_queue_head(&ptp->rxq, skb); + break; + } + } + + return rc; +} + +/* Complete processing of a received packet */ +static inline void efx_ptp_process_rx(struct efx_nic *efx, struct sk_buff *skb) +{ + local_bh_disable(); + netif_receive_skb(skb); + local_bh_enable(); +} + +static int efx_ptp_start(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct efx_filter_spec rxfilter; + int rc; + + ptp->reset_required = false; + + /* Must filter on both event and general ports to ensure + * that there is no packet re-ordering. + */ + efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0, + efx_rx_queue_index( + efx_channel_get_rx_queue(ptp->channel))); + rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP, + htonl(PTP_ADDRESS), + htons(PTP_EVENT_PORT)); + if (rc != 0) + return rc; + + rc = efx_filter_insert_filter(efx, &rxfilter, true); + if (rc < 0) + return rc; + ptp->rxfilter_event = rc; + + efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0, + efx_rx_queue_index( + efx_channel_get_rx_queue(ptp->channel))); + rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP, + htonl(PTP_ADDRESS), + htons(PTP_GENERAL_PORT)); + if (rc != 0) + goto fail; + + rc = efx_filter_insert_filter(efx, &rxfilter, true); + if (rc < 0) + goto fail; + ptp->rxfilter_general = rc; + + rc = efx_ptp_enable(efx); + if (rc != 0) + goto fail2; + + ptp->evt_frag_idx = 0; + ptp->current_adjfreq = 0; + ptp->rxfilter_installed = true; + + return 0; + +fail2: + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_general); +fail: + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_event); + + return rc; +} + +static int efx_ptp_stop(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + int rc = efx_ptp_disable(efx); + struct list_head *cursor; + struct list_head *next; + + if (ptp->rxfilter_installed) { + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_general); + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_event); + ptp->rxfilter_installed = false; + } + + /* Make sure RX packets are really delivered */ + efx_ptp_deliver_rx_queue(&efx->ptp_data->rxq); + skb_queue_purge(&efx->ptp_data->txq); + + /* Drop any pending receive events */ + spin_lock_bh(&efx->ptp_data->evt_lock); + list_for_each_safe(cursor, next, &efx->ptp_data->evt_list) { + list_move(cursor, &efx->ptp_data->evt_free_list); + } + spin_unlock_bh(&efx->ptp_data->evt_lock); + + return rc; +} + +static void efx_ptp_pps_worker(struct work_struct *work) +{ + struct efx_ptp_data *ptp = + container_of(work, struct efx_ptp_data, pps_work); + struct efx_nic *efx = ptp->channel->efx; + struct ptp_clock_event ptp_evt; + + if (efx_ptp_synchronize(efx, PTP_SYNC_ATTEMPTS)) + return; + + ptp_evt.type = PTP_CLOCK_PPSUSR; + ptp_evt.pps_times = ptp->host_time_pps; + ptp_clock_event(ptp->phc_clock, &ptp_evt); +} + +/* Process any pending transmissions and timestamp any received packets. + */ +static void efx_ptp_worker(struct work_struct *work) +{ + struct efx_ptp_data *ptp_data = + container_of(work, struct efx_ptp_data, work); + struct efx_nic *efx = ptp_data->channel->efx; + struct sk_buff *skb; + struct sk_buff_head tempq; + + if (ptp_data->reset_required) { + efx_ptp_stop(efx); + efx_ptp_start(efx); + return; + } + + efx_ptp_drop_time_expired_events(efx); + + __skb_queue_head_init(&tempq); + if (efx_ptp_process_events(efx, &tempq) || + !skb_queue_empty(&ptp_data->txq)) { + + while ((skb = skb_dequeue(&ptp_data->txq))) + efx_ptp_xmit_skb(efx, skb); + } + + while ((skb = __skb_dequeue(&tempq))) + efx_ptp_process_rx(efx, skb); +} + +/* Initialise PTP channel and state. + * + * Setting core_index to zero causes the queue to be initialised and doesn't + * overlap with 'rxq0' because ptp.c doesn't use skb_record_rx_queue. + */ +static int efx_ptp_probe_channel(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp; + int rc = 0; + unsigned int pos; + + channel->irq_moderation = 0; + channel->rx_queue.core_index = 0; + + ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL); + efx->ptp_data = ptp; + if (!efx->ptp_data) + return -ENOMEM; + + rc = efx_nic_alloc_buffer(efx, &ptp->start, sizeof(int)); + if (rc != 0) + goto fail1; + + ptp->channel = channel; + skb_queue_head_init(&ptp->rxq); + skb_queue_head_init(&ptp->txq); + ptp->workwq = create_singlethread_workqueue("sfc_ptp"); + if (!ptp->workwq) { + rc = -ENOMEM; + goto fail2; + } + + INIT_WORK(&ptp->work, efx_ptp_worker); + ptp->config.flags = 0; + ptp->config.tx_type = HWTSTAMP_TX_OFF; + ptp->config.rx_filter = HWTSTAMP_FILTER_NONE; + INIT_LIST_HEAD(&ptp->evt_list); + INIT_LIST_HEAD(&ptp->evt_free_list); + spin_lock_init(&ptp->evt_lock); + for (pos = 0; pos < MAX_RECEIVE_EVENTS; pos++) + list_add(&ptp->rx_evts[pos].link, &ptp->evt_free_list); + + ptp->phc_clock_info.owner = THIS_MODULE; + snprintf(ptp->phc_clock_info.name, + sizeof(ptp->phc_clock_info.name), + "%pm", efx->net_dev->perm_addr); + ptp->phc_clock_info.max_adj = MAX_PPB; + ptp->phc_clock_info.n_alarm = 0; + ptp->phc_clock_info.n_ext_ts = 0; + ptp->phc_clock_info.n_per_out = 0; + ptp->phc_clock_info.pps = 1; + ptp->phc_clock_info.adjfreq = efx_phc_adjfreq; + ptp->phc_clock_info.adjtime = efx_phc_adjtime; + ptp->phc_clock_info.gettime = efx_phc_gettime; + ptp->phc_clock_info.settime = efx_phc_settime; + ptp->phc_clock_info.enable = efx_phc_enable; + + ptp->phc_clock = ptp_clock_register(&ptp->phc_clock_info, + &efx->pci_dev->dev); + if (!ptp->phc_clock) + goto fail3; + + INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); + ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); + if (!ptp->pps_workwq) { + rc = -ENOMEM; + goto fail4; + } + ptp->nic_ts_enabled = false; + + return 0; +fail4: + ptp_clock_unregister(efx->ptp_data->phc_clock); + +fail3: + destroy_workqueue(efx->ptp_data->workwq); + +fail2: + efx_nic_free_buffer(efx, &ptp->start); + +fail1: + kfree(efx->ptp_data); + efx->ptp_data = NULL; + + return rc; +} + +static void efx_ptp_remove_channel(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + + if (!efx->ptp_data) + return; + + (void)efx_ptp_disable(channel->efx); + + cancel_work_sync(&efx->ptp_data->work); + cancel_work_sync(&efx->ptp_data->pps_work); + + skb_queue_purge(&efx->ptp_data->rxq); + skb_queue_purge(&efx->ptp_data->txq); + + ptp_clock_unregister(efx->ptp_data->phc_clock); + + destroy_workqueue(efx->ptp_data->workwq); + destroy_workqueue(efx->ptp_data->pps_workwq); + + efx_nic_free_buffer(efx, &efx->ptp_data->start); + kfree(efx->ptp_data); +} + +static void efx_ptp_get_channel_name(struct efx_channel *channel, + char *buf, size_t len) +{ + snprintf(buf, len, "%s-ptp", channel->efx->name); +} + +/* Determine whether this packet should be processed by the PTP module + * or transmitted conventionally. + */ +bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + return efx->ptp_data && + efx->ptp_data->enabled && + skb->len >= PTP_MIN_LENGTH && + skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM && + likely(skb->protocol == htons(ETH_P_IP)) && + ip_hdr(skb)->protocol == IPPROTO_UDP && + udp_hdr(skb)->dest == htons(PTP_EVENT_PORT); +} + +/* Receive a PTP packet. Packets are queued until the arrival of + * the receive timestamp from the MC - this will probably occur after the + * packet arrival because of the processing in the MC. + */ +static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) +{ + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + struct efx_ptp_match *match = (struct efx_ptp_match *)skb->cb; + u8 *data; + unsigned int version; + + match->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); + + /* Correct version? */ + if (ptp->mode == MC_CMD_PTP_MODE_V1) { + if (skb->len < PTP_V1_MIN_LENGTH) { + netif_receive_skb(skb); + return; + } + version = ntohs(*(__be16 *)&skb->data[PTP_V1_VERSION_OFFSET]); + if (version != PTP_VERSION_V1) { + netif_receive_skb(skb); + return; + } + } else { + if (skb->len < PTP_V2_MIN_LENGTH) { + netif_receive_skb(skb); + return; + } + version = skb->data[PTP_V2_VERSION_OFFSET]; + + BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2); + BUILD_BUG_ON(PTP_V1_UUID_OFFSET != PTP_V2_MC_UUID_OFFSET); + BUILD_BUG_ON(PTP_V1_UUID_LENGTH != PTP_V2_MC_UUID_LENGTH); + BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET); + BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH); + + if ((version & PTP_VERSION_V2_MASK) != PTP_VERSION_V2) { + netif_receive_skb(skb); + return; + } + } + + /* Does this packet require timestamping? */ + if (ntohs(*(__be16 *)&skb->data[PTP_DPORT_OFFSET]) == PTP_EVENT_PORT) { + struct skb_shared_hwtstamps *timestamps; + + match->state = PTP_PACKET_STATE_UNMATCHED; + + /* Clear all timestamps held: filled in later */ + timestamps = skb_hwtstamps(skb); + memset(timestamps, 0, sizeof(*timestamps)); + + /* Extract UUID/Sequence information */ + data = skb->data + PTP_V1_UUID_OFFSET; + match->words[0] = (data[0] | + (data[1] << 8) | + (data[2] << 16) | + (data[3] << 24)); + match->words[1] = (data[4] | + (data[5] << 8) | + (skb->data[PTP_V1_SEQUENCE_OFFSET + + PTP_V1_SEQUENCE_LENGTH - 1] << + 16)); + } else { + match->state = PTP_PACKET_STATE_MATCH_UNWANTED; + } + + skb_queue_tail(&ptp->rxq, skb); + queue_work(ptp->workwq, &ptp->work); +} + +/* Transmit a PTP packet. This has to be transmitted by the MC + * itself, through an MCDI call. MCDI calls aren't permitted + * in the transmit path so defer the actual transmission to a suitable worker. + */ +int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + + skb_queue_tail(&ptp->txq, skb); + + if ((udp_hdr(skb)->dest == htons(PTP_EVENT_PORT)) && + (skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM)) + efx_xmit_hwtstamp_pending(skb); + queue_work(ptp->workwq, &ptp->work); + + return NETDEV_TX_OK; +} + +static int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted, + unsigned int new_mode) +{ + if ((enable_wanted != efx->ptp_data->enabled) || + (enable_wanted && (efx->ptp_data->mode != new_mode))) { + int rc; + + if (enable_wanted) { + /* Change of mode requires disable */ + if (efx->ptp_data->enabled && + (efx->ptp_data->mode != new_mode)) { + efx->ptp_data->enabled = false; + rc = efx_ptp_stop(efx); + if (rc != 0) + return rc; + } + + /* Set new operating mode and establish + * baseline synchronisation, which must + * succeed. + */ + efx->ptp_data->mode = new_mode; + rc = efx_ptp_start(efx); + if (rc == 0) { + rc = efx_ptp_synchronize(efx, + PTP_SYNC_ATTEMPTS * 2); + if (rc != 0) + efx_ptp_stop(efx); + } + } else { + rc = efx_ptp_stop(efx); + } + + if (rc != 0) + return rc; + + efx->ptp_data->enabled = enable_wanted; + } + + return 0; +} + +static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init) +{ + bool enable_wanted = false; + unsigned int new_mode; + int rc; + + if (init->flags) + return -EINVAL; + + if ((init->tx_type != HWTSTAMP_TX_OFF) && + (init->tx_type != HWTSTAMP_TX_ON)) + return -ERANGE; + + new_mode = efx->ptp_data->mode; + /* Determine whether any PTP HW operations are required */ + switch (init->rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + init->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + new_mode = MC_CMD_PTP_MODE_V1; + enable_wanted = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + /* Although these three are accepted only IPV4 packets will be + * timestamped + */ + init->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + new_mode = MC_CMD_PTP_MODE_V2; + enable_wanted = true; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + /* Non-IP + IPv6 timestamping not supported */ + return -ERANGE; + break; + default: + return -ERANGE; + } + + if (init->tx_type != HWTSTAMP_TX_OFF) + enable_wanted = true; + + rc = efx_ptp_change_mode(efx, enable_wanted, new_mode); + if (rc != 0) + return rc; + + efx->ptp_data->config = *init; + + return 0; +} + +int +efx_ptp_get_ts_info(struct net_device *net_dev, struct ethtool_ts_info *ts_info) +{ + struct efx_nic *efx = netdev_priv(net_dev); + struct efx_ptp_data *ptp = efx->ptp_data; + + if (!ptp) + return -EOPNOTSUPP; + + ts_info->so_timestamping = (SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE); + ts_info->phc_index = ptp_clock_index(ptp->phc_clock); + ts_info->tx_types = 1 << HWTSTAMP_TX_OFF | 1 << HWTSTAMP_TX_ON; + ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE | + 1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT | + 1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC | + 1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ | + 1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT | + 1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC | + 1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ); + return 0; +} + +int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd) +{ + struct hwtstamp_config config; + int rc; + + /* Not a PTP enabled port */ + if (!efx->ptp_data) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + rc = efx_ptp_ts_init(efx, &config); + if (rc != 0) + return rc; + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) + ? -EFAULT : 0; +} + +static void ptp_event_failure(struct efx_nic *efx, int expected_frag_len) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + + netif_err(efx, hw, efx->net_dev, + "PTP unexpected event length: got %d expected %d\n", + ptp->evt_frag_idx, expected_frag_len); + ptp->reset_required = true; + queue_work(ptp->workwq, &ptp->work); +} + +/* Process a completed receive event. Put it on the event queue and + * start worker thread. This is required because event and their + * correspoding packets may come in either order. + */ +static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + struct efx_ptp_event_rx *evt = NULL; + + if (ptp->evt_frag_idx != 3) { + ptp_event_failure(efx, 3); + return; + } + + spin_lock_bh(&ptp->evt_lock); + if (!list_empty(&ptp->evt_free_list)) { + evt = list_first_entry(&ptp->evt_free_list, + struct efx_ptp_event_rx, link); + list_del(&evt->link); + + evt->seq0 = EFX_QWORD_FIELD(ptp->evt_frags[2], MCDI_EVENT_DATA); + evt->seq1 = (EFX_QWORD_FIELD(ptp->evt_frags[2], + MCDI_EVENT_SRC) | + (EFX_QWORD_FIELD(ptp->evt_frags[1], + MCDI_EVENT_SRC) << 8) | + (EFX_QWORD_FIELD(ptp->evt_frags[0], + MCDI_EVENT_SRC) << 16)); + evt->hwtimestamp = ktime_set( + EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA), + EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA)); + evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); + list_add_tail(&evt->link, &ptp->evt_list); + + queue_work(ptp->workwq, &ptp->work); + } else { + netif_err(efx, rx_err, efx->net_dev, "No free PTP event"); + } + spin_unlock_bh(&ptp->evt_lock); +} + +static void ptp_event_fault(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + int code = EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA); + if (ptp->evt_frag_idx != 1) { + ptp_event_failure(efx, 1); + return; + } + + netif_err(efx, hw, efx->net_dev, "PTP error %d\n", code); +} + +static void ptp_event_pps(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + if (ptp->nic_ts_enabled) + queue_work(ptp->pps_workwq, &ptp->pps_work); +} + +void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + int code = EFX_QWORD_FIELD(*ev, MCDI_EVENT_CODE); + + if (!ptp->enabled) + return; + + if (ptp->evt_frag_idx == 0) { + ptp->evt_code = code; + } else if (ptp->evt_code != code) { + netif_err(efx, hw, efx->net_dev, + "PTP out of sequence event %d\n", code); + ptp->evt_frag_idx = 0; + } + + ptp->evt_frags[ptp->evt_frag_idx++] = *ev; + if (!MCDI_EVENT_FIELD(*ev, CONT)) { + /* Process resulting event */ + switch (code) { + case MCDI_EVENT_CODE_PTP_RX: + ptp_event_rx(efx, ptp); + break; + case MCDI_EVENT_CODE_PTP_FAULT: + ptp_event_fault(efx, ptp); + break; + case MCDI_EVENT_CODE_PTP_PPS: + ptp_event_pps(efx, ptp); + break; + default: + netif_err(efx, hw, efx->net_dev, + "PTP unknown event %d\n", code); + break; + } + ptp->evt_frag_idx = 0; + } else if (MAX_EVENT_FRAGS == ptp->evt_frag_idx) { + netif_err(efx, hw, efx->net_dev, + "PTP too many event fragments\n"); + ptp->evt_frag_idx = 0; + } +} + +static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->channel->efx; + u8 inadj[MC_CMD_PTP_IN_ADJUST_LEN]; + s64 adjustment_ns; + int rc; + + if (delta > MAX_PPB) + delta = MAX_PPB; + else if (delta < -MAX_PPB) + delta = -MAX_PPB; + + /* Convert ppb to fixed point ns. */ + adjustment_ns = (((s64)delta * PPB_SCALE_WORD) >> + (PPB_EXTRA_BITS + MAX_PPB_BITS)); + + MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_FREQ_LO, (u32)adjustment_ns); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_FREQ_HI, + (u32)(adjustment_ns >> 32)); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_SECONDS, 0); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_NANOSECONDS, 0); + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inadj, sizeof(inadj), + NULL, 0, NULL); + if (rc != 0) + return rc; + + ptp_data->current_adjfreq = delta; + return 0; +} + +static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->channel->efx; + struct timespec delta_ts = ns_to_timespec(delta); + u8 inbuf[MC_CMD_PTP_IN_ADJUST_LEN]; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_FREQ_LO, 0); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_FREQ_HI, 0); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_SECONDS, (u32)delta_ts.tv_sec); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_NANOSECONDS, (u32)delta_ts.tv_nsec); + return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->channel->efx; + u8 inbuf[MC_CMD_PTP_IN_READ_NIC_TIME_LEN]; + u8 outbuf[MC_CMD_PTP_OUT_READ_NIC_TIME_LEN]; + int rc; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_READ_NIC_TIME); + + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); + if (rc != 0) + return rc; + + ts->tv_sec = MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_SECONDS); + ts->tv_nsec = MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_NANOSECONDS); + return 0; +} + +static int efx_phc_settime(struct ptp_clock_info *ptp, + const struct timespec *e_ts) +{ + /* Get the current NIC time, efx_phc_gettime. + * Subtract from the desired time to get the offset + * call efx_phc_adjtime with the offset + */ + int rc; + struct timespec time_now; + struct timespec delta; + + rc = efx_phc_gettime(ptp, &time_now); + if (rc != 0) + return rc; + + delta = timespec_sub(*e_ts, time_now); + + efx_phc_adjtime(ptp, timespec_to_ns(&delta)); + if (rc != 0) + return rc; + + return 0; +} + +static int efx_phc_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *request, + int enable) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + if (request->type != PTP_CLK_REQ_PPS) + return -EOPNOTSUPP; + + ptp_data->nic_ts_enabled = !!enable; + return 0; +} + +static const struct efx_channel_type efx_ptp_channel_type = { + .handle_no_channel = efx_ptp_handle_no_channel, + .pre_probe = efx_ptp_probe_channel, + .post_remove = efx_ptp_remove_channel, + .get_name = efx_ptp_get_channel_name, + /* no copy operation; there is no need to reallocate this channel */ + .receive_skb = efx_ptp_rx, + .keep_eventq = false, +}; + +void efx_ptp_probe(struct efx_nic *efx) +{ + /* Check whether PTP is implemented on this NIC. The DISABLE + * operation will succeed if and only if it is implemented. + */ + if (efx_ptp_disable(efx) == 0) + efx->extra_channel_type[EFX_EXTRA_CHANNEL_PTP] = + &efx_ptp_channel_type; +} diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 719319b89d7..9e0ad1b75c3 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -479,7 +479,7 @@ static void efx_rx_packet_gro(struct efx_channel *channel, skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE); - skb_record_rx_queue(skb, channel->channel); + skb_record_rx_queue(skb, channel->rx_queue.core_index); gro_result = napi_gro_frags(napi); } else { @@ -571,8 +571,14 @@ static void efx_rx_deliver(struct efx_channel *channel, /* Set the SKB flags */ skb_checksum_none_assert(skb); + /* Record the rx_queue */ + skb_record_rx_queue(skb, channel->rx_queue.core_index); + /* Pass the packet up */ - netif_receive_skb(skb); + if (channel->type->receive_skb) + channel->type->receive_skb(channel, skb); + else + netif_receive_skb(skb); /* Update allocation strategy method */ channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; @@ -608,13 +614,14 @@ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf) * at the ethernet header */ skb->protocol = eth_type_trans(skb, efx->net_dev); - skb_record_rx_queue(skb, channel->channel); + skb_record_rx_queue(skb, channel->rx_queue.core_index); } if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; - if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED))) + if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED)) && + !channel->type->receive_skb) efx_rx_packet_gro(channel, rx_buf, eh); else efx_rx_deliver(channel, rx_buf); @@ -624,6 +631,11 @@ void efx_rx_strategy(struct efx_channel *channel) { enum efx_rx_alloc_method method = rx_alloc_method; + if (channel->type->receive_skb) { + channel->rx_alloc_push_pages = false; + return; + } + /* Only makes sense to use page based allocation if GRO is enabled */ if (!(channel->efx->net_dev->features & NETIF_F_GRO)) { method = RX_ALLOC_METHOD_SKB; diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 96068d15b60..ce72ae4f399 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -614,7 +614,8 @@ static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests, { enum efx_loopback_mode mode; struct efx_loopback_state *state; - struct efx_channel *channel = efx_get_channel(efx, 0); + struct efx_channel *channel = + efx_get_channel(efx, efx->tx_channel_offset); struct efx_tx_queue *tx_queue; int rc = 0; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 6bafd216e55..84b41bf08a3 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -335,6 +335,7 @@ static int siena_probe_nic(struct efx_nic *efx) goto fail5; efx_sriov_probe(efx); + efx_ptp_probe(efx); return 0; diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index 9cb3b84ecae..d49b53dc2a5 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -21,6 +21,9 @@ /* Number of longs required to track all the VIs in a VF */ #define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX) +/* Maximum number of RX queues supported */ +#define VF_MAX_RX_QUEUES 63 + /** * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour * @VF_TX_FILTER_OFF: Disabled @@ -578,6 +581,7 @@ static int efx_vfdi_init_rxq(struct efx_vf *vf) efx_oword_t reg; if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) || + vf_rxq >= VF_MAX_RX_QUEUES || bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) { if (net_ratelimit()) netif_err(efx, hw, efx->net_dev, @@ -683,6 +687,9 @@ static int efx_vfdi_fini_all_queues(struct efx_vf *vf) __le32 *rxqs; int rc; + BUILD_BUG_ON(VF_MAX_RX_QUEUES > + MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM); + rxqs = kmalloc(count * sizeof(*rxqs), GFP_KERNEL); if (rxqs == NULL) return VFDI_RC_ENOMEM; @@ -1028,6 +1035,7 @@ efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len) static const struct efx_channel_type efx_sriov_channel_type = { .handle_no_channel = efx_sriov_handle_no_channel, .pre_probe = efx_sriov_probe_channel, + .post_remove = efx_channel_dummy_op_void, .get_name = efx_sriov_get_channel_name, /* no copy operation; channel must not be reallocated */ .keep_eventq = true, diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 18713436b44..5e090e54298 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -22,14 +22,6 @@ #include "nic.h" #include "workarounds.h" -/* - * TX descriptor ring full threshold - * - * The tx_queue descriptor ring fill-level must fall below this value - * before we restart the netif queue - */ -#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u) - static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, @@ -39,67 +31,32 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct device *dma_dev = &tx_queue->efx->pci_dev->dev; dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len - buffer->unmap_len); - if (buffer->unmap_single) + if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); else dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); buffer->unmap_len = 0; - buffer->unmap_single = false; } - if (buffer->skb) { + if (buffer->flags & EFX_TX_BUF_SKB) { (*pkts_compl)++; (*bytes_compl) += buffer->skb->len; dev_kfree_skb_any((struct sk_buff *) buffer->skb); - buffer->skb = NULL; netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, "TX queue %d transmission id %x complete\n", tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_HEAP) { + kfree(buffer->heap_buf); } -} -/** - * struct efx_tso_header - a DMA mapped buffer for packet headers - * @next: Linked list of free ones. - * The list is protected by the TX queue lock. - * @dma_unmap_len: Length to unmap for an oversize buffer, or 0. - * @dma_addr: The DMA address of the header below. - * - * This controls the memory used for a TSO header. Use TSOH_DATA() - * to find the packet header data. Use TSOH_SIZE() to calculate the - * total size required for a given packet header length. TSO headers - * in the free list are exactly %TSOH_STD_SIZE bytes in size. - */ -struct efx_tso_header { - union { - struct efx_tso_header *next; - size_t unmap_len; - }; - dma_addr_t dma_addr; -}; + buffer->len = 0; + buffer->flags = 0; +} static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb); -static void efx_fini_tso(struct efx_tx_queue *tx_queue); -static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh); - -static void efx_tsoh_free(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer) -{ - if (buffer->tsoh) { - if (likely(!buffer->tsoh->unmap_len)) { - buffer->tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = buffer->tsoh; - } else { - efx_tsoh_heap_free(tx_queue, buffer->tsoh); - } - buffer->tsoh = NULL; - } -} - static inline unsigned efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) @@ -138,6 +95,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) return max_descs; } +/* Get partner of a TX queue, seen as part of the same net core queue */ +static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) +{ + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) + return tx_queue - EFX_TXQ_TYPE_OFFLOAD; + else + return tx_queue + EFX_TXQ_TYPE_OFFLOAD; +} + +static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) +{ + /* We need to consider both queues that the net core sees as one */ + struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1); + struct efx_nic *efx = txq1->efx; + unsigned int fill_level; + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + if (likely(fill_level < efx->txq_stop_thresh)) + return; + + /* We used the stale old_read_count above, which gives us a + * pessimistic estimate of the fill level (which may even + * validly be >= efx->txq_entries). Now try again using + * read_count (more likely to be a cache miss). + * + * If we read read_count and then conditionally stop the + * queue, it is possible for the completion path to race with + * us and complete all outstanding descriptors in the middle, + * after which there will be no more completions to wake it. + * Therefore we stop the queue first, then read read_count + * (with a memory barrier to ensure the ordering), then + * restart the queue if the fill level turns out to be low + * enough. + */ + netif_tx_stop_queue(txq1->core_txq); + smp_mb(); + txq1->old_read_count = ACCESS_ONCE(txq1->read_count); + txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries); + if (likely(fill_level < efx->txq_stop_thresh)) { + smp_mb(); + if (likely(!efx->loopback_selftest)) + netif_tx_start_queue(txq1->core_txq); + } +} + /* * Add a socket buffer to a TX queue * @@ -151,7 +158,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) * This function is split out from efx_hard_start_xmit to allow the * loopback test to direct packets via specific TX queues. * - * Returns NETDEV_TX_OK or NETDEV_TX_BUSY + * Returns NETDEV_TX_OK. * You must hold netif_tx_lock() to call this function. */ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) @@ -160,12 +167,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct device *dma_dev = &efx->pci_dev->dev; struct efx_tx_buffer *buffer; skb_frag_t *fragment; - unsigned int len, unmap_len = 0, fill_level, insert_ptr; + unsigned int len, unmap_len = 0, insert_ptr; dma_addr_t dma_addr, unmap_addr = 0; unsigned int dma_len; - bool unmap_single; - int q_space, i = 0; - netdev_tx_t rc = NETDEV_TX_OK; + unsigned short dma_flags; + int i = 0; EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); @@ -183,14 +189,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) return NETDEV_TX_OK; } - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - q_space = efx->txq_entries - 1 - fill_level; - /* Map for DMA. Use dma_map_single rather than dma_map_page * since this is more efficient on machines with sparse * memory. */ - unmap_single = true; + dma_flags = EFX_TX_BUF_MAP_SINGLE; dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE); /* Process all fragments */ @@ -205,39 +208,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Add to TX queue, splitting across DMA boundaries */ do { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have - * happened since the xmit path last - * checked. Update the xmit path's - * copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the - * change of queue state from the access - * of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - rc = NETDEV_TX_BUSY; - goto unwind; - } - smp_mb(); - if (likely(!efx->loopback_selftest)) - netif_tx_start_queue( - tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->tsoh); - EFX_BUG_ON_PARANOID(buffer->skb); + EFX_BUG_ON_PARANOID(buffer->flags); EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(!buffer->continuation); EFX_BUG_ON_PARANOID(buffer->unmap_len); dma_len = efx_max_tx_len(efx, dma_addr); @@ -247,13 +221,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Fill out per descriptor fields */ buffer->len = dma_len; buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; len -= dma_len; dma_addr += dma_len; ++tx_queue->insert_count; } while (len); /* Transfer ownership of the unmapping to the final buffer */ - buffer->unmap_single = unmap_single; + buffer->flags = EFX_TX_BUF_CONT | dma_flags; buffer->unmap_len = unmap_len; unmap_len = 0; @@ -264,20 +239,22 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) len = skb_frag_size(fragment); i++; /* Map for DMA */ - unmap_single = false; + dma_flags = 0; dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, DMA_TO_DEVICE); } /* Transfer ownership of the skb to the final buffer */ buffer->skb = skb; - buffer->continuation = false; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; netdev_tx_sent_queue(tx_queue->core_txq, skb->len); /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + return NETDEV_TX_OK; dma_err: @@ -289,7 +266,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Mark the packet as transmitted, and free the SKB ourselves */ dev_kfree_skb_any(skb); - unwind: /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { unsigned int pkts_compl = 0, bytes_compl = 0; @@ -297,12 +273,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - buffer->len = 0; } /* Free the fragment we were mid-way through pushing */ if (unmap_len) { - if (unmap_single) + if (dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, unmap_len, DMA_TO_DEVICE); else @@ -310,7 +285,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) DMA_TO_DEVICE); } - return rc; + return NETDEV_TX_OK; } /* Remove packets from the TX queue @@ -340,8 +315,6 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, } efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); - buffer->continuation = true; - buffer->len = 0; ++tx_queue->read_count; read_ptr = tx_queue->read_count & tx_queue->ptr_mask; @@ -366,6 +339,12 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, EFX_WARN_ON_PARANOID(!netif_device_present(net_dev)); + /* PTP "event" packet */ + if (unlikely(efx_xmit_with_hwtstamp(skb)) && + unlikely(efx_ptp_is_ptp_tx(efx, skb))) { + return efx_ptp_tx(efx, skb); + } + index = skb_get_queue_mapping(skb); type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0; if (index >= efx->n_tx_channels) { @@ -450,6 +429,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned fill_level; struct efx_nic *efx = tx_queue->efx; + struct efx_tx_queue *txq2; unsigned int pkts_compl = 0, bytes_compl = 0; EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); @@ -457,15 +437,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); - /* See if we need to restart the netif queue. This barrier - * separates the update of read_count from the test of the - * queue state. */ + /* See if we need to restart the netif queue. This memory + * barrier ensures that we write read_count (inside + * efx_dequeue_buffers()) before reading the queue status. + */ smp_mb(); if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && likely(efx->port_enabled) && likely(netif_device_present(efx->net_dev))) { - fill_level = tx_queue->insert_count - tx_queue->read_count; - if (fill_level < EFX_TXQ_THRESHOLD(efx)) + txq2 = efx_tx_queue_partner(tx_queue); + fill_level = max(tx_queue->insert_count - tx_queue->read_count, + txq2->insert_count - txq2->read_count); + if (fill_level <= efx->txq_wake_thresh) netif_tx_wake_queue(tx_queue->core_txq); } @@ -480,11 +463,26 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) } } +/* Size of page-based TSO header buffers. Larger blocks must be + * allocated from the heap. + */ +#define TSOH_STD_SIZE 128 +#define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE) + +/* At most half the descriptors in the queue at any time will refer to + * a TSO header buffer, since they must always be followed by a + * payload descriptor referring to an skb. + */ +static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue) +{ + return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE); +} + int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) { struct efx_nic *efx = tx_queue->efx; unsigned int entries; - int i, rc; + int rc; /* Create the smallest power-of-two aligned ring */ entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); @@ -500,17 +498,28 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) GFP_KERNEL); if (!tx_queue->buffer) return -ENOMEM; - for (i = 0; i <= tx_queue->ptr_mask; ++i) - tx_queue->buffer[i].continuation = true; + + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) { + tx_queue->tsoh_page = + kcalloc(efx_tsoh_page_count(tx_queue), + sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL); + if (!tx_queue->tsoh_page) { + rc = -ENOMEM; + goto fail1; + } + } /* Allocate hardware ring */ rc = efx_nic_probe_tx(tx_queue); if (rc) - goto fail; + goto fail2; return 0; - fail: +fail2: + kfree(tx_queue->tsoh_page); + tx_queue->tsoh_page = NULL; +fail1: kfree(tx_queue->buffer); tx_queue->buffer = NULL; return rc; @@ -546,8 +555,6 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) unsigned int pkts_compl = 0, bytes_compl = 0; buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - buffer->continuation = true; - buffer->len = 0; ++tx_queue->read_count; } @@ -568,13 +575,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) efx_nic_fini_tx(tx_queue); efx_release_tx_buffers(tx_queue); - - /* Free up TSO header cache */ - efx_fini_tso(tx_queue); } void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) { + int i; + if (!tx_queue->buffer) return; @@ -582,6 +588,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) "destroying TX queue %d\n", tx_queue->queue); efx_nic_remove_tx(tx_queue); + if (tx_queue->tsoh_page) { + for (i = 0; i < efx_tsoh_page_count(tx_queue); i++) + efx_nic_free_buffer(tx_queue->efx, + &tx_queue->tsoh_page[i]); + kfree(tx_queue->tsoh_page); + tx_queue->tsoh_page = NULL; + } + kfree(tx_queue->buffer); tx_queue->buffer = NULL; } @@ -604,22 +618,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) #define TSOH_OFFSET NET_IP_ALIGN #endif -#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET) - -/* Total size of struct efx_tso_header, buffer and padding */ -#define TSOH_SIZE(hdr_len) \ - (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len) - -/* Size of blocks on free list. Larger blocks must be allocated from - * the heap. - */ -#define TSOH_STD_SIZE 128 - #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) -#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data) -#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data) -#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data) -#define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data) /** * struct tso_state - TSO state for an SKB @@ -631,10 +630,12 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) * @in_len: Remaining length in current SKB fragment * @unmap_len: Length of SKB fragment * @unmap_addr: DMA address of SKB fragment - * @unmap_single: DMA single vs page mapping flag + * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0 * @protocol: Network protocol (after any VLAN header) + * @ip_off: Offset of IP header + * @tcp_off: Offset of TCP header * @header_len: Number of bytes of header - * @full_packet_size: Number of bytes to put in each outgoing segment + * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload * * The state used during segmentation. It is put into this data structure * just to make it easy to pass into inline functions. @@ -651,11 +652,13 @@ struct tso_state { unsigned in_len; unsigned unmap_len; dma_addr_t unmap_addr; - bool unmap_single; + unsigned short dma_flags; __be16 protocol; + unsigned int ip_off; + unsigned int tcp_off; unsigned header_len; - int full_packet_size; + unsigned int ip_base_len; }; @@ -687,91 +690,43 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb) return protocol; } - -/* - * Allocate a page worth of efx_tso_header structures, and string them - * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM. - */ -static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue) +static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, unsigned int len) { - struct device *dma_dev = &tx_queue->efx->pci_dev->dev; - struct efx_tso_header *tsoh; - dma_addr_t dma_addr; - u8 *base_kva, *kva; + u8 *result; - base_kva = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr, GFP_ATOMIC); - if (base_kva == NULL) { - netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev, - "Unable to allocate page for TSO headers\n"); - return -ENOMEM; - } - - /* dma_alloc_coherent() allocates pages. */ - EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u)); - - for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) { - tsoh = (struct efx_tso_header *)kva; - tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva); - tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = tsoh; - } - - return 0; -} - - -/* Free up a TSO header, and all others in the same page. */ -static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh, - struct device *dma_dev) -{ - struct efx_tso_header **p; - unsigned long base_kva; - dma_addr_t base_dma; - - base_kva = (unsigned long)tsoh & PAGE_MASK; - base_dma = tsoh->dma_addr & PAGE_MASK; - - p = &tx_queue->tso_headers_free; - while (*p != NULL) { - if (((unsigned long)*p & PAGE_MASK) == base_kva) - *p = (*p)->next; - else - p = &(*p)->next; - } + EFX_BUG_ON_PARANOID(buffer->len); + EFX_BUG_ON_PARANOID(buffer->flags); + EFX_BUG_ON_PARANOID(buffer->unmap_len); - dma_free_coherent(dma_dev, PAGE_SIZE, (void *)base_kva, base_dma); -} + if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) { + unsigned index = + (tx_queue->insert_count & tx_queue->ptr_mask) / 2; + struct efx_buffer *page_buf = + &tx_queue->tsoh_page[index / TSOH_PER_PAGE]; + unsigned offset = + TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET; + + if (unlikely(!page_buf->addr) && + efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE)) + return NULL; + + result = (u8 *)page_buf->addr + offset; + buffer->dma_addr = page_buf->dma_addr + offset; + buffer->flags = EFX_TX_BUF_CONT; + } else { + tx_queue->tso_long_headers++; -static struct efx_tso_header * -efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) -{ - struct efx_tso_header *tsoh; - - tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA); - if (unlikely(!tsoh)) - return NULL; - - tsoh->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, - TSOH_BUFFER(tsoh), header_len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, - tsoh->dma_addr))) { - kfree(tsoh); - return NULL; + buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC); + if (unlikely(!buffer->heap_buf)) + return NULL; + result = (u8 *)buffer->heap_buf + TSOH_OFFSET; + buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP; } - tsoh->unmap_len = header_len; - return tsoh; -} + buffer->len = len; -static void -efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) -{ - dma_unmap_single(&tx_queue->efx->pci_dev->dev, - tsoh->dma_addr, tsoh->unmap_len, - DMA_TO_DEVICE); - kfree(tsoh); + return result; } /** @@ -781,47 +736,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) * @len: Length of fragment * @final_buffer: The final buffer inserted into the queue * - * Push descriptors onto the TX queue. Return 0 on success or 1 if - * @tx_queue full. + * Push descriptors onto the TX queue. */ -static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, - dma_addr_t dma_addr, unsigned len, - struct efx_tx_buffer **final_buffer) +static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned len, + struct efx_tx_buffer **final_buffer) { struct efx_tx_buffer *buffer; struct efx_nic *efx = tx_queue->efx; - unsigned dma_len, fill_level, insert_ptr; - int q_space; + unsigned dma_len, insert_ptr; EFX_BUG_ON_PARANOID(len <= 0); - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - /* -1 as there is no way to represent all descriptors used */ - q_space = efx->txq_entries - 1 - fill_level; - while (1) { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have happened - * since the xmit path last checked. Update - * the xmit path's copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the change of - * queue state from the access of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - *final_buffer = NULL; - return 1; - } - smp_mb(); - netif_tx_start_queue(tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; ++tx_queue->insert_count; @@ -830,12 +757,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, tx_queue->read_count >= efx->txq_entries); - efx_tsoh_free(tx_queue, buffer); EFX_BUG_ON_PARANOID(buffer->len); EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->skb); - EFX_BUG_ON_PARANOID(!buffer->continuation); - EFX_BUG_ON_PARANOID(buffer->tsoh); + EFX_BUG_ON_PARANOID(buffer->flags); buffer->dma_addr = dma_addr; @@ -845,7 +769,8 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, if (dma_len >= len) break; - buffer->len = dma_len; /* Don't set the other members */ + buffer->len = dma_len; + buffer->flags = EFX_TX_BUF_CONT; dma_addr += dma_len; len -= dma_len; } @@ -853,7 +778,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, EFX_BUG_ON_PARANOID(!len); buffer->len = len; *final_buffer = buffer; - return 0; } @@ -864,54 +788,42 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, * a single fragment, and we know it doesn't cross a page boundary. It * also allows us to not worry about end-of-packet etc. */ -static void efx_tso_put_header(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh, unsigned len) +static int efx_tso_put_header(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, u8 *header) { - struct efx_tx_buffer *buffer; - - buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->skb); - EFX_BUG_ON_PARANOID(!buffer->continuation); - EFX_BUG_ON_PARANOID(buffer->tsoh); - buffer->len = len; - buffer->dma_addr = tsoh->dma_addr; - buffer->tsoh = tsoh; + if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) { + buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, + header, buffer->len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, + buffer->dma_addr))) { + kfree(buffer->heap_buf); + buffer->len = 0; + buffer->flags = 0; + return -ENOMEM; + } + buffer->unmap_len = buffer->len; + buffer->flags |= EFX_TX_BUF_MAP_SINGLE; + } ++tx_queue->insert_count; + return 0; } -/* Remove descriptors put into a tx_queue. */ +/* Remove buffers put into a tx_queue. None of the buffers must have + * an skb attached. + */ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) { struct efx_tx_buffer *buffer; - dma_addr_t unmap_addr; /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { --tx_queue->insert_count; buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->skb); - if (buffer->unmap_len) { - unmap_addr = (buffer->dma_addr + buffer->len - - buffer->unmap_len); - if (buffer->unmap_single) - dma_unmap_single(&tx_queue->efx->pci_dev->dev, - unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - else - dma_unmap_page(&tx_queue->efx->pci_dev->dev, - unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - buffer->unmap_len = 0; - } - buffer->len = 0; - buffer->continuation = true; + efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); } } @@ -919,17 +831,16 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) /* Parse the SKB header and initialise state. */ static void tso_start(struct tso_state *st, const struct sk_buff *skb) { - /* All ethernet/IP/TCP headers combined size is TCP header size - * plus offset of TCP header relative to start of packet. - */ - st->header_len = ((tcp_hdr(skb)->doff << 2u) - + PTR_DIFF(tcp_hdr(skb), skb->data)); - st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size; - - if (st->protocol == htons(ETH_P_IP)) + st->ip_off = skb_network_header(skb) - skb->data; + st->tcp_off = skb_transport_header(skb) - skb->data; + st->header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); + if (st->protocol == htons(ETH_P_IP)) { + st->ip_base_len = st->header_len - st->ip_off; st->ipv4_id = ntohs(ip_hdr(skb)->id); - else + } else { + st->ip_base_len = st->header_len - st->tcp_off; st->ipv4_id = 0; + } st->seqnum = ntohl(tcp_hdr(skb)->seq); EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); @@ -938,7 +849,7 @@ static void tso_start(struct tso_state *st, const struct sk_buff *skb) st->out_len = skb->len - st->header_len; st->unmap_len = 0; - st->unmap_single = false; + st->dma_flags = 0; } static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, @@ -947,7 +858,7 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->unmap_single = false; + st->dma_flags = 0; st->unmap_len = skb_frag_size(frag); st->in_len = skb_frag_size(frag); st->dma_addr = st->unmap_addr; @@ -965,7 +876,7 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl, len, DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->unmap_single = true; + st->dma_flags = EFX_TX_BUF_MAP_SINGLE; st->unmap_len = len; st->in_len = len; st->dma_addr = st->unmap_addr; @@ -982,20 +893,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, * @st: TSO state * * Form descriptors for the current fragment, until we reach the end - * of fragment or end-of-packet. Return 0 on success, 1 if not enough - * space in @tx_queue. + * of fragment or end-of-packet. */ -static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, - const struct sk_buff *skb, - struct tso_state *st) +static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, + const struct sk_buff *skb, + struct tso_state *st) { struct efx_tx_buffer *buffer; - int n, end_of_packet, rc; + int n; if (st->in_len == 0) - return 0; + return; if (st->packet_space == 0) - return 0; + return; EFX_BUG_ON_PARANOID(st->in_len <= 0); EFX_BUG_ON_PARANOID(st->packet_space <= 0); @@ -1006,25 +916,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, st->out_len -= n; st->in_len -= n; - rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - if (likely(rc == 0)) { - if (st->out_len == 0) - /* Transfer ownership of the skb */ - buffer->skb = skb; + efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - end_of_packet = st->out_len == 0 || st->packet_space == 0; - buffer->continuation = !end_of_packet; + if (st->out_len == 0) { + /* Transfer ownership of the skb */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB; + } else if (st->packet_space != 0) { + buffer->flags = EFX_TX_BUF_CONT; + } - if (st->in_len == 0) { - /* Transfer ownership of the DMA mapping */ - buffer->unmap_len = st->unmap_len; - buffer->unmap_single = st->unmap_single; - st->unmap_len = 0; - } + if (st->in_len == 0) { + /* Transfer ownership of the DMA mapping */ + buffer->unmap_len = st->unmap_len; + buffer->flags |= st->dma_flags; + st->unmap_len = 0; } st->dma_addr += n; - return rc; } @@ -1035,36 +944,25 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, * @st: TSO state * * Generate a new header and prepare for the new packet. Return 0 on - * success, or -1 if failed to alloc header. + * success, or -%ENOMEM if failed to alloc header. */ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, const struct sk_buff *skb, struct tso_state *st) { - struct efx_tso_header *tsoh; + struct efx_tx_buffer *buffer = + &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; struct tcphdr *tsoh_th; unsigned ip_length; u8 *header; + int rc; - /* Allocate a DMA-mapped header buffer. */ - if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) { - if (tx_queue->tso_headers_free == NULL) { - if (efx_tsoh_block_alloc(tx_queue)) - return -1; - } - EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free); - tsoh = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = tsoh->next; - tsoh->unmap_len = 0; - } else { - tx_queue->tso_long_headers++; - tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len); - if (unlikely(!tsoh)) - return -1; - } + /* Allocate and insert a DMA-mapped header buffer. */ + header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); + if (!header) + return -ENOMEM; - header = TSOH_BUFFER(tsoh); - tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb)); + tsoh_th = (struct tcphdr *)(header + st->tcp_off); /* Copy and update the headers. */ memcpy(header, skb->data, st->header_len); @@ -1073,19 +971,19 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, st->seqnum += skb_shinfo(skb)->gso_size; if (st->out_len > skb_shinfo(skb)->gso_size) { /* This packet will not finish the TSO burst. */ - ip_length = st->full_packet_size - ETH_HDR_LEN(skb); + st->packet_space = skb_shinfo(skb)->gso_size; tsoh_th->fin = 0; tsoh_th->psh = 0; } else { /* This packet will be the last in the TSO burst. */ - ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len; + st->packet_space = st->out_len; tsoh_th->fin = tcp_hdr(skb)->fin; tsoh_th->psh = tcp_hdr(skb)->psh; } + ip_length = st->ip_base_len + st->packet_space; if (st->protocol == htons(ETH_P_IP)) { - struct iphdr *tsoh_iph = - (struct iphdr *)(header + SKB_IPV4_OFF(skb)); + struct iphdr *tsoh_iph = (struct iphdr *)(header + st->ip_off); tsoh_iph->tot_len = htons(ip_length); @@ -1094,16 +992,16 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, st->ipv4_id++; } else { struct ipv6hdr *tsoh_iph = - (struct ipv6hdr *)(header + SKB_IPV6_OFF(skb)); + (struct ipv6hdr *)(header + st->ip_off); - tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph)); + tsoh_iph->payload_len = htons(ip_length); } - st->packet_space = skb_shinfo(skb)->gso_size; - ++tx_queue->tso_packets; + rc = efx_tso_put_header(tx_queue, buffer, header); + if (unlikely(rc)) + return rc; - /* Form a descriptor for this header. */ - efx_tso_put_header(tx_queue, tsoh, st->header_len); + ++tx_queue->tso_packets; return 0; } @@ -1118,13 +1016,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, * * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if * @skb was not enqueued. In all cases @skb is consumed. Return - * %NETDEV_TX_OK or %NETDEV_TX_BUSY. + * %NETDEV_TX_OK. */ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { struct efx_nic *efx = tx_queue->efx; - int frag_i, rc, rc2 = NETDEV_TX_OK; + int frag_i, rc; struct tso_state state; /* Find the packet protocol and sanity-check it */ @@ -1156,11 +1054,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, goto mem_err; while (1) { - rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); - if (unlikely(rc)) { - rc2 = NETDEV_TX_BUSY; - goto unwind; - } + tso_fill_packet_with_fragment(tx_queue, skb, &state); /* Move onto the next fragment? */ if (state.in_len == 0) { @@ -1184,6 +1078,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + tx_queue->tso_bursts++; return NETDEV_TX_OK; @@ -1192,10 +1088,9 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, "Out of memory for TSO headers, or DMA mapping error\n"); dev_kfree_skb_any(skb); - unwind: /* Free the DMA mapping we were in the process of writing out */ if (state.unmap_len) { - if (state.unmap_single) + if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, state.unmap_len, DMA_TO_DEVICE); else @@ -1204,25 +1099,5 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, } efx_enqueue_unwind(tx_queue); - return rc2; -} - - -/* - * Free up all TSO datastructures associated with tx_queue. This - * routine should be called only once the tx_queue is both empty and - * will no longer be used. - */ -static void efx_fini_tso(struct efx_tx_queue *tx_queue) -{ - unsigned i; - - if (tx_queue->buffer) { - for (i = 0; i <= tx_queue->ptr_mask; ++i) - efx_tsoh_free(tx_queue, &tx_queue->buffer[i]); - } - - while (tx_queue->tso_headers_free != NULL) - efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free, - &tx_queue->efx->pci_dev->dev); + return NETDEV_TX_OK; } |