diff options
Diffstat (limited to 'drivers/net/ethernet')
33 files changed, 1691 insertions, 1096 deletions
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index d266c86a53f..5b622993ff1 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -110,6 +110,7 @@ static inline char *nic_name(struct pci_dev *pdev) #define MAX_RX_POST BE_NAPI_WEIGHT /* Frags posted at a time */ #define RX_FRAGS_REFILL_WM (RX_Q_LEN - MAX_RX_POST) +#define MAX_VFS 30 /* Max VFs supported by BE3 FW */ #define FW_VER_LEN 32 struct be_dma_mem { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 8c63d06ab12..701b3e9a715 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -120,7 +120,7 @@ static int be_mcc_compl_process(struct be_adapter *adapter, if (compl_status == MCC_STATUS_UNAUTHORIZED_REQUEST) { dev_warn(&adapter->pdev->dev, - "opcode %d-%d is not permitted\n", + "VF is not privileged to issue opcode %d-%d\n", opcode, subsystem); } else { extd_status = (compl->status >> CQE_STATUS_EXTD_SHIFT) & diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 78b8aa8069f..111dc8813f6 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2176,8 +2176,7 @@ static uint be_num_rss_want(struct be_adapter *adapter) { u32 num = 0; if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) && - !sriov_want(adapter) && be_physfn(adapter) && - !be_is_mc(adapter)) { + !sriov_want(adapter) && be_physfn(adapter)) { num = (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; num = min_t(u32, num, (u32)netif_get_num_default_rss_queues()); } @@ -2646,8 +2645,8 @@ static int be_vf_setup(struct be_adapter *adapter) } for_all_vfs(adapter, vf_cfg, vf) { - status = be_cmd_link_status_query(adapter, NULL, &lnk_speed, - NULL, vf + 1); + lnk_speed = 1000; + status = be_cmd_set_qos(adapter, lnk_speed, vf + 1); if (status) goto err; vf_cfg->tx_rate = lnk_speed * 10; @@ -2724,6 +2723,8 @@ static int be_get_config(struct be_adapter *adapter) if (pos) { pci_read_config_word(adapter->pdev, pos + PCI_SRIOV_TOTAL_VF, &dev_num_vfs); + if (!lancer_chip(adapter)) + dev_num_vfs = min_t(u16, dev_num_vfs, MAX_VFS); adapter->dev_num_vfs = dev_num_vfs; } return 0; @@ -3437,6 +3438,7 @@ static void be_ctrl_cleanup(struct be_adapter *adapter) if (mem->va) dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, mem->dma); + kfree(adapter->pmac_id); } static int be_ctrl_init(struct be_adapter *adapter) @@ -3473,6 +3475,12 @@ static int be_ctrl_init(struct be_adapter *adapter) } memset(rx_filter->va, 0, rx_filter->size); + /* primary mac needs 1 pmac entry */ + adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1, + sizeof(*adapter->pmac_id), GFP_KERNEL); + if (!adapter->pmac_id) + return -ENOMEM; + mutex_init(&adapter->mbox_lock); spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); @@ -3609,12 +3617,6 @@ static int be_get_initial_config(struct be_adapter *adapter) else adapter->max_pmac_cnt = BE_VF_UC_PMAC_COUNT; - /* primary mac needs 1 pmac entry */ - adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1, - sizeof(u32), GFP_KERNEL); - if (!adapter->pmac_id) - return -ENOMEM; - status = be_cmd_get_cntl_attributes(adapter); if (status) return status; diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 3574e1499df..feff51664dc 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -62,6 +62,13 @@ config FSL_PQ_MDIO ---help--- This driver supports the MDIO bus used by the gianfar and UCC drivers. +config FSL_XGMAC_MDIO + tristate "Freescale XGMAC MDIO" + depends on FSL_SOC + select PHYLIB + ---help--- + This driver supports the MDIO bus on the Fman 10G Ethernet MACs. + config UCC_GETH tristate "Freescale QE Gigabit Ethernet" depends on QUICC_ENGINE diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index 1752488c9ee..3d1839afff6 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -9,6 +9,7 @@ ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y) endif obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_FSL_PQ_MDIO) += fsl_pq_mdio.o +obj-$(CONFIG_FSL_XGMAC_MDIO) += xgmac_mdio.o obj-$(CONFIG_GIANFAR) += gianfar_driver.o obj-$(CONFIG_PTP_1588_CLOCK_GIANFAR) += gianfar_ptp.o gianfar_driver-objs := gianfar.o \ diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 9527b28d70d..c93a05654b4 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -19,54 +19,90 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/unistd.h> #include <linux/slab.h> -#include <linux/interrupt.h> #include <linux/init.h> #include <linux/delay.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/spinlock.h> -#include <linux/mm.h> #include <linux/module.h> -#include <linux/platform_device.h> -#include <linux/crc32.h> #include <linux/mii.h> -#include <linux/phy.h> -#include <linux/of.h> #include <linux/of_address.h> #include <linux/of_mdio.h> -#include <linux/of_platform.h> +#include <linux/of_device.h> #include <asm/io.h> -#include <asm/irq.h> -#include <asm/uaccess.h> -#include <asm/ucc.h> +#include <asm/ucc.h> /* for ucc_set_qe_mux_mii_mng() */ #include "gianfar.h" -#include "fsl_pq_mdio.h" + +#define MIIMIND_BUSY 0x00000001 +#define MIIMIND_NOTVALID 0x00000004 +#define MIIMCFG_INIT_VALUE 0x00000007 +#define MIIMCFG_RESET 0x80000000 + +#define MII_READ_COMMAND 0x00000001 + +struct fsl_pq_mii { + u32 miimcfg; /* MII management configuration reg */ + u32 miimcom; /* MII management command reg */ + u32 miimadd; /* MII management address reg */ + u32 miimcon; /* MII management control reg */ + u32 miimstat; /* MII management status reg */ + u32 miimind; /* MII management indication reg */ +}; + +struct fsl_pq_mdio { + u8 res1[16]; + u32 ieventm; /* MDIO Interrupt event register (for etsec2)*/ + u32 imaskm; /* MDIO Interrupt mask register (for etsec2)*/ + u8 res2[4]; + u32 emapm; /* MDIO Event mapping register (for etsec2)*/ + u8 res3[1280]; + struct fsl_pq_mii mii; + u8 res4[28]; + u32 utbipar; /* TBI phy address reg (only on UCC) */ + u8 res5[2728]; +} __packed; /* Number of microseconds to wait for an MII register to respond */ #define MII_TIMEOUT 1000 struct fsl_pq_mdio_priv { void __iomem *map; - struct fsl_pq_mdio __iomem *regs; + struct fsl_pq_mii __iomem *regs; + int irqs[PHY_MAX_ADDR]; +}; + +/* + * Per-device-type data. Each type of device tree node that we support gets + * one of these. + * + * @mii_offset: the offset of the MII registers within the memory map of the + * node. Some nodes define only the MII registers, and some define the whole + * MAC (which includes the MII registers). + * + * @get_tbipa: determines the address of the TBIPA register + * + * @ucc_configure: a special function for extra QE configuration + */ +struct fsl_pq_mdio_data { + unsigned int mii_offset; /* offset of the MII registers */ + uint32_t __iomem * (*get_tbipa)(void __iomem *p); + void (*ucc_configure)(phys_addr_t start, phys_addr_t end); }; /* - * Write value to the PHY at mii_id at register regnum, - * on the bus attached to the local interface, which may be different from the - * generic mdio bus (tied to a single interface), waiting until the write is - * done before returning. This is helpful in programming interfaces like - * the TBI which control interfaces like onchip SERDES and are always tied to - * the local mdio pins, which may not be the same as system mdio bus, used for + * Write value to the PHY at mii_id at register regnum, on the bus attached + * to the local interface, which may be different from the generic mdio bus + * (tied to a single interface), waiting until the write is done before + * returning. This is helpful in programming interfaces like the TBI which + * control interfaces like onchip SERDES and are always tied to the local + * mdio pins, which may not be the same as system mdio bus, used for * controlling the external PHYs, for example. */ -int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id, - int regnum, u16 value) +static int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum, + u16 value) { + struct fsl_pq_mdio_priv *priv = bus->priv; + struct fsl_pq_mii __iomem *regs = priv->regs; u32 status; /* Set the PHY address and the register address we want to write */ @@ -83,20 +119,21 @@ int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id, } /* - * Read the bus for PHY at addr mii_id, register regnum, and - * return the value. Clears miimcom first. All PHY operation - * done on the bus attached to the local interface, - * which may be different from the generic mdio bus - * This is helpful in programming interfaces like - * the TBI which, in turn, control interfaces like onchip SERDES - * and are always tied to the local mdio pins, which may not be the + * Read the bus for PHY at addr mii_id, register regnum, and return the value. + * Clears miimcom first. + * + * All PHY operation done on the bus attached to the local interface, which + * may be different from the generic mdio bus. This is helpful in programming + * interfaces like the TBI which, in turn, control interfaces like on-chip + * SERDES and are always tied to the local mdio pins, which may not be the * same as system mdio bus, used for controlling the external PHYs, for eg. */ -int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs, - int mii_id, int regnum) +static int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { - u16 value; + struct fsl_pq_mdio_priv *priv = bus->priv; + struct fsl_pq_mii __iomem *regs = priv->regs; u32 status; + u16 value; /* Set the PHY address and the register address we want to read */ out_be32(®s->miimadd, (mii_id << 8) | regnum); @@ -115,44 +152,15 @@ int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs, /* Grab the value of the register from miimstat */ value = in_be32(®s->miimstat); + dev_dbg(&bus->dev, "read %04x from address %x/%x\n", value, mii_id, regnum); return value; } -static struct fsl_pq_mdio __iomem *fsl_pq_mdio_get_regs(struct mii_bus *bus) -{ - struct fsl_pq_mdio_priv *priv = bus->priv; - - return priv->regs; -} - -/* - * Write value to the PHY at mii_id at register regnum, - * on the bus, waiting until the write is done before returning. - */ -int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) -{ - struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus); - - /* Write to the local MII regs */ - return fsl_pq_local_mdio_write(regs, mii_id, regnum, value); -} - -/* - * Read the bus for PHY at addr mii_id, register regnum, and - * return the value. Clears miimcom first. - */ -int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum) -{ - struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus); - - /* Read the local MII regs */ - return fsl_pq_local_mdio_read(regs, mii_id, regnum); -} - /* Reset the MIIM registers, and wait for the bus to free */ static int fsl_pq_mdio_reset(struct mii_bus *bus) { - struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus); + struct fsl_pq_mdio_priv *priv = bus->priv; + struct fsl_pq_mii __iomem *regs = priv->regs; u32 status; mutex_lock(&bus->mdio_lock); @@ -170,234 +178,291 @@ static int fsl_pq_mdio_reset(struct mii_bus *bus) mutex_unlock(&bus->mdio_lock); if (!status) { - printk(KERN_ERR "%s: The MII Bus is stuck!\n", - bus->name); + dev_err(&bus->dev, "timeout waiting for MII bus\n"); return -EBUSY; } return 0; } -void fsl_pq_mdio_bus_name(char *name, struct device_node *np) +#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) +/* + * This is mildly evil, but so is our hardware for doing this. + * Also, we have to cast back to struct gfar because of + * definition weirdness done in gianfar.h. + */ +static uint32_t __iomem *get_gfar_tbipa(void __iomem *p) { - const u32 *addr; - u64 taddr = OF_BAD_ADDR; - - addr = of_get_address(np, 0, NULL, NULL); - if (addr) - taddr = of_translate_address(np, addr); + struct gfar __iomem *enet_regs = p; - snprintf(name, MII_BUS_ID_SIZE, "%s@%llx", np->name, - (unsigned long long)taddr); + return &enet_regs->tbipa; } -EXPORT_SYMBOL_GPL(fsl_pq_mdio_bus_name); +/* + * Return the TBIPAR address for an eTSEC2 node + */ +static uint32_t __iomem *get_etsec_tbipa(void __iomem *p) +{ + return p; +} +#endif -static u32 __iomem *get_gfar_tbipa(struct fsl_pq_mdio __iomem *regs, struct device_node *np) +#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) +/* + * Return the TBIPAR address for a QE MDIO node + */ +static uint32_t __iomem *get_ucc_tbipa(void __iomem *p) { -#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) - struct gfar __iomem *enet_regs; + struct fsl_pq_mdio __iomem *mdio = p; - /* - * This is mildly evil, but so is our hardware for doing this. - * Also, we have to cast back to struct gfar because of - * definition weirdness done in gianfar.h. - */ - if(of_device_is_compatible(np, "fsl,gianfar-mdio") || - of_device_is_compatible(np, "fsl,gianfar-tbi") || - of_device_is_compatible(np, "gianfar")) { - enet_regs = (struct gfar __iomem *)regs; - return &enet_regs->tbipa; - } else if (of_device_is_compatible(np, "fsl,etsec2-mdio") || - of_device_is_compatible(np, "fsl,etsec2-tbi")) { - return of_iomap(np, 1); - } -#endif - return NULL; + return &mdio->utbipar; } - -static int get_ucc_id_for_range(u64 start, u64 end, u32 *ucc_id) +/* + * Find the UCC node that controls the given MDIO node + * + * For some reason, the QE MDIO nodes are not children of the UCC devices + * that control them. Therefore, we need to scan all UCC nodes looking for + * the one that encompases the given MDIO node. We do this by comparing + * physical addresses. The 'start' and 'end' addresses of the MDIO node are + * passed, and the correct UCC node will cover the entire address range. + * + * This assumes that there is only one QE MDIO node in the entire device tree. + */ +static void ucc_configure(phys_addr_t start, phys_addr_t end) { -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) + static bool found_mii_master; struct device_node *np = NULL; - int err = 0; - for_each_compatible_node(np, NULL, "ucc_geth") { - struct resource tempres; + if (found_mii_master) + return; - err = of_address_to_resource(np, 0, &tempres); - if (err) + for_each_compatible_node(np, NULL, "ucc_geth") { + struct resource res; + const uint32_t *iprop; + uint32_t id; + int ret; + + ret = of_address_to_resource(np, 0, &res); + if (ret < 0) { + pr_debug("fsl-pq-mdio: no address range in node %s\n", + np->full_name); continue; + } /* if our mdio regs fall within this UCC regs range */ - if ((start >= tempres.start) && (end <= tempres.end)) { - /* Find the id of the UCC */ - const u32 *id; - - id = of_get_property(np, "cell-index", NULL); - if (!id) { - id = of_get_property(np, "device-id", NULL); - if (!id) - continue; + if ((start < res.start) || (end > res.end)) + continue; + + iprop = of_get_property(np, "cell-index", NULL); + if (!iprop) { + iprop = of_get_property(np, "device-id", NULL); + if (!iprop) { + pr_debug("fsl-pq-mdio: no UCC ID in node %s\n", + np->full_name); + continue; } + } - *ucc_id = *id; + id = be32_to_cpup(iprop); - return 0; + /* + * cell-index and device-id for QE nodes are + * numbered from 1, not 0. + */ + if (ucc_set_qe_mux_mii_mng(id - 1) < 0) { + pr_debug("fsl-pq-mdio: invalid UCC ID in node %s\n", + np->full_name); + continue; } + + pr_debug("fsl-pq-mdio: setting node UCC%u to MII master\n", id); + found_mii_master = true; } +} - if (err) - return err; - else - return -EINVAL; -#else - return -ENODEV; #endif -} -static int fsl_pq_mdio_probe(struct platform_device *ofdev) +static struct of_device_id fsl_pq_mdio_match[] = { +#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) + { + .compatible = "fsl,gianfar-tbi", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + .get_tbipa = get_gfar_tbipa, + }, + }, + { + .compatible = "fsl,gianfar-mdio", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + .get_tbipa = get_gfar_tbipa, + }, + }, + { + .type = "mdio", + .compatible = "gianfar", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = offsetof(struct fsl_pq_mdio, mii), + .get_tbipa = get_gfar_tbipa, + }, + }, + { + .compatible = "fsl,etsec2-tbi", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = offsetof(struct fsl_pq_mdio, mii), + .get_tbipa = get_etsec_tbipa, + }, + }, + { + .compatible = "fsl,etsec2-mdio", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = offsetof(struct fsl_pq_mdio, mii), + .get_tbipa = get_etsec_tbipa, + }, + }, +#endif +#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) + { + .compatible = "fsl,ucc-mdio", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + .get_tbipa = get_ucc_tbipa, + .ucc_configure = ucc_configure, + }, + }, + { + /* Legacy UCC MDIO node */ + .type = "mdio", + .compatible = "ucc_geth_phy", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + .get_tbipa = get_ucc_tbipa, + .ucc_configure = ucc_configure, + }, + }, +#endif + /* No Kconfig option for Fman support yet */ + { + .compatible = "fsl,fman-mdio", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + /* Fman TBI operations are handled elsewhere */ + }, + }, + + {}, +}; +MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match); + +static int fsl_pq_mdio_probe(struct platform_device *pdev) { - struct device_node *np = ofdev->dev.of_node; + const struct of_device_id *id = + of_match_device(fsl_pq_mdio_match, &pdev->dev); + const struct fsl_pq_mdio_data *data = id->data; + struct device_node *np = pdev->dev.of_node; + struct resource res; struct device_node *tbi; struct fsl_pq_mdio_priv *priv; - struct fsl_pq_mdio __iomem *regs = NULL; - void __iomem *map; - u32 __iomem *tbipa; struct mii_bus *new_bus; - int tbiaddr = -1; - const u32 *addrp; - u64 addr = 0, size = 0; int err; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + dev_dbg(&pdev->dev, "found %s compatible node\n", id->compatible); - new_bus = mdiobus_alloc(); - if (!new_bus) { - err = -ENOMEM; - goto err_free_priv; - } + new_bus = mdiobus_alloc_size(sizeof(*priv)); + if (!new_bus) + return -ENOMEM; + priv = new_bus->priv; new_bus->name = "Freescale PowerQUICC MII Bus", - new_bus->read = &fsl_pq_mdio_read, - new_bus->write = &fsl_pq_mdio_write, - new_bus->reset = &fsl_pq_mdio_reset, - new_bus->priv = priv; - fsl_pq_mdio_bus_name(new_bus->id, np); - - addrp = of_get_address(np, 0, &size, NULL); - if (!addrp) { - err = -EINVAL; - goto err_free_bus; + new_bus->read = &fsl_pq_mdio_read; + new_bus->write = &fsl_pq_mdio_write; + new_bus->reset = &fsl_pq_mdio_reset; + new_bus->irq = priv->irqs; + + err = of_address_to_resource(np, 0, &res); + if (err < 0) { + dev_err(&pdev->dev, "could not obtain address information\n"); + goto error; } - /* Set the PHY base address */ - addr = of_translate_address(np, addrp); - if (addr == OF_BAD_ADDR) { - err = -EINVAL; - goto err_free_bus; - } + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s@%llx", np->name, + (unsigned long long)res.start); - map = ioremap(addr, size); - if (!map) { + priv->map = of_iomap(np, 0); + if (!priv->map) { err = -ENOMEM; - goto err_free_bus; + goto error; } - priv->map = map; - - if (of_device_is_compatible(np, "fsl,gianfar-mdio") || - of_device_is_compatible(np, "fsl,gianfar-tbi") || - of_device_is_compatible(np, "fsl,ucc-mdio") || - of_device_is_compatible(np, "ucc_geth_phy")) - map -= offsetof(struct fsl_pq_mdio, miimcfg); - regs = map; - priv->regs = regs; - - new_bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); - if (NULL == new_bus->irq) { - err = -ENOMEM; - goto err_unmap_regs; + /* + * Some device tree nodes represent only the MII registers, and + * others represent the MAC and MII registers. The 'mii_offset' field + * contains the offset of the MII registers inside the mapped register + * space. + */ + if (data->mii_offset > resource_size(&res)) { + dev_err(&pdev->dev, "invalid register map\n"); + err = -EINVAL; + goto error; } + priv->regs = priv->map + data->mii_offset; - new_bus->parent = &ofdev->dev; - dev_set_drvdata(&ofdev->dev, new_bus); - - if (of_device_is_compatible(np, "fsl,gianfar-mdio") || - of_device_is_compatible(np, "fsl,gianfar-tbi") || - of_device_is_compatible(np, "fsl,etsec2-mdio") || - of_device_is_compatible(np, "fsl,etsec2-tbi") || - of_device_is_compatible(np, "gianfar")) { - tbipa = get_gfar_tbipa(regs, np); - if (!tbipa) { - err = -EINVAL; - goto err_free_irqs; - } - } else if (of_device_is_compatible(np, "fsl,ucc-mdio") || - of_device_is_compatible(np, "ucc_geth_phy")) { - u32 id; - static u32 mii_mng_master; - - tbipa = ®s->utbipar; - - if ((err = get_ucc_id_for_range(addr, addr + size, &id))) - goto err_free_irqs; + new_bus->parent = &pdev->dev; + dev_set_drvdata(&pdev->dev, new_bus); - if (!mii_mng_master) { - mii_mng_master = id; - ucc_set_qe_mux_mii_mng(id - 1); + if (data->get_tbipa) { + for_each_child_of_node(np, tbi) { + if (strcmp(tbi->type, "tbi-phy") == 0) { + dev_dbg(&pdev->dev, "found TBI PHY node %s\n", + strrchr(tbi->full_name, '/') + 1); + break; + } } - } else { - err = -ENODEV; - goto err_free_irqs; - } - for_each_child_of_node(np, tbi) { - if (!strncmp(tbi->type, "tbi-phy", 8)) - break; - } + if (tbi) { + const u32 *prop = of_get_property(tbi, "reg", NULL); + uint32_t __iomem *tbipa; - if (tbi) { - const u32 *prop = of_get_property(tbi, "reg", NULL); + if (!prop) { + dev_err(&pdev->dev, + "missing 'reg' property in node %s\n", + tbi->full_name); + err = -EBUSY; + goto error; + } - if (prop) - tbiaddr = *prop; + tbipa = data->get_tbipa(priv->map); - if (tbiaddr == -1) { - err = -EBUSY; - goto err_free_irqs; - } else { - out_be32(tbipa, tbiaddr); + out_be32(tbipa, be32_to_cpup(prop)); } } + if (data->ucc_configure) + data->ucc_configure(res.start, res.end); + err = of_mdiobus_register(new_bus, np); if (err) { - printk (KERN_ERR "%s: Cannot register as MDIO bus\n", - new_bus->name); - goto err_free_irqs; + dev_err(&pdev->dev, "cannot register %s as MDIO bus\n", + new_bus->name); + goto error; } return 0; -err_free_irqs: - kfree(new_bus->irq); -err_unmap_regs: - iounmap(priv->map); -err_free_bus: +error: + if (priv->map) + iounmap(priv->map); + kfree(new_bus); -err_free_priv: - kfree(priv); + return err; } -static int fsl_pq_mdio_remove(struct platform_device *ofdev) +static int fsl_pq_mdio_remove(struct platform_device *pdev) { - struct device *device = &ofdev->dev; + struct device *device = &pdev->dev; struct mii_bus *bus = dev_get_drvdata(device); struct fsl_pq_mdio_priv *priv = bus->priv; @@ -406,41 +471,11 @@ static int fsl_pq_mdio_remove(struct platform_device *ofdev) dev_set_drvdata(device, NULL); iounmap(priv->map); - bus->priv = NULL; mdiobus_free(bus); - kfree(priv); return 0; } -static struct of_device_id fsl_pq_mdio_match[] = { - { - .type = "mdio", - .compatible = "ucc_geth_phy", - }, - { - .type = "mdio", - .compatible = "gianfar", - }, - { - .compatible = "fsl,ucc-mdio", - }, - { - .compatible = "fsl,gianfar-tbi", - }, - { - .compatible = "fsl,gianfar-mdio", - }, - { - .compatible = "fsl,etsec2-tbi", - }, - { - .compatible = "fsl,etsec2-mdio", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match); - static struct platform_driver fsl_pq_mdio_driver = { .driver = { .name = "fsl-pq_mdio", diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.h b/drivers/net/ethernet/freescale/fsl_pq_mdio.h deleted file mode 100644 index bd17a2a0139..00000000000 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Freescale PowerQUICC MDIO Driver -- MII Management Bus Implementation - * Driver for the MDIO bus controller on Freescale PowerQUICC processors - * - * Author: Andy Fleming - * Modifier: Sandeep Gopalpet - * - * Copyright 2002-2004, 2008-2009 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ -#ifndef __FSL_PQ_MDIO_H -#define __FSL_PQ_MDIO_H - -#define MIIMIND_BUSY 0x00000001 -#define MIIMIND_NOTVALID 0x00000004 -#define MIIMCFG_INIT_VALUE 0x00000007 -#define MIIMCFG_RESET 0x80000000 - -#define MII_READ_COMMAND 0x00000001 - -struct fsl_pq_mdio { - u8 res1[16]; - u32 ieventm; /* MDIO Interrupt event register (for etsec2)*/ - u32 imaskm; /* MDIO Interrupt mask register (for etsec2)*/ - u8 res2[4]; - u32 emapm; /* MDIO Event mapping register (for etsec2)*/ - u8 res3[1280]; - u32 miimcfg; /* MII management configuration reg */ - u32 miimcom; /* MII management command reg */ - u32 miimadd; /* MII management address reg */ - u32 miimcon; /* MII management control reg */ - u32 miimstat; /* MII management status reg */ - u32 miimind; /* MII management indication reg */ - u8 reserved[28]; /* Space holder */ - u32 utbipar; /* TBI phy address reg (only on UCC) */ - u8 res4[2728]; -} __packed; - -int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum); -int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value); -int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id, - int regnum, u16 value); -int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs, int mii_id, int regnum); -int __init fsl_pq_mdio_init(void); -void fsl_pq_mdio_exit(void); -void fsl_pq_mdio_bus_name(char *name, struct device_node *np); -#endif /* FSL_PQ_MDIO_H */ diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index d3233f59a82..4d5b58ce129 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -100,7 +100,6 @@ #include <linux/of_net.h> #include "gianfar.h" -#include "fsl_pq_mdio.h" #define TX_TIMEOUT (1*HZ) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 21c6574c5f1..16428843922 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -42,7 +42,6 @@ #include <asm/machdep.h> #include "ucc_geth.h" -#include "fsl_pq_mdio.h" #undef DEBUG diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c new file mode 100644 index 00000000000..1afb5ea2a98 --- /dev/null +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -0,0 +1,274 @@ +/* + * QorIQ 10G MDIO Controller + * + * Copyright 2012 Freescale Semiconductor, Inc. + * + * Authors: Andy Fleming <afleming@freescale.com> + * Timur Tabi <timur@freescale.com> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/phy.h> +#include <linux/mdio.h> +#include <linux/of_platform.h> +#include <linux/of_mdio.h> + +/* Number of microseconds to wait for a register to respond */ +#define TIMEOUT 1000 + +struct tgec_mdio_controller { + __be32 reserved[12]; + __be32 mdio_stat; /* MDIO configuration and status */ + __be32 mdio_ctl; /* MDIO control */ + __be32 mdio_data; /* MDIO data */ + __be32 mdio_addr; /* MDIO address */ +} __packed; + +#define MDIO_STAT_CLKDIV(x) (((x>>1) & 0xff) << 8) +#define MDIO_STAT_BSY (1 << 0) +#define MDIO_STAT_RD_ER (1 << 1) +#define MDIO_CTL_DEV_ADDR(x) (x & 0x1f) +#define MDIO_CTL_PORT_ADDR(x) ((x & 0x1f) << 5) +#define MDIO_CTL_PRE_DIS (1 << 10) +#define MDIO_CTL_SCAN_EN (1 << 11) +#define MDIO_CTL_POST_INC (1 << 14) +#define MDIO_CTL_READ (1 << 15) + +#define MDIO_DATA(x) (x & 0xffff) +#define MDIO_DATA_BSY (1 << 31) + +/* + * Wait untill the MDIO bus is free + */ +static int xgmac_wait_until_free(struct device *dev, + struct tgec_mdio_controller __iomem *regs) +{ + uint32_t status; + + /* Wait till the bus is free */ + status = spin_event_timeout( + !((in_be32(®s->mdio_stat)) & MDIO_STAT_BSY), TIMEOUT, 0); + if (!status) { + dev_err(dev, "timeout waiting for bus to be free\n"); + return -ETIMEDOUT; + } + + return 0; +} + +/* + * Wait till the MDIO read or write operation is complete + */ +static int xgmac_wait_until_done(struct device *dev, + struct tgec_mdio_controller __iomem *regs) +{ + uint32_t status; + + /* Wait till the MDIO write is complete */ + status = spin_event_timeout( + !((in_be32(®s->mdio_data)) & MDIO_DATA_BSY), TIMEOUT, 0); + if (!status) { + dev_err(dev, "timeout waiting for operation to complete\n"); + return -ETIMEDOUT; + } + + return 0; +} + +/* + * Write value to the PHY for this device to the register at regnum,waiting + * until the write is done before it returns. All PHY configuration has to be + * done through the TSEC1 MIIM regs. + */ +static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value) +{ + struct tgec_mdio_controller __iomem *regs = bus->priv; + uint16_t dev_addr = regnum >> 16; + int ret; + + /* Setup the MII Mgmt clock speed */ + out_be32(®s->mdio_stat, MDIO_STAT_CLKDIV(100)); + + ret = xgmac_wait_until_free(&bus->dev, regs); + if (ret) + return ret; + + /* Set the port and dev addr */ + out_be32(®s->mdio_ctl, + MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr)); + + /* Set the register address */ + out_be32(®s->mdio_addr, regnum & 0xffff); + + ret = xgmac_wait_until_free(&bus->dev, regs); + if (ret) + return ret; + + /* Write the value to the register */ + out_be32(®s->mdio_data, MDIO_DATA(value)); + + ret = xgmac_wait_until_done(&bus->dev, regs); + if (ret) + return ret; + + return 0; +} + +/* + * Reads from register regnum in the PHY for device dev, returning the value. + * Clears miimcom first. All PHY configuration has to be done through the + * TSEC1 MIIM regs. + */ +static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) +{ + struct tgec_mdio_controller __iomem *regs = bus->priv; + uint16_t dev_addr = regnum >> 16; + uint32_t mdio_ctl; + uint16_t value; + int ret; + + /* Setup the MII Mgmt clock speed */ + out_be32(®s->mdio_stat, MDIO_STAT_CLKDIV(100)); + + ret = xgmac_wait_until_free(&bus->dev, regs); + if (ret) + return ret; + + /* Set the Port and Device Addrs */ + mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr); + out_be32(®s->mdio_ctl, mdio_ctl); + + /* Set the register address */ + out_be32(®s->mdio_addr, regnum & 0xffff); + + ret = xgmac_wait_until_free(&bus->dev, regs); + if (ret) + return ret; + + /* Initiate the read */ + out_be32(®s->mdio_ctl, mdio_ctl | MDIO_CTL_READ); + + ret = xgmac_wait_until_done(&bus->dev, regs); + if (ret) + return ret; + + /* Return all Fs if nothing was there */ + if (in_be32(®s->mdio_stat) & MDIO_STAT_RD_ER) { + dev_err(&bus->dev, "MDIO read error\n"); + return 0xffff; + } + + value = in_be32(®s->mdio_data) & 0xffff; + dev_dbg(&bus->dev, "read %04x\n", value); + + return value; +} + +/* Reset the MIIM registers, and wait for the bus to free */ +static int xgmac_mdio_reset(struct mii_bus *bus) +{ + struct tgec_mdio_controller __iomem *regs = bus->priv; + int ret; + + mutex_lock(&bus->mdio_lock); + + /* Setup the MII Mgmt clock speed */ + out_be32(®s->mdio_stat, MDIO_STAT_CLKDIV(100)); + + ret = xgmac_wait_until_free(&bus->dev, regs); + + mutex_unlock(&bus->mdio_lock); + + return ret; +} + +static int __devinit xgmac_mdio_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct mii_bus *bus; + struct resource res; + int ret; + + ret = of_address_to_resource(np, 0, &res); + if (ret) { + dev_err(&pdev->dev, "could not obtain address\n"); + return ret; + } + + bus = mdiobus_alloc_size(PHY_MAX_ADDR * sizeof(int)); + if (!bus) + return -ENOMEM; + + bus->name = "Freescale XGMAC MDIO Bus"; + bus->read = xgmac_mdio_read; + bus->write = xgmac_mdio_write; + bus->reset = xgmac_mdio_reset; + bus->irq = bus->priv; + bus->parent = &pdev->dev; + snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res.start); + + /* Set the PHY base address */ + bus->priv = of_iomap(np, 0); + if (!bus->priv) { + ret = -ENOMEM; + goto err_ioremap; + } + + ret = of_mdiobus_register(bus, np); + if (ret) { + dev_err(&pdev->dev, "cannot register MDIO bus\n"); + goto err_registration; + } + + dev_set_drvdata(&pdev->dev, bus); + + return 0; + +err_registration: + iounmap(bus->priv); + +err_ioremap: + mdiobus_free(bus); + + return ret; +} + +static int __devexit xgmac_mdio_remove(struct platform_device *pdev) +{ + struct mii_bus *bus = dev_get_drvdata(&pdev->dev); + + mdiobus_unregister(bus); + iounmap(bus->priv); + mdiobus_free(bus); + + return 0; +} + +static struct of_device_id xgmac_mdio_match[] = { + { + .compatible = "fsl,fman-xmdio", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, xgmac_mdio_match); + +static struct platform_driver xgmac_mdio_driver = { + .driver = { + .name = "fsl-fman_xmdio", + .of_match_table = xgmac_mdio_match, + }, + .probe = xgmac_mdio_probe, + .remove = xgmac_mdio_remove, +}; + +module_platform_driver(xgmac_mdio_driver); + +MODULE_DESCRIPTION("Freescale QorIQ 10G MDIO Controller"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 736a7d987db..9089d00f142 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -174,6 +174,20 @@ static int e1000_get_settings(struct net_device *netdev, ecmd->autoneg = ((hw->media_type == e1000_media_type_fiber) || hw->autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 1; MDI => 0 */ + if ((hw->media_type == e1000_media_type_copper) && + netif_carrier_ok(netdev)) + ecmd->eth_tp_mdix = (!!adapter->phy_info.mdix_mode ? + ETH_TP_MDI_X : + ETH_TP_MDI); + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->mdix; return 0; } @@ -183,6 +197,22 @@ static int e1000_set_settings(struct net_device *netdev, struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + e_err(drv, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) msleep(1); @@ -199,12 +229,21 @@ static int e1000_set_settings(struct net_device *netdev, ecmd->advertising = hw->autoneg_advertised; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__E1000_RESETTING, &adapter->flags); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->mdix = AUTO_ALL_MODES; + else + hw->mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3bfbb8df898..0ae2fcfa512 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -4939,6 +4939,10 @@ int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + hw->mdix = AUTO_ALL_MODES; + return 0; err_inval: diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 0349e2478df..2e76f06720f 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -199,6 +199,11 @@ static int e1000_get_settings(struct net_device *netdev, else ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + return 0; } @@ -241,6 +246,10 @@ static int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: @@ -264,6 +273,22 @@ static int e1000_set_settings(struct net_device *netdev, return -EINVAL; } + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) usleep_range(1000, 2000); @@ -282,20 +307,32 @@ static int e1000_set_settings(struct net_device *netdev, hw->fc.requested_mode = e1000_fc_default; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__E1000_RESETTING, &adapter->state); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* + * fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { e1000e_down(adapter); e1000e_up(adapter); - } else { + } else e1000e_reset(adapter); - } clear_bit(__E1000_RESETTING, &adapter->state); return 0; diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index b860d4f7ea2..fc62a3f3a5b 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -84,8 +84,9 @@ static const u16 e1000_igp_2_cable_length_table[] = { #define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200 /* I82577 PHY Control 2 */ -#define I82577_PHY_CTRL2_AUTO_MDIX 0x0400 -#define I82577_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 +#define I82577_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82577_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82577_PHY_CTRL2_MDIX_CFG_MASK 0x0600 /* I82577 PHY Diagnostics Status */ #define I82577_DSTATUS_CABLE_LENGTH 0x03FC @@ -702,6 +703,32 @@ s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) if (ret_val) return ret_val; + /* Set MDI/MDIX mode */ + ret_val = e1e_rphy(hw, I82577_PHY_CTRL_2, &phy_data); + if (ret_val) + return ret_val; + phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK; + /* + * Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = e1e_wphy(hw, I82577_PHY_CTRL_2, phy_data); + if (ret_val) + return ret_val; + return e1000_set_master_slave_mode(hw); } diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 7be98b6f105..3404bc79f4c 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -464,6 +464,32 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw) phy_data |= I82580_CFG_ENABLE_DOWNSHIFT; ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data); + if (ret_val) + goto out; + + /* Set MDI/MDIX mode */ + ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data); + if (ret_val) + goto out; + phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; + /* + * Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82580_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82580_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = hw->phy.ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); out: return ret_val; @@ -2246,8 +2272,7 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) if (ret_val) goto out; - phy_data &= ~I82580_PHY_CTRL2_AUTO_MDIX; - phy_data &= ~I82580_PHY_CTRL2_FORCE_MDI_MDIX; + phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; ret_val = phy->ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); if (ret_val) diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index 34e40619f16..6ac3299bfcb 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -111,8 +111,9 @@ s32 igb_check_polarity_m88(struct e1000_hw *hw); #define I82580_PHY_STATUS2_SPEED_100MBPS 0x0100 /* I82580 PHY Control 2 */ -#define I82580_PHY_CTRL2_AUTO_MDIX 0x0400 -#define I82580_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 +#define I82580_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82580_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82580_PHY_CTRL2_MDIX_CFG_MASK 0x0600 /* I82580 PHY Diagnostics Status */ #define I82580_DSTATUS_CABLE_LENGTH 0x03FC diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 70591117051..be02168f130 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -198,6 +198,19 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) } ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 2; MDI =>1; Invalid =>0 */ + if (hw->phy.media_type == e1000_media_type_copper) + ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : + ETH_TP_MDI; + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + return 0; } @@ -214,6 +227,22 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) return -EINVAL; } + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) msleep(1); @@ -227,12 +256,25 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) hw->fc.requested_mode = e1000_fc_default; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (igb_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__IGB_RESETTING, &adapter->state); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* + * fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { igb_down(adapter); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 48cc4fb1a30..73cc273ef98 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6675,6 +6675,10 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index b9623e9ea89..bffcf1f2357 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -78,6 +78,9 @@ /* Supported Rx Buffer Sizes */ #define IXGBE_RXBUFFER_256 256 /* Used for skb receive header */ +#define IXGBE_RXBUFFER_2K 2048 +#define IXGBE_RXBUFFER_3K 3072 +#define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ /* @@ -104,6 +107,7 @@ #define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) #define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) #define IXGBE_TX_FLAGS_TSTAMP (u32)(1 << 8) +#define IXGBE_TX_FLAGS_NO_IFCS (u32)(1 << 9) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 @@ -293,16 +297,25 @@ struct ixgbe_ring_feature { * this is twice the size of a half page we need to double the page order * for FCoE enabled Rx queues. */ -#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) -static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) { - return test_bit(__IXGBE_RX_FCOE, &ring->state) ? 1 : 0; +#ifdef IXGBE_FCOE + if (test_bit(__IXGBE_RX_FCOE, &ring->state)) + return (PAGE_SIZE < 8192) ? IXGBE_RXBUFFER_4K : + IXGBE_RXBUFFER_3K; +#endif + return IXGBE_RXBUFFER_2K; } -#else -#define ixgbe_rx_pg_order(_ring) 0 + +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ +#ifdef IXGBE_FCOE + if (test_bit(__IXGBE_RX_FCOE, &ring->state)) + return (PAGE_SIZE < 8192) ? 1 : 0; #endif + return 0; +} #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) -#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4326f74f713..fa0d6e1561c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1167,7 +1167,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, } bi->dma = dma; - bi->page_offset ^= ixgbe_rx_bufsz(rx_ring); + bi->page_offset = 0; return true; } @@ -1320,29 +1320,6 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, return max_len; } -static void ixgbe_get_rsc_cnt(struct ixgbe_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - __le32 rsc_enabled; - u32 rsc_cnt; - - if (!ring_is_rsc_enabled(rx_ring)) - return; - - rsc_enabled = rx_desc->wb.lower.lo_dword.data & - cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); - - /* If this is an RSC frame rsc_cnt should be non-zero */ - if (!rsc_enabled) - return; - - rsc_cnt = le32_to_cpu(rsc_enabled); - rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; - - IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; -} - static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, struct sk_buff *skb) { @@ -1440,16 +1417,28 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, prefetch(IXGBE_RX_DESC(rx_ring, ntc)); - if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - return false; + /* update RSC append count if present */ + if (ring_is_rsc_enabled(rx_ring)) { + __le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data & + cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); + + if (unlikely(rsc_enabled)) { + u32 rsc_cnt = le32_to_cpu(rsc_enabled); - /* append_cnt indicates packet is RSC, if so fetch nextp */ - if (IXGBE_CB(skb)->append_cnt) { - ntc = le32_to_cpu(rx_desc->wb.upper.status_error); - ntc &= IXGBE_RXDADV_NEXTP_MASK; - ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; + IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; + + /* update ntc based on RSC value */ + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= IXGBE_RXDADV_NEXTP_MASK; + ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + } } + /* if we are the last buffer then there is nothing else to do */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + /* place skb in next buffer to be received */ rx_ring->rx_buffer_info[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; @@ -1458,6 +1447,78 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, } /** + * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being adjusted + * + * This function is an ixgbe specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being updated + * + * This function provides a basic DMA sync up for the first fragment of an + * skb. The reason for doing this is that the first fragment cannot be + * unmapped until we have reached the end of packet descriptor for a buffer + * chain. + */ +static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + /* if the page was released unmap it, else just sync our portion */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } else { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + frag->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + IXGBE_CB(skb)->dma = 0; +} + +/** * ixgbe_cleanup_headers - Correct corrupted or empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor @@ -1479,24 +1540,7 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; struct net_device *netdev = rx_ring->netdev; - unsigned char *va; - unsigned int pull_len; - - /* if the page was released unmap it, else just sync our portion */ - if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - IXGBE_CB(skb)->page_released = false; - } else { - dma_sync_single_range_for_cpu(rx_ring->dev, - IXGBE_CB(skb)->dma, - frag->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - } - IXGBE_CB(skb)->dma = 0; /* verify that the packet does not have any known errors */ if (unlikely(ixgbe_test_staterr(rx_desc, @@ -1506,40 +1550,9 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, return true; } - /* - * it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* - * we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = skb_frag_size(frag); - if (pull_len > IXGBE_RX_HDR_SIZE) - pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; - - /* - * if we sucked the frag empty then we should free it, - * if there are other frags here something is screwed up in hardware - */ - if (skb_frag_size(frag) == 0) { - BUG_ON(skb_shinfo(skb)->nr_frags != 1); - skb_shinfo(skb)->nr_frags = 0; - __skb_frag_unref(frag); - skb->truesize -= ixgbe_rx_bufsz(rx_ring); - } + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + ixgbe_pull_tail(rx_ring, skb); #ifdef IXGBE_FCOE /* do not attempt to pad FCoE Frames as this will disrupt DDP */ @@ -1560,33 +1573,17 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, } /** - * ixgbe_can_reuse_page - determine if we can reuse a page - * @rx_buffer: pointer to rx_buffer containing the page we want to reuse - * - * Returns true if page can be reused in another Rx buffer - **/ -static inline bool ixgbe_can_reuse_page(struct ixgbe_rx_buffer *rx_buffer) -{ - struct page *page = rx_buffer->page; - - /* if we are only owner of page and it is local we can reuse it */ - return likely(page_count(page) == 1) && - likely(page_to_nid(page) == numa_node_id()); -} - -/** * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring * @rx_ring: rx descriptor ring to store buffers on * @old_buff: donor buffer to have page reused * - * Syncronizes page for reuse by the adapter + * Synchronizes page for reuse by the adapter **/ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *old_buff) { struct ixgbe_rx_buffer *new_buff; u16 nta = rx_ring->next_to_alloc; - u16 bufsz = ixgbe_rx_bufsz(rx_ring); new_buff = &rx_ring->rx_buffer_info[nta]; @@ -1597,17 +1594,13 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, /* transfer page from old buffer to new buffer */ new_buff->page = old_buff->page; new_buff->dma = old_buff->dma; - - /* flip page offset to other buffer and store to new_buff */ - new_buff->page_offset = old_buff->page_offset ^ bufsz; + new_buff->page_offset = old_buff->page_offset; /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, - new_buff->page_offset, bufsz, + new_buff->page_offset, + ixgbe_rx_bufsz(rx_ring), DMA_FROM_DEVICE); - - /* bump ref count on page before it is given to the stack */ - get_page(new_buff->page); } /** @@ -1617,20 +1610,159 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buff to place the data into * - * This function is based on skb_add_rx_frag. I would have used that - * function however it doesn't handle the truesize case correctly since we - * are allocating more memory than might be used for a single receive. + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. **/ -static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, +static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - struct sk_buff *skb, int size) + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buffer->page, rx_buffer->page_offset, - size); - skb->len += size; - skb->data_len += size; - skb->truesize += ixgbe_rx_bufsz(rx_ring); + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_bufsz(rx_ring); +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int last_offset = ixgbe_rx_pg_size(rx_ring) - + ixgbe_rx_bufsz(rx_ring); +#endif + + if ((size <= IXGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, truesize); + + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; + + /* + * since we are the only owner of the page and we need to + * increment it, just set the value to 2 in order to avoid + * an unecessary locked operation + */ + atomic_set(&page->_count, 2); +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; + + /* bump ref count on page before it is given to the stack */ + get_page(page); +#endif + + return true; +} + +static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + struct ixgbe_rx_buffer *rx_buffer; + struct sk_buff *skb; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + page = rx_buffer->page; + prefetchw(page); + + skb = rx_buffer->skb; + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + return NULL; + } + + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + + /* + * Delay unmapping of the first packet. It carries the + * header information, HW may still access the header + * after the writeback. Only unmap it when EOP is + * reached + */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + goto dma_sync; + + IXGBE_CB(skb)->dma = rx_buffer->dma; + } else { + if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + ixgbe_dma_sync_frag(rx_ring, skb); + +dma_sync: + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + + /* pull page into skb */ + if (ixgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + } + + /* clear contents of buffer_info */ + rx_buffer->skb = NULL; + rx_buffer->dma = 0; + rx_buffer->page = NULL; + + return skb; } /** @@ -1658,11 +1790,8 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, u16 cleaned_count = ixgbe_desc_unused(rx_ring); do { - struct ixgbe_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; struct sk_buff *skb; - struct page *page; - u16 ntc; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { @@ -1670,9 +1799,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, cleaned_count = 0; } - ntc = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC(rx_ring, ntc); - rx_buffer = &rx_ring->rx_buffer_info[ntc]; + rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) break; @@ -1684,75 +1811,12 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ rmb(); - page = rx_buffer->page; - prefetchw(page); - - skb = rx_buffer->skb; + /* retrieve a buffer from the ring */ + skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; - - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif - - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IXGBE_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - break; - } - - /* - * we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - - /* - * Delay unmapping of the first packet. It carries the - * header information, HW may still access the header - * after the writeback. Only unmap it when EOP is - * reached - */ - IXGBE_CB(skb)->dma = rx_buffer->dma; - } else { - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - } - - /* pull page into skb */ - ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, - le16_to_cpu(rx_desc->wb.upper.length)); - - if (ixgbe_can_reuse_page(rx_buffer)) { - /* hand second half of page back to the ring */ - ixgbe_reuse_rx_page(rx_ring, rx_buffer); - } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { - /* the page has been released from the ring */ - IXGBE_CB(skb)->page_released = true; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE); - } - - /* clear contents of buffer_info */ - rx_buffer->skb = NULL; - rx_buffer->dma = 0; - rx_buffer->page = NULL; - - ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; cleaned_count++; @@ -2868,11 +2932,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; /* configure the packet buffer length */ -#if PAGE_SIZE > IXGBE_MAX_RXBUFFER - srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#else srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#endif /* configure descriptor type */ srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; @@ -2980,13 +3040,7 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, * total size of max desc * buf_len is not greater * than 65536 */ -#if (PAGE_SIZE <= 8192) rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (PAGE_SIZE <= 16384) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; -#endif IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl); } @@ -4130,27 +4184,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } /** - * ixgbe_init_rx_page_offset - initialize page offset values for Rx buffers - * @rx_ring: ring to setup - * - * On many IA platforms the L1 cache has a critical stride of 4K, this - * results in each receive buffer starting in the same cache set. To help - * reduce the pressure on this cache set we can interleave the offsets so - * that only every other buffer will be in the same cache set. - **/ -static void ixgbe_init_rx_page_offset(struct ixgbe_ring *rx_ring) -{ - struct ixgbe_rx_buffer *rx_buffer = rx_ring->rx_buffer_info; - u16 i; - - for (i = 0; i < rx_ring->count; i += 2) { - rx_buffer[0].page_offset = 0; - rx_buffer[1].page_offset = ixgbe_rx_bufsz(rx_ring); - rx_buffer = &rx_buffer[2]; - } -} - -/** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ @@ -4195,8 +4228,6 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; memset(rx_ring->rx_buffer_info, 0, size); - ixgbe_init_rx_page_offset(rx_ring); - /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); @@ -4646,8 +4677,6 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - ixgbe_init_rx_page_offset(rx_ring); - return 0; err: vfree(rx_ring->rx_buffer_info); @@ -5874,9 +5903,12 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN) && - !(first->tx_flags & IXGBE_TX_FLAGS_TXSW)) - return; + if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN)) { + if (unlikely(skb->no_fcs)) + first->tx_flags |= IXGBE_TX_FLAGS_NO_IFCS; + if (!(first->tx_flags & IXGBE_TX_FLAGS_TXSW)) + return; + } } else { u8 l4_hdr = 0; switch (first->protocol) { @@ -5938,7 +5970,6 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ __le32 cmd_type = cpu_to_le32(IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); /* set HW vlan bit if vlan is present */ @@ -5958,6 +5989,10 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) #endif cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_TSE); + /* insert frame checksum */ + if (!(tx_flags & IXGBE_TX_FLAGS_NO_IFCS)) + cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS); + return cmd_type; } @@ -6063,8 +6098,6 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, if (likely(!data_len)) break; - if (unlikely(skb->no_fcs)) - cmd_type &= ~(cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); i++; diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index f45def01a98..876beceaf2d 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -3409,7 +3409,7 @@ set_speed: pause_flags = 0; /* setup pause frame */ - if (np->duplex != 0) { + if (netif_running(dev) && (np->duplex != 0)) { if (np->autoneg && np->pause_flags & NV_PAUSEFRAME_AUTONEG) { adv_pause = adv & (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); lpa_pause = lpa & (LPA_PAUSE_CAP | LPA_PAUSE_ASYM); @@ -4435,7 +4435,7 @@ static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void regs->version = FORCEDETH_REGS_VER; spin_lock_irq(&np->lock); - for (i = 0; i <= np->register_size/sizeof(u32); i++) + for (i = 0; i < np->register_size/sizeof(u32); i++) rbuf[i] = readl(base + i*sizeof(u32)); spin_unlock_irq(&np->lock); } @@ -5455,6 +5455,7 @@ static int nv_close(struct net_device *dev) netif_stop_queue(dev); spin_lock_irq(&np->lock); + nv_update_pause(dev, 0); /* otherwise stop_tx bricks NIC */ nv_stop_rxtx(dev); nv_txrx_reset(dev); @@ -5904,11 +5905,19 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_error; } + netif_carrier_off(dev); + + /* Some NICs freeze when TX pause is enabled while NIC is + * down, and this stays across warm reboots. The sequence + * below should be enough to recover from that state. + */ + nv_update_pause(dev, 0); + nv_start_tx(dev); + nv_stop_tx(dev); + if (id->driver_data & DEV_HAS_VLAN) nv_vlan_mode(dev, dev->features); - netif_carrier_off(dev); - dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n", dev->name, np->phy_oui, np->phyaddr, dev->dev_addr); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 65a8d49106a..a606db43c5b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -202,11 +202,21 @@ static void efx_stop_all(struct efx_nic *efx); #define EFX_ASSERT_RESET_SERIALISED(efx) \ do { \ - if ((efx->state == STATE_RUNNING) || \ + if ((efx->state == STATE_READY) || \ (efx->state == STATE_DISABLED)) \ ASSERT_RTNL(); \ } while (0) +static int efx_check_disabled(struct efx_nic *efx) +{ + if (efx->state == STATE_DISABLED) { + netif_err(efx, drv, efx->net_dev, + "device is disabled due to earlier errors\n"); + return -EIO; + } + return 0; +} + /************************************************************************** * * Event queue processing @@ -630,6 +640,16 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_buffer_order = get_order(efx->rx_buffer_len + sizeof(struct efx_rx_page_state)); + /* We must keep at least one descriptor in a TX ring empty. + * We could avoid this when the queue size does not exactly + * match the hardware ring size, but it's not that important. + * Therefore we stop the queue when one more skb might fill + * the ring completely. We wake it when half way back to + * empty. + */ + efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); + efx->txq_wake_thresh = efx->txq_stop_thresh / 2; + /* Initialise the channels */ efx_for_each_channel(channel, efx) { efx_for_each_channel_tx_queue(tx_queue, channel) @@ -730,7 +750,11 @@ efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; u32 old_rxq_entries, old_txq_entries; unsigned i, next_buffer_table = 0; - int rc = 0; + int rc; + + rc = efx_check_disabled(efx); + if (rc) + return rc; /* Not all channels should be reallocated. We must avoid * reallocating their buffer table entries. @@ -1365,6 +1389,8 @@ static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq) { struct efx_channel *channel; + BUG_ON(efx->state == STATE_DISABLED); + if (efx->legacy_irq) efx->legacy_irq_enabled = true; efx_nic_enable_interrupts(efx); @@ -1382,6 +1408,9 @@ static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq) { struct efx_channel *channel; + if (efx->state == STATE_DISABLED) + return; + efx_mcdi_mode_poll(efx); efx_nic_disable_interrupts(efx); @@ -1533,22 +1562,21 @@ static int efx_probe_all(struct efx_nic *efx) return rc; } -/* Called after previous invocation(s) of efx_stop_all, restarts the port, - * kernel transmit queues and NAPI processing, and ensures that the port is - * scheduled to be reconfigured. This function is safe to call multiple - * times when the NIC is in any state. +/* If the interface is supposed to be running but is not, start + * the hardware and software data path, regular activity for the port + * (MAC statistics, link polling, etc.) and schedule the port to be + * reconfigured. Interrupts must already be enabled. This function + * is safe to call multiple times, so long as the NIC is not disabled. + * Requires the RTNL lock. */ static void efx_start_all(struct efx_nic *efx) { EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->state == STATE_DISABLED); /* Check that it is appropriate to restart the interface. All * of these flags are safe to read under just the rtnl lock */ - if (efx->port_enabled) - return; - if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) - return; - if (!netif_running(efx->net_dev)) + if (efx->port_enabled || !netif_running(efx->net_dev)) return; efx_start_port(efx); @@ -1582,11 +1610,11 @@ static void efx_flush_all(struct efx_nic *efx) cancel_work_sync(&efx->mac_work); } -/* Quiesce hardware and software without bringing the link down. - * Safe to call multiple times, when the nic and interface is in any - * state. The caller is guaranteed to subsequently be in a position - * to modify any hardware and software state they see fit without - * taking locks. */ +/* Quiesce the hardware and software data path, and regular activity + * for the port without bringing the link down. Safe to call multiple + * times with the NIC in almost any state, but interrupts should be + * enabled. Requires the RTNL lock. + */ static void efx_stop_all(struct efx_nic *efx) { EFX_ASSERT_RESET_SERIALISED(efx); @@ -1739,8 +1767,6 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) struct efx_nic *efx = netdev_priv(net_dev); struct mii_ioctl_data *data = if_mii(ifr); - EFX_ASSERT_RESET_SERIALISED(efx); - /* Convert phy_id from older PRTAD/DEVAD format */ if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && (data->phy_id & 0xfc00) == 0x0400) @@ -1820,13 +1846,14 @@ static void efx_netpoll(struct net_device *net_dev) static int efx_net_open(struct net_device *net_dev) { struct efx_nic *efx = netdev_priv(net_dev); - EFX_ASSERT_RESET_SERIALISED(efx); + int rc; netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", raw_smp_processor_id()); - if (efx->state == STATE_DISABLED) - return -EIO; + rc = efx_check_disabled(efx); + if (rc) + return rc; if (efx->phy_mode & PHY_MODE_SPECIAL) return -EBUSY; if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) @@ -1852,10 +1879,8 @@ static int efx_net_stop(struct net_device *net_dev) netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", raw_smp_processor_id()); - if (efx->state != STATE_DISABLED) { - /* Stop the device and flush all the channels */ - efx_stop_all(efx); - } + /* Stop the device and flush all the channels */ + efx_stop_all(efx); return 0; } @@ -1915,9 +1940,11 @@ static void efx_watchdog(struct net_device *net_dev) static int efx_change_mtu(struct net_device *net_dev, int new_mtu) { struct efx_nic *efx = netdev_priv(net_dev); + int rc; - EFX_ASSERT_RESET_SERIALISED(efx); - + rc = efx_check_disabled(efx); + if (rc) + return rc; if (new_mtu > EFX_MAX_MTU) return -EINVAL; @@ -1926,8 +1953,6 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); mutex_lock(&efx->mac_lock); - /* Reconfigure the MAC before enabling the dma queues so that - * the RX buffers don't overflow */ net_dev->mtu = new_mtu; efx->type->reconfigure_mac(efx); mutex_unlock(&efx->mac_lock); @@ -1942,8 +1967,6 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data) struct sockaddr *addr = data; char *new_addr = addr->sa_data; - EFX_ASSERT_RESET_SERIALISED(efx); - if (!is_valid_ether_addr(new_addr)) { netif_err(efx, drv, efx->net_dev, "invalid ethernet MAC address requested: %pM\n", @@ -2079,11 +2102,27 @@ static int efx_register_netdev(struct efx_nic *efx) rtnl_lock(); + /* Enable resets to be scheduled and check whether any were + * already requested. If so, the NIC is probably hosed so we + * abort. + */ + efx->state = STATE_READY; + smp_mb(); /* ensure we change state before checking reset_pending */ + if (efx->reset_pending) { + netif_err(efx, probe, efx->net_dev, + "aborting probe due to scheduled reset\n"); + rc = -EIO; + goto fail_locked; + } + rc = dev_alloc_name(net_dev, net_dev->name); if (rc < 0) goto fail_locked; efx_update_name(efx); + /* Always start with carrier off; PHY events will detect the link */ + netif_carrier_off(net_dev); + rc = register_netdevice(net_dev); if (rc) goto fail_locked; @@ -2094,9 +2133,6 @@ static int efx_register_netdev(struct efx_nic *efx) efx_init_tx_queue_core_txq(tx_queue); } - /* Always start with carrier off; PHY events will detect the link */ - netif_carrier_off(net_dev); - rtnl_unlock(); rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); @@ -2108,14 +2144,14 @@ static int efx_register_netdev(struct efx_nic *efx) return 0; +fail_registered: + rtnl_lock(); + unregister_netdevice(net_dev); fail_locked: + efx->state = STATE_UNINIT; rtnl_unlock(); netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); return rc; - -fail_registered: - unregister_netdev(net_dev); - return rc; } static void efx_unregister_netdev(struct efx_nic *efx) @@ -2138,7 +2174,11 @@ static void efx_unregister_netdev(struct efx_nic *efx) strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); - unregister_netdev(efx->net_dev); + + rtnl_lock(); + unregister_netdevice(efx->net_dev); + efx->state = STATE_UNINIT; + rtnl_unlock(); } /************************************************************************** @@ -2154,9 +2194,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) EFX_ASSERT_RESET_SERIALISED(efx); efx_stop_all(efx); - mutex_lock(&efx->mac_lock); - efx_stop_interrupts(efx, false); + + mutex_lock(&efx->mac_lock); if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) efx->phy_op->fini(efx); efx->type->fini(efx); @@ -2276,16 +2316,15 @@ static void efx_reset_work(struct work_struct *data) if (!pending) return; - /* If we're not RUNNING then don't reset. Leave the reset_pending - * flags set so that efx_pci_probe_main will be retried */ - if (efx->state != STATE_RUNNING) { - netif_info(efx, drv, efx->net_dev, - "scheduled reset quenched. NIC not RUNNING\n"); - return; - } - rtnl_lock(); - (void)efx_reset(efx, fls(pending) - 1); + + /* We checked the state in efx_schedule_reset() but it may + * have changed by now. Now that we have the RTNL lock, + * it cannot change again. + */ + if (efx->state == STATE_READY) + (void)efx_reset(efx, fls(pending) - 1); + rtnl_unlock(); } @@ -2311,6 +2350,13 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) } set_bit(method, &efx->reset_pending); + smp_mb(); /* ensure we change reset_pending before checking state */ + + /* If we're not READY then just leave the flags set as the cue + * to abort probing or reschedule the reset later. + */ + if (ACCESS_ONCE(efx->state) != STATE_READY) + return; /* efx_process_channel() will no longer read events once a * reset is scheduled. So switch back to poll'd MCDI completions. */ @@ -2376,13 +2422,12 @@ static const struct efx_phy_operations efx_dummy_phy_operations = { /* This zeroes out and then fills in the invariants in a struct * efx_nic (including all sub-structures). */ -static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, +static int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, struct net_device *net_dev) { int i; /* Initialise common structures */ - memset(efx, 0, sizeof(*efx)); spin_lock_init(&efx->biu_lock); #ifdef CONFIG_SFC_MTD INIT_LIST_HEAD(&efx->mtd_list); @@ -2392,7 +2437,7 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); efx->pci_dev = pci_dev; efx->msg_enable = debug; - efx->state = STATE_INIT; + efx->state = STATE_UNINIT; strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); efx->net_dev = net_dev; @@ -2409,8 +2454,6 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, goto fail; } - efx->type = type; - EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS); /* Higher numbered interrupt modes are less capable! */ @@ -2455,6 +2498,12 @@ static void efx_fini_struct(struct efx_nic *efx) */ static void efx_pci_remove_main(struct efx_nic *efx) { + /* Flush reset_work. It can no longer be scheduled since we + * are not READY. + */ + BUG_ON(efx->state == STATE_READY); + cancel_work_sync(&efx->reset_work); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); efx->net_dev->rx_cpu_rmap = NULL; @@ -2480,24 +2529,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev) /* Mark the NIC as fini, then stop the interface */ rtnl_lock(); - efx->state = STATE_FINI; dev_close(efx->net_dev); - - /* Allow any queued efx_resets() to complete */ + efx_stop_interrupts(efx, false); rtnl_unlock(); - efx_stop_interrupts(efx, false); efx_sriov_fini(efx); efx_unregister_netdev(efx); efx_mtd_remove(efx); - /* Wait for any scheduled resets to complete. No more will be - * scheduled from this point because efx_stop_all() has been - * called, we are no longer registered with driverlink, and - * the net_device's have been removed. */ - cancel_work_sync(&efx->reset_work); - efx_pci_remove_main(efx); efx_fini_io(efx); @@ -2617,7 +2657,6 @@ static int efx_pci_probe_main(struct efx_nic *efx) static int __devinit efx_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *entry) { - const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data; struct net_device *net_dev; struct efx_nic *efx; int rc; @@ -2627,10 +2666,12 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, EFX_MAX_RX_QUEUES); if (!net_dev) return -ENOMEM; - net_dev->features |= (type->offload_features | NETIF_F_SG | + efx = netdev_priv(net_dev); + efx->type = (const struct efx_nic_type *) entry->driver_data; + net_dev->features |= (efx->type->offload_features | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_RXCSUM); - if (type->offload_features & NETIF_F_V6_CSUM) + if (efx->type->offload_features & NETIF_F_V6_CSUM) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | @@ -2638,10 +2679,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, NETIF_F_RXCSUM); /* All offloads can be toggled */ net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA; - efx = netdev_priv(net_dev); pci_set_drvdata(pci_dev, efx); SET_NETDEV_DEV(net_dev, &pci_dev->dev); - rc = efx_init_struct(efx, type, pci_dev, net_dev); + rc = efx_init_struct(efx, pci_dev, net_dev); if (rc) goto fail1; @@ -2656,28 +2696,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, goto fail2; rc = efx_pci_probe_main(efx); - - /* Serialise against efx_reset(). No more resets will be - * scheduled since efx_stop_all() has been called, and we have - * not and never have been registered. - */ - cancel_work_sync(&efx->reset_work); - if (rc) goto fail3; - /* If there was a scheduled reset during probe, the NIC is - * probably hosed anyway. - */ - if (efx->reset_pending) { - rc = -EIO; - goto fail4; - } - - /* Switch to the running state before we expose the device to the OS, - * so that dev_open()|efx_start_all() will actually start the device */ - efx->state = STATE_RUNNING; - rc = efx_register_netdev(efx); if (rc) goto fail4; @@ -2717,12 +2738,18 @@ static int efx_pm_freeze(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); - efx->state = STATE_FINI; + rtnl_lock(); - netif_device_detach(efx->net_dev); + if (efx->state != STATE_DISABLED) { + efx->state = STATE_UNINIT; - efx_stop_all(efx); - efx_stop_interrupts(efx, false); + netif_device_detach(efx->net_dev); + + efx_stop_all(efx); + efx_stop_interrupts(efx, false); + } + + rtnl_unlock(); return 0; } @@ -2731,21 +2758,25 @@ static int efx_pm_thaw(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); - efx->state = STATE_INIT; + rtnl_lock(); - efx_start_interrupts(efx, false); + if (efx->state != STATE_DISABLED) { + efx_start_interrupts(efx, false); - mutex_lock(&efx->mac_lock); - efx->phy_op->reconfigure(efx); - mutex_unlock(&efx->mac_lock); + mutex_lock(&efx->mac_lock); + efx->phy_op->reconfigure(efx); + mutex_unlock(&efx->mac_lock); - efx_start_all(efx); + efx_start_all(efx); - netif_device_attach(efx->net_dev); + netif_device_attach(efx->net_dev); - efx->state = STATE_RUNNING; + efx->state = STATE_READY; - efx->type->resume_wol(efx); + efx->type->resume_wol(efx); + } + + rtnl_unlock(); /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ queue_work(reset_workqueue, &efx->reset_work); diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 5faedd855b7..f8e7e204981 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -529,9 +529,7 @@ static void efx_ethtool_self_test(struct net_device *net_dev, if (!efx_tests) goto fail; - - ASSERT_RTNL(); - if (efx->state != STATE_RUNNING) { + if (efx->state != STATE_READY) { rc = -EIO; goto fail1; } diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c index 8687a6c3db0..ec1e99d0dca 100644 --- a/drivers/net/ethernet/sfc/falcon_boards.c +++ b/drivers/net/ethernet/sfc/falcon_boards.c @@ -380,7 +380,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev, new_mode = PHY_MODE_SPECIAL; if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) { err = 0; - } else if (efx->state != STATE_RUNNING || netif_running(efx->net_dev)) { + } else if (efx->state != STATE_READY || netif_running(efx->net_dev)) { err = -EBUSY; } else { /* Reset the PHY, reconfigure the MAC and enable/disable diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index cd9c0a98969..7ab1232494e 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -91,29 +91,31 @@ struct efx_special_buffer { }; /** - * struct efx_tx_buffer - An Efx TX buffer - * @skb: The associated socket buffer. - * Set only on the final fragment of a packet; %NULL for all other - * fragments. When this fragment completes, then we can free this - * skb. - * @tsoh: The associated TSO header structure, or %NULL if this - * buffer is not a TSO header. + * struct efx_tx_buffer - buffer state for a TX descriptor + * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be + * freed when descriptor completes + * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be + * freed when descriptor completes. * @dma_addr: DMA address of the fragment. + * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. * This field is zero when the queue slot is empty. - * @continuation: True if this fragment is not the end of a packet. - * @unmap_single: True if dma_unmap_single should be used. * @unmap_len: Length of this fragment to unmap */ struct efx_tx_buffer { - const struct sk_buff *skb; - struct efx_tso_header *tsoh; + union { + const struct sk_buff *skb; + void *heap_buf; + }; dma_addr_t dma_addr; + unsigned short flags; unsigned short len; - bool continuation; - bool unmap_single; unsigned short unmap_len; }; +#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */ +#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ +#define EFX_TX_BUF_HEAP 4 /* buffer was allocated with kmalloc() */ +#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ /** * struct efx_tx_queue - An Efx TX queue @@ -133,6 +135,7 @@ struct efx_tx_buffer { * @channel: The associated channel * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring + * @tsoh_page: Array of pages of TSO header buffers * @txd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. * @initialised: Has hardware queue been initialised? @@ -156,9 +159,6 @@ struct efx_tx_buffer { * variable indicates that the queue is full. This is to * avoid cache-line ping-pong between the xmit path and the * completion path. - * @tso_headers_free: A list of TSO headers allocated for this TX queue - * that are not in use, and so available for new TSO sends. The list - * is protected by the TX queue lock. * @tso_bursts: Number of times TSO xmit invoked by kernel * @tso_long_headers: Number of packets with headers too long for standard * blocks @@ -175,6 +175,7 @@ struct efx_tx_queue { struct efx_channel *channel; struct netdev_queue *core_txq; struct efx_tx_buffer *buffer; + struct efx_buffer *tsoh_page; struct efx_special_buffer txd; unsigned int ptr_mask; bool initialised; @@ -187,7 +188,6 @@ struct efx_tx_queue { unsigned int insert_count ____cacheline_aligned_in_smp; unsigned int write_count; unsigned int old_read_count; - struct efx_tso_header *tso_headers_free; unsigned int tso_bursts; unsigned int tso_long_headers; unsigned int tso_packets; @@ -430,11 +430,9 @@ enum efx_int_mode { #define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI) enum nic_state { - STATE_INIT = 0, - STATE_RUNNING = 1, - STATE_FINI = 2, - STATE_DISABLED = 3, - STATE_MAX, + STATE_UNINIT = 0, /* device being probed/removed or is frozen */ + STATE_READY = 1, /* hardware ready and netdev registered */ + STATE_DISABLED = 2, /* device disabled due to hardware errors */ }; /* @@ -654,7 +652,7 @@ struct vfdi_status; * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues * @irq_rx_moderation: IRQ moderation time for RX event queues * @msg_enable: Log message enable flags - * @state: Device state flag. Serialised by the rtnl_lock. + * @state: Device state number (%STATE_*). Serialised by the rtnl_lock. * @reset_pending: Bitmask for pending resets * @tx_queue: TX DMA queues * @rx_queue: RX DMA queues @@ -664,6 +662,8 @@ struct vfdi_status; * should be allocated for this NIC * @rxq_entries: Size of receive queues requested by user. * @txq_entries: Size of transmit queues requested by user. + * @txq_stop_thresh: TX queue fill level at or above which we stop it. + * @txq_wake_thresh: TX queue fill level at or below which we wake it. * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches * @sram_lim_qw: Qword address limit of SRAM @@ -774,6 +774,9 @@ struct efx_nic { unsigned rxq_entries; unsigned txq_entries; + unsigned int txq_stop_thresh; + unsigned int txq_wake_thresh; + unsigned tx_dc_base; unsigned rx_dc_base; unsigned sram_lim_qw; diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 326d799762d..cdff40b6572 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -298,7 +298,7 @@ efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer) /************************************************************************** * * Generic buffer handling - * These buffers are used for interrupt status and MAC stats + * These buffers are used for interrupt status, MAC stats, etc. * **************************************************************************/ @@ -401,8 +401,10 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) ++tx_queue->write_count; /* Create TX descriptor ring entry */ + BUILD_BUG_ON(EFX_TX_BUF_CONT != 1); EFX_POPULATE_QWORD_4(*txd, - FSF_AZ_TX_KER_CONT, buffer->continuation, + FSF_AZ_TX_KER_CONT, + buffer->flags & EFX_TX_BUF_CONT, FSF_AZ_TX_KER_BYTE_COUNT, buffer->len, FSF_AZ_TX_KER_BUF_REGION, 0, FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr); diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 18713436b44..ebca75ed78d 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -22,14 +22,6 @@ #include "nic.h" #include "workarounds.h" -/* - * TX descriptor ring full threshold - * - * The tx_queue descriptor ring fill-level must fall below this value - * before we restart the netif queue - */ -#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u) - static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, @@ -39,67 +31,32 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct device *dma_dev = &tx_queue->efx->pci_dev->dev; dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len - buffer->unmap_len); - if (buffer->unmap_single) + if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); else dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); buffer->unmap_len = 0; - buffer->unmap_single = false; } - if (buffer->skb) { + if (buffer->flags & EFX_TX_BUF_SKB) { (*pkts_compl)++; (*bytes_compl) += buffer->skb->len; dev_kfree_skb_any((struct sk_buff *) buffer->skb); - buffer->skb = NULL; netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, "TX queue %d transmission id %x complete\n", tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_HEAP) { + kfree(buffer->heap_buf); } -} -/** - * struct efx_tso_header - a DMA mapped buffer for packet headers - * @next: Linked list of free ones. - * The list is protected by the TX queue lock. - * @dma_unmap_len: Length to unmap for an oversize buffer, or 0. - * @dma_addr: The DMA address of the header below. - * - * This controls the memory used for a TSO header. Use TSOH_DATA() - * to find the packet header data. Use TSOH_SIZE() to calculate the - * total size required for a given packet header length. TSO headers - * in the free list are exactly %TSOH_STD_SIZE bytes in size. - */ -struct efx_tso_header { - union { - struct efx_tso_header *next; - size_t unmap_len; - }; - dma_addr_t dma_addr; -}; + buffer->len = 0; + buffer->flags = 0; +} static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb); -static void efx_fini_tso(struct efx_tx_queue *tx_queue); -static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh); - -static void efx_tsoh_free(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer) -{ - if (buffer->tsoh) { - if (likely(!buffer->tsoh->unmap_len)) { - buffer->tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = buffer->tsoh; - } else { - efx_tsoh_heap_free(tx_queue, buffer->tsoh); - } - buffer->tsoh = NULL; - } -} - static inline unsigned efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) @@ -138,6 +95,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) return max_descs; } +/* Get partner of a TX queue, seen as part of the same net core queue */ +static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) +{ + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) + return tx_queue - EFX_TXQ_TYPE_OFFLOAD; + else + return tx_queue + EFX_TXQ_TYPE_OFFLOAD; +} + +static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) +{ + /* We need to consider both queues that the net core sees as one */ + struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1); + struct efx_nic *efx = txq1->efx; + unsigned int fill_level; + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + if (likely(fill_level < efx->txq_stop_thresh)) + return; + + /* We used the stale old_read_count above, which gives us a + * pessimistic estimate of the fill level (which may even + * validly be >= efx->txq_entries). Now try again using + * read_count (more likely to be a cache miss). + * + * If we read read_count and then conditionally stop the + * queue, it is possible for the completion path to race with + * us and complete all outstanding descriptors in the middle, + * after which there will be no more completions to wake it. + * Therefore we stop the queue first, then read read_count + * (with a memory barrier to ensure the ordering), then + * restart the queue if the fill level turns out to be low + * enough. + */ + netif_tx_stop_queue(txq1->core_txq); + smp_mb(); + txq1->old_read_count = ACCESS_ONCE(txq1->read_count); + txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries); + if (likely(fill_level < efx->txq_stop_thresh)) { + smp_mb(); + if (likely(!efx->loopback_selftest)) + netif_tx_start_queue(txq1->core_txq); + } +} + /* * Add a socket buffer to a TX queue * @@ -151,7 +158,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) * This function is split out from efx_hard_start_xmit to allow the * loopback test to direct packets via specific TX queues. * - * Returns NETDEV_TX_OK or NETDEV_TX_BUSY + * Returns NETDEV_TX_OK. * You must hold netif_tx_lock() to call this function. */ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) @@ -160,12 +167,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct device *dma_dev = &efx->pci_dev->dev; struct efx_tx_buffer *buffer; skb_frag_t *fragment; - unsigned int len, unmap_len = 0, fill_level, insert_ptr; + unsigned int len, unmap_len = 0, insert_ptr; dma_addr_t dma_addr, unmap_addr = 0; unsigned int dma_len; - bool unmap_single; - int q_space, i = 0; - netdev_tx_t rc = NETDEV_TX_OK; + unsigned short dma_flags; + int i = 0; EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); @@ -183,14 +189,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) return NETDEV_TX_OK; } - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - q_space = efx->txq_entries - 1 - fill_level; - /* Map for DMA. Use dma_map_single rather than dma_map_page * since this is more efficient on machines with sparse * memory. */ - unmap_single = true; + dma_flags = EFX_TX_BUF_MAP_SINGLE; dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE); /* Process all fragments */ @@ -205,39 +208,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Add to TX queue, splitting across DMA boundaries */ do { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have - * happened since the xmit path last - * checked. Update the xmit path's - * copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the - * change of queue state from the access - * of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - rc = NETDEV_TX_BUSY; - goto unwind; - } - smp_mb(); - if (likely(!efx->loopback_selftest)) - netif_tx_start_queue( - tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->tsoh); - EFX_BUG_ON_PARANOID(buffer->skb); + EFX_BUG_ON_PARANOID(buffer->flags); EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(!buffer->continuation); EFX_BUG_ON_PARANOID(buffer->unmap_len); dma_len = efx_max_tx_len(efx, dma_addr); @@ -247,13 +221,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Fill out per descriptor fields */ buffer->len = dma_len; buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; len -= dma_len; dma_addr += dma_len; ++tx_queue->insert_count; } while (len); /* Transfer ownership of the unmapping to the final buffer */ - buffer->unmap_single = unmap_single; + buffer->flags = EFX_TX_BUF_CONT | dma_flags; buffer->unmap_len = unmap_len; unmap_len = 0; @@ -264,20 +239,22 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) len = skb_frag_size(fragment); i++; /* Map for DMA */ - unmap_single = false; + dma_flags = 0; dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, DMA_TO_DEVICE); } /* Transfer ownership of the skb to the final buffer */ buffer->skb = skb; - buffer->continuation = false; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; netdev_tx_sent_queue(tx_queue->core_txq, skb->len); /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + return NETDEV_TX_OK; dma_err: @@ -289,7 +266,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Mark the packet as transmitted, and free the SKB ourselves */ dev_kfree_skb_any(skb); - unwind: /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { unsigned int pkts_compl = 0, bytes_compl = 0; @@ -297,12 +273,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - buffer->len = 0; } /* Free the fragment we were mid-way through pushing */ if (unmap_len) { - if (unmap_single) + if (dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, unmap_len, DMA_TO_DEVICE); else @@ -310,7 +285,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) DMA_TO_DEVICE); } - return rc; + return NETDEV_TX_OK; } /* Remove packets from the TX queue @@ -340,8 +315,6 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, } efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); - buffer->continuation = true; - buffer->len = 0; ++tx_queue->read_count; read_ptr = tx_queue->read_count & tx_queue->ptr_mask; @@ -450,6 +423,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned fill_level; struct efx_nic *efx = tx_queue->efx; + struct efx_tx_queue *txq2; unsigned int pkts_compl = 0, bytes_compl = 0; EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); @@ -457,15 +431,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); - /* See if we need to restart the netif queue. This barrier - * separates the update of read_count from the test of the - * queue state. */ + /* See if we need to restart the netif queue. This memory + * barrier ensures that we write read_count (inside + * efx_dequeue_buffers()) before reading the queue status. + */ smp_mb(); if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && likely(efx->port_enabled) && likely(netif_device_present(efx->net_dev))) { - fill_level = tx_queue->insert_count - tx_queue->read_count; - if (fill_level < EFX_TXQ_THRESHOLD(efx)) + txq2 = efx_tx_queue_partner(tx_queue); + fill_level = max(tx_queue->insert_count - tx_queue->read_count, + txq2->insert_count - txq2->read_count); + if (fill_level <= efx->txq_wake_thresh) netif_tx_wake_queue(tx_queue->core_txq); } @@ -480,11 +457,26 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) } } +/* Size of page-based TSO header buffers. Larger blocks must be + * allocated from the heap. + */ +#define TSOH_STD_SIZE 128 +#define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE) + +/* At most half the descriptors in the queue at any time will refer to + * a TSO header buffer, since they must always be followed by a + * payload descriptor referring to an skb. + */ +static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue) +{ + return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE); +} + int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) { struct efx_nic *efx = tx_queue->efx; unsigned int entries; - int i, rc; + int rc; /* Create the smallest power-of-two aligned ring */ entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); @@ -500,17 +492,28 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) GFP_KERNEL); if (!tx_queue->buffer) return -ENOMEM; - for (i = 0; i <= tx_queue->ptr_mask; ++i) - tx_queue->buffer[i].continuation = true; + + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) { + tx_queue->tsoh_page = + kcalloc(efx_tsoh_page_count(tx_queue), + sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL); + if (!tx_queue->tsoh_page) { + rc = -ENOMEM; + goto fail1; + } + } /* Allocate hardware ring */ rc = efx_nic_probe_tx(tx_queue); if (rc) - goto fail; + goto fail2; return 0; - fail: +fail2: + kfree(tx_queue->tsoh_page); + tx_queue->tsoh_page = NULL; +fail1: kfree(tx_queue->buffer); tx_queue->buffer = NULL; return rc; @@ -546,8 +549,6 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) unsigned int pkts_compl = 0, bytes_compl = 0; buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - buffer->continuation = true; - buffer->len = 0; ++tx_queue->read_count; } @@ -568,13 +569,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) efx_nic_fini_tx(tx_queue); efx_release_tx_buffers(tx_queue); - - /* Free up TSO header cache */ - efx_fini_tso(tx_queue); } void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) { + int i; + if (!tx_queue->buffer) return; @@ -582,6 +582,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) "destroying TX queue %d\n", tx_queue->queue); efx_nic_remove_tx(tx_queue); + if (tx_queue->tsoh_page) { + for (i = 0; i < efx_tsoh_page_count(tx_queue); i++) + efx_nic_free_buffer(tx_queue->efx, + &tx_queue->tsoh_page[i]); + kfree(tx_queue->tsoh_page); + tx_queue->tsoh_page = NULL; + } + kfree(tx_queue->buffer); tx_queue->buffer = NULL; } @@ -604,22 +612,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) #define TSOH_OFFSET NET_IP_ALIGN #endif -#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET) - -/* Total size of struct efx_tso_header, buffer and padding */ -#define TSOH_SIZE(hdr_len) \ - (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len) - -/* Size of blocks on free list. Larger blocks must be allocated from - * the heap. - */ -#define TSOH_STD_SIZE 128 - #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) -#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data) -#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data) -#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data) -#define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data) /** * struct tso_state - TSO state for an SKB @@ -631,10 +624,12 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) * @in_len: Remaining length in current SKB fragment * @unmap_len: Length of SKB fragment * @unmap_addr: DMA address of SKB fragment - * @unmap_single: DMA single vs page mapping flag + * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0 * @protocol: Network protocol (after any VLAN header) + * @ip_off: Offset of IP header + * @tcp_off: Offset of TCP header * @header_len: Number of bytes of header - * @full_packet_size: Number of bytes to put in each outgoing segment + * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload * * The state used during segmentation. It is put into this data structure * just to make it easy to pass into inline functions. @@ -651,11 +646,13 @@ struct tso_state { unsigned in_len; unsigned unmap_len; dma_addr_t unmap_addr; - bool unmap_single; + unsigned short dma_flags; __be16 protocol; + unsigned int ip_off; + unsigned int tcp_off; unsigned header_len; - int full_packet_size; + unsigned int ip_base_len; }; @@ -687,91 +684,43 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb) return protocol; } - -/* - * Allocate a page worth of efx_tso_header structures, and string them - * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM. - */ -static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue) +static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, unsigned int len) { - struct device *dma_dev = &tx_queue->efx->pci_dev->dev; - struct efx_tso_header *tsoh; - dma_addr_t dma_addr; - u8 *base_kva, *kva; - - base_kva = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr, GFP_ATOMIC); - if (base_kva == NULL) { - netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev, - "Unable to allocate page for TSO headers\n"); - return -ENOMEM; - } - - /* dma_alloc_coherent() allocates pages. */ - EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u)); - - for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) { - tsoh = (struct efx_tso_header *)kva; - tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva); - tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = tsoh; - } - - return 0; -} + u8 *result; + EFX_BUG_ON_PARANOID(buffer->len); + EFX_BUG_ON_PARANOID(buffer->flags); + EFX_BUG_ON_PARANOID(buffer->unmap_len); -/* Free up a TSO header, and all others in the same page. */ -static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh, - struct device *dma_dev) -{ - struct efx_tso_header **p; - unsigned long base_kva; - dma_addr_t base_dma; - - base_kva = (unsigned long)tsoh & PAGE_MASK; - base_dma = tsoh->dma_addr & PAGE_MASK; - - p = &tx_queue->tso_headers_free; - while (*p != NULL) { - if (((unsigned long)*p & PAGE_MASK) == base_kva) - *p = (*p)->next; - else - p = &(*p)->next; - } - - dma_free_coherent(dma_dev, PAGE_SIZE, (void *)base_kva, base_dma); -} + if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) { + unsigned index = + (tx_queue->insert_count & tx_queue->ptr_mask) / 2; + struct efx_buffer *page_buf = + &tx_queue->tsoh_page[index / TSOH_PER_PAGE]; + unsigned offset = + TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET; + + if (unlikely(!page_buf->addr) && + efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE)) + return NULL; + + result = (u8 *)page_buf->addr + offset; + buffer->dma_addr = page_buf->dma_addr + offset; + buffer->flags = EFX_TX_BUF_CONT; + } else { + tx_queue->tso_long_headers++; -static struct efx_tso_header * -efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) -{ - struct efx_tso_header *tsoh; - - tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA); - if (unlikely(!tsoh)) - return NULL; - - tsoh->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, - TSOH_BUFFER(tsoh), header_len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, - tsoh->dma_addr))) { - kfree(tsoh); - return NULL; + buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC); + if (unlikely(!buffer->heap_buf)) + return NULL; + result = (u8 *)buffer->heap_buf + TSOH_OFFSET; + buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP; } - tsoh->unmap_len = header_len; - return tsoh; -} + buffer->len = len; -static void -efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) -{ - dma_unmap_single(&tx_queue->efx->pci_dev->dev, - tsoh->dma_addr, tsoh->unmap_len, - DMA_TO_DEVICE); - kfree(tsoh); + return result; } /** @@ -781,47 +730,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) * @len: Length of fragment * @final_buffer: The final buffer inserted into the queue * - * Push descriptors onto the TX queue. Return 0 on success or 1 if - * @tx_queue full. + * Push descriptors onto the TX queue. */ -static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, - dma_addr_t dma_addr, unsigned len, - struct efx_tx_buffer **final_buffer) +static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned len, + struct efx_tx_buffer **final_buffer) { struct efx_tx_buffer *buffer; struct efx_nic *efx = tx_queue->efx; - unsigned dma_len, fill_level, insert_ptr; - int q_space; + unsigned dma_len, insert_ptr; EFX_BUG_ON_PARANOID(len <= 0); - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - /* -1 as there is no way to represent all descriptors used */ - q_space = efx->txq_entries - 1 - fill_level; - while (1) { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have happened - * since the xmit path last checked. Update - * the xmit path's copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the change of - * queue state from the access of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - *final_buffer = NULL; - return 1; - } - smp_mb(); - netif_tx_start_queue(tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; ++tx_queue->insert_count; @@ -830,12 +751,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, tx_queue->read_count >= efx->txq_entries); - efx_tsoh_free(tx_queue, buffer); EFX_BUG_ON_PARANOID(buffer->len); EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->skb); - EFX_BUG_ON_PARANOID(!buffer->continuation); - EFX_BUG_ON_PARANOID(buffer->tsoh); + EFX_BUG_ON_PARANOID(buffer->flags); buffer->dma_addr = dma_addr; @@ -845,7 +763,8 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, if (dma_len >= len) break; - buffer->len = dma_len; /* Don't set the other members */ + buffer->len = dma_len; + buffer->flags = EFX_TX_BUF_CONT; dma_addr += dma_len; len -= dma_len; } @@ -853,7 +772,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, EFX_BUG_ON_PARANOID(!len); buffer->len = len; *final_buffer = buffer; - return 0; } @@ -864,54 +782,42 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, * a single fragment, and we know it doesn't cross a page boundary. It * also allows us to not worry about end-of-packet etc. */ -static void efx_tso_put_header(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh, unsigned len) +static int efx_tso_put_header(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, u8 *header) { - struct efx_tx_buffer *buffer; - - buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->skb); - EFX_BUG_ON_PARANOID(!buffer->continuation); - EFX_BUG_ON_PARANOID(buffer->tsoh); - buffer->len = len; - buffer->dma_addr = tsoh->dma_addr; - buffer->tsoh = tsoh; + if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) { + buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, + header, buffer->len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, + buffer->dma_addr))) { + kfree(buffer->heap_buf); + buffer->len = 0; + buffer->flags = 0; + return -ENOMEM; + } + buffer->unmap_len = buffer->len; + buffer->flags |= EFX_TX_BUF_MAP_SINGLE; + } ++tx_queue->insert_count; + return 0; } -/* Remove descriptors put into a tx_queue. */ +/* Remove buffers put into a tx_queue. None of the buffers must have + * an skb attached. + */ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) { struct efx_tx_buffer *buffer; - dma_addr_t unmap_addr; /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { --tx_queue->insert_count; buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->skb); - if (buffer->unmap_len) { - unmap_addr = (buffer->dma_addr + buffer->len - - buffer->unmap_len); - if (buffer->unmap_single) - dma_unmap_single(&tx_queue->efx->pci_dev->dev, - unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - else - dma_unmap_page(&tx_queue->efx->pci_dev->dev, - unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - buffer->unmap_len = 0; - } - buffer->len = 0; - buffer->continuation = true; + efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); } } @@ -919,17 +825,16 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) /* Parse the SKB header and initialise state. */ static void tso_start(struct tso_state *st, const struct sk_buff *skb) { - /* All ethernet/IP/TCP headers combined size is TCP header size - * plus offset of TCP header relative to start of packet. - */ - st->header_len = ((tcp_hdr(skb)->doff << 2u) - + PTR_DIFF(tcp_hdr(skb), skb->data)); - st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size; - - if (st->protocol == htons(ETH_P_IP)) + st->ip_off = skb_network_header(skb) - skb->data; + st->tcp_off = skb_transport_header(skb) - skb->data; + st->header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); + if (st->protocol == htons(ETH_P_IP)) { + st->ip_base_len = st->header_len - st->ip_off; st->ipv4_id = ntohs(ip_hdr(skb)->id); - else + } else { + st->ip_base_len = st->header_len - st->tcp_off; st->ipv4_id = 0; + } st->seqnum = ntohl(tcp_hdr(skb)->seq); EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); @@ -938,7 +843,7 @@ static void tso_start(struct tso_state *st, const struct sk_buff *skb) st->out_len = skb->len - st->header_len; st->unmap_len = 0; - st->unmap_single = false; + st->dma_flags = 0; } static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, @@ -947,7 +852,7 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->unmap_single = false; + st->dma_flags = 0; st->unmap_len = skb_frag_size(frag); st->in_len = skb_frag_size(frag); st->dma_addr = st->unmap_addr; @@ -965,7 +870,7 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl, len, DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->unmap_single = true; + st->dma_flags = EFX_TX_BUF_MAP_SINGLE; st->unmap_len = len; st->in_len = len; st->dma_addr = st->unmap_addr; @@ -982,20 +887,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, * @st: TSO state * * Form descriptors for the current fragment, until we reach the end - * of fragment or end-of-packet. Return 0 on success, 1 if not enough - * space in @tx_queue. + * of fragment or end-of-packet. */ -static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, - const struct sk_buff *skb, - struct tso_state *st) +static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, + const struct sk_buff *skb, + struct tso_state *st) { struct efx_tx_buffer *buffer; - int n, end_of_packet, rc; + int n; if (st->in_len == 0) - return 0; + return; if (st->packet_space == 0) - return 0; + return; EFX_BUG_ON_PARANOID(st->in_len <= 0); EFX_BUG_ON_PARANOID(st->packet_space <= 0); @@ -1006,25 +910,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, st->out_len -= n; st->in_len -= n; - rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - if (likely(rc == 0)) { - if (st->out_len == 0) - /* Transfer ownership of the skb */ - buffer->skb = skb; + efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - end_of_packet = st->out_len == 0 || st->packet_space == 0; - buffer->continuation = !end_of_packet; + if (st->out_len == 0) { + /* Transfer ownership of the skb */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB; + } else if (st->packet_space != 0) { + buffer->flags = EFX_TX_BUF_CONT; + } - if (st->in_len == 0) { - /* Transfer ownership of the DMA mapping */ - buffer->unmap_len = st->unmap_len; - buffer->unmap_single = st->unmap_single; - st->unmap_len = 0; - } + if (st->in_len == 0) { + /* Transfer ownership of the DMA mapping */ + buffer->unmap_len = st->unmap_len; + buffer->flags |= st->dma_flags; + st->unmap_len = 0; } st->dma_addr += n; - return rc; } @@ -1035,36 +938,25 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, * @st: TSO state * * Generate a new header and prepare for the new packet. Return 0 on - * success, or -1 if failed to alloc header. + * success, or -%ENOMEM if failed to alloc header. */ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, const struct sk_buff *skb, struct tso_state *st) { - struct efx_tso_header *tsoh; + struct efx_tx_buffer *buffer = + &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; struct tcphdr *tsoh_th; unsigned ip_length; u8 *header; + int rc; - /* Allocate a DMA-mapped header buffer. */ - if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) { - if (tx_queue->tso_headers_free == NULL) { - if (efx_tsoh_block_alloc(tx_queue)) - return -1; - } - EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free); - tsoh = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = tsoh->next; - tsoh->unmap_len = 0; - } else { - tx_queue->tso_long_headers++; - tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len); - if (unlikely(!tsoh)) - return -1; - } + /* Allocate and insert a DMA-mapped header buffer. */ + header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); + if (!header) + return -ENOMEM; - header = TSOH_BUFFER(tsoh); - tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb)); + tsoh_th = (struct tcphdr *)(header + st->tcp_off); /* Copy and update the headers. */ memcpy(header, skb->data, st->header_len); @@ -1073,19 +965,19 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, st->seqnum += skb_shinfo(skb)->gso_size; if (st->out_len > skb_shinfo(skb)->gso_size) { /* This packet will not finish the TSO burst. */ - ip_length = st->full_packet_size - ETH_HDR_LEN(skb); + st->packet_space = skb_shinfo(skb)->gso_size; tsoh_th->fin = 0; tsoh_th->psh = 0; } else { /* This packet will be the last in the TSO burst. */ - ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len; + st->packet_space = st->out_len; tsoh_th->fin = tcp_hdr(skb)->fin; tsoh_th->psh = tcp_hdr(skb)->psh; } + ip_length = st->ip_base_len + st->packet_space; if (st->protocol == htons(ETH_P_IP)) { - struct iphdr *tsoh_iph = - (struct iphdr *)(header + SKB_IPV4_OFF(skb)); + struct iphdr *tsoh_iph = (struct iphdr *)(header + st->ip_off); tsoh_iph->tot_len = htons(ip_length); @@ -1094,16 +986,16 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, st->ipv4_id++; } else { struct ipv6hdr *tsoh_iph = - (struct ipv6hdr *)(header + SKB_IPV6_OFF(skb)); + (struct ipv6hdr *)(header + st->ip_off); - tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph)); + tsoh_iph->payload_len = htons(ip_length); } - st->packet_space = skb_shinfo(skb)->gso_size; - ++tx_queue->tso_packets; + rc = efx_tso_put_header(tx_queue, buffer, header); + if (unlikely(rc)) + return rc; - /* Form a descriptor for this header. */ - efx_tso_put_header(tx_queue, tsoh, st->header_len); + ++tx_queue->tso_packets; return 0; } @@ -1118,13 +1010,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, * * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if * @skb was not enqueued. In all cases @skb is consumed. Return - * %NETDEV_TX_OK or %NETDEV_TX_BUSY. + * %NETDEV_TX_OK. */ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { struct efx_nic *efx = tx_queue->efx; - int frag_i, rc, rc2 = NETDEV_TX_OK; + int frag_i, rc; struct tso_state state; /* Find the packet protocol and sanity-check it */ @@ -1156,11 +1048,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, goto mem_err; while (1) { - rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); - if (unlikely(rc)) { - rc2 = NETDEV_TX_BUSY; - goto unwind; - } + tso_fill_packet_with_fragment(tx_queue, skb, &state); /* Move onto the next fragment? */ if (state.in_len == 0) { @@ -1184,6 +1072,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + tx_queue->tso_bursts++; return NETDEV_TX_OK; @@ -1192,10 +1082,9 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, "Out of memory for TSO headers, or DMA mapping error\n"); dev_kfree_skb_any(skb); - unwind: /* Free the DMA mapping we were in the process of writing out */ if (state.unmap_len) { - if (state.unmap_single) + if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, state.unmap_len, DMA_TO_DEVICE); else @@ -1204,25 +1093,5 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, } efx_enqueue_unwind(tx_queue); - return rc2; -} - - -/* - * Free up all TSO datastructures associated with tx_queue. This - * routine should be called only once the tx_queue is both empty and - * will no longer be used. - */ -static void efx_fini_tso(struct efx_tx_queue *tx_queue) -{ - unsigned i; - - if (tx_queue->buffer) { - for (i = 0; i <= tx_queue->ptr_mask; ++i) - efx_tsoh_free(tx_queue, &tx_queue->buffer[i]); - } - - while (tx_queue->tso_headers_free != NULL) - efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free, - &tx_queue->efx->pci_dev->dev); + return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 1b173a6145d..b26cbda5efa 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -32,7 +32,7 @@ config TI_DAVINCI_EMAC config TI_DAVINCI_MDIO tristate "TI DaVinci MDIO Support" - depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 ) + depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX ) select PHYLIB ---help--- This driver supports TI's DaVinci MDIO module. @@ -42,7 +42,7 @@ config TI_DAVINCI_MDIO config TI_DAVINCI_CPDMA tristate "TI DaVinci CPDMA Support" - depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 ) + depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX ) ---help--- This driver supports TI's DaVinci CPDMA dma engine. diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1e5d85b06e7..0cbc0e59252 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -28,6 +28,9 @@ #include <linux/workqueue.h> #include <linux/delay.h> #include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/of_net.h> +#include <linux/of_device.h> #include <linux/platform_data/cpsw.h> @@ -709,6 +712,158 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->sliver = regs + data->sliver_reg_ofs; } +static int cpsw_probe_dt(struct cpsw_platform_data *data, + struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct device_node *slave_node; + int i = 0, ret; + u32 prop; + + if (!node) + return -EINVAL; + + if (of_property_read_u32(node, "slaves", &prop)) { + pr_err("Missing slaves property in the DT.\n"); + return -EINVAL; + } + data->slaves = prop; + + data->slave_data = kzalloc(sizeof(struct cpsw_slave_data) * + data->slaves, GFP_KERNEL); + if (!data->slave_data) { + pr_err("Could not allocate slave memory.\n"); + return -EINVAL; + } + + data->no_bd_ram = of_property_read_bool(node, "no_bd_ram"); + + if (of_property_read_u32(node, "cpdma_channels", &prop)) { + pr_err("Missing cpdma_channels property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->channels = prop; + + if (of_property_read_u32(node, "host_port_no", &prop)) { + pr_err("Missing host_port_no property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->host_port_num = prop; + + if (of_property_read_u32(node, "cpdma_reg_ofs", &prop)) { + pr_err("Missing cpdma_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->cpdma_reg_ofs = prop; + + if (of_property_read_u32(node, "cpdma_sram_ofs", &prop)) { + pr_err("Missing cpdma_sram_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->cpdma_sram_ofs = prop; + + if (of_property_read_u32(node, "ale_reg_ofs", &prop)) { + pr_err("Missing ale_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->ale_reg_ofs = prop; + + if (of_property_read_u32(node, "ale_entries", &prop)) { + pr_err("Missing ale_entries property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->ale_entries = prop; + + if (of_property_read_u32(node, "host_port_reg_ofs", &prop)) { + pr_err("Missing host_port_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->host_port_reg_ofs = prop; + + if (of_property_read_u32(node, "hw_stats_reg_ofs", &prop)) { + pr_err("Missing hw_stats_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->hw_stats_reg_ofs = prop; + + if (of_property_read_u32(node, "bd_ram_ofs", &prop)) { + pr_err("Missing bd_ram_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->bd_ram_ofs = prop; + + if (of_property_read_u32(node, "bd_ram_size", &prop)) { + pr_err("Missing bd_ram_size property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->bd_ram_size = prop; + + if (of_property_read_u32(node, "rx_descs", &prop)) { + pr_err("Missing rx_descs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->rx_descs = prop; + + if (of_property_read_u32(node, "mac_control", &prop)) { + pr_err("Missing mac_control property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->mac_control = prop; + + for_each_child_of_node(node, slave_node) { + struct cpsw_slave_data *slave_data = data->slave_data + i; + const char *phy_id = NULL; + const void *mac_addr = NULL; + + if (of_property_read_string(slave_node, "phy_id", &phy_id)) { + pr_err("Missing slave[%d] phy_id property\n", i); + ret = -EINVAL; + goto error_ret; + } + slave_data->phy_id = phy_id; + + if (of_property_read_u32(slave_node, "slave_reg_ofs", &prop)) { + pr_err("Missing slave[%d] slave_reg_ofs property\n", i); + ret = -EINVAL; + goto error_ret; + } + slave_data->slave_reg_ofs = prop; + + if (of_property_read_u32(slave_node, "sliver_reg_ofs", + &prop)) { + pr_err("Missing slave[%d] sliver_reg_ofs property\n", + i); + ret = -EINVAL; + goto error_ret; + } + slave_data->sliver_reg_ofs = prop; + + mac_addr = of_get_mac_address(slave_node); + if (mac_addr) + memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); + + i++; + } + + return 0; + +error_ret: + kfree(data->slave_data); + return ret; +} + static int __devinit cpsw_probe(struct platform_device *pdev) { struct cpsw_platform_data *data = pdev->dev.platform_data; @@ -720,11 +875,6 @@ static int __devinit cpsw_probe(struct platform_device *pdev) struct resource *res; int ret = 0, i, k = 0; - if (!data) { - pr_err("platform data missing\n"); - return -ENODEV; - } - ndev = alloc_etherdev(sizeof(struct cpsw_priv)); if (!ndev) { pr_err("error allocating net_device\n"); @@ -734,13 +884,19 @@ static int __devinit cpsw_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); priv = netdev_priv(ndev); spin_lock_init(&priv->lock); - priv->data = *data; priv->pdev = pdev; priv->ndev = ndev; priv->dev = &ndev->dev; priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); priv->rx_packet_max = max(rx_packet_max, 128); + if (cpsw_probe_dt(&priv->data, pdev)) { + pr_err("cpsw: platform data missing\n"); + ret = -ENODEV; + goto clean_ndev_ret; + } + data = &priv->data; + if (is_valid_ether_addr(data->slave_data[0].mac_addr)) { memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN); pr_info("Detected MACID = %pM", priv->mac_addr); @@ -996,11 +1152,17 @@ static const struct dev_pm_ops cpsw_pm_ops = { .resume = cpsw_resume, }; +static const struct of_device_id cpsw_of_mtable[] = { + { .compatible = "ti,cpsw", }, + { /* sentinel */ }, +}; + static struct platform_driver cpsw_driver = { .driver = { .name = "cpsw", .owner = THIS_MODULE, .pm = &cpsw_pm_ops, + .of_match_table = of_match_ptr(cpsw_of_mtable), }, .probe = cpsw_probe, .remove = __devexit_p(cpsw_remove), diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index cd7ee204e94..573f3be5f42 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -36,6 +36,8 @@ #include <linux/io.h> #include <linux/pm_runtime.h> #include <linux/davinci_emac.h> +#include <linux/of.h> +#include <linux/of_device.h> /* * This timeout definition is a worst-case ultra defensive measure against @@ -289,6 +291,25 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id, return 0; } +static int davinci_mdio_probe_dt(struct mdio_platform_data *data, + struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + u32 prop; + + if (!node) + return -EINVAL; + + if (of_property_read_u32(node, "bus_freq", &prop)) { + pr_err("Missing bus_freq property in the DT.\n"); + return -EINVAL; + } + data->bus_freq = prop; + + return 0; +} + + static int __devinit davinci_mdio_probe(struct platform_device *pdev) { struct mdio_platform_data *pdata = pdev->dev.platform_data; @@ -304,8 +325,6 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev) return -ENOMEM; } - data->pdata = pdata ? (*pdata) : default_pdata; - data->bus = mdiobus_alloc(); if (!data->bus) { dev_err(dev, "failed to alloc mii bus\n"); @@ -313,14 +332,22 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev) goto bail_out; } + if (dev->of_node) { + if (davinci_mdio_probe_dt(&data->pdata, pdev)) + data->pdata = default_pdata; + snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); + } else { + data->pdata = pdata ? (*pdata) : default_pdata; + snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); + } + data->bus->name = dev_name(dev); data->bus->read = davinci_mdio_read, data->bus->write = davinci_mdio_write, data->bus->reset = davinci_mdio_reset, data->bus->parent = dev; data->bus->priv = data; - snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x", - pdev->name, pdev->id); pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -454,11 +481,17 @@ static const struct dev_pm_ops davinci_mdio_pm_ops = { .resume = davinci_mdio_resume, }; +static const struct of_device_id davinci_mdio_of_mtable[] = { + { .compatible = "ti,davinci_mdio", }, + { /* sentinel */ }, +}; + static struct platform_driver davinci_mdio_driver = { .driver = { .name = "davinci_mdio", .owner = THIS_MODULE, .pm = &davinci_mdio_pm_ops, + .of_match_table = of_match_ptr(davinci_mdio_of_mtable), }, .probe = davinci_mdio_probe, .remove = __devexit_p(davinci_mdio_remove), diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 277c93e9ff4..8fa947a2d92 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1359,7 +1359,6 @@ static int tsi108_open(struct net_device *dev) } data->rxskbs[i] = skb; - data->rxskbs[i] = skb; data->rxring[i].buf0 = virt_to_phys(data->rxskbs[i]->data); data->rxring[i].misc = TSI108_RX_OWN | TSI108_RX_INT; } diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index a5826a3111a..2c08bf6e7bf 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -637,8 +637,7 @@ static int __devinit w5100_hw_probe(struct platform_device *pdev) if (data && is_valid_ether_addr(data->mac_addr)) { memcpy(ndev->dev_addr, data->mac_addr, ETH_ALEN); } else { - eth_random_addr(ndev->dev_addr); - ndev->addr_assign_type |= NET_ADDR_RANDOM; + eth_hw_addr_random(ndev); } mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index bdd8891c215..88943d90c76 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -557,8 +557,7 @@ static int __devinit w5300_hw_probe(struct platform_device *pdev) if (data && is_valid_ether_addr(data->mac_addr)) { memcpy(ndev->dev_addr, data->mac_addr, ETH_ALEN); } else { - eth_random_addr(ndev->dev_addr); - ndev->addr_assign_type |= NET_ADDR_RANDOM; + eth_hw_addr_random(ndev); } mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); |