From cee60c377de6d9d10f0a2876794149bd79a15020 Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Thu, 17 Apr 2008 22:35:54 +0200 Subject: r8169: fix past rtl_chip_info array size for unknown chipsets 'i' is unsigned. Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Acked-by: Francois Romieu --- drivers/net/r8169.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 3acfeeabdee..5e8ad634490 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1705,18 +1705,18 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl8169_print_mac_version(tp); - for (i = ARRAY_SIZE(rtl_chip_info) - 1; i >= 0; i--) { + for (i = 0; i < ARRAY_SIZE(rtl_chip_info); i++) { if (tp->mac_version == rtl_chip_info[i].mac_version) break; } - if (i < 0) { + if (i == ARRAY_SIZE(rtl_chip_info)) { /* Unknown chip: assume array element #0, original RTL-8169 */ if (netif_msg_probe(tp)) { dev_printk(KERN_DEBUG, &pdev->dev, "unknown chip version, assuming %s\n", rtl_chip_info[0].name); } - i++; + i = 0; } tp->chipset = i; -- cgit v1.2.3-70-g09d2 From 21e197f231343201368338603cb0909a13961bac Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 17 Apr 2008 22:48:41 +0200 Subject: r8169: fix oops in r8169_get_mac_version r8169_get_mac_version crashes when it meets an unknown MAC due to tp->pci_dev not being set. Initialize it early. Signed-off-by: Ivan Vecera Acked-by: Francois Romieu --- drivers/net/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 5e8ad634490..65724250462 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1617,6 +1617,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) SET_NETDEV_DEV(dev, &pdev->dev); tp = netdev_priv(dev); tp->dev = dev; + tp->pci_dev = pdev; tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT); /* enable device (incl. PCI PM wakeup and hotplug setup) */ @@ -1777,7 +1778,6 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #endif tp->intr_mask = 0xffff; - tp->pci_dev = pdev; tp->mmio_addr = ioaddr; tp->align = cfg->align; tp->hw_start = cfg->hw_start; -- cgit v1.2.3-70-g09d2 From 4709aa59ede5ff9902d60088d93d1c0e2e9d2247 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 27 Apr 2008 14:36:59 +0200 Subject: sis190: use the allocated buffer as a status code in sis190_alloc_rx_skb The local status code does not carry mory information. Signed-off-by: Stephen Hemminger Acked-by: Francois Romieu --- drivers/net/sis190.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 20745fd4e97..0d6aa1fc9c6 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -480,30 +480,22 @@ static inline void sis190_make_unusable_by_asic(struct RxDesc *desc) desc->status = 0x0; } -static int sis190_alloc_rx_skb(struct pci_dev *pdev, struct sk_buff **sk_buff, - struct RxDesc *desc, u32 rx_buf_sz) +static struct sk_buff *sis190_alloc_rx_skb(struct pci_dev *pdev, + struct RxDesc *desc, u32 rx_buf_sz) { struct sk_buff *skb; - dma_addr_t mapping; - int ret = 0; skb = dev_alloc_skb(rx_buf_sz); - if (!skb) - goto err_out; - - *sk_buff = skb; - - mapping = pci_map_single(pdev, skb->data, rx_buf_sz, - PCI_DMA_FROMDEVICE); + if (likely(skb)) { + dma_addr_t mapping; - sis190_map_to_asic(desc, mapping, rx_buf_sz); -out: - return ret; + mapping = pci_map_single(pdev, skb->data, rx_buf_sz, + PCI_DMA_FROMDEVICE); + sis190_map_to_asic(desc, mapping, rx_buf_sz); + } else + sis190_make_unusable_by_asic(desc); -err_out: - ret = -ENOMEM; - sis190_make_unusable_by_asic(desc); - goto out; + return skb; } static u32 sis190_rx_fill(struct sis190_private *tp, struct net_device *dev, @@ -512,14 +504,15 @@ static u32 sis190_rx_fill(struct sis190_private *tp, struct net_device *dev, u32 cur; for (cur = start; cur < end; cur++) { - int ret, i = cur % NUM_RX_DESC; + unsigned int i = cur % NUM_RX_DESC; if (tp->Rx_skbuff[i]) continue; - ret = sis190_alloc_rx_skb(tp->pci_dev, tp->Rx_skbuff + i, - tp->RxDescRing + i, tp->rx_buf_sz); - if (ret < 0) + tp->Rx_skbuff[i] = sis190_alloc_rx_skb(tp->pci_dev, + tp->RxDescRing + i, + tp->rx_buf_sz); + if (!tp->Rx_skbuff[i]) break; } return cur - start; -- cgit v1.2.3-70-g09d2 From e3eccad9f6e84656b45bfa07738934145b09e11e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 27 Apr 2008 14:42:27 +0200 Subject: sis190: hard-code the alignment of tiny packets There is no DMA involved here. Align the IP header without condition. Signed-off-by: Stephen Hemminger Acked-by: Francois Romieu --- drivers/net/sis190.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 0d6aa1fc9c6..248c3855f4d 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -526,9 +526,9 @@ static inline int sis190_try_rx_copy(struct sk_buff **sk_buff, int pkt_size, if (pkt_size < rx_copybreak) { struct sk_buff *skb; - skb = dev_alloc_skb(pkt_size + NET_IP_ALIGN); + skb = dev_alloc_skb(pkt_size + 2); if (skb) { - skb_reserve(skb, NET_IP_ALIGN); + skb_reserve(skb, 2); skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size); *sk_buff = skb; sis190_give_to_asic(desc, rx_buf_sz); -- cgit v1.2.3-70-g09d2 From 35aeb7809345e0362772a75368a3e62ecd931481 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 27 Apr 2008 14:54:32 +0200 Subject: sis190: use netdev_alloc_skb This sets skb->dev and allows arch specific allocation. Signed-off-by: Stephen Hemminger Acked-by: Francois Romieu --- drivers/net/sis190.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 248c3855f4d..97aa18dd9a4 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -480,16 +480,17 @@ static inline void sis190_make_unusable_by_asic(struct RxDesc *desc) desc->status = 0x0; } -static struct sk_buff *sis190_alloc_rx_skb(struct pci_dev *pdev, - struct RxDesc *desc, u32 rx_buf_sz) +static struct sk_buff *sis190_alloc_rx_skb(struct sis190_private *tp, + struct RxDesc *desc) { + u32 rx_buf_sz = tp->rx_buf_sz; struct sk_buff *skb; - skb = dev_alloc_skb(rx_buf_sz); + skb = netdev_alloc_skb(tp->dev, rx_buf_sz); if (likely(skb)) { dma_addr_t mapping; - mapping = pci_map_single(pdev, skb->data, rx_buf_sz, + mapping = pci_map_single(tp->pci_dev, skb->data, tp->rx_buf_sz, PCI_DMA_FROMDEVICE); sis190_map_to_asic(desc, mapping, rx_buf_sz); } else @@ -509,29 +510,29 @@ static u32 sis190_rx_fill(struct sis190_private *tp, struct net_device *dev, if (tp->Rx_skbuff[i]) continue; - tp->Rx_skbuff[i] = sis190_alloc_rx_skb(tp->pci_dev, - tp->RxDescRing + i, - tp->rx_buf_sz); + tp->Rx_skbuff[i] = sis190_alloc_rx_skb(tp, tp->RxDescRing + i); + if (!tp->Rx_skbuff[i]) break; } return cur - start; } -static inline int sis190_try_rx_copy(struct sk_buff **sk_buff, int pkt_size, - struct RxDesc *desc, int rx_buf_sz) +static int sis190_try_rx_copy(struct sis190_private *tp, + struct sk_buff **sk_buff, int pkt_size, + struct RxDesc *desc) { int ret = -1; if (pkt_size < rx_copybreak) { struct sk_buff *skb; - skb = dev_alloc_skb(pkt_size + 2); + skb = netdev_alloc_skb(tp->dev, pkt_size + 2); if (skb) { skb_reserve(skb, 2); skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size); *sk_buff = skb; - sis190_give_to_asic(desc, rx_buf_sz); + sis190_give_to_asic(desc, tp->rx_buf_sz); ret = 0; } } @@ -603,8 +604,7 @@ static int sis190_rx_interrupt(struct net_device *dev, le32_to_cpu(desc->addr), tp->rx_buf_sz, PCI_DMA_FROMDEVICE); - if (sis190_try_rx_copy(&skb, pkt_size, desc, - tp->rx_buf_sz)) { + if (sis190_try_rx_copy(tp, &skb, pkt_size, desc)) { pci_action = pci_unmap_single; tp->Rx_skbuff[entry] = NULL; sis190_make_unusable_by_asic(desc); -- cgit v1.2.3-70-g09d2 From 47e4781544aaf2916170ef5516786fbb19447600 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sun, 27 Apr 2008 17:59:52 +0200 Subject: sis190: Rx path update - remove the function pointer to help gcc optimizing the inline pci_dma functions - pci_dma_sync_single_for_cpu is not needed for a single large packet - convert rtl8169_try_rx_copy to bool b449655ff52ff8a29c66c5fc3fc03617e61182ee did the same for the r8169 driver. Signed-off-by: Francois Romieu --- drivers/net/sis190.c | 54 +++++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 97aa18dd9a4..0b22e75633a 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -518,25 +518,28 @@ static u32 sis190_rx_fill(struct sis190_private *tp, struct net_device *dev, return cur - start; } -static int sis190_try_rx_copy(struct sis190_private *tp, - struct sk_buff **sk_buff, int pkt_size, - struct RxDesc *desc) +static bool sis190_try_rx_copy(struct sis190_private *tp, + struct sk_buff **sk_buff, int pkt_size, + dma_addr_t addr) { - int ret = -1; + struct sk_buff *skb; + bool done = false; - if (pkt_size < rx_copybreak) { - struct sk_buff *skb; + if (pkt_size >= rx_copybreak) + goto out; - skb = netdev_alloc_skb(tp->dev, pkt_size + 2); - if (skb) { - skb_reserve(skb, 2); - skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size); - *sk_buff = skb; - sis190_give_to_asic(desc, tp->rx_buf_sz); - ret = 0; - } - } - return ret; + skb = netdev_alloc_skb(tp->dev, pkt_size + 2); + if (!skb) + goto out; + + pci_dma_sync_single_for_device(tp->pci_dev, addr, pkt_size, + PCI_DMA_FROMDEVICE); + skb_reserve(skb, 2); + skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size); + *sk_buff = skb; + done = true; +out: + return done; } static inline int sis190_rx_pkt_err(u32 status, struct net_device_stats *stats) @@ -586,9 +589,9 @@ static int sis190_rx_interrupt(struct net_device *dev, sis190_give_to_asic(desc, tp->rx_buf_sz); else { struct sk_buff *skb = tp->Rx_skbuff[entry]; + dma_addr_t addr = le32_to_cpu(desc->addr); int pkt_size = (status & RxSizeMask) - 4; - void (*pci_action)(struct pci_dev *, dma_addr_t, - size_t, int) = pci_dma_sync_single_for_device; + struct pci_dev *pdev = tp->pci_dev; if (unlikely(pkt_size > tp->rx_buf_sz)) { net_intr(tp, KERN_INFO @@ -600,19 +603,18 @@ static int sis190_rx_interrupt(struct net_device *dev, continue; } - pci_dma_sync_single_for_cpu(tp->pci_dev, - le32_to_cpu(desc->addr), tp->rx_buf_sz, - PCI_DMA_FROMDEVICE); - if (sis190_try_rx_copy(tp, &skb, pkt_size, desc)) { - pci_action = pci_unmap_single; + if (sis190_try_rx_copy(tp, &skb, pkt_size, addr)) { + pci_dma_sync_single_for_device(pdev, addr, + tp->rx_buf_sz, PCI_DMA_FROMDEVICE); + sis190_give_to_asic(desc, tp->rx_buf_sz); + } else { + pci_unmap_single(pdev, addr, tp->rx_buf_sz, + PCI_DMA_FROMDEVICE); tp->Rx_skbuff[entry] = NULL; sis190_make_unusable_by_asic(desc); } - pci_action(tp->pci_dev, le32_to_cpu(desc->addr), - tp->rx_buf_sz, PCI_DMA_FROMDEVICE); - skb_put(skb, pkt_size); skb->protocol = eth_type_trans(skb, dev); -- cgit v1.2.3-70-g09d2 From c34ebbae01e3d1f6a5cced6a40dc0ed792590d22 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sun, 18 Nov 2007 22:04:05 +0100 Subject: sis190: remove needless MII reset It does not help the auto-negotiation process to settle. Added a debug message to give some hindsight when things do not work as expected. Signed-off-by: Francois Romieu --- drivers/net/sis190.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 0b22e75633a..20f48296efc 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -899,10 +899,9 @@ static void sis190_phy_task(struct work_struct *work) mod_timer(&tp->timer, jiffies + HZ/10); } else if (!(mdio_read_latched(ioaddr, phy_id, MII_BMSR) & BMSR_ANEGCOMPLETE)) { - net_link(tp, KERN_WARNING "%s: PHY reset until link up.\n", - dev->name); netif_carrier_off(dev); - mdio_write(ioaddr, phy_id, MII_BMCR, val | BMCR_RESET); + net_link(tp, KERN_WARNING "%s: auto-negotiating...\n", + dev->name); mod_timer(&tp->timer, jiffies + SIS190_PHY_TIMEOUT); } else { /* Rejoice ! */ -- cgit v1.2.3-70-g09d2 From 697c269610179051cf19e45566fee3dcebbb1e93 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Wed, 21 Nov 2007 22:30:37 +0100 Subject: sis190: account for Tx errors Update the collision counter as well. Signed-off-by: Francois Romieu --- drivers/net/sis190.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 20f48296efc..abc63b0663b 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -212,6 +212,12 @@ enum _DescStatusBit { THOL2 = 0x20000000, THOL1 = 0x10000000, THOL0 = 0x00000000, + + WND = 0x00080000, + TABRT = 0x00040000, + FIFO = 0x00020000, + LINK = 0x00010000, + ColCountMask = 0x0000ffff, /* RxDesc.status */ IPON = 0x20000000, TCPON = 0x10000000, @@ -653,9 +659,31 @@ static void sis190_unmap_tx_skb(struct pci_dev *pdev, struct sk_buff *skb, memset(desc, 0x00, sizeof(*desc)); } +static inline int sis190_tx_pkt_err(u32 status, struct net_device_stats *stats) +{ +#define TxErrMask (WND | TABRT | FIFO | LINK) + + if (!unlikely(status & TxErrMask)) + return 0; + + if (status & WND) + stats->tx_window_errors++; + if (status & TABRT) + stats->tx_aborted_errors++; + if (status & FIFO) + stats->tx_fifo_errors++; + if (status & LINK) + stats->tx_carrier_errors++; + + stats->tx_errors++; + + return -1; +} + static void sis190_tx_interrupt(struct net_device *dev, struct sis190_private *tp, void __iomem *ioaddr) { + struct net_device_stats *stats = &dev->stats; u32 pending, dirty_tx = tp->dirty_tx; /* * It would not be needed if queueing was allowed to be enabled @@ -670,15 +698,19 @@ static void sis190_tx_interrupt(struct net_device *dev, for (; pending; pending--, dirty_tx++) { unsigned int entry = dirty_tx % NUM_TX_DESC; struct TxDesc *txd = tp->TxDescRing + entry; + u32 status = le32_to_cpu(txd->status); struct sk_buff *skb; - if (le32_to_cpu(txd->status) & OWNbit) + if (status & OWNbit) break; skb = tp->Tx_skbuff[entry]; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + if (likely(sis190_tx_pkt_err(status, stats) == 0)) { + stats->tx_packets++; + stats->tx_bytes += skb->len; + stats->collisions += ((status & ColCountMask) - 1); + } sis190_unmap_tx_skb(tp->pci_dev, skb, txd); tp->Tx_skbuff[entry] = NULL; -- cgit v1.2.3-70-g09d2 From 8ceee660aacb29721e26f08e336c58dc4847d1bd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 27 Apr 2008 12:55:59 +0100 Subject: New driver "sfc" for Solarstorm SFC4000 controller. The driver supports the 10Xpress PHY and XFP modules on our reference designs SFE4001 and SFE4002 and the SMC models SMC10GPCIe-XFP and SMC10GPCIe-10BT. Signed-off-by: Ben Hutchings Signed-off-by: Jeff Garzik --- MAINTAINERS | 7 + drivers/net/Kconfig | 1 + drivers/net/Makefile | 2 + drivers/net/sfc/Kconfig | 12 + drivers/net/sfc/Makefile | 5 + drivers/net/sfc/bitfield.h | 508 ++++++++ drivers/net/sfc/boards.c | 167 +++ drivers/net/sfc/boards.h | 26 + drivers/net/sfc/efx.c | 2208 +++++++++++++++++++++++++++++++ drivers/net/sfc/efx.h | 67 + drivers/net/sfc/enum.h | 50 + drivers/net/sfc/ethtool.c | 460 +++++++ drivers/net/sfc/ethtool.h | 27 + drivers/net/sfc/falcon.c | 2722 +++++++++++++++++++++++++++++++++++++++ drivers/net/sfc/falcon.h | 130 ++ drivers/net/sfc/falcon_hwdefs.h | 1135 ++++++++++++++++ drivers/net/sfc/falcon_io.h | 243 ++++ drivers/net/sfc/falcon_xmac.c | 585 +++++++++ drivers/net/sfc/gmii.h | 195 +++ drivers/net/sfc/i2c-direct.c | 381 ++++++ drivers/net/sfc/i2c-direct.h | 91 ++ drivers/net/sfc/mac.h | 33 + drivers/net/sfc/mdio_10g.c | 282 ++++ drivers/net/sfc/mdio_10g.h | 232 ++++ drivers/net/sfc/net_driver.h | 883 +++++++++++++ drivers/net/sfc/phy.h | 48 + drivers/net/sfc/rx.c | 875 +++++++++++++ drivers/net/sfc/rx.h | 29 + drivers/net/sfc/sfe4001.c | 252 ++++ drivers/net/sfc/spi.h | 71 + drivers/net/sfc/tenxpress.c | 434 +++++++ drivers/net/sfc/tx.c | 452 +++++++ drivers/net/sfc/tx.h | 24 + drivers/net/sfc/workarounds.h | 56 + drivers/net/sfc/xenpack.h | 62 + drivers/net/sfc/xfp_phy.c | 132 ++ 36 files changed, 12887 insertions(+) create mode 100644 drivers/net/sfc/Kconfig create mode 100644 drivers/net/sfc/Makefile create mode 100644 drivers/net/sfc/bitfield.h create mode 100644 drivers/net/sfc/boards.c create mode 100644 drivers/net/sfc/boards.h create mode 100644 drivers/net/sfc/efx.c create mode 100644 drivers/net/sfc/efx.h create mode 100644 drivers/net/sfc/enum.h create mode 100644 drivers/net/sfc/ethtool.c create mode 100644 drivers/net/sfc/ethtool.h create mode 100644 drivers/net/sfc/falcon.c create mode 100644 drivers/net/sfc/falcon.h create mode 100644 drivers/net/sfc/falcon_hwdefs.h create mode 100644 drivers/net/sfc/falcon_io.h create mode 100644 drivers/net/sfc/falcon_xmac.c create mode 100644 drivers/net/sfc/gmii.h create mode 100644 drivers/net/sfc/i2c-direct.c create mode 100644 drivers/net/sfc/i2c-direct.h create mode 100644 drivers/net/sfc/mac.h create mode 100644 drivers/net/sfc/mdio_10g.c create mode 100644 drivers/net/sfc/mdio_10g.h create mode 100644 drivers/net/sfc/net_driver.h create mode 100644 drivers/net/sfc/phy.h create mode 100644 drivers/net/sfc/rx.c create mode 100644 drivers/net/sfc/rx.h create mode 100644 drivers/net/sfc/sfe4001.c create mode 100644 drivers/net/sfc/spi.h create mode 100644 drivers/net/sfc/tenxpress.c create mode 100644 drivers/net/sfc/tx.c create mode 100644 drivers/net/sfc/tx.h create mode 100644 drivers/net/sfc/workarounds.h create mode 100644 drivers/net/sfc/xenpack.h create mode 100644 drivers/net/sfc/xfp_phy.c diff --git a/MAINTAINERS b/MAINTAINERS index 36aadf6003b..2112034e164 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3522,6 +3522,13 @@ M: pfg@sgi.com L: linux-ia64@vger.kernel.org S: Supported +SFC NETWORK DRIVER +P: Steve Hodgson +P: Ben Hutchings +P: Robert Stonehouse +M: linux-net-drivers@solarflare.com +S: Supported + SGI VISUAL WORKSTATION 320 AND 540 P: Andrey Panin M: pazke@donpac.ru diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 45c3a208d93..50b36b408ca 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2592,6 +2592,7 @@ config BNX2X To compile this driver as a module, choose M here: the module will be called bnx2x. This is recommended. +source "drivers/net/sfc/Kconfig" endif # NETDEV_10000 diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 4d71729e85e..371cb0785b2 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -252,3 +252,5 @@ obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_NETXEN_NIC) += netxen/ obj-$(CONFIG_NIU) += niu.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o +obj-$(CONFIG_SFC) += sfc/ + diff --git a/drivers/net/sfc/Kconfig b/drivers/net/sfc/Kconfig new file mode 100644 index 00000000000..dbad95c295b --- /dev/null +++ b/drivers/net/sfc/Kconfig @@ -0,0 +1,12 @@ +config SFC + tristate "Solarflare Solarstorm SFC4000 support" + depends on PCI && INET + select MII + select INET_LRO + select CRC32 + help + This driver supports 10-gigabit Ethernet cards based on + the Solarflare Communications Solarstorm SFC4000 controller. + + To compile this driver as a module, choose M here. The module + will be called sfc. diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile new file mode 100644 index 00000000000..0f023447eaf --- /dev/null +++ b/drivers/net/sfc/Makefile @@ -0,0 +1,5 @@ +sfc-y += efx.o falcon.o tx.o rx.o falcon_xmac.o \ + i2c-direct.o ethtool.o xfp_phy.o mdio_10g.o \ + tenxpress.o boards.o sfe4001.o + +obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/sfc/bitfield.h b/drivers/net/sfc/bitfield.h new file mode 100644 index 00000000000..2806201644c --- /dev/null +++ b/drivers/net/sfc/bitfield.h @@ -0,0 +1,508 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_BITFIELD_H +#define EFX_BITFIELD_H + +/* + * Efx bitfield access + * + * Efx NICs make extensive use of bitfields up to 128 bits + * wide. Since there is no native 128-bit datatype on most systems, + * and since 64-bit datatypes are inefficient on 32-bit systems and + * vice versa, we wrap accesses in a way that uses the most efficient + * datatype. + * + * The NICs are PCI devices and therefore little-endian. Since most + * of the quantities that we deal with are DMAed to/from host memory, + * we define our datatypes (efx_oword_t, efx_qword_t and + * efx_dword_t) to be little-endian. + */ + +/* Lowest bit numbers and widths */ +#define EFX_DUMMY_FIELD_LBN 0 +#define EFX_DUMMY_FIELD_WIDTH 0 +#define EFX_DWORD_0_LBN 0 +#define EFX_DWORD_0_WIDTH 32 +#define EFX_DWORD_1_LBN 32 +#define EFX_DWORD_1_WIDTH 32 +#define EFX_DWORD_2_LBN 64 +#define EFX_DWORD_2_WIDTH 32 +#define EFX_DWORD_3_LBN 96 +#define EFX_DWORD_3_WIDTH 32 + +/* Specified attribute (e.g. LBN) of the specified field */ +#define EFX_VAL(field, attribute) field ## _ ## attribute +/* Low bit number of the specified field */ +#define EFX_LOW_BIT(field) EFX_VAL(field, LBN) +/* Bit width of the specified field */ +#define EFX_WIDTH(field) EFX_VAL(field, WIDTH) +/* High bit number of the specified field */ +#define EFX_HIGH_BIT(field) (EFX_LOW_BIT(field) + EFX_WIDTH(field) - 1) +/* Mask equal in width to the specified field. + * + * For example, a field with width 5 would have a mask of 0x1f. + * + * The maximum width mask that can be generated is 64 bits. + */ +#define EFX_MASK64(field) \ + (EFX_WIDTH(field) == 64 ? ~((u64) 0) : \ + (((((u64) 1) << EFX_WIDTH(field))) - 1)) + +/* Mask equal in width to the specified field. + * + * For example, a field with width 5 would have a mask of 0x1f. + * + * The maximum width mask that can be generated is 32 bits. Use + * EFX_MASK64 for higher width fields. + */ +#define EFX_MASK32(field) \ + (EFX_WIDTH(field) == 32 ? ~((u32) 0) : \ + (((((u32) 1) << EFX_WIDTH(field))) - 1)) + +/* A doubleword (i.e. 4 byte) datatype - little-endian in HW */ +typedef union efx_dword { + __le32 u32[1]; +} efx_dword_t; + +/* A quadword (i.e. 8 byte) datatype - little-endian in HW */ +typedef union efx_qword { + __le64 u64[1]; + __le32 u32[2]; + efx_dword_t dword[2]; +} efx_qword_t; + +/* An octword (eight-word, i.e. 16 byte) datatype - little-endian in HW */ +typedef union efx_oword { + __le64 u64[2]; + efx_qword_t qword[2]; + __le32 u32[4]; + efx_dword_t dword[4]; +} efx_oword_t; + +/* Format string and value expanders for printk */ +#define EFX_DWORD_FMT "%08x" +#define EFX_QWORD_FMT "%08x:%08x" +#define EFX_OWORD_FMT "%08x:%08x:%08x:%08x" +#define EFX_DWORD_VAL(dword) \ + ((unsigned int) le32_to_cpu((dword).u32[0])) +#define EFX_QWORD_VAL(qword) \ + ((unsigned int) le32_to_cpu((qword).u32[1])), \ + ((unsigned int) le32_to_cpu((qword).u32[0])) +#define EFX_OWORD_VAL(oword) \ + ((unsigned int) le32_to_cpu((oword).u32[3])), \ + ((unsigned int) le32_to_cpu((oword).u32[2])), \ + ((unsigned int) le32_to_cpu((oword).u32[1])), \ + ((unsigned int) le32_to_cpu((oword).u32[0])) + +/* + * Extract bit field portion [low,high) from the native-endian element + * which contains bits [min,max). + * + * For example, suppose "element" represents the high 32 bits of a + * 64-bit value, and we wish to extract the bits belonging to the bit + * field occupying bits 28-45 of this 64-bit value. + * + * Then EFX_EXTRACT ( element, 32, 63, 28, 45 ) would give + * + * ( element ) << 4 + * + * The result will contain the relevant bits filled in in the range + * [0,high-low), with garbage in bits [high-low+1,...). + */ +#define EFX_EXTRACT_NATIVE(native_element, min, max, low, high) \ + (((low > max) || (high < min)) ? 0 : \ + ((low > min) ? \ + ((native_element) >> (low - min)) : \ + ((native_element) << (min - low)))) + +/* + * Extract bit field portion [low,high) from the 64-bit little-endian + * element which contains bits [min,max) + */ +#define EFX_EXTRACT64(element, min, max, low, high) \ + EFX_EXTRACT_NATIVE(le64_to_cpu(element), min, max, low, high) + +/* + * Extract bit field portion [low,high) from the 32-bit little-endian + * element which contains bits [min,max) + */ +#define EFX_EXTRACT32(element, min, max, low, high) \ + EFX_EXTRACT_NATIVE(le32_to_cpu(element), min, max, low, high) + +#define EFX_EXTRACT_OWORD64(oword, low, high) \ + (EFX_EXTRACT64((oword).u64[0], 0, 63, low, high) | \ + EFX_EXTRACT64((oword).u64[1], 64, 127, low, high)) + +#define EFX_EXTRACT_QWORD64(qword, low, high) \ + EFX_EXTRACT64((qword).u64[0], 0, 63, low, high) + +#define EFX_EXTRACT_OWORD32(oword, low, high) \ + (EFX_EXTRACT32((oword).u32[0], 0, 31, low, high) | \ + EFX_EXTRACT32((oword).u32[1], 32, 63, low, high) | \ + EFX_EXTRACT32((oword).u32[2], 64, 95, low, high) | \ + EFX_EXTRACT32((oword).u32[3], 96, 127, low, high)) + +#define EFX_EXTRACT_QWORD32(qword, low, high) \ + (EFX_EXTRACT32((qword).u32[0], 0, 31, low, high) | \ + EFX_EXTRACT32((qword).u32[1], 32, 63, low, high)) + +#define EFX_EXTRACT_DWORD(dword, low, high) \ + EFX_EXTRACT32((dword).u32[0], 0, 31, low, high) + +#define EFX_OWORD_FIELD64(oword, field) \ + (EFX_EXTRACT_OWORD64(oword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \ + & EFX_MASK64(field)) + +#define EFX_QWORD_FIELD64(qword, field) \ + (EFX_EXTRACT_QWORD64(qword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \ + & EFX_MASK64(field)) + +#define EFX_OWORD_FIELD32(oword, field) \ + (EFX_EXTRACT_OWORD32(oword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \ + & EFX_MASK32(field)) + +#define EFX_QWORD_FIELD32(qword, field) \ + (EFX_EXTRACT_QWORD32(qword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \ + & EFX_MASK32(field)) + +#define EFX_DWORD_FIELD(dword, field) \ + (EFX_EXTRACT_DWORD(dword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \ + & EFX_MASK32(field)) + +#define EFX_OWORD_IS_ZERO64(oword) \ + (((oword).u64[0] | (oword).u64[1]) == (__force __le64) 0) + +#define EFX_QWORD_IS_ZERO64(qword) \ + (((qword).u64[0]) == (__force __le64) 0) + +#define EFX_OWORD_IS_ZERO32(oword) \ + (((oword).u32[0] | (oword).u32[1] | (oword).u32[2] | (oword).u32[3]) \ + == (__force __le32) 0) + +#define EFX_QWORD_IS_ZERO32(qword) \ + (((qword).u32[0] | (qword).u32[1]) == (__force __le32) 0) + +#define EFX_DWORD_IS_ZERO(dword) \ + (((dword).u32[0]) == (__force __le32) 0) + +#define EFX_OWORD_IS_ALL_ONES64(oword) \ + (((oword).u64[0] & (oword).u64[1]) == ~((__force __le64) 0)) + +#define EFX_QWORD_IS_ALL_ONES64(qword) \ + ((qword).u64[0] == ~((__force __le64) 0)) + +#define EFX_OWORD_IS_ALL_ONES32(oword) \ + (((oword).u32[0] & (oword).u32[1] & (oword).u32[2] & (oword).u32[3]) \ + == ~((__force __le32) 0)) + +#define EFX_QWORD_IS_ALL_ONES32(qword) \ + (((qword).u32[0] & (qword).u32[1]) == ~((__force __le32) 0)) + +#define EFX_DWORD_IS_ALL_ONES(dword) \ + ((dword).u32[0] == ~((__force __le32) 0)) + +#if BITS_PER_LONG == 64 +#define EFX_OWORD_FIELD EFX_OWORD_FIELD64 +#define EFX_QWORD_FIELD EFX_QWORD_FIELD64 +#define EFX_OWORD_IS_ZERO EFX_OWORD_IS_ZERO64 +#define EFX_QWORD_IS_ZERO EFX_QWORD_IS_ZERO64 +#define EFX_OWORD_IS_ALL_ONES EFX_OWORD_IS_ALL_ONES64 +#define EFX_QWORD_IS_ALL_ONES EFX_QWORD_IS_ALL_ONES64 +#else +#define EFX_OWORD_FIELD EFX_OWORD_FIELD32 +#define EFX_QWORD_FIELD EFX_QWORD_FIELD32 +#define EFX_OWORD_IS_ZERO EFX_OWORD_IS_ZERO32 +#define EFX_QWORD_IS_ZERO EFX_QWORD_IS_ZERO32 +#define EFX_OWORD_IS_ALL_ONES EFX_OWORD_IS_ALL_ONES32 +#define EFX_QWORD_IS_ALL_ONES EFX_QWORD_IS_ALL_ONES32 +#endif + +/* + * Construct bit field portion + * + * Creates the portion of the bit field [low,high) that lies within + * the range [min,max). + */ +#define EFX_INSERT_NATIVE64(min, max, low, high, value) \ + (((low > max) || (high < min)) ? 0 : \ + ((low > min) ? \ + (((u64) (value)) << (low - min)) : \ + (((u64) (value)) >> (min - low)))) + +#define EFX_INSERT_NATIVE32(min, max, low, high, value) \ + (((low > max) || (high < min)) ? 0 : \ + ((low > min) ? \ + (((u32) (value)) << (low - min)) : \ + (((u32) (value)) >> (min - low)))) + +#define EFX_INSERT_NATIVE(min, max, low, high, value) \ + ((((max - min) >= 32) || ((high - low) >= 32)) ? \ + EFX_INSERT_NATIVE64(min, max, low, high, value) : \ + EFX_INSERT_NATIVE32(min, max, low, high, value)) + +/* + * Construct bit field portion + * + * Creates the portion of the named bit field that lies within the + * range [min,max). + */ +#define EFX_INSERT_FIELD_NATIVE(min, max, field, value) \ + EFX_INSERT_NATIVE(min, max, EFX_LOW_BIT(field), \ + EFX_HIGH_BIT(field), value) + +/* + * Construct bit field + * + * Creates the portion of the named bit fields that lie within the + * range [min,max). + */ +#define EFX_INSERT_FIELDS_NATIVE(min, max, \ + field1, value1, \ + field2, value2, \ + field3, value3, \ + field4, value4, \ + field5, value5, \ + field6, value6, \ + field7, value7, \ + field8, value8, \ + field9, value9, \ + field10, value10) \ + (EFX_INSERT_FIELD_NATIVE((min), (max), field1, (value1)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field2, (value2)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field3, (value3)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field4, (value4)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field5, (value5)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field6, (value6)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field7, (value7)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field8, (value8)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field9, (value9)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field10, (value10))) + +#define EFX_INSERT_FIELDS64(...) \ + cpu_to_le64(EFX_INSERT_FIELDS_NATIVE(__VA_ARGS__)) + +#define EFX_INSERT_FIELDS32(...) \ + cpu_to_le32(EFX_INSERT_FIELDS_NATIVE(__VA_ARGS__)) + +#define EFX_POPULATE_OWORD64(oword, ...) do { \ + (oword).u64[0] = EFX_INSERT_FIELDS64(0, 63, __VA_ARGS__); \ + (oword).u64[1] = EFX_INSERT_FIELDS64(64, 127, __VA_ARGS__); \ + } while (0) + +#define EFX_POPULATE_QWORD64(qword, ...) do { \ + (qword).u64[0] = EFX_INSERT_FIELDS64(0, 63, __VA_ARGS__); \ + } while (0) + +#define EFX_POPULATE_OWORD32(oword, ...) do { \ + (oword).u32[0] = EFX_INSERT_FIELDS32(0, 31, __VA_ARGS__); \ + (oword).u32[1] = EFX_INSERT_FIELDS32(32, 63, __VA_ARGS__); \ + (oword).u32[2] = EFX_INSERT_FIELDS32(64, 95, __VA_ARGS__); \ + (oword).u32[3] = EFX_INSERT_FIELDS32(96, 127, __VA_ARGS__); \ + } while (0) + +#define EFX_POPULATE_QWORD32(qword, ...) do { \ + (qword).u32[0] = EFX_INSERT_FIELDS32(0, 31, __VA_ARGS__); \ + (qword).u32[1] = EFX_INSERT_FIELDS32(32, 63, __VA_ARGS__); \ + } while (0) + +#define EFX_POPULATE_DWORD(dword, ...) do { \ + (dword).u32[0] = EFX_INSERT_FIELDS32(0, 31, __VA_ARGS__); \ + } while (0) + +#if BITS_PER_LONG == 64 +#define EFX_POPULATE_OWORD EFX_POPULATE_OWORD64 +#define EFX_POPULATE_QWORD EFX_POPULATE_QWORD64 +#else +#define EFX_POPULATE_OWORD EFX_POPULATE_OWORD32 +#define EFX_POPULATE_QWORD EFX_POPULATE_QWORD32 +#endif + +/* Populate an octword field with various numbers of arguments */ +#define EFX_POPULATE_OWORD_10 EFX_POPULATE_OWORD +#define EFX_POPULATE_OWORD_9(oword, ...) \ + EFX_POPULATE_OWORD_10(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_8(oword, ...) \ + EFX_POPULATE_OWORD_9(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_7(oword, ...) \ + EFX_POPULATE_OWORD_8(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_6(oword, ...) \ + EFX_POPULATE_OWORD_7(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_5(oword, ...) \ + EFX_POPULATE_OWORD_6(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_4(oword, ...) \ + EFX_POPULATE_OWORD_5(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_3(oword, ...) \ + EFX_POPULATE_OWORD_4(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_2(oword, ...) \ + EFX_POPULATE_OWORD_3(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_1(oword, ...) \ + EFX_POPULATE_OWORD_2(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_ZERO_OWORD(oword) \ + EFX_POPULATE_OWORD_1(oword, EFX_DUMMY_FIELD, 0) +#define EFX_SET_OWORD(oword) \ + EFX_POPULATE_OWORD_4(oword, \ + EFX_DWORD_0, 0xffffffff, \ + EFX_DWORD_1, 0xffffffff, \ + EFX_DWORD_2, 0xffffffff, \ + EFX_DWORD_3, 0xffffffff) + +/* Populate a quadword field with various numbers of arguments */ +#define EFX_POPULATE_QWORD_10 EFX_POPULATE_QWORD +#define EFX_POPULATE_QWORD_9(qword, ...) \ + EFX_POPULATE_QWORD_10(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_8(qword, ...) \ + EFX_POPULATE_QWORD_9(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_7(qword, ...) \ + EFX_POPULATE_QWORD_8(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_6(qword, ...) \ + EFX_POPULATE_QWORD_7(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_5(qword, ...) \ + EFX_POPULATE_QWORD_6(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_4(qword, ...) \ + EFX_POPULATE_QWORD_5(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_3(qword, ...) \ + EFX_POPULATE_QWORD_4(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_2(qword, ...) \ + EFX_POPULATE_QWORD_3(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_1(qword, ...) \ + EFX_POPULATE_QWORD_2(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_ZERO_QWORD(qword) \ + EFX_POPULATE_QWORD_1(qword, EFX_DUMMY_FIELD, 0) +#define EFX_SET_QWORD(qword) \ + EFX_POPULATE_QWORD_2(qword, \ + EFX_DWORD_0, 0xffffffff, \ + EFX_DWORD_1, 0xffffffff) + +/* Populate a dword field with various numbers of arguments */ +#define EFX_POPULATE_DWORD_10 EFX_POPULATE_DWORD +#define EFX_POPULATE_DWORD_9(dword, ...) \ + EFX_POPULATE_DWORD_10(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_8(dword, ...) \ + EFX_POPULATE_DWORD_9(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_7(dword, ...) \ + EFX_POPULATE_DWORD_8(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_6(dword, ...) \ + EFX_POPULATE_DWORD_7(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_5(dword, ...) \ + EFX_POPULATE_DWORD_6(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_4(dword, ...) \ + EFX_POPULATE_DWORD_5(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_3(dword, ...) \ + EFX_POPULATE_DWORD_4(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_2(dword, ...) \ + EFX_POPULATE_DWORD_3(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_1(dword, ...) \ + EFX_POPULATE_DWORD_2(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_ZERO_DWORD(dword) \ + EFX_POPULATE_DWORD_1(dword, EFX_DUMMY_FIELD, 0) +#define EFX_SET_DWORD(dword) \ + EFX_POPULATE_DWORD_1(dword, EFX_DWORD_0, 0xffffffff) + +/* + * Modify a named field within an already-populated structure. Used + * for read-modify-write operations. + * + */ + +#define EFX_INVERT_OWORD(oword) do { \ + (oword).u64[0] = ~((oword).u64[0]); \ + (oword).u64[1] = ~((oword).u64[1]); \ + } while (0) + +#define EFX_INSERT_FIELD64(...) \ + cpu_to_le64(EFX_INSERT_FIELD_NATIVE(__VA_ARGS__)) + +#define EFX_INSERT_FIELD32(...) \ + cpu_to_le32(EFX_INSERT_FIELD_NATIVE(__VA_ARGS__)) + +#define EFX_INPLACE_MASK64(min, max, field) \ + EFX_INSERT_FIELD64(min, max, field, EFX_MASK64(field)) + +#define EFX_INPLACE_MASK32(min, max, field) \ + EFX_INSERT_FIELD32(min, max, field, EFX_MASK32(field)) + +#define EFX_SET_OWORD_FIELD64(oword, field, value) do { \ + (oword).u64[0] = (((oword).u64[0] \ + & ~EFX_INPLACE_MASK64(0, 63, field)) \ + | EFX_INSERT_FIELD64(0, 63, field, value)); \ + (oword).u64[1] = (((oword).u64[1] \ + & ~EFX_INPLACE_MASK64(64, 127, field)) \ + | EFX_INSERT_FIELD64(64, 127, field, value)); \ + } while (0) + +#define EFX_SET_QWORD_FIELD64(qword, field, value) do { \ + (qword).u64[0] = (((qword).u64[0] \ + & ~EFX_INPLACE_MASK64(0, 63, field)) \ + | EFX_INSERT_FIELD64(0, 63, field, value)); \ + } while (0) + +#define EFX_SET_OWORD_FIELD32(oword, field, value) do { \ + (oword).u32[0] = (((oword).u32[0] \ + & ~EFX_INPLACE_MASK32(0, 31, field)) \ + | EFX_INSERT_FIELD32(0, 31, field, value)); \ + (oword).u32[1] = (((oword).u32[1] \ + & ~EFX_INPLACE_MASK32(32, 63, field)) \ + | EFX_INSERT_FIELD32(32, 63, field, value)); \ + (oword).u32[2] = (((oword).u32[2] \ + & ~EFX_INPLACE_MASK32(64, 95, field)) \ + | EFX_INSERT_FIELD32(64, 95, field, value)); \ + (oword).u32[3] = (((oword).u32[3] \ + & ~EFX_INPLACE_MASK32(96, 127, field)) \ + | EFX_INSERT_FIELD32(96, 127, field, value)); \ + } while (0) + +#define EFX_SET_QWORD_FIELD32(qword, field, value) do { \ + (qword).u32[0] = (((qword).u32[0] \ + & ~EFX_INPLACE_MASK32(0, 31, field)) \ + | EFX_INSERT_FIELD32(0, 31, field, value)); \ + (qword).u32[1] = (((qword).u32[1] \ + & ~EFX_INPLACE_MASK32(32, 63, field)) \ + | EFX_INSERT_FIELD32(32, 63, field, value)); \ + } while (0) + +#define EFX_SET_DWORD_FIELD(dword, field, value) do { \ + (dword).u32[0] = (((dword).u32[0] \ + & ~EFX_INPLACE_MASK32(0, 31, field)) \ + | EFX_INSERT_FIELD32(0, 31, field, value)); \ + } while (0) + +#if BITS_PER_LONG == 64 +#define EFX_SET_OWORD_FIELD EFX_SET_OWORD_FIELD64 +#define EFX_SET_QWORD_FIELD EFX_SET_QWORD_FIELD64 +#else +#define EFX_SET_OWORD_FIELD EFX_SET_OWORD_FIELD32 +#define EFX_SET_QWORD_FIELD EFX_SET_QWORD_FIELD32 +#endif + +#define EFX_SET_OWORD_FIELD_VER(efx, oword, field, value) do { \ + if (FALCON_REV(efx) >= FALCON_REV_B0) { \ + EFX_SET_OWORD_FIELD((oword), field##_B0, (value)); \ + } else { \ + EFX_SET_OWORD_FIELD((oword), field##_A1, (value)); \ + } \ +} while (0) + +#define EFX_QWORD_FIELD_VER(efx, qword, field) \ + (FALCON_REV(efx) >= FALCON_REV_B0 ? \ + EFX_QWORD_FIELD((qword), field##_B0) : \ + EFX_QWORD_FIELD((qword), field##_A1)) + +/* Used to avoid compiler warnings about shift range exceeding width + * of the data types when dma_addr_t is only 32 bits wide. + */ +#define DMA_ADDR_T_WIDTH (8 * sizeof(dma_addr_t)) +#define EFX_DMA_TYPE_WIDTH(width) \ + (((width) < DMA_ADDR_T_WIDTH) ? (width) : DMA_ADDR_T_WIDTH) +#define EFX_DMA_MAX_MASK ((DMA_ADDR_T_WIDTH == 64) ? \ + ~((u64) 0) : ~((u32) 0)) +#define EFX_DMA_MASK(mask) ((mask) & EFX_DMA_MAX_MASK) + +#endif /* EFX_BITFIELD_H */ diff --git a/drivers/net/sfc/boards.c b/drivers/net/sfc/boards.c new file mode 100644 index 00000000000..eecaa6d5858 --- /dev/null +++ b/drivers/net/sfc/boards.c @@ -0,0 +1,167 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2007 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include "phy.h" +#include "boards.h" +#include "efx.h" + +/* Macros for unpacking the board revision */ +/* The revision info is in host byte order. */ +#define BOARD_TYPE(_rev) (_rev >> 8) +#define BOARD_MAJOR(_rev) ((_rev >> 4) & 0xf) +#define BOARD_MINOR(_rev) (_rev & 0xf) + +/* Blink support. If the PHY has no auto-blink mode so we hang it off a timer */ +#define BLINK_INTERVAL (HZ/2) + +static void blink_led_timer(unsigned long context) +{ + struct efx_nic *efx = (struct efx_nic *)context; + struct efx_blinker *bl = &efx->board_info.blinker; + efx->board_info.set_fault_led(efx, bl->state); + bl->state = !bl->state; + if (bl->resubmit) { + bl->timer.expires = jiffies + BLINK_INTERVAL; + add_timer(&bl->timer); + } +} + +static void board_blink(struct efx_nic *efx, int blink) +{ + struct efx_blinker *blinker = &efx->board_info.blinker; + + /* The rtnl mutex serialises all ethtool ioctls, so + * nothing special needs doing here. */ + if (blink) { + blinker->resubmit = 1; + blinker->state = 0; + setup_timer(&blinker->timer, blink_led_timer, + (unsigned long)efx); + blinker->timer.expires = jiffies + BLINK_INTERVAL; + add_timer(&blinker->timer); + } else { + blinker->resubmit = 0; + if (blinker->timer.function) + del_timer_sync(&blinker->timer); + efx->board_info.set_fault_led(efx, 0); + } +} + +/***************************************************************************** + * Support for the SFE4002 + * + */ +/****************************************************************************/ +/* LED allocations. Note that on rev A0 boards the schematic and the reality + * differ: red and green are swapped. Below is the fixed (A1) layout (there + * are only 3 A0 boards in existence, so no real reason to make this + * conditional). + */ +#define SFE4002_FAULT_LED (2) /* Red */ +#define SFE4002_RX_LED (0) /* Green */ +#define SFE4002_TX_LED (1) /* Amber */ + +static int sfe4002_init_leds(struct efx_nic *efx) +{ + /* Set the TX and RX LEDs to reflect status and activity, and the + * fault LED off */ + xfp_set_led(efx, SFE4002_TX_LED, + QUAKE_LED_TXLINK | QUAKE_LED_LINK_ACTSTAT); + xfp_set_led(efx, SFE4002_RX_LED, + QUAKE_LED_RXLINK | QUAKE_LED_LINK_ACTSTAT); + xfp_set_led(efx, SFE4002_FAULT_LED, QUAKE_LED_OFF); + efx->board_info.blinker.led_num = SFE4002_FAULT_LED; + return 0; +} + +static void sfe4002_fault_led(struct efx_nic *efx, int state) +{ + xfp_set_led(efx, SFE4002_FAULT_LED, state ? QUAKE_LED_ON : + QUAKE_LED_OFF); +} + +static int sfe4002_init(struct efx_nic *efx) +{ + efx->board_info.init_leds = sfe4002_init_leds; + efx->board_info.set_fault_led = sfe4002_fault_led; + efx->board_info.blink = board_blink; + return 0; +} + +/* This will get expanded as board-specific details get moved out of the + * PHY drivers. */ +struct efx_board_data { + const char *ref_model; + const char *gen_type; + int (*init) (struct efx_nic *nic); +}; + +static int dummy_init(struct efx_nic *nic) +{ + return 0; +} + +static struct efx_board_data board_data[] = { + [EFX_BOARD_INVALID] = + {NULL, NULL, dummy_init}, + [EFX_BOARD_SFE4001] = + {"SFE4001", "10GBASE-T adapter", sfe4001_poweron}, + [EFX_BOARD_SFE4002] = + {"SFE4002", "XFP adapter", sfe4002_init}, +}; + +int efx_set_board_info(struct efx_nic *efx, u16 revision_info) +{ + int rc = 0; + struct efx_board_data *data; + + if (BOARD_TYPE(revision_info) >= EFX_BOARD_MAX) { + EFX_ERR(efx, "squashing unknown board type %d\n", + BOARD_TYPE(revision_info)); + revision_info = 0; + } + + if (BOARD_TYPE(revision_info) == 0) { + efx->board_info.major = 0; + efx->board_info.minor = 0; + /* For early boards that don't have revision info. there is + * only 1 board for each PHY type, so we can work it out, with + * the exception of the PHY-less boards. */ + switch (efx->phy_type) { + case PHY_TYPE_10XPRESS: + efx->board_info.type = EFX_BOARD_SFE4001; + break; + case PHY_TYPE_XFP: + efx->board_info.type = EFX_BOARD_SFE4002; + break; + default: + efx->board_info.type = 0; + break; + } + } else { + efx->board_info.type = BOARD_TYPE(revision_info); + efx->board_info.major = BOARD_MAJOR(revision_info); + efx->board_info.minor = BOARD_MINOR(revision_info); + } + + data = &board_data[efx->board_info.type]; + + /* Report the board model number or generic type for recognisable + * boards. */ + if (efx->board_info.type != 0) + EFX_INFO(efx, "board is %s rev %c%d\n", + (efx->pci_dev->subsystem_vendor == EFX_VENDID_SFC) + ? data->ref_model : data->gen_type, + 'A' + efx->board_info.major, efx->board_info.minor); + + efx->board_info.init = data->init; + + return rc; +} diff --git a/drivers/net/sfc/boards.h b/drivers/net/sfc/boards.h new file mode 100644 index 00000000000..f56341d428e --- /dev/null +++ b/drivers/net/sfc/boards.h @@ -0,0 +1,26 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2007 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_BOARDS_H +#define EFX_BOARDS_H + +/* Board IDs (must fit in 8 bits) */ +enum efx_board_type { + EFX_BOARD_INVALID = 0, + EFX_BOARD_SFE4001 = 1, /* SFE4001 (10GBASE-T) */ + EFX_BOARD_SFE4002 = 2, + /* Insert new types before here */ + EFX_BOARD_MAX +}; + +extern int efx_set_board_info(struct efx_nic *efx, u16 revision_info); +extern int sfe4001_poweron(struct efx_nic *efx); +extern void sfe4001_poweroff(struct efx_nic *efx); + +#endif diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c new file mode 100644 index 00000000000..59edcf793c1 --- /dev/null +++ b/drivers/net/sfc/efx.c @@ -0,0 +1,2208 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "net_driver.h" +#include "gmii.h" +#include "ethtool.h" +#include "tx.h" +#include "rx.h" +#include "efx.h" +#include "mdio_10g.h" +#include "falcon.h" +#include "workarounds.h" +#include "mac.h" + +#define EFX_MAX_MTU (9 * 1024) + +/* RX slow fill workqueue. If memory allocation fails in the fast path, + * a work item is pushed onto this work queue to retry the allocation later, + * to avoid the NIC being starved of RX buffers. Since this is a per cpu + * workqueue, there is nothing to be gained in making it per NIC + */ +static struct workqueue_struct *refill_workqueue; + +/************************************************************************** + * + * Configurable values + * + *************************************************************************/ + +/* + * Enable large receive offload (LRO) aka soft segment reassembly (SSR) + * + * This sets the default for new devices. It can be controlled later + * using ethtool. + */ +static int lro = 1; +module_param(lro, int, 0644); +MODULE_PARM_DESC(lro, "Large receive offload acceleration"); + +/* + * Use separate channels for TX and RX events + * + * Set this to 1 to use separate channels for TX and RX. It allows us to + * apply a higher level of interrupt moderation to TX events. + * + * This is forced to 0 for MSI interrupt mode as the interrupt vector + * is not written + */ +static unsigned int separate_tx_and_rx_channels = 1; + +/* This is the weight assigned to each of the (per-channel) virtual + * NAPI devices. + */ +static int napi_weight = 64; + +/* This is the time (in jiffies) between invocations of the hardware + * monitor, which checks for known hardware bugs and resets the + * hardware and driver as necessary. + */ +unsigned int efx_monitor_interval = 1 * HZ; + +/* This controls whether or not the hardware monitor will trigger a + * reset when it detects an error condition. + */ +static unsigned int monitor_reset = 1; + +/* This controls whether or not the driver will initialise devices + * with invalid MAC addresses stored in the EEPROM or flash. If true, + * such devices will be initialised with a random locally-generated + * MAC address. This allows for loading the sfc_mtd driver to + * reprogram the flash, even if the flash contents (including the MAC + * address) have previously been erased. + */ +static unsigned int allow_bad_hwaddr; + +/* Initial interrupt moderation settings. They can be modified after + * module load with ethtool. + * + * The default for RX should strike a balance between increasing the + * round-trip latency and reducing overhead. + */ +static unsigned int rx_irq_mod_usec = 60; + +/* Initial interrupt moderation settings. They can be modified after + * module load with ethtool. + * + * This default is chosen to ensure that a 10G link does not go idle + * while a TX queue is stopped after it has become full. A queue is + * restarted when it drops below half full. The time this takes (assuming + * worst case 3 descriptors per packet and 1024 descriptors) is + * 512 / 3 * 1.2 = 205 usec. + */ +static unsigned int tx_irq_mod_usec = 150; + +/* This is the first interrupt mode to try out of: + * 0 => MSI-X + * 1 => MSI + * 2 => legacy + */ +static unsigned int interrupt_mode; + +/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), + * i.e. the number of CPUs among which we may distribute simultaneous + * interrupt handling. + * + * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. + * The default (0) means to assign an interrupt to each package (level II cache) + */ +static unsigned int rss_cpus; +module_param(rss_cpus, uint, 0444); +MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); + +/************************************************************************** + * + * Utility functions and prototypes + * + *************************************************************************/ +static void efx_remove_channel(struct efx_channel *channel); +static void efx_remove_port(struct efx_nic *efx); +static void efx_fini_napi(struct efx_nic *efx); +static void efx_fini_channels(struct efx_nic *efx); + +#define EFX_ASSERT_RESET_SERIALISED(efx) \ + do { \ + if ((efx->state == STATE_RUNNING) || \ + (efx->state == STATE_RESETTING)) \ + ASSERT_RTNL(); \ + } while (0) + +/************************************************************************** + * + * Event queue processing + * + *************************************************************************/ + +/* Process channel's event queue + * + * This function is responsible for processing the event queue of a + * single channel. The caller must guarantee that this function will + * never be concurrently called more than once on the same channel, + * though different channels may be being processed concurrently. + */ +static inline int efx_process_channel(struct efx_channel *channel, int rx_quota) +{ + int rxdmaqs; + struct efx_rx_queue *rx_queue; + + if (unlikely(channel->efx->reset_pending != RESET_TYPE_NONE || + !channel->enabled)) + return rx_quota; + + rxdmaqs = falcon_process_eventq(channel, &rx_quota); + + /* Deliver last RX packet. */ + if (channel->rx_pkt) { + __efx_rx_packet(channel, channel->rx_pkt, + channel->rx_pkt_csummed); + channel->rx_pkt = NULL; + } + + efx_flush_lro(channel); + efx_rx_strategy(channel); + + /* Refill descriptor rings as necessary */ + rx_queue = &channel->efx->rx_queue[0]; + while (rxdmaqs) { + if (rxdmaqs & 0x01) + efx_fast_push_rx_descriptors(rx_queue); + rx_queue++; + rxdmaqs >>= 1; + } + + return rx_quota; +} + +/* Mark channel as finished processing + * + * Note that since we will not receive further interrupts for this + * channel before we finish processing and call the eventq_read_ack() + * method, there is no need to use the interrupt hold-off timers. + */ +static inline void efx_channel_processed(struct efx_channel *channel) +{ + /* Write to EVQ_RPTR_REG. If a new event arrived in a race + * with finishing processing, a new interrupt will be raised. + */ + channel->work_pending = 0; + smp_wmb(); /* Ensure channel updated before any new interrupt. */ + falcon_eventq_read_ack(channel); +} + +/* NAPI poll handler + * + * NAPI guarantees serialisation of polls of the same device, which + * provides the guarantee required by efx_process_channel(). + */ +static int efx_poll(struct napi_struct *napi, int budget) +{ + struct efx_channel *channel = + container_of(napi, struct efx_channel, napi_str); + struct net_device *napi_dev = channel->napi_dev; + int unused; + int rx_packets; + + EFX_TRACE(channel->efx, "channel %d NAPI poll executing on CPU %d\n", + channel->channel, raw_smp_processor_id()); + + unused = efx_process_channel(channel, budget); + rx_packets = (budget - unused); + + if (rx_packets < budget) { + /* There is no race here; although napi_disable() will + * only wait for netif_rx_complete(), this isn't a problem + * since efx_channel_processed() will have no effect if + * interrupts have already been disabled. + */ + netif_rx_complete(napi_dev, napi); + efx_channel_processed(channel); + } + + return rx_packets; +} + +/* Process the eventq of the specified channel immediately on this CPU + * + * Disable hardware generated interrupts, wait for any existing + * processing to finish, then directly poll (and ack ) the eventq. + * Finally reenable NAPI and interrupts. + * + * Since we are touching interrupts the caller should hold the suspend lock + */ +void efx_process_channel_now(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + + BUG_ON(!channel->used_flags); + BUG_ON(!channel->enabled); + + /* Disable interrupts and wait for ISRs to complete */ + falcon_disable_interrupts(efx); + if (efx->legacy_irq) + synchronize_irq(efx->legacy_irq); + if (channel->has_interrupt && channel->irq) + synchronize_irq(channel->irq); + + /* Wait for any NAPI processing to complete */ + napi_disable(&channel->napi_str); + + /* Poll the channel */ + (void) efx_process_channel(channel, efx->type->evq_size); + + /* Ack the eventq. This may cause an interrupt to be generated + * when they are reenabled */ + efx_channel_processed(channel); + + napi_enable(&channel->napi_str); + falcon_enable_interrupts(efx); +} + +/* Create event queue + * Event queue memory allocations are done only once. If the channel + * is reset, the memory buffer will be reused; this guards against + * errors during channel reset and also simplifies interrupt handling. + */ +static int efx_probe_eventq(struct efx_channel *channel) +{ + EFX_LOG(channel->efx, "chan %d create event queue\n", channel->channel); + + return falcon_probe_eventq(channel); +} + +/* Prepare channel's event queue */ +static int efx_init_eventq(struct efx_channel *channel) +{ + EFX_LOG(channel->efx, "chan %d init event queue\n", channel->channel); + + channel->eventq_read_ptr = 0; + + return falcon_init_eventq(channel); +} + +static void efx_fini_eventq(struct efx_channel *channel) +{ + EFX_LOG(channel->efx, "chan %d fini event queue\n", channel->channel); + + falcon_fini_eventq(channel); +} + +static void efx_remove_eventq(struct efx_channel *channel) +{ + EFX_LOG(channel->efx, "chan %d remove event queue\n", channel->channel); + + falcon_remove_eventq(channel); +} + +/************************************************************************** + * + * Channel handling + * + *************************************************************************/ + +/* Setup per-NIC RX buffer parameters. + * Calculate the rx buffer allocation parameters required to support + * the current MTU, including padding for header alignment and overruns. + */ +static void efx_calc_rx_buffer_params(struct efx_nic *efx) +{ + unsigned int order, len; + + len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) + + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + + efx->type->rx_buffer_padding); + + /* Calculate page-order */ + for (order = 0; ((1u << order) * PAGE_SIZE) < len; ++order) + ; + + efx->rx_buffer_len = len; + efx->rx_buffer_order = order; +} + +static int efx_probe_channel(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int rc; + + EFX_LOG(channel->efx, "creating channel %d\n", channel->channel); + + rc = efx_probe_eventq(channel); + if (rc) + goto fail1; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + rc = efx_probe_tx_queue(tx_queue); + if (rc) + goto fail2; + } + + efx_for_each_channel_rx_queue(rx_queue, channel) { + rc = efx_probe_rx_queue(rx_queue); + if (rc) + goto fail3; + } + + channel->n_rx_frm_trunc = 0; + + return 0; + + fail3: + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_remove_rx_queue(rx_queue); + fail2: + efx_for_each_channel_tx_queue(tx_queue, channel) + efx_remove_tx_queue(tx_queue); + fail1: + return rc; +} + + +/* Channels are shutdown and reinitialised whilst the NIC is running + * to propagate configuration changes (mtu, checksum offload), or + * to clear hardware error conditions + */ +static int efx_init_channels(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + struct efx_channel *channel; + int rc = 0; + + efx_calc_rx_buffer_params(efx); + + /* Initialise the channels */ + efx_for_each_channel(channel, efx) { + EFX_LOG(channel->efx, "init chan %d\n", channel->channel); + + rc = efx_init_eventq(channel); + if (rc) + goto err; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + rc = efx_init_tx_queue(tx_queue); + if (rc) + goto err; + } + + /* The rx buffer allocation strategy is MTU dependent */ + efx_rx_strategy(channel); + + efx_for_each_channel_rx_queue(rx_queue, channel) { + rc = efx_init_rx_queue(rx_queue); + if (rc) + goto err; + } + + WARN_ON(channel->rx_pkt != NULL); + efx_rx_strategy(channel); + } + + return 0; + + err: + EFX_ERR(efx, "failed to initialise channel %d\n", + channel ? channel->channel : -1); + efx_fini_channels(efx); + return rc; +} + +/* This enables event queue processing and packet transmission. + * + * Note that this function is not allowed to fail, since that would + * introduce too much complexity into the suspend/resume path. + */ +static void efx_start_channel(struct efx_channel *channel) +{ + struct efx_rx_queue *rx_queue; + + EFX_LOG(channel->efx, "starting chan %d\n", channel->channel); + + if (!(channel->efx->net_dev->flags & IFF_UP)) + netif_napi_add(channel->napi_dev, &channel->napi_str, + efx_poll, napi_weight); + + channel->work_pending = 0; + channel->enabled = 1; + smp_wmb(); /* ensure channel updated before first interrupt */ + + napi_enable(&channel->napi_str); + + /* Load up RX descriptors */ + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_fast_push_rx_descriptors(rx_queue); +} + +/* This disables event queue processing and packet transmission. + * This function does not guarantee that all queue processing + * (e.g. RX refill) is complete. + */ +static void efx_stop_channel(struct efx_channel *channel) +{ + struct efx_rx_queue *rx_queue; + + if (!channel->enabled) + return; + + EFX_LOG(channel->efx, "stop chan %d\n", channel->channel); + + channel->enabled = 0; + napi_disable(&channel->napi_str); + + /* Ensure that any worker threads have exited or will be no-ops */ + efx_for_each_channel_rx_queue(rx_queue, channel) { + spin_lock_bh(&rx_queue->add_lock); + spin_unlock_bh(&rx_queue->add_lock); + } +} + +static void efx_fini_channels(struct efx_nic *efx) +{ + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + + EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->port_enabled); + + efx_for_each_channel(channel, efx) { + EFX_LOG(channel->efx, "shut down chan %d\n", channel->channel); + + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_fini_rx_queue(rx_queue); + efx_for_each_channel_tx_queue(tx_queue, channel) + efx_fini_tx_queue(tx_queue); + } + + /* Do the event queues last so that we can handle flush events + * for all DMA queues. */ + efx_for_each_channel(channel, efx) { + EFX_LOG(channel->efx, "shut down evq %d\n", channel->channel); + + efx_fini_eventq(channel); + } +} + +static void efx_remove_channel(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + + EFX_LOG(channel->efx, "destroy chan %d\n", channel->channel); + + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_remove_rx_queue(rx_queue); + efx_for_each_channel_tx_queue(tx_queue, channel) + efx_remove_tx_queue(tx_queue); + efx_remove_eventq(channel); + + channel->used_flags = 0; +} + +void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue, int delay) +{ + queue_delayed_work(refill_workqueue, &rx_queue->work, delay); +} + +/************************************************************************** + * + * Port handling + * + **************************************************************************/ + +/* This ensures that the kernel is kept informed (via + * netif_carrier_on/off) of the link status, and also maintains the + * link status's stop on the port's TX queue. + */ +static void efx_link_status_changed(struct efx_nic *efx) +{ + int carrier_ok; + + /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure + * that no events are triggered between unregister_netdev() and the + * driver unloading. A more general condition is that NETDEV_CHANGE + * can only be generated between NETDEV_UP and NETDEV_DOWN */ + if (!netif_running(efx->net_dev)) + return; + + carrier_ok = netif_carrier_ok(efx->net_dev) ? 1 : 0; + if (efx->link_up != carrier_ok) { + efx->n_link_state_changes++; + + if (efx->link_up) + netif_carrier_on(efx->net_dev); + else + netif_carrier_off(efx->net_dev); + } + + /* Status message for kernel log */ + if (efx->link_up) { + struct mii_if_info *gmii = &efx->mii; + unsigned adv, lpa; + /* NONE here means direct XAUI from the controller, with no + * MDIO-attached device we can query. */ + if (efx->phy_type != PHY_TYPE_NONE) { + adv = gmii_advertised(gmii); + lpa = gmii_lpa(gmii); + } else { + lpa = GM_LPA_10000 | LPA_DUPLEX; + adv = lpa; + } + EFX_INFO(efx, "link up at %dMbps %s-duplex " + "(adv %04x lpa %04x) (MTU %d)%s\n", + (efx->link_options & GM_LPA_10000 ? 10000 : + (efx->link_options & GM_LPA_1000 ? 1000 : + (efx->link_options & GM_LPA_100 ? 100 : + 10))), + (efx->link_options & GM_LPA_DUPLEX ? + "full" : "half"), + adv, lpa, + efx->net_dev->mtu, + (efx->promiscuous ? " [PROMISC]" : "")); + } else { + EFX_INFO(efx, "link down\n"); + } + +} + +/* This call reinitialises the MAC to pick up new PHY settings. The + * caller must hold the mac_lock */ +static void __efx_reconfigure_port(struct efx_nic *efx) +{ + WARN_ON(!mutex_is_locked(&efx->mac_lock)); + + EFX_LOG(efx, "reconfiguring MAC from PHY settings on CPU %d\n", + raw_smp_processor_id()); + + falcon_reconfigure_xmac(efx); + + /* Inform kernel of loss/gain of carrier */ + efx_link_status_changed(efx); +} + +/* Reinitialise the MAC to pick up new PHY settings, even if the port is + * disabled. */ +void efx_reconfigure_port(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + + mutex_lock(&efx->mac_lock); + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); +} + +/* Asynchronous efx_reconfigure_port work item. To speed up efx_flush_all() + * we don't efx_reconfigure_port() if the port is disabled. Care is taken + * in efx_stop_all() and efx_start_port() to prevent PHY events being lost */ +static void efx_reconfigure_work(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, + reconfigure_work); + + mutex_lock(&efx->mac_lock); + if (efx->port_enabled) + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); +} + +static int efx_probe_port(struct efx_nic *efx) +{ + int rc; + + EFX_LOG(efx, "create port\n"); + + /* Connect up MAC/PHY operations table and read MAC address */ + rc = falcon_probe_port(efx); + if (rc) + goto err; + + /* Sanity check MAC address */ + if (is_valid_ether_addr(efx->mac_address)) { + memcpy(efx->net_dev->dev_addr, efx->mac_address, ETH_ALEN); + } else { + DECLARE_MAC_BUF(mac); + + EFX_ERR(efx, "invalid MAC address %s\n", + print_mac(mac, efx->mac_address)); + if (!allow_bad_hwaddr) { + rc = -EINVAL; + goto err; + } + random_ether_addr(efx->net_dev->dev_addr); + EFX_INFO(efx, "using locally-generated MAC %s\n", + print_mac(mac, efx->net_dev->dev_addr)); + } + + return 0; + + err: + efx_remove_port(efx); + return rc; +} + +static int efx_init_port(struct efx_nic *efx) +{ + int rc; + + EFX_LOG(efx, "init port\n"); + + /* Initialise the MAC and PHY */ + rc = falcon_init_xmac(efx); + if (rc) + return rc; + + efx->port_initialized = 1; + + /* Reconfigure port to program MAC registers */ + falcon_reconfigure_xmac(efx); + + return 0; +} + +/* Allow efx_reconfigure_port() to be scheduled, and close the window + * between efx_stop_port and efx_flush_all whereby a previously scheduled + * efx_reconfigure_port() may have been cancelled */ +static void efx_start_port(struct efx_nic *efx) +{ + EFX_LOG(efx, "start port\n"); + BUG_ON(efx->port_enabled); + + mutex_lock(&efx->mac_lock); + efx->port_enabled = 1; + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); +} + +/* Prevent efx_reconfigure_work and efx_monitor() from executing, and + * efx_set_multicast_list() from scheduling efx_reconfigure_work. + * efx_reconfigure_work can still be scheduled via NAPI processing + * until efx_flush_all() is called */ +static void efx_stop_port(struct efx_nic *efx) +{ + EFX_LOG(efx, "stop port\n"); + + mutex_lock(&efx->mac_lock); + efx->port_enabled = 0; + mutex_unlock(&efx->mac_lock); + + /* Serialise against efx_set_multicast_list() */ + if (NET_DEV_REGISTERED(efx)) { + netif_tx_lock_bh(efx->net_dev); + netif_tx_unlock_bh(efx->net_dev); + } +} + +static void efx_fini_port(struct efx_nic *efx) +{ + EFX_LOG(efx, "shut down port\n"); + + if (!efx->port_initialized) + return; + + falcon_fini_xmac(efx); + efx->port_initialized = 0; + + efx->link_up = 0; + efx_link_status_changed(efx); +} + +static void efx_remove_port(struct efx_nic *efx) +{ + EFX_LOG(efx, "destroying port\n"); + + falcon_remove_port(efx); +} + +/************************************************************************** + * + * NIC handling + * + **************************************************************************/ + +/* This configures the PCI device to enable I/O and DMA. */ +static int efx_init_io(struct efx_nic *efx) +{ + struct pci_dev *pci_dev = efx->pci_dev; + dma_addr_t dma_mask = efx->type->max_dma_mask; + int rc; + + EFX_LOG(efx, "initialising I/O\n"); + + rc = pci_enable_device(pci_dev); + if (rc) { + EFX_ERR(efx, "failed to enable PCI device\n"); + goto fail1; + } + + pci_set_master(pci_dev); + + /* Set the PCI DMA mask. Try all possibilities from our + * genuine mask down to 32 bits, because some architectures + * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit + * masks event though they reject 46 bit masks. + */ + while (dma_mask > 0x7fffffffUL) { + if (pci_dma_supported(pci_dev, dma_mask) && + ((rc = pci_set_dma_mask(pci_dev, dma_mask)) == 0)) + break; + dma_mask >>= 1; + } + if (rc) { + EFX_ERR(efx, "could not find a suitable DMA mask\n"); + goto fail2; + } + EFX_LOG(efx, "using DMA mask %llx\n", (unsigned long long) dma_mask); + rc = pci_set_consistent_dma_mask(pci_dev, dma_mask); + if (rc) { + /* pci_set_consistent_dma_mask() is not *allowed* to + * fail with a mask that pci_set_dma_mask() accepted, + * but just in case... + */ + EFX_ERR(efx, "failed to set consistent DMA mask\n"); + goto fail2; + } + + efx->membase_phys = pci_resource_start(efx->pci_dev, + efx->type->mem_bar); + rc = pci_request_region(pci_dev, efx->type->mem_bar, "sfc"); + if (rc) { + EFX_ERR(efx, "request for memory BAR failed\n"); + rc = -EIO; + goto fail3; + } + efx->membase = ioremap_nocache(efx->membase_phys, + efx->type->mem_map_size); + if (!efx->membase) { + EFX_ERR(efx, "could not map memory BAR %d at %lx+%x\n", + efx->type->mem_bar, efx->membase_phys, + efx->type->mem_map_size); + rc = -ENOMEM; + goto fail4; + } + EFX_LOG(efx, "memory BAR %u at %lx+%x (virtual %p)\n", + efx->type->mem_bar, efx->membase_phys, efx->type->mem_map_size, + efx->membase); + + return 0; + + fail4: + release_mem_region(efx->membase_phys, efx->type->mem_map_size); + fail3: + efx->membase_phys = 0UL; + fail2: + pci_disable_device(efx->pci_dev); + fail1: + return rc; +} + +static void efx_fini_io(struct efx_nic *efx) +{ + EFX_LOG(efx, "shutting down I/O\n"); + + if (efx->membase) { + iounmap(efx->membase); + efx->membase = NULL; + } + + if (efx->membase_phys) { + pci_release_region(efx->pci_dev, efx->type->mem_bar); + efx->membase_phys = 0UL; + } + + pci_disable_device(efx->pci_dev); +} + +/* Probe the number and type of interrupts we are able to obtain. */ +static void efx_probe_interrupts(struct efx_nic *efx) +{ + int max_channel = efx->type->phys_addr_channels - 1; + struct msix_entry xentries[EFX_MAX_CHANNELS]; + int rc, i; + + if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { + BUG_ON(!pci_find_capability(efx->pci_dev, PCI_CAP_ID_MSIX)); + + efx->rss_queues = rss_cpus ? rss_cpus : num_online_cpus(); + efx->rss_queues = min(efx->rss_queues, max_channel + 1); + efx->rss_queues = min(efx->rss_queues, EFX_MAX_CHANNELS); + + /* Request maximum number of MSI interrupts, and fill out + * the channel interrupt information the allowed allocation */ + for (i = 0; i < efx->rss_queues; i++) + xentries[i].entry = i; + rc = pci_enable_msix(efx->pci_dev, xentries, efx->rss_queues); + if (rc > 0) { + EFX_BUG_ON_PARANOID(rc >= efx->rss_queues); + efx->rss_queues = rc; + rc = pci_enable_msix(efx->pci_dev, xentries, + efx->rss_queues); + } + + if (rc == 0) { + for (i = 0; i < efx->rss_queues; i++) { + efx->channel[i].has_interrupt = 1; + efx->channel[i].irq = xentries[i].vector; + } + } else { + /* Fall back to single channel MSI */ + efx->interrupt_mode = EFX_INT_MODE_MSI; + EFX_ERR(efx, "could not enable MSI-X\n"); + } + } + + /* Try single interrupt MSI */ + if (efx->interrupt_mode == EFX_INT_MODE_MSI) { + efx->rss_queues = 1; + rc = pci_enable_msi(efx->pci_dev); + if (rc == 0) { + efx->channel[0].irq = efx->pci_dev->irq; + efx->channel[0].has_interrupt = 1; + } else { + EFX_ERR(efx, "could not enable MSI\n"); + efx->interrupt_mode = EFX_INT_MODE_LEGACY; + } + } + + /* Assume legacy interrupts */ + if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { + efx->rss_queues = 1; + /* Every channel is interruptible */ + for (i = 0; i < EFX_MAX_CHANNELS; i++) + efx->channel[i].has_interrupt = 1; + efx->legacy_irq = efx->pci_dev->irq; + } +} + +static void efx_remove_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel; + + /* Remove MSI/MSI-X interrupts */ + efx_for_each_channel_with_interrupt(channel, efx) + channel->irq = 0; + pci_disable_msi(efx->pci_dev); + pci_disable_msix(efx->pci_dev); + + /* Remove legacy interrupt */ + efx->legacy_irq = 0; +} + +/* Select number of used resources + * Should be called after probe_interrupts() + */ +static void efx_select_used(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int i; + + /* TX queues. One per port per channel with TX capability + * (more than one per port won't work on Linux, due to out + * of order issues... but will be fine on Solaris) + */ + tx_queue = &efx->tx_queue[0]; + + /* Perform this for each channel with TX capabilities. + * At the moment, we only support a single TX queue + */ + tx_queue->used = 1; + if ((!EFX_INT_MODE_USE_MSI(efx)) && separate_tx_and_rx_channels) + tx_queue->channel = &efx->channel[1]; + else + tx_queue->channel = &efx->channel[0]; + tx_queue->channel->used_flags |= EFX_USED_BY_TX; + tx_queue++; + + /* RX queues. Each has a dedicated channel. */ + for (i = 0; i < EFX_MAX_RX_QUEUES; i++) { + rx_queue = &efx->rx_queue[i]; + + if (i < efx->rss_queues) { + rx_queue->used = 1; + /* If we allow multiple RX queues per channel + * we need to decide that here + */ + rx_queue->channel = &efx->channel[rx_queue->queue]; + rx_queue->channel->used_flags |= EFX_USED_BY_RX; + rx_queue++; + } + } +} + +static int efx_probe_nic(struct efx_nic *efx) +{ + int rc; + + EFX_LOG(efx, "creating NIC\n"); + + /* Carry out hardware-type specific initialisation */ + rc = falcon_probe_nic(efx); + if (rc) + return rc; + + /* Determine the number of channels and RX queues by trying to hook + * in MSI-X interrupts. */ + efx_probe_interrupts(efx); + + /* Determine number of RX queues and TX queues */ + efx_select_used(efx); + + /* Initialise the interrupt moderation settings */ + efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec); + + return 0; +} + +static void efx_remove_nic(struct efx_nic *efx) +{ + EFX_LOG(efx, "destroying NIC\n"); + + efx_remove_interrupts(efx); + falcon_remove_nic(efx); +} + +/************************************************************************** + * + * NIC startup/shutdown + * + *************************************************************************/ + +static int efx_probe_all(struct efx_nic *efx) +{ + struct efx_channel *channel; + int rc; + + /* Create NIC */ + rc = efx_probe_nic(efx); + if (rc) { + EFX_ERR(efx, "failed to create NIC\n"); + goto fail1; + } + + /* Create port */ + rc = efx_probe_port(efx); + if (rc) { + EFX_ERR(efx, "failed to create port\n"); + goto fail2; + } + + /* Create channels */ + efx_for_each_channel(channel, efx) { + rc = efx_probe_channel(channel); + if (rc) { + EFX_ERR(efx, "failed to create channel %d\n", + channel->channel); + goto fail3; + } + } + + return 0; + + fail3: + efx_for_each_channel(channel, efx) + efx_remove_channel(channel); + efx_remove_port(efx); + fail2: + efx_remove_nic(efx); + fail1: + return rc; +} + +/* Called after previous invocation(s) of efx_stop_all, restarts the + * port, kernel transmit queue, NAPI processing and hardware interrupts, + * and ensures that the port is scheduled to be reconfigured. + * This function is safe to call multiple times when the NIC is in any + * state. */ +static void efx_start_all(struct efx_nic *efx) +{ + struct efx_channel *channel; + + EFX_ASSERT_RESET_SERIALISED(efx); + + /* Check that it is appropriate to restart the interface. All + * of these flags are safe to read under just the rtnl lock */ + if (efx->port_enabled) + return; + if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) + return; + if (NET_DEV_REGISTERED(efx) && !netif_running(efx->net_dev)) + return; + + /* Mark the port as enabled so port reconfigurations can start, then + * restart the transmit interface early so the watchdog timer stops */ + efx_start_port(efx); + efx_wake_queue(efx); + + efx_for_each_channel(channel, efx) + efx_start_channel(channel); + + falcon_enable_interrupts(efx); + + /* Start hardware monitor if we're in RUNNING */ + if (efx->state == STATE_RUNNING) + queue_delayed_work(efx->workqueue, &efx->monitor_work, + efx_monitor_interval); +} + +/* Flush all delayed work. Should only be called when no more delayed work + * will be scheduled. This doesn't flush pending online resets (efx_reset), + * since we're holding the rtnl_lock at this point. */ +static void efx_flush_all(struct efx_nic *efx) +{ + struct efx_rx_queue *rx_queue; + + /* Make sure the hardware monitor is stopped */ + cancel_delayed_work_sync(&efx->monitor_work); + + /* Ensure that all RX slow refills are complete. */ + efx_for_each_rx_queue(rx_queue, efx) { + cancel_delayed_work_sync(&rx_queue->work); + } + + /* Stop scheduled port reconfigurations */ + cancel_work_sync(&efx->reconfigure_work); + +} + +/* Quiesce hardware and software without bringing the link down. + * Safe to call multiple times, when the nic and interface is in any + * state. The caller is guaranteed to subsequently be in a position + * to modify any hardware and software state they see fit without + * taking locks. */ +static void efx_stop_all(struct efx_nic *efx) +{ + struct efx_channel *channel; + + EFX_ASSERT_RESET_SERIALISED(efx); + + /* port_enabled can be read safely under the rtnl lock */ + if (!efx->port_enabled) + return; + + /* Disable interrupts and wait for ISR to complete */ + falcon_disable_interrupts(efx); + if (efx->legacy_irq) + synchronize_irq(efx->legacy_irq); + efx_for_each_channel_with_interrupt(channel, efx) + if (channel->irq) + synchronize_irq(channel->irq); + + /* Stop all NAPI processing and synchronous rx refills */ + efx_for_each_channel(channel, efx) + efx_stop_channel(channel); + + /* Stop all asynchronous port reconfigurations. Since all + * event processing has already been stopped, there is no + * window to loose phy events */ + efx_stop_port(efx); + + /* Flush reconfigure_work, refill_workqueue, monitor_work */ + efx_flush_all(efx); + + /* Isolate the MAC from the TX and RX engines, so that queue + * flushes will complete in a timely fashion. */ + falcon_deconfigure_mac_wrapper(efx); + falcon_drain_tx_fifo(efx); + + /* Stop the kernel transmit interface late, so the watchdog + * timer isn't ticking over the flush */ + efx_stop_queue(efx); + if (NET_DEV_REGISTERED(efx)) { + netif_tx_lock_bh(efx->net_dev); + netif_tx_unlock_bh(efx->net_dev); + } +} + +static void efx_remove_all(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_remove_channel(channel); + efx_remove_port(efx); + efx_remove_nic(efx); +} + +/* A convinience function to safely flush all the queues */ +int efx_flush_queues(struct efx_nic *efx) +{ + int rc; + + EFX_ASSERT_RESET_SERIALISED(efx); + + efx_stop_all(efx); + + efx_fini_channels(efx); + rc = efx_init_channels(efx); + if (rc) { + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + return rc; + } + + efx_start_all(efx); + + return 0; +} + +/************************************************************************** + * + * Interrupt moderation + * + **************************************************************************/ + +/* Set interrupt moderation parameters */ +void efx_init_irq_moderation(struct efx_nic *efx, int tx_usecs, int rx_usecs) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + + EFX_ASSERT_RESET_SERIALISED(efx); + + efx_for_each_tx_queue(tx_queue, efx) + tx_queue->channel->irq_moderation = tx_usecs; + + efx_for_each_rx_queue(rx_queue, efx) + rx_queue->channel->irq_moderation = rx_usecs; +} + +/************************************************************************** + * + * Hardware monitor + * + **************************************************************************/ + +/* Run periodically off the general workqueue. Serialised against + * efx_reconfigure_port via the mac_lock */ +static void efx_monitor(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, + monitor_work.work); + int rc = 0; + + EFX_TRACE(efx, "hardware monitor executing on CPU %d\n", + raw_smp_processor_id()); + + + /* If the mac_lock is already held then it is likely a port + * reconfiguration is already in place, which will likely do + * most of the work of check_hw() anyway. */ + if (!mutex_trylock(&efx->mac_lock)) { + queue_delayed_work(efx->workqueue, &efx->monitor_work, + efx_monitor_interval); + return; + } + + if (efx->port_enabled) + rc = falcon_check_xmac(efx); + mutex_unlock(&efx->mac_lock); + + if (rc) { + if (monitor_reset) { + EFX_ERR(efx, "hardware monitor detected a fault: " + "triggering reset\n"); + efx_schedule_reset(efx, RESET_TYPE_MONITOR); + } else { + EFX_ERR(efx, "hardware monitor detected a fault, " + "skipping reset\n"); + } + } + + queue_delayed_work(efx->workqueue, &efx->monitor_work, + efx_monitor_interval); +} + +/************************************************************************** + * + * ioctls + * + *************************************************************************/ + +/* Net device ioctl + * Context: process, rtnl_lock() held. + */ +static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) +{ + struct efx_nic *efx = net_dev->priv; + + EFX_ASSERT_RESET_SERIALISED(efx); + + return generic_mii_ioctl(&efx->mii, if_mii(ifr), cmd, NULL); +} + +/************************************************************************** + * + * NAPI interface + * + **************************************************************************/ + +static int efx_init_napi(struct efx_nic *efx) +{ + struct efx_channel *channel; + int rc; + + efx_for_each_channel(channel, efx) { + channel->napi_dev = efx->net_dev; + rc = efx_lro_init(&channel->lro_mgr, efx); + if (rc) + goto err; + } + return 0; + err: + efx_fini_napi(efx); + return rc; +} + +static void efx_fini_napi(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) { + efx_lro_fini(&channel->lro_mgr); + channel->napi_dev = NULL; + } +} + +/************************************************************************** + * + * Kernel netpoll interface + * + *************************************************************************/ + +#ifdef CONFIG_NET_POLL_CONTROLLER + +/* Although in the common case interrupts will be disabled, this is not + * guaranteed. However, all our work happens inside the NAPI callback, + * so no locking is required. + */ +static void efx_netpoll(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + struct efx_channel *channel; + + efx_for_each_channel_with_interrupt(channel, efx) + efx_schedule_channel(channel); +} + +#endif + +/************************************************************************** + * + * Kernel net device interface + * + *************************************************************************/ + +/* Context: process, rtnl_lock() held. */ +static int efx_net_open(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + EFX_ASSERT_RESET_SERIALISED(efx); + + EFX_LOG(efx, "opening device %s on CPU %d\n", net_dev->name, + raw_smp_processor_id()); + + efx_start_all(efx); + return 0; +} + +/* Context: process, rtnl_lock() held. + * Note that the kernel will ignore our return code; this method + * should really be a void. + */ +static int efx_net_stop(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + int rc; + + EFX_LOG(efx, "closing %s on CPU %d\n", net_dev->name, + raw_smp_processor_id()); + + /* Stop the device and flush all the channels */ + efx_stop_all(efx); + efx_fini_channels(efx); + rc = efx_init_channels(efx); + if (rc) + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + + return 0; +} + +/* Context: process, dev_base_lock held, non-blocking. */ +static struct net_device_stats *efx_net_stats(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + struct efx_mac_stats *mac_stats = &efx->mac_stats; + struct net_device_stats *stats = &net_dev->stats; + + if (!spin_trylock(&efx->stats_lock)) + return stats; + if (efx->state == STATE_RUNNING) { + falcon_update_stats_xmac(efx); + falcon_update_nic_stats(efx); + } + spin_unlock(&efx->stats_lock); + + stats->rx_packets = mac_stats->rx_packets; + stats->tx_packets = mac_stats->tx_packets; + stats->rx_bytes = mac_stats->rx_bytes; + stats->tx_bytes = mac_stats->tx_bytes; + stats->multicast = mac_stats->rx_multicast; + stats->collisions = mac_stats->tx_collision; + stats->rx_length_errors = (mac_stats->rx_gtjumbo + + mac_stats->rx_length_error); + stats->rx_over_errors = efx->n_rx_nodesc_drop_cnt; + stats->rx_crc_errors = mac_stats->rx_bad; + stats->rx_frame_errors = mac_stats->rx_align_error; + stats->rx_fifo_errors = mac_stats->rx_overflow; + stats->rx_missed_errors = mac_stats->rx_missed; + stats->tx_window_errors = mac_stats->tx_late_collision; + + stats->rx_errors = (stats->rx_length_errors + + stats->rx_over_errors + + stats->rx_crc_errors + + stats->rx_frame_errors + + stats->rx_fifo_errors + + stats->rx_missed_errors + + mac_stats->rx_symbol_error); + stats->tx_errors = (stats->tx_window_errors + + mac_stats->tx_bad); + + return stats; +} + +/* Context: netif_tx_lock held, BHs disabled. */ +static void efx_watchdog(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + + EFX_ERR(efx, "TX stuck with stop_count=%d port_enabled=%d: %s\n", + atomic_read(&efx->netif_stop_count), efx->port_enabled, + monitor_reset ? "resetting channels" : "skipping reset"); + + if (monitor_reset) + efx_schedule_reset(efx, RESET_TYPE_MONITOR); +} + + +/* Context: process, rtnl_lock() held. */ +static int efx_change_mtu(struct net_device *net_dev, int new_mtu) +{ + struct efx_nic *efx = net_dev->priv; + int rc = 0; + + EFX_ASSERT_RESET_SERIALISED(efx); + + if (new_mtu > EFX_MAX_MTU) + return -EINVAL; + + efx_stop_all(efx); + + EFX_LOG(efx, "changing MTU to %d\n", new_mtu); + + efx_fini_channels(efx); + net_dev->mtu = new_mtu; + rc = efx_init_channels(efx); + if (rc) + goto fail; + + efx_start_all(efx); + return rc; + + fail: + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + return rc; +} + +static int efx_set_mac_address(struct net_device *net_dev, void *data) +{ + struct efx_nic *efx = net_dev->priv; + struct sockaddr *addr = data; + char *new_addr = addr->sa_data; + + EFX_ASSERT_RESET_SERIALISED(efx); + + if (!is_valid_ether_addr(new_addr)) { + DECLARE_MAC_BUF(mac); + EFX_ERR(efx, "invalid ethernet MAC address requested: %s\n", + print_mac(mac, new_addr)); + return -EINVAL; + } + + memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len); + + /* Reconfigure the MAC */ + efx_reconfigure_port(efx); + + return 0; +} + +/* Context: netif_tx_lock held, BHs disabled. */ +static void efx_set_multicast_list(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + struct dev_mc_list *mc_list = net_dev->mc_list; + union efx_multicast_hash *mc_hash = &efx->multicast_hash; + int promiscuous; + u32 crc; + int bit; + int i; + + /* Set per-MAC promiscuity flag and reconfigure MAC if necessary */ + promiscuous = (net_dev->flags & IFF_PROMISC) ? 1 : 0; + if (efx->promiscuous != promiscuous) { + efx->promiscuous = promiscuous; + /* Close the window between efx_stop_port() and efx_flush_all() + * by only queuing work when the port is enabled. */ + if (efx->port_enabled) + queue_work(efx->workqueue, &efx->reconfigure_work); + } + + /* Build multicast hash table */ + if (promiscuous || (net_dev->flags & IFF_ALLMULTI)) { + memset(mc_hash, 0xff, sizeof(*mc_hash)); + } else { + memset(mc_hash, 0x00, sizeof(*mc_hash)); + for (i = 0; i < net_dev->mc_count; i++) { + crc = ether_crc_le(ETH_ALEN, mc_list->dmi_addr); + bit = crc & (EFX_MCAST_HASH_ENTRIES - 1); + set_bit_le(bit, mc_hash->byte); + mc_list = mc_list->next; + } + } + + /* Create and activate new global multicast hash table */ + falcon_set_multicast_hash(efx); +} + +static int efx_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *net_dev = (struct net_device *)ptr; + + if (net_dev->open == efx_net_open && event == NETDEV_CHANGENAME) { + struct efx_nic *efx = net_dev->priv; + + strcpy(efx->name, net_dev->name); + } + + return NOTIFY_DONE; +} + +static struct notifier_block efx_netdev_notifier = { + .notifier_call = efx_netdev_event, +}; + +static int efx_register_netdev(struct efx_nic *efx) +{ + struct net_device *net_dev = efx->net_dev; + int rc; + + net_dev->watchdog_timeo = 5 * HZ; + net_dev->irq = efx->pci_dev->irq; + net_dev->open = efx_net_open; + net_dev->stop = efx_net_stop; + net_dev->get_stats = efx_net_stats; + net_dev->tx_timeout = &efx_watchdog; + net_dev->hard_start_xmit = efx_hard_start_xmit; + net_dev->do_ioctl = efx_ioctl; + net_dev->change_mtu = efx_change_mtu; + net_dev->set_mac_address = efx_set_mac_address; + net_dev->set_multicast_list = efx_set_multicast_list; +#ifdef CONFIG_NET_POLL_CONTROLLER + net_dev->poll_controller = efx_netpoll; +#endif + SET_NETDEV_DEV(net_dev, &efx->pci_dev->dev); + SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops); + + /* Always start with carrier off; PHY events will detect the link */ + netif_carrier_off(efx->net_dev); + + /* Clear MAC statistics */ + falcon_update_stats_xmac(efx); + memset(&efx->mac_stats, 0, sizeof(efx->mac_stats)); + + rc = register_netdev(net_dev); + if (rc) { + EFX_ERR(efx, "could not register net dev\n"); + return rc; + } + strcpy(efx->name, net_dev->name); + + return 0; +} + +static void efx_unregister_netdev(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + + if (!efx->net_dev) + return; + + BUG_ON(efx->net_dev->priv != efx); + + /* Free up any skbs still remaining. This has to happen before + * we try to unregister the netdev as running their destructors + * may be needed to get the device ref. count to 0. */ + efx_for_each_tx_queue(tx_queue, efx) + efx_release_tx_buffers(tx_queue); + + if (NET_DEV_REGISTERED(efx)) { + strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); + unregister_netdev(efx->net_dev); + } +} + +/************************************************************************** + * + * Device reset and suspend + * + **************************************************************************/ + +/* The final hardware and software finalisation before reset. */ +static int efx_reset_down(struct efx_nic *efx, struct ethtool_cmd *ecmd) +{ + int rc; + + EFX_ASSERT_RESET_SERIALISED(efx); + + rc = falcon_xmac_get_settings(efx, ecmd); + if (rc) { + EFX_ERR(efx, "could not back up PHY settings\n"); + goto fail; + } + + efx_fini_channels(efx); + return 0; + + fail: + return rc; +} + +/* The first part of software initialisation after a hardware reset + * This function does not handle serialisation with the kernel, it + * assumes the caller has done this */ +static int efx_reset_up(struct efx_nic *efx, struct ethtool_cmd *ecmd) +{ + int rc; + + rc = efx_init_channels(efx); + if (rc) + goto fail1; + + /* Restore MAC and PHY settings. */ + rc = falcon_xmac_set_settings(efx, ecmd); + if (rc) { + EFX_ERR(efx, "could not restore PHY settings\n"); + goto fail2; + } + + return 0; + + fail2: + efx_fini_channels(efx); + fail1: + return rc; +} + +/* Reset the NIC as transparently as possible. Do not reset the PHY + * Note that the reset may fail, in which case the card will be left + * in a most-probably-unusable state. + * + * This function will sleep. You cannot reset from within an atomic + * state; use efx_schedule_reset() instead. + * + * Grabs the rtnl_lock. + */ +static int efx_reset(struct efx_nic *efx) +{ + struct ethtool_cmd ecmd; + enum reset_type method = efx->reset_pending; + int rc; + + /* Serialise with kernel interfaces */ + rtnl_lock(); + + /* If we're not RUNNING then don't reset. Leave the reset_pending + * flag set so that efx_pci_probe_main will be retried */ + if (efx->state != STATE_RUNNING) { + EFX_INFO(efx, "scheduled reset quenched. NIC not RUNNING\n"); + goto unlock_rtnl; + } + + efx->state = STATE_RESETTING; + EFX_INFO(efx, "resetting (%d)\n", method); + + /* The net_dev->get_stats handler is quite slow, and will fail + * if a fetch is pending over reset. Serialise against it. */ + spin_lock(&efx->stats_lock); + spin_unlock(&efx->stats_lock); + + efx_stop_all(efx); + mutex_lock(&efx->mac_lock); + + rc = efx_reset_down(efx, &ecmd); + if (rc) + goto fail1; + + rc = falcon_reset_hw(efx, method); + if (rc) { + EFX_ERR(efx, "failed to reset hardware\n"); + goto fail2; + } + + /* Allow resets to be rescheduled. */ + efx->reset_pending = RESET_TYPE_NONE; + + /* Reinitialise bus-mastering, which may have been turned off before + * the reset was scheduled. This is still appropriate, even in the + * RESET_TYPE_DISABLE since this driver generally assumes the hardware + * can respond to requests. */ + pci_set_master(efx->pci_dev); + + /* Reinitialise device. This is appropriate in the RESET_TYPE_DISABLE + * case so the driver can talk to external SRAM */ + rc = falcon_init_nic(efx); + if (rc) { + EFX_ERR(efx, "failed to initialise NIC\n"); + goto fail3; + } + + /* Leave device stopped if necessary */ + if (method == RESET_TYPE_DISABLE) { + /* Reinitialise the device anyway so the driver unload sequence + * can talk to the external SRAM */ + (void) falcon_init_nic(efx); + rc = -EIO; + goto fail4; + } + + rc = efx_reset_up(efx, &ecmd); + if (rc) + goto fail5; + + mutex_unlock(&efx->mac_lock); + EFX_LOG(efx, "reset complete\n"); + + efx->state = STATE_RUNNING; + efx_start_all(efx); + + unlock_rtnl: + rtnl_unlock(); + return 0; + + fail5: + fail4: + fail3: + fail2: + fail1: + EFX_ERR(efx, "has been disabled\n"); + efx->state = STATE_DISABLED; + + mutex_unlock(&efx->mac_lock); + rtnl_unlock(); + efx_unregister_netdev(efx); + efx_fini_port(efx); + return rc; +} + +/* The worker thread exists so that code that cannot sleep can + * schedule a reset for later. + */ +static void efx_reset_work(struct work_struct *data) +{ + struct efx_nic *nic = container_of(data, struct efx_nic, reset_work); + + efx_reset(nic); +} + +void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) +{ + enum reset_type method; + + if (efx->reset_pending != RESET_TYPE_NONE) { + EFX_INFO(efx, "quenching already scheduled reset\n"); + return; + } + + switch (type) { + case RESET_TYPE_INVISIBLE: + case RESET_TYPE_ALL: + case RESET_TYPE_WORLD: + case RESET_TYPE_DISABLE: + method = type; + break; + case RESET_TYPE_RX_RECOVERY: + case RESET_TYPE_RX_DESC_FETCH: + case RESET_TYPE_TX_DESC_FETCH: + case RESET_TYPE_TX_SKIP: + method = RESET_TYPE_INVISIBLE; + break; + default: + method = RESET_TYPE_ALL; + break; + } + + if (method != type) + EFX_LOG(efx, "scheduling reset (%d:%d)\n", type, method); + else + EFX_LOG(efx, "scheduling reset (%d)\n", method); + + efx->reset_pending = method; + + queue_work(efx->workqueue, &efx->reset_work); +} + +/************************************************************************** + * + * List of NICs we support + * + **************************************************************************/ + +/* PCI device ID table */ +static struct pci_device_id efx_pci_table[] __devinitdata = { + {PCI_DEVICE(EFX_VENDID_SFC, FALCON_A_P_DEVID), + .driver_data = (unsigned long) &falcon_a_nic_type}, + {PCI_DEVICE(EFX_VENDID_SFC, FALCON_B_P_DEVID), + .driver_data = (unsigned long) &falcon_b_nic_type}, + {0} /* end of list */ +}; + +/************************************************************************** + * + * Dummy PHY/MAC/Board operations + * + * Can be used where the MAC does not implement this operation + * Needed so all function pointers are valid and do not have to be tested + * before use + * + **************************************************************************/ +int efx_port_dummy_op_int(struct efx_nic *efx) +{ + return 0; +} +void efx_port_dummy_op_void(struct efx_nic *efx) {} +void efx_port_dummy_op_blink(struct efx_nic *efx, int blink) {} + +static struct efx_phy_operations efx_dummy_phy_operations = { + .init = efx_port_dummy_op_int, + .reconfigure = efx_port_dummy_op_void, + .check_hw = efx_port_dummy_op_int, + .fini = efx_port_dummy_op_void, + .clear_interrupt = efx_port_dummy_op_void, + .reset_xaui = efx_port_dummy_op_void, +}; + +/* Dummy board operations */ +static int efx_nic_dummy_op_int(struct efx_nic *nic) +{ + return 0; +} + +static struct efx_board efx_dummy_board_info = { + .init = efx_nic_dummy_op_int, + .init_leds = efx_port_dummy_op_int, + .set_fault_led = efx_port_dummy_op_blink, +}; + +/************************************************************************** + * + * Data housekeeping + * + **************************************************************************/ + +/* This zeroes out and then fills in the invariants in a struct + * efx_nic (including all sub-structures). + */ +static int efx_init_struct(struct efx_nic *efx, struct efx_nic_type *type, + struct pci_dev *pci_dev, struct net_device *net_dev) +{ + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int i, rc; + + /* Initialise common structures */ + memset(efx, 0, sizeof(*efx)); + spin_lock_init(&efx->biu_lock); + spin_lock_init(&efx->phy_lock); + INIT_WORK(&efx->reset_work, efx_reset_work); + INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); + efx->pci_dev = pci_dev; + efx->state = STATE_INIT; + efx->reset_pending = RESET_TYPE_NONE; + strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); + efx->board_info = efx_dummy_board_info; + + efx->net_dev = net_dev; + efx->rx_checksum_enabled = 1; + spin_lock_init(&efx->netif_stop_lock); + spin_lock_init(&efx->stats_lock); + mutex_init(&efx->mac_lock); + efx->phy_op = &efx_dummy_phy_operations; + efx->mii.dev = net_dev; + INIT_WORK(&efx->reconfigure_work, efx_reconfigure_work); + atomic_set(&efx->netif_stop_count, 1); + + for (i = 0; i < EFX_MAX_CHANNELS; i++) { + channel = &efx->channel[i]; + channel->efx = efx; + channel->channel = i; + channel->evqnum = i; + channel->work_pending = 0; + } + for (i = 0; i < EFX_MAX_TX_QUEUES; i++) { + tx_queue = &efx->tx_queue[i]; + tx_queue->efx = efx; + tx_queue->queue = i; + tx_queue->buffer = NULL; + tx_queue->channel = &efx->channel[0]; /* for safety */ + } + for (i = 0; i < EFX_MAX_RX_QUEUES; i++) { + rx_queue = &efx->rx_queue[i]; + rx_queue->efx = efx; + rx_queue->queue = i; + rx_queue->channel = &efx->channel[0]; /* for safety */ + rx_queue->buffer = NULL; + spin_lock_init(&rx_queue->add_lock); + INIT_DELAYED_WORK(&rx_queue->work, efx_rx_work); + } + + efx->type = type; + + /* Sanity-check NIC type */ + EFX_BUG_ON_PARANOID(efx->type->txd_ring_mask & + (efx->type->txd_ring_mask + 1)); + EFX_BUG_ON_PARANOID(efx->type->rxd_ring_mask & + (efx->type->rxd_ring_mask + 1)); + EFX_BUG_ON_PARANOID(efx->type->evq_size & + (efx->type->evq_size - 1)); + /* As close as we can get to guaranteeing that we don't overflow */ + EFX_BUG_ON_PARANOID(efx->type->evq_size < + (efx->type->txd_ring_mask + 1 + + efx->type->rxd_ring_mask + 1)); + EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS); + + /* Higher numbered interrupt modes are less capable! */ + efx->interrupt_mode = max(efx->type->max_interrupt_mode, + interrupt_mode); + + efx->workqueue = create_singlethread_workqueue("sfc_work"); + if (!efx->workqueue) { + rc = -ENOMEM; + goto fail1; + } + + return 0; + + fail1: + return rc; +} + +static void efx_fini_struct(struct efx_nic *efx) +{ + if (efx->workqueue) { + destroy_workqueue(efx->workqueue); + efx->workqueue = NULL; + } +} + +/************************************************************************** + * + * PCI interface + * + **************************************************************************/ + +/* Main body of final NIC shutdown code + * This is called only at module unload (or hotplug removal). + */ +static void efx_pci_remove_main(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + + /* Skip everything if we never obtained a valid membase */ + if (!efx->membase) + return; + + efx_fini_channels(efx); + efx_fini_port(efx); + + /* Shutdown the board, then the NIC and board state */ + falcon_fini_interrupt(efx); + + efx_fini_napi(efx); + efx_remove_all(efx); +} + +/* Final NIC shutdown + * This is called only at module unload (or hotplug removal). + */ +static void efx_pci_remove(struct pci_dev *pci_dev) +{ + struct efx_nic *efx; + + efx = pci_get_drvdata(pci_dev); + if (!efx) + return; + + /* Mark the NIC as fini, then stop the interface */ + rtnl_lock(); + efx->state = STATE_FINI; + dev_close(efx->net_dev); + + /* Allow any queued efx_resets() to complete */ + rtnl_unlock(); + + if (efx->membase == NULL) + goto out; + + efx_unregister_netdev(efx); + + /* Wait for any scheduled resets to complete. No more will be + * scheduled from this point because efx_stop_all() has been + * called, we are no longer registered with driverlink, and + * the net_device's have been removed. */ + flush_workqueue(efx->workqueue); + + efx_pci_remove_main(efx); + +out: + efx_fini_io(efx); + EFX_LOG(efx, "shutdown successful\n"); + + pci_set_drvdata(pci_dev, NULL); + efx_fini_struct(efx); + free_netdev(efx->net_dev); +}; + +/* Main body of NIC initialisation + * This is called at module load (or hotplug insertion, theoretically). + */ +static int efx_pci_probe_main(struct efx_nic *efx) +{ + int rc; + + /* Do start-of-day initialisation */ + rc = efx_probe_all(efx); + if (rc) + goto fail1; + + rc = efx_init_napi(efx); + if (rc) + goto fail2; + + /* Initialise the board */ + rc = efx->board_info.init(efx); + if (rc) { + EFX_ERR(efx, "failed to initialise board\n"); + goto fail3; + } + + rc = falcon_init_nic(efx); + if (rc) { + EFX_ERR(efx, "failed to initialise NIC\n"); + goto fail4; + } + + rc = efx_init_port(efx); + if (rc) { + EFX_ERR(efx, "failed to initialise port\n"); + goto fail5; + } + + rc = efx_init_channels(efx); + if (rc) + goto fail6; + + rc = falcon_init_interrupt(efx); + if (rc) + goto fail7; + + return 0; + + fail7: + efx_fini_channels(efx); + fail6: + efx_fini_port(efx); + fail5: + fail4: + fail3: + efx_fini_napi(efx); + fail2: + efx_remove_all(efx); + fail1: + return rc; +} + +/* NIC initialisation + * + * This is called at module load (or hotplug insertion, + * theoretically). It sets up PCI mappings, tests and resets the NIC, + * sets up and registers the network devices with the kernel and hooks + * the interrupt service routine. It does not prepare the device for + * transmission; this is left to the first time one of the network + * interfaces is brought up (i.e. efx_net_open). + */ +static int __devinit efx_pci_probe(struct pci_dev *pci_dev, + const struct pci_device_id *entry) +{ + struct efx_nic_type *type = (struct efx_nic_type *) entry->driver_data; + struct net_device *net_dev; + struct efx_nic *efx; + int i, rc; + + /* Allocate and initialise a struct net_device and struct efx_nic */ + net_dev = alloc_etherdev(sizeof(*efx)); + if (!net_dev) + return -ENOMEM; + net_dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA; + if (lro) + net_dev->features |= NETIF_F_LRO; + efx = net_dev->priv; + pci_set_drvdata(pci_dev, efx); + rc = efx_init_struct(efx, type, pci_dev, net_dev); + if (rc) + goto fail1; + + EFX_INFO(efx, "Solarflare Communications NIC detected\n"); + + /* Set up basic I/O (BAR mappings etc) */ + rc = efx_init_io(efx); + if (rc) + goto fail2; + + /* No serialisation is required with the reset path because + * we're in STATE_INIT. */ + for (i = 0; i < 5; i++) { + rc = efx_pci_probe_main(efx); + if (rc == 0) + break; + + /* Serialise against efx_reset(). No more resets will be + * scheduled since efx_stop_all() has been called, and we + * have not and never have been registered with either + * the rtnetlink or driverlink layers. */ + cancel_work_sync(&efx->reset_work); + + /* Retry if a recoverably reset event has been scheduled */ + if ((efx->reset_pending != RESET_TYPE_INVISIBLE) && + (efx->reset_pending != RESET_TYPE_ALL)) + goto fail3; + + efx->reset_pending = RESET_TYPE_NONE; + } + + if (rc) { + EFX_ERR(efx, "Could not reset NIC\n"); + goto fail4; + } + + /* Switch to the running state before we expose the device to + * the OS. This is to ensure that the initial gathering of + * MAC stats succeeds. */ + rtnl_lock(); + efx->state = STATE_RUNNING; + rtnl_unlock(); + + rc = efx_register_netdev(efx); + if (rc) + goto fail5; + + EFX_LOG(efx, "initialisation successful\n"); + + return 0; + + fail5: + efx_pci_remove_main(efx); + fail4: + fail3: + efx_fini_io(efx); + fail2: + efx_fini_struct(efx); + fail1: + EFX_LOG(efx, "initialisation failed. rc=%d\n", rc); + free_netdev(net_dev); + return rc; +} + +static struct pci_driver efx_pci_driver = { + .name = EFX_DRIVER_NAME, + .id_table = efx_pci_table, + .probe = efx_pci_probe, + .remove = efx_pci_remove, +}; + +/************************************************************************** + * + * Kernel module interface + * + *************************************************************************/ + +module_param(interrupt_mode, uint, 0444); +MODULE_PARM_DESC(interrupt_mode, + "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); + +static int __init efx_init_module(void) +{ + int rc; + + printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n"); + + rc = register_netdevice_notifier(&efx_netdev_notifier); + if (rc) + goto err_notifier; + + refill_workqueue = create_workqueue("sfc_refill"); + if (!refill_workqueue) { + rc = -ENOMEM; + goto err_refill; + } + + rc = pci_register_driver(&efx_pci_driver); + if (rc < 0) + goto err_pci; + + return 0; + + err_pci: + destroy_workqueue(refill_workqueue); + err_refill: + unregister_netdevice_notifier(&efx_netdev_notifier); + err_notifier: + return rc; +} + +static void __exit efx_exit_module(void) +{ + printk(KERN_INFO "Solarflare NET driver unloading\n"); + + pci_unregister_driver(&efx_pci_driver); + destroy_workqueue(refill_workqueue); + unregister_netdevice_notifier(&efx_netdev_notifier); + +} + +module_init(efx_init_module); +module_exit(efx_exit_module); + +MODULE_AUTHOR("Michael Brown and " + "Solarflare Communications"); +MODULE_DESCRIPTION("Solarflare Communications network driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, efx_pci_table); diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h new file mode 100644 index 00000000000..3b2f69f4a9a --- /dev/null +++ b/drivers/net/sfc/efx.h @@ -0,0 +1,67 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_EFX_H +#define EFX_EFX_H + +#include "net_driver.h" + +/* PCI IDs */ +#define EFX_VENDID_SFC 0x1924 +#define FALCON_A_P_DEVID 0x0703 +#define FALCON_A_S_DEVID 0x6703 +#define FALCON_B_P_DEVID 0x0710 + +/* TX */ +extern int efx_xmit(struct efx_nic *efx, + struct efx_tx_queue *tx_queue, struct sk_buff *skb); +extern void efx_stop_queue(struct efx_nic *efx); +extern void efx_wake_queue(struct efx_nic *efx); + +/* RX */ +extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); +extern void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, + unsigned int len, int checksummed, int discard); +extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue, int delay); + +/* Channels */ +extern void efx_process_channel_now(struct efx_channel *channel); +extern int efx_flush_queues(struct efx_nic *efx); + +/* Ports */ +extern void efx_reconfigure_port(struct efx_nic *efx); + +/* Global */ +extern void efx_schedule_reset(struct efx_nic *efx, enum reset_type type); +extern void efx_suspend(struct efx_nic *efx); +extern void efx_resume(struct efx_nic *efx); +extern void efx_init_irq_moderation(struct efx_nic *efx, int tx_usecs, + int rx_usecs); +extern int efx_request_power(struct efx_nic *efx, int mw, const char *name); +extern void efx_hex_dump(const u8 *, unsigned int, const char *); + +/* Dummy PHY ops for PHY drivers */ +extern int efx_port_dummy_op_int(struct efx_nic *efx); +extern void efx_port_dummy_op_void(struct efx_nic *efx); +extern void efx_port_dummy_op_blink(struct efx_nic *efx, int blink); + + +extern unsigned int efx_monitor_interval; + +static inline void efx_schedule_channel(struct efx_channel *channel) +{ + EFX_TRACE(channel->efx, "channel %d scheduling NAPI poll on CPU%d\n", + channel->channel, raw_smp_processor_id()); + channel->work_pending = 1; + + netif_rx_schedule(channel->napi_dev, &channel->napi_str); +} + +#endif /* EFX_EFX_H */ diff --git a/drivers/net/sfc/enum.h b/drivers/net/sfc/enum.h new file mode 100644 index 00000000000..43663a4619d --- /dev/null +++ b/drivers/net/sfc/enum.h @@ -0,0 +1,50 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2007 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_ENUM_H +#define EFX_ENUM_H + +/*****************************************************************************/ + +/** + * enum reset_type - reset types + * + * %RESET_TYPE_INVSIBLE, %RESET_TYPE_ALL, %RESET_TYPE_WORLD and + * %RESET_TYPE_DISABLE specify the method/scope of the reset. The + * other valuesspecify reasons, which efx_schedule_reset() will choose + * a method for. + * + * @RESET_TYPE_INVISIBLE: don't reset the PHYs or interrupts + * @RESET_TYPE_ALL: reset everything but PCI core blocks + * @RESET_TYPE_WORLD: reset everything, save & restore PCI config + * @RESET_TYPE_DISABLE: disable NIC + * @RESET_TYPE_MONITOR: reset due to hardware monitor + * @RESET_TYPE_INT_ERROR: reset due to internal error + * @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors + * @RESET_TYPE_RX_DESC_FETCH: pcie error during rx descriptor fetch + * @RESET_TYPE_TX_DESC_FETCH: pcie error during tx descriptor fetch + * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors + */ +enum reset_type { + RESET_TYPE_NONE = -1, + RESET_TYPE_INVISIBLE = 0, + RESET_TYPE_ALL = 1, + RESET_TYPE_WORLD = 2, + RESET_TYPE_DISABLE = 3, + RESET_TYPE_MAX_METHOD, + RESET_TYPE_MONITOR, + RESET_TYPE_INT_ERROR, + RESET_TYPE_RX_RECOVERY, + RESET_TYPE_RX_DESC_FETCH, + RESET_TYPE_TX_DESC_FETCH, + RESET_TYPE_TX_SKIP, + RESET_TYPE_MAX, +}; + +#endif /* EFX_ENUM_H */ diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c new file mode 100644 index 00000000000..ad541badbd9 --- /dev/null +++ b/drivers/net/sfc/ethtool.c @@ -0,0 +1,460 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include +#include "net_driver.h" +#include "efx.h" +#include "ethtool.h" +#include "falcon.h" +#include "gmii.h" +#include "mac.h" + +static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable); + +struct ethtool_string { + char name[ETH_GSTRING_LEN]; +}; + +struct efx_ethtool_stat { + const char *name; + enum { + EFX_ETHTOOL_STAT_SOURCE_mac_stats, + EFX_ETHTOOL_STAT_SOURCE_nic, + EFX_ETHTOOL_STAT_SOURCE_channel + } source; + unsigned offset; + u64(*get_stat) (void *field); /* Reader function */ +}; + +/* Initialiser for a struct #efx_ethtool_stat with type-checking */ +#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \ + get_stat_function) { \ + .name = #stat_name, \ + .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \ + .offset = ((((field_type *) 0) == \ + &((struct efx_##source_name *)0)->field) ? \ + offsetof(struct efx_##source_name, field) : \ + offsetof(struct efx_##source_name, field)), \ + .get_stat = get_stat_function, \ +} + +static u64 efx_get_uint_stat(void *field) +{ + return *(unsigned int *)field; +} + +static u64 efx_get_ulong_stat(void *field) +{ + return *(unsigned long *)field; +} + +static u64 efx_get_u64_stat(void *field) +{ + return *(u64 *) field; +} + +static u64 efx_get_atomic_stat(void *field) +{ + return atomic_read((atomic_t *) field); +} + +#define EFX_ETHTOOL_ULONG_MAC_STAT(field) \ + EFX_ETHTOOL_STAT(field, mac_stats, field, \ + unsigned long, efx_get_ulong_stat) + +#define EFX_ETHTOOL_U64_MAC_STAT(field) \ + EFX_ETHTOOL_STAT(field, mac_stats, field, \ + u64, efx_get_u64_stat) + +#define EFX_ETHTOOL_UINT_NIC_STAT(name) \ + EFX_ETHTOOL_STAT(name, nic, n_##name, \ + unsigned int, efx_get_uint_stat) + +#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \ + EFX_ETHTOOL_STAT(field, nic, field, \ + atomic_t, efx_get_atomic_stat) + +#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \ + EFX_ETHTOOL_STAT(field, channel, n_##field, \ + unsigned int, efx_get_uint_stat) + +static struct efx_ethtool_stat efx_ethtool_stats[] = { + EFX_ETHTOOL_U64_MAC_STAT(tx_bytes), + EFX_ETHTOOL_U64_MAC_STAT(tx_good_bytes), + EFX_ETHTOOL_U64_MAC_STAT(tx_bad_bytes), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_packets), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_bad), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_pause), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_control), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_unicast), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_multicast), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_broadcast), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_lt64), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_64), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_65_to_127), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_128_to_255), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_256_to_511), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_512_to_1023), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_1024_to_15xx), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_15xx_to_jumbo), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_gtjumbo), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_collision), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_single_collision), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_multiple_collision), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_excessive_collision), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_deferred), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_late_collision), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_excessive_deferred), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_non_tcpudp), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_mac_src_error), + EFX_ETHTOOL_ULONG_MAC_STAT(tx_ip_src_error), + EFX_ETHTOOL_U64_MAC_STAT(rx_bytes), + EFX_ETHTOOL_U64_MAC_STAT(rx_good_bytes), + EFX_ETHTOOL_U64_MAC_STAT(rx_bad_bytes), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_packets), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_good), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_pause), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_control), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_unicast), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_multicast), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_broadcast), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_lt64), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_64), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_65_to_127), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_128_to_255), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_256_to_511), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_512_to_1023), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_1024_to_15xx), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_15xx_to_jumbo), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_gtjumbo), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad_lt64), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad_64_to_15xx), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad_15xx_to_jumbo), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad_gtjumbo), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_overflow), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_missed), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_false_carrier), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_symbol_error), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_align_error), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_length_error), + EFX_ETHTOOL_ULONG_MAC_STAT(rx_internal_error), + EFX_ETHTOOL_UINT_NIC_STAT(rx_nodesc_drop_cnt), + EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), +}; + +/* Number of ethtool statistics */ +#define EFX_ETHTOOL_NUM_STATS ARRAY_SIZE(efx_ethtool_stats) + +/************************************************************************** + * + * Ethtool operations + * + ************************************************************************** + */ + +/* Identify device by flashing LEDs */ +static int efx_ethtool_phys_id(struct net_device *net_dev, u32 seconds) +{ + struct efx_nic *efx = net_dev->priv; + + efx->board_info.blink(efx, 1); + schedule_timeout_interruptible(seconds * HZ); + efx->board_info.blink(efx, 0); + return 0; +} + +/* This must be called with rtnl_lock held. */ +int efx_ethtool_get_settings(struct net_device *net_dev, + struct ethtool_cmd *ecmd) +{ + struct efx_nic *efx = net_dev->priv; + int rc; + + mutex_lock(&efx->mac_lock); + rc = falcon_xmac_get_settings(efx, ecmd); + mutex_unlock(&efx->mac_lock); + + return rc; +} + +/* This must be called with rtnl_lock held. */ +int efx_ethtool_set_settings(struct net_device *net_dev, + struct ethtool_cmd *ecmd) +{ + struct efx_nic *efx = net_dev->priv; + int rc; + + mutex_lock(&efx->mac_lock); + rc = falcon_xmac_set_settings(efx, ecmd); + mutex_unlock(&efx->mac_lock); + if (!rc) + efx_reconfigure_port(efx); + + return rc; +} + +static void efx_ethtool_get_drvinfo(struct net_device *net_dev, + struct ethtool_drvinfo *info) +{ + struct efx_nic *efx = net_dev->priv; + + strlcpy(info->driver, EFX_DRIVER_NAME, sizeof(info->driver)); + strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version)); + strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info)); +} + +static int efx_ethtool_get_stats_count(struct net_device *net_dev) +{ + return EFX_ETHTOOL_NUM_STATS; +} + +static void efx_ethtool_get_strings(struct net_device *net_dev, + u32 string_set, u8 *strings) +{ + struct ethtool_string *ethtool_strings = + (struct ethtool_string *)strings; + int i; + + if (string_set == ETH_SS_STATS) + for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) + strncpy(ethtool_strings[i].name, + efx_ethtool_stats[i].name, + sizeof(ethtool_strings[i].name)); +} + +static void efx_ethtool_get_stats(struct net_device *net_dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct efx_nic *efx = net_dev->priv; + struct efx_mac_stats *mac_stats = &efx->mac_stats; + struct efx_ethtool_stat *stat; + struct efx_channel *channel; + int i; + + EFX_BUG_ON_PARANOID(stats->n_stats != EFX_ETHTOOL_NUM_STATS); + + /* Update MAC and NIC statistics */ + net_dev->get_stats(net_dev); + + /* Fill detailed statistics buffer */ + for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) { + stat = &efx_ethtool_stats[i]; + switch (stat->source) { + case EFX_ETHTOOL_STAT_SOURCE_mac_stats: + data[i] = stat->get_stat((void *)mac_stats + + stat->offset); + break; + case EFX_ETHTOOL_STAT_SOURCE_nic: + data[i] = stat->get_stat((void *)efx + stat->offset); + break; + case EFX_ETHTOOL_STAT_SOURCE_channel: + data[i] = 0; + efx_for_each_channel(channel, efx) + data[i] += stat->get_stat((void *)channel + + stat->offset); + break; + } + } +} + +static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable) +{ + struct efx_nic *efx = net_dev->priv; + int rc; + + rc = ethtool_op_set_tx_csum(net_dev, enable); + if (rc) + return rc; + + efx_flush_queues(efx); + + return 0; +} + +static int efx_ethtool_set_rx_csum(struct net_device *net_dev, u32 enable) +{ + struct efx_nic *efx = net_dev->priv; + + /* No way to stop the hardware doing the checks; we just + * ignore the result. + */ + efx->rx_checksum_enabled = (enable ? 1 : 0); + + return 0; +} + +static u32 efx_ethtool_get_rx_csum(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + + return efx->rx_checksum_enabled; +} + +/* Restart autonegotiation */ +static int efx_ethtool_nway_reset(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + + return mii_nway_restart(&efx->mii); +} + +static u32 efx_ethtool_get_link(struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + + return efx->link_up; +} + +static int efx_ethtool_get_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct efx_nic *efx = net_dev->priv; + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + struct efx_channel *channel; + + memset(coalesce, 0, sizeof(*coalesce)); + + /* Find lowest IRQ moderation across all used TX queues */ + coalesce->tx_coalesce_usecs_irq = ~((u32) 0); + efx_for_each_tx_queue(tx_queue, efx) { + channel = tx_queue->channel; + if (channel->irq_moderation < coalesce->tx_coalesce_usecs_irq) { + if (channel->used_flags != EFX_USED_BY_RX_TX) + coalesce->tx_coalesce_usecs_irq = + channel->irq_moderation; + else + coalesce->tx_coalesce_usecs_irq = 0; + } + } + + /* Find lowest IRQ moderation across all used RX queues */ + coalesce->rx_coalesce_usecs_irq = ~((u32) 0); + efx_for_each_rx_queue(rx_queue, efx) { + channel = rx_queue->channel; + if (channel->irq_moderation < coalesce->rx_coalesce_usecs_irq) + coalesce->rx_coalesce_usecs_irq = + channel->irq_moderation; + } + + return 0; +} + +/* Set coalescing parameters + * The difficulties occur for shared channels + */ +static int efx_ethtool_set_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct efx_nic *efx = net_dev->priv; + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + unsigned tx_usecs, rx_usecs; + + if (coalesce->use_adaptive_rx_coalesce || + coalesce->use_adaptive_tx_coalesce) + return -EOPNOTSUPP; + + if (coalesce->rx_coalesce_usecs || coalesce->tx_coalesce_usecs) { + EFX_ERR(efx, "invalid coalescing setting. " + "Only rx/tx_coalesce_usecs_irq are supported\n"); + return -EOPNOTSUPP; + } + + rx_usecs = coalesce->rx_coalesce_usecs_irq; + tx_usecs = coalesce->tx_coalesce_usecs_irq; + + /* If the channel is shared only allow RX parameters to be set */ + efx_for_each_tx_queue(tx_queue, efx) { + if ((tx_queue->channel->used_flags == EFX_USED_BY_RX_TX) && + tx_usecs) { + EFX_ERR(efx, "Channel is shared. " + "Only RX coalescing may be set\n"); + return -EOPNOTSUPP; + } + } + + efx_init_irq_moderation(efx, tx_usecs, rx_usecs); + + /* Reset channel to pick up new moderation value. Note that + * this may change the value of the irq_moderation field + * (e.g. to allow for hardware timer granularity). + */ + efx_for_each_channel(channel, efx) + falcon_set_int_moderation(channel); + + return 0; +} + +static int efx_ethtool_set_pauseparam(struct net_device *net_dev, + struct ethtool_pauseparam *pause) +{ + struct efx_nic *efx = net_dev->priv; + enum efx_fc_type flow_control = efx->flow_control; + int rc; + + flow_control &= ~(EFX_FC_RX | EFX_FC_TX | EFX_FC_AUTO); + flow_control |= pause->rx_pause ? EFX_FC_RX : 0; + flow_control |= pause->tx_pause ? EFX_FC_TX : 0; + flow_control |= pause->autoneg ? EFX_FC_AUTO : 0; + + /* Try to push the pause parameters */ + mutex_lock(&efx->mac_lock); + rc = falcon_xmac_set_pause(efx, flow_control); + mutex_unlock(&efx->mac_lock); + + if (!rc) + efx_reconfigure_port(efx); + + return rc; +} + +static void efx_ethtool_get_pauseparam(struct net_device *net_dev, + struct ethtool_pauseparam *pause) +{ + struct efx_nic *efx = net_dev->priv; + + pause->rx_pause = (efx->flow_control & EFX_FC_RX) ? 1 : 0; + pause->tx_pause = (efx->flow_control & EFX_FC_TX) ? 1 : 0; + pause->autoneg = (efx->flow_control & EFX_FC_AUTO) ? 1 : 0; +} + + +struct ethtool_ops efx_ethtool_ops = { + .get_settings = efx_ethtool_get_settings, + .set_settings = efx_ethtool_set_settings, + .get_drvinfo = efx_ethtool_get_drvinfo, + .nway_reset = efx_ethtool_nway_reset, + .get_link = efx_ethtool_get_link, + .get_coalesce = efx_ethtool_get_coalesce, + .set_coalesce = efx_ethtool_set_coalesce, + .get_pauseparam = efx_ethtool_get_pauseparam, + .set_pauseparam = efx_ethtool_set_pauseparam, + .get_rx_csum = efx_ethtool_get_rx_csum, + .set_rx_csum = efx_ethtool_set_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = efx_ethtool_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, + .get_strings = efx_ethtool_get_strings, + .phys_id = efx_ethtool_phys_id, + .get_stats_count = efx_ethtool_get_stats_count, + .get_ethtool_stats = efx_ethtool_get_stats, +}; diff --git a/drivers/net/sfc/ethtool.h b/drivers/net/sfc/ethtool.h new file mode 100644 index 00000000000..3628e43df14 --- /dev/null +++ b/drivers/net/sfc/ethtool.h @@ -0,0 +1,27 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005 Fen Systems Ltd. + * Copyright 2006 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_ETHTOOL_H +#define EFX_ETHTOOL_H + +#include "net_driver.h" + +/* + * Ethtool support + */ + +extern int efx_ethtool_get_settings(struct net_device *net_dev, + struct ethtool_cmd *ecmd); +extern int efx_ethtool_set_settings(struct net_device *net_dev, + struct ethtool_cmd *ecmd); + +extern struct ethtool_ops efx_ethtool_ops; + +#endif /* EFX_ETHTOOL_H */ diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c new file mode 100644 index 00000000000..46db549ce58 --- /dev/null +++ b/drivers/net/sfc/falcon.c @@ -0,0 +1,2722 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include "net_driver.h" +#include "bitfield.h" +#include "efx.h" +#include "mac.h" +#include "gmii.h" +#include "spi.h" +#include "falcon.h" +#include "falcon_hwdefs.h" +#include "falcon_io.h" +#include "mdio_10g.h" +#include "phy.h" +#include "boards.h" +#include "workarounds.h" + +/* Falcon hardware control. + * Falcon is the internal codename for the SFC4000 controller that is + * present in SFE400X evaluation boards + */ + +/** + * struct falcon_nic_data - Falcon NIC state + * @next_buffer_table: First available buffer table id + * @pci_dev2: The secondary PCI device if present + */ +struct falcon_nic_data { + unsigned next_buffer_table; + struct pci_dev *pci_dev2; +}; + +/************************************************************************** + * + * Configurable values + * + ************************************************************************** + */ + +static int disable_dma_stats; + +/* This is set to 16 for a good reason. In summary, if larger than + * 16, the descriptor cache holds more than a default socket + * buffer's worth of packets (for UDP we can only have at most one + * socket buffer's worth outstanding). This combined with the fact + * that we only get 1 TX event per descriptor cache means the NIC + * goes idle. + */ +#define TX_DC_ENTRIES 16 +#define TX_DC_ENTRIES_ORDER 0 +#define TX_DC_BASE 0x130000 + +#define RX_DC_ENTRIES 64 +#define RX_DC_ENTRIES_ORDER 2 +#define RX_DC_BASE 0x100000 + +/* RX FIFO XOFF watermark + * + * When the amount of the RX FIFO increases used increases past this + * watermark send XOFF. Only used if RX flow control is enabled (ethtool -A) + * This also has an effect on RX/TX arbitration + */ +static int rx_xoff_thresh_bytes = -1; +module_param(rx_xoff_thresh_bytes, int, 0644); +MODULE_PARM_DESC(rx_xoff_thresh_bytes, "RX fifo XOFF threshold"); + +/* RX FIFO XON watermark + * + * When the amount of the RX FIFO used decreases below this + * watermark send XON. Only used if TX flow control is enabled (ethtool -A) + * This also has an effect on RX/TX arbitration + */ +static int rx_xon_thresh_bytes = -1; +module_param(rx_xon_thresh_bytes, int, 0644); +MODULE_PARM_DESC(rx_xon_thresh_bytes, "RX fifo XON threshold"); + +/* TX descriptor ring size - min 512 max 4k */ +#define FALCON_TXD_RING_ORDER TX_DESCQ_SIZE_1K +#define FALCON_TXD_RING_SIZE 1024 +#define FALCON_TXD_RING_MASK (FALCON_TXD_RING_SIZE - 1) + +/* RX descriptor ring size - min 512 max 4k */ +#define FALCON_RXD_RING_ORDER RX_DESCQ_SIZE_1K +#define FALCON_RXD_RING_SIZE 1024 +#define FALCON_RXD_RING_MASK (FALCON_RXD_RING_SIZE - 1) + +/* Event queue size - max 32k */ +#define FALCON_EVQ_ORDER EVQ_SIZE_4K +#define FALCON_EVQ_SIZE 4096 +#define FALCON_EVQ_MASK (FALCON_EVQ_SIZE - 1) + +/* Max number of internal errors. After this resets will not be performed */ +#define FALCON_MAX_INT_ERRORS 4 + +/* Maximum period that we wait for flush events. If the flush event + * doesn't arrive in this period of time then we check if the queue + * was disabled anyway. */ +#define FALCON_FLUSH_TIMEOUT 10 /* 10ms */ + +/************************************************************************** + * + * Falcon constants + * + ************************************************************************** + */ + +/* DMA address mask (up to 46-bit, avoiding compiler warnings) + * + * Note that it is possible to have a platform with 64-bit longs and + * 32-bit DMA addresses, or vice versa. EFX_DMA_MASK takes care of the + * platform DMA mask. + */ +#if BITS_PER_LONG == 64 +#define FALCON_DMA_MASK EFX_DMA_MASK(0x00003fffffffffffUL) +#else +#define FALCON_DMA_MASK EFX_DMA_MASK(0x00003fffffffffffULL) +#endif + +/* TX DMA length mask (13-bit) */ +#define FALCON_TX_DMA_MASK (4096 - 1) + +/* Size and alignment of special buffers (4KB) */ +#define FALCON_BUF_SIZE 4096 + +/* Dummy SRAM size code */ +#define SRM_NB_BSZ_ONCHIP_ONLY (-1) + +/* Be nice if these (or equiv.) were in linux/pci_regs.h, but they're not. */ +#define PCI_EXP_DEVCAP_PWR_VAL_LBN 18 +#define PCI_EXP_DEVCAP_PWR_SCL_LBN 26 +#define PCI_EXP_DEVCTL_PAYLOAD_LBN 5 +#define PCI_EXP_LNKSTA_LNK_WID 0x3f0 +#define PCI_EXP_LNKSTA_LNK_WID_LBN 4 + +#define FALCON_IS_DUAL_FUNC(efx) \ + (FALCON_REV(efx) < FALCON_REV_B0) + +/************************************************************************** + * + * Falcon hardware access + * + **************************************************************************/ + +/* Read the current event from the event queue */ +static inline efx_qword_t *falcon_event(struct efx_channel *channel, + unsigned int index) +{ + return (((efx_qword_t *) (channel->eventq.addr)) + index); +} + +/* See if an event is present + * + * We check both the high and low dword of the event for all ones. We + * wrote all ones when we cleared the event, and no valid event can + * have all ones in either its high or low dwords. This approach is + * robust against reordering. + * + * Note that using a single 64-bit comparison is incorrect; even + * though the CPU read will be atomic, the DMA write may not be. + */ +static inline int falcon_event_present(efx_qword_t *event) +{ + return (!(EFX_DWORD_IS_ALL_ONES(event->dword[0]) | + EFX_DWORD_IS_ALL_ONES(event->dword[1]))); +} + +/************************************************************************** + * + * I2C bus - this is a bit-bashing interface using GPIO pins + * Note that it uses the output enables to tristate the outputs + * SDA is the data pin and SCL is the clock + * + ************************************************************************** + */ +static void falcon_setsdascl(struct efx_i2c_interface *i2c) +{ + efx_oword_t reg; + + falcon_read(i2c->efx, ®, GPIO_CTL_REG_KER); + EFX_SET_OWORD_FIELD(reg, GPIO0_OEN, (i2c->scl ? 0 : 1)); + EFX_SET_OWORD_FIELD(reg, GPIO3_OEN, (i2c->sda ? 0 : 1)); + falcon_write(i2c->efx, ®, GPIO_CTL_REG_KER); +} + +static int falcon_getsda(struct efx_i2c_interface *i2c) +{ + efx_oword_t reg; + + falcon_read(i2c->efx, ®, GPIO_CTL_REG_KER); + return EFX_OWORD_FIELD(reg, GPIO3_IN); +} + +static int falcon_getscl(struct efx_i2c_interface *i2c) +{ + efx_oword_t reg; + + falcon_read(i2c->efx, ®, GPIO_CTL_REG_KER); + return EFX_DWORD_FIELD(reg, GPIO0_IN); +} + +static struct efx_i2c_bit_operations falcon_i2c_bit_operations = { + .setsda = falcon_setsdascl, + .setscl = falcon_setsdascl, + .getsda = falcon_getsda, + .getscl = falcon_getscl, + .udelay = 100, + .mdelay = 10, +}; + +/************************************************************************** + * + * Falcon special buffer handling + * Special buffers are used for event queues and the TX and RX + * descriptor rings. + * + *************************************************************************/ + +/* + * Initialise a Falcon special buffer + * + * This will define a buffer (previously allocated via + * falcon_alloc_special_buffer()) in Falcon's buffer table, allowing + * it to be used for event queues, descriptor rings etc. + */ +static int +falcon_init_special_buffer(struct efx_nic *efx, + struct efx_special_buffer *buffer) +{ + efx_qword_t buf_desc; + int index; + dma_addr_t dma_addr; + int i; + + EFX_BUG_ON_PARANOID(!buffer->addr); + + /* Write buffer descriptors to NIC */ + for (i = 0; i < buffer->entries; i++) { + index = buffer->index + i; + dma_addr = buffer->dma_addr + (i * 4096); + EFX_LOG(efx, "mapping special buffer %d at %llx\n", + index, (unsigned long long)dma_addr); + EFX_POPULATE_QWORD_4(buf_desc, + IP_DAT_BUF_SIZE, IP_DAT_BUF_SIZE_4K, + BUF_ADR_REGION, 0, + BUF_ADR_FBUF, (dma_addr >> 12), + BUF_OWNER_ID_FBUF, 0); + falcon_write_sram(efx, &buf_desc, index); + } + + return 0; +} + +/* Unmaps a buffer from Falcon and clears the buffer table entries */ +static void +falcon_fini_special_buffer(struct efx_nic *efx, + struct efx_special_buffer *buffer) +{ + efx_oword_t buf_tbl_upd; + unsigned int start = buffer->index; + unsigned int end = (buffer->index + buffer->entries - 1); + + if (!buffer->entries) + return; + + EFX_LOG(efx, "unmapping special buffers %d-%d\n", + buffer->index, buffer->index + buffer->entries - 1); + + EFX_POPULATE_OWORD_4(buf_tbl_upd, + BUF_UPD_CMD, 0, + BUF_CLR_CMD, 1, + BUF_CLR_END_ID, end, + BUF_CLR_START_ID, start); + falcon_write(efx, &buf_tbl_upd, BUF_TBL_UPD_REG_KER); +} + +/* + * Allocate a new Falcon special buffer + * + * This allocates memory for a new buffer, clears it and allocates a + * new buffer ID range. It does not write into Falcon's buffer table. + * + * This call will allocate 4KB buffers, since Falcon can't use 8KB + * buffers for event queues and descriptor rings. + */ +static int falcon_alloc_special_buffer(struct efx_nic *efx, + struct efx_special_buffer *buffer, + unsigned int len) +{ + struct falcon_nic_data *nic_data = efx->nic_data; + + len = ALIGN(len, FALCON_BUF_SIZE); + + buffer->addr = pci_alloc_consistent(efx->pci_dev, len, + &buffer->dma_addr); + if (!buffer->addr) + return -ENOMEM; + buffer->len = len; + buffer->entries = len / FALCON_BUF_SIZE; + BUG_ON(buffer->dma_addr & (FALCON_BUF_SIZE - 1)); + + /* All zeros is a potentially valid event so memset to 0xff */ + memset(buffer->addr, 0xff, len); + + /* Select new buffer ID */ + buffer->index = nic_data->next_buffer_table; + nic_data->next_buffer_table += buffer->entries; + + EFX_LOG(efx, "allocating special buffers %d-%d at %llx+%x " + "(virt %p phys %lx)\n", buffer->index, + buffer->index + buffer->entries - 1, + (unsigned long long)buffer->dma_addr, len, + buffer->addr, virt_to_phys(buffer->addr)); + + return 0; +} + +static void falcon_free_special_buffer(struct efx_nic *efx, + struct efx_special_buffer *buffer) +{ + if (!buffer->addr) + return; + + EFX_LOG(efx, "deallocating special buffers %d-%d at %llx+%x " + "(virt %p phys %lx)\n", buffer->index, + buffer->index + buffer->entries - 1, + (unsigned long long)buffer->dma_addr, buffer->len, + buffer->addr, virt_to_phys(buffer->addr)); + + pci_free_consistent(efx->pci_dev, buffer->len, buffer->addr, + buffer->dma_addr); + buffer->addr = NULL; + buffer->entries = 0; +} + +/************************************************************************** + * + * Falcon generic buffer handling + * These buffers are used for interrupt status and MAC stats + * + **************************************************************************/ + +static int falcon_alloc_buffer(struct efx_nic *efx, + struct efx_buffer *buffer, unsigned int len) +{ + buffer->addr = pci_alloc_consistent(efx->pci_dev, len, + &buffer->dma_addr); + if (!buffer->addr) + return -ENOMEM; + buffer->len = len; + memset(buffer->addr, 0, len); + return 0; +} + +static void falcon_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer) +{ + if (buffer->addr) { + pci_free_consistent(efx->pci_dev, buffer->len, + buffer->addr, buffer->dma_addr); + buffer->addr = NULL; + } +} + +/************************************************************************** + * + * Falcon TX path + * + **************************************************************************/ + +/* Returns a pointer to the specified transmit descriptor in the TX + * descriptor queue belonging to the specified channel. + */ +static inline efx_qword_t *falcon_tx_desc(struct efx_tx_queue *tx_queue, + unsigned int index) +{ + return (((efx_qword_t *) (tx_queue->txd.addr)) + index); +} + +/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */ +static inline void falcon_notify_tx_desc(struct efx_tx_queue *tx_queue) +{ + unsigned write_ptr; + efx_dword_t reg; + + write_ptr = tx_queue->write_count & FALCON_TXD_RING_MASK; + EFX_POPULATE_DWORD_1(reg, TX_DESC_WPTR_DWORD, write_ptr); + falcon_writel_page(tx_queue->efx, ®, + TX_DESC_UPD_REG_KER_DWORD, tx_queue->queue); +} + + +/* For each entry inserted into the software descriptor ring, create a + * descriptor in the hardware TX descriptor ring (in host memory), and + * write a doorbell. + */ +void falcon_push_buffers(struct efx_tx_queue *tx_queue) +{ + + struct efx_tx_buffer *buffer; + efx_qword_t *txd; + unsigned write_ptr; + + BUG_ON(tx_queue->write_count == tx_queue->insert_count); + + do { + write_ptr = tx_queue->write_count & FALCON_TXD_RING_MASK; + buffer = &tx_queue->buffer[write_ptr]; + txd = falcon_tx_desc(tx_queue, write_ptr); + ++tx_queue->write_count; + + /* Create TX descriptor ring entry */ + EFX_POPULATE_QWORD_5(*txd, + TX_KER_PORT, 0, + TX_KER_CONT, buffer->continuation, + TX_KER_BYTE_CNT, buffer->len, + TX_KER_BUF_REGION, 0, + TX_KER_BUF_ADR, buffer->dma_addr); + } while (tx_queue->write_count != tx_queue->insert_count); + + wmb(); /* Ensure descriptors are written before they are fetched */ + falcon_notify_tx_desc(tx_queue); +} + +/* Allocate hardware resources for a TX queue */ +int falcon_probe_tx(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + return falcon_alloc_special_buffer(efx, &tx_queue->txd, + FALCON_TXD_RING_SIZE * + sizeof(efx_qword_t)); +} + +int falcon_init_tx(struct efx_tx_queue *tx_queue) +{ + efx_oword_t tx_desc_ptr; + struct efx_nic *efx = tx_queue->efx; + int rc; + + /* Pin TX descriptor ring */ + rc = falcon_init_special_buffer(efx, &tx_queue->txd); + if (rc) + return rc; + + /* Push TX descriptor ring to card */ + EFX_POPULATE_OWORD_10(tx_desc_ptr, + TX_DESCQ_EN, 1, + TX_ISCSI_DDIG_EN, 0, + TX_ISCSI_HDIG_EN, 0, + TX_DESCQ_BUF_BASE_ID, tx_queue->txd.index, + TX_DESCQ_EVQ_ID, tx_queue->channel->evqnum, + TX_DESCQ_OWNER_ID, 0, + TX_DESCQ_LABEL, tx_queue->queue, + TX_DESCQ_SIZE, FALCON_TXD_RING_ORDER, + TX_DESCQ_TYPE, 0, + TX_NON_IP_DROP_DIS_B0, 1); + + if (FALCON_REV(efx) >= FALCON_REV_B0) { + int csum = !(efx->net_dev->features & NETIF_F_IP_CSUM); + EFX_SET_OWORD_FIELD(tx_desc_ptr, TX_IP_CHKSM_DIS_B0, csum); + EFX_SET_OWORD_FIELD(tx_desc_ptr, TX_TCP_CHKSM_DIS_B0, csum); + } + + falcon_write_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base, + tx_queue->queue); + + if (FALCON_REV(efx) < FALCON_REV_B0) { + efx_oword_t reg; + + BUG_ON(tx_queue->queue >= 128); /* HW limit */ + + falcon_read(efx, ®, TX_CHKSM_CFG_REG_KER_A1); + if (efx->net_dev->features & NETIF_F_IP_CSUM) + clear_bit_le(tx_queue->queue, (void *)®); + else + set_bit_le(tx_queue->queue, (void *)®); + falcon_write(efx, ®, TX_CHKSM_CFG_REG_KER_A1); + } + + return 0; +} + +static int falcon_flush_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + struct efx_channel *channel = &efx->channel[0]; + efx_oword_t tx_flush_descq; + unsigned int read_ptr, i; + + /* Post a flush command */ + EFX_POPULATE_OWORD_2(tx_flush_descq, + TX_FLUSH_DESCQ_CMD, 1, + TX_FLUSH_DESCQ, tx_queue->queue); + falcon_write(efx, &tx_flush_descq, TX_FLUSH_DESCQ_REG_KER); + msleep(FALCON_FLUSH_TIMEOUT); + + if (EFX_WORKAROUND_7803(efx)) + return 0; + + /* Look for a flush completed event */ + read_ptr = channel->eventq_read_ptr; + for (i = 0; i < FALCON_EVQ_SIZE; ++i) { + efx_qword_t *event = falcon_event(channel, read_ptr); + int ev_code, ev_sub_code, ev_queue; + if (!falcon_event_present(event)) + break; + + ev_code = EFX_QWORD_FIELD(*event, EV_CODE); + ev_sub_code = EFX_QWORD_FIELD(*event, DRIVER_EV_SUB_CODE); + ev_queue = EFX_QWORD_FIELD(*event, DRIVER_EV_TX_DESCQ_ID); + if ((ev_sub_code == TX_DESCQ_FLS_DONE_EV_DECODE) && + (ev_queue == tx_queue->queue)) { + EFX_LOG(efx, "tx queue %d flush command succesful\n", + tx_queue->queue); + return 0; + } + + read_ptr = (read_ptr + 1) & FALCON_EVQ_MASK; + } + + if (EFX_WORKAROUND_11557(efx)) { + efx_oword_t reg; + int enabled; + + falcon_read_table(efx, ®, efx->type->txd_ptr_tbl_base, + tx_queue->queue); + enabled = EFX_OWORD_FIELD(reg, TX_DESCQ_EN); + if (!enabled) { + EFX_LOG(efx, "tx queue %d disabled without a " + "flush event seen\n", tx_queue->queue); + return 0; + } + } + + EFX_ERR(efx, "tx queue %d flush command timed out\n", tx_queue->queue); + return -ETIMEDOUT; +} + +void falcon_fini_tx(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + efx_oword_t tx_desc_ptr; + + /* Stop the hardware using the queue */ + if (falcon_flush_tx_queue(tx_queue)) + EFX_ERR(efx, "failed to flush tx queue %d\n", tx_queue->queue); + + /* Remove TX descriptor ring from card */ + EFX_ZERO_OWORD(tx_desc_ptr); + falcon_write_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base, + tx_queue->queue); + + /* Unpin TX descriptor ring */ + falcon_fini_special_buffer(efx, &tx_queue->txd); +} + +/* Free buffers backing TX queue */ +void falcon_remove_tx(struct efx_tx_queue *tx_queue) +{ + falcon_free_special_buffer(tx_queue->efx, &tx_queue->txd); +} + +/************************************************************************** + * + * Falcon RX path + * + **************************************************************************/ + +/* Returns a pointer to the specified descriptor in the RX descriptor queue */ +static inline efx_qword_t *falcon_rx_desc(struct efx_rx_queue *rx_queue, + unsigned int index) +{ + return (((efx_qword_t *) (rx_queue->rxd.addr)) + index); +} + +/* This creates an entry in the RX descriptor queue */ +static inline void falcon_build_rx_desc(struct efx_rx_queue *rx_queue, + unsigned index) +{ + struct efx_rx_buffer *rx_buf; + efx_qword_t *rxd; + + rxd = falcon_rx_desc(rx_queue, index); + rx_buf = efx_rx_buffer(rx_queue, index); + EFX_POPULATE_QWORD_3(*rxd, + RX_KER_BUF_SIZE, + rx_buf->len - + rx_queue->efx->type->rx_buffer_padding, + RX_KER_BUF_REGION, 0, + RX_KER_BUF_ADR, rx_buf->dma_addr); +} + +/* This writes to the RX_DESC_WPTR register for the specified receive + * descriptor ring. + */ +void falcon_notify_rx_desc(struct efx_rx_queue *rx_queue) +{ + efx_dword_t reg; + unsigned write_ptr; + + while (rx_queue->notified_count != rx_queue->added_count) { + falcon_build_rx_desc(rx_queue, + rx_queue->notified_count & + FALCON_RXD_RING_MASK); + ++rx_queue->notified_count; + } + + wmb(); + write_ptr = rx_queue->added_count & FALCON_RXD_RING_MASK; + EFX_POPULATE_DWORD_1(reg, RX_DESC_WPTR_DWORD, write_ptr); + falcon_writel_page(rx_queue->efx, ®, + RX_DESC_UPD_REG_KER_DWORD, rx_queue->queue); +} + +int falcon_probe_rx(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + return falcon_alloc_special_buffer(efx, &rx_queue->rxd, + FALCON_RXD_RING_SIZE * + sizeof(efx_qword_t)); +} + +int falcon_init_rx(struct efx_rx_queue *rx_queue) +{ + efx_oword_t rx_desc_ptr; + struct efx_nic *efx = rx_queue->efx; + int rc; + int is_b0 = FALCON_REV(efx) >= FALCON_REV_B0; + int iscsi_digest_en = is_b0; + + EFX_LOG(efx, "RX queue %d ring in special buffers %d-%d\n", + rx_queue->queue, rx_queue->rxd.index, + rx_queue->rxd.index + rx_queue->rxd.entries - 1); + + /* Pin RX descriptor ring */ + rc = falcon_init_special_buffer(efx, &rx_queue->rxd); + if (rc) + return rc; + + /* Push RX descriptor ring to card */ + EFX_POPULATE_OWORD_10(rx_desc_ptr, + RX_ISCSI_DDIG_EN, iscsi_digest_en, + RX_ISCSI_HDIG_EN, iscsi_digest_en, + RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index, + RX_DESCQ_EVQ_ID, rx_queue->channel->evqnum, + RX_DESCQ_OWNER_ID, 0, + RX_DESCQ_LABEL, rx_queue->queue, + RX_DESCQ_SIZE, FALCON_RXD_RING_ORDER, + RX_DESCQ_TYPE, 0 /* kernel queue */ , + /* For >=B0 this is scatter so disable */ + RX_DESCQ_JUMBO, !is_b0, + RX_DESCQ_EN, 1); + falcon_write_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, + rx_queue->queue); + return 0; +} + +static int falcon_flush_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + struct efx_channel *channel = &efx->channel[0]; + unsigned int read_ptr, i; + efx_oword_t rx_flush_descq; + + /* Post a flush command */ + EFX_POPULATE_OWORD_2(rx_flush_descq, + RX_FLUSH_DESCQ_CMD, 1, + RX_FLUSH_DESCQ, rx_queue->queue); + falcon_write(efx, &rx_flush_descq, RX_FLUSH_DESCQ_REG_KER); + msleep(FALCON_FLUSH_TIMEOUT); + + if (EFX_WORKAROUND_7803(efx)) + return 0; + + /* Look for a flush completed event */ + read_ptr = channel->eventq_read_ptr; + for (i = 0; i < FALCON_EVQ_SIZE; ++i) { + efx_qword_t *event = falcon_event(channel, read_ptr); + int ev_code, ev_sub_code, ev_queue, ev_failed; + if (!falcon_event_present(event)) + break; + + ev_code = EFX_QWORD_FIELD(*event, EV_CODE); + ev_sub_code = EFX_QWORD_FIELD(*event, DRIVER_EV_SUB_CODE); + ev_queue = EFX_QWORD_FIELD(*event, DRIVER_EV_RX_DESCQ_ID); + ev_failed = EFX_QWORD_FIELD(*event, DRIVER_EV_RX_FLUSH_FAIL); + + if ((ev_sub_code == RX_DESCQ_FLS_DONE_EV_DECODE) && + (ev_queue == rx_queue->queue)) { + if (ev_failed) { + EFX_INFO(efx, "rx queue %d flush command " + "failed\n", rx_queue->queue); + return -EAGAIN; + } else { + EFX_LOG(efx, "rx queue %d flush command " + "succesful\n", rx_queue->queue); + return 0; + } + } + + read_ptr = (read_ptr + 1) & FALCON_EVQ_MASK; + } + + if (EFX_WORKAROUND_11557(efx)) { + efx_oword_t reg; + int enabled; + + falcon_read_table(efx, ®, efx->type->rxd_ptr_tbl_base, + rx_queue->queue); + enabled = EFX_OWORD_FIELD(reg, RX_DESCQ_EN); + if (!enabled) { + EFX_LOG(efx, "rx queue %d disabled without a " + "flush event seen\n", rx_queue->queue); + return 0; + } + } + + EFX_ERR(efx, "rx queue %d flush command timed out\n", rx_queue->queue); + return -ETIMEDOUT; +} + +void falcon_fini_rx(struct efx_rx_queue *rx_queue) +{ + efx_oword_t rx_desc_ptr; + struct efx_nic *efx = rx_queue->efx; + int i, rc; + + /* Try and flush the rx queue. This may need to be repeated */ + for (i = 0; i < 5; i++) { + rc = falcon_flush_rx_queue(rx_queue); + if (rc == -EAGAIN) + continue; + break; + } + if (rc) + EFX_ERR(efx, "failed to flush rx queue %d\n", rx_queue->queue); + + /* Remove RX descriptor ring from card */ + EFX_ZERO_OWORD(rx_desc_ptr); + falcon_write_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, + rx_queue->queue); + + /* Unpin RX descriptor ring */ + falcon_fini_special_buffer(efx, &rx_queue->rxd); +} + +/* Free buffers backing RX queue */ +void falcon_remove_rx(struct efx_rx_queue *rx_queue) +{ + falcon_free_special_buffer(rx_queue->efx, &rx_queue->rxd); +} + +/************************************************************************** + * + * Falcon event queue processing + * Event queues are processed by per-channel tasklets. + * + **************************************************************************/ + +/* Update a channel's event queue's read pointer (RPTR) register + * + * This writes the EVQ_RPTR_REG register for the specified channel's + * event queue. + * + * Note that EVQ_RPTR_REG contains the index of the "last read" event, + * whereas channel->eventq_read_ptr contains the index of the "next to + * read" event. + */ +void falcon_eventq_read_ack(struct efx_channel *channel) +{ + efx_dword_t reg; + struct efx_nic *efx = channel->efx; + + EFX_POPULATE_DWORD_1(reg, EVQ_RPTR_DWORD, channel->eventq_read_ptr); + falcon_writel_table(efx, ®, efx->type->evq_rptr_tbl_base, + channel->evqnum); +} + +/* Use HW to insert a SW defined event */ +void falcon_generate_event(struct efx_channel *channel, efx_qword_t *event) +{ + efx_oword_t drv_ev_reg; + + EFX_POPULATE_OWORD_2(drv_ev_reg, + DRV_EV_QID, channel->evqnum, + DRV_EV_DATA, + EFX_QWORD_FIELD64(*event, WHOLE_EVENT)); + falcon_write(channel->efx, &drv_ev_reg, DRV_EV_REG_KER); +} + +/* Handle a transmit completion event + * + * Falcon batches TX completion events; the message we receive is of + * the form "complete all TX events up to this index". + */ +static inline void falcon_handle_tx_event(struct efx_channel *channel, + efx_qword_t *event) +{ + unsigned int tx_ev_desc_ptr; + unsigned int tx_ev_q_label; + struct efx_tx_queue *tx_queue; + struct efx_nic *efx = channel->efx; + + if (likely(EFX_QWORD_FIELD(*event, TX_EV_COMP))) { + /* Transmit completion */ + tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, TX_EV_DESC_PTR); + tx_ev_q_label = EFX_QWORD_FIELD(*event, TX_EV_Q_LABEL); + tx_queue = &efx->tx_queue[tx_ev_q_label]; + efx_xmit_done(tx_queue, tx_ev_desc_ptr); + } else if (EFX_QWORD_FIELD(*event, TX_EV_WQ_FF_FULL)) { + /* Rewrite the FIFO write pointer */ + tx_ev_q_label = EFX_QWORD_FIELD(*event, TX_EV_Q_LABEL); + tx_queue = &efx->tx_queue[tx_ev_q_label]; + + if (NET_DEV_REGISTERED(efx)) + netif_tx_lock(efx->net_dev); + falcon_notify_tx_desc(tx_queue); + if (NET_DEV_REGISTERED(efx)) + netif_tx_unlock(efx->net_dev); + } else if (EFX_QWORD_FIELD(*event, TX_EV_PKT_ERR) && + EFX_WORKAROUND_10727(efx)) { + efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); + } else { + EFX_ERR(efx, "channel %d unexpected TX event " + EFX_QWORD_FMT"\n", channel->channel, + EFX_QWORD_VAL(*event)); + } +} + +/* Check received packet's destination MAC address. */ +static int check_dest_mac(struct efx_rx_queue *rx_queue, + const efx_qword_t *event) +{ + struct efx_rx_buffer *rx_buf; + struct efx_nic *efx = rx_queue->efx; + int rx_ev_desc_ptr; + struct ethhdr *eh; + + if (efx->promiscuous) + return 1; + + rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, RX_EV_DESC_PTR); + rx_buf = efx_rx_buffer(rx_queue, rx_ev_desc_ptr); + eh = (struct ethhdr *)rx_buf->data; + if (memcmp(eh->h_dest, efx->net_dev->dev_addr, ETH_ALEN)) + return 0; + return 1; +} + +/* Detect errors included in the rx_evt_pkt_ok bit. */ +static void falcon_handle_rx_not_ok(struct efx_rx_queue *rx_queue, + const efx_qword_t *event, + unsigned *rx_ev_pkt_ok, + int *discard, int byte_count) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err; + unsigned rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err; + unsigned rx_ev_frm_trunc, rx_ev_drib_nib, rx_ev_tobe_disc; + unsigned rx_ev_pkt_type, rx_ev_other_err, rx_ev_pause_frm; + unsigned rx_ev_ip_frag_err, rx_ev_hdr_type, rx_ev_mcast_pkt; + int snap, non_ip; + + rx_ev_hdr_type = EFX_QWORD_FIELD(*event, RX_EV_HDR_TYPE); + rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, RX_EV_MCAST_PKT); + rx_ev_tobe_disc = EFX_QWORD_FIELD(*event, RX_EV_TOBE_DISC); + rx_ev_pkt_type = EFX_QWORD_FIELD(*event, RX_EV_PKT_TYPE); + rx_ev_buf_owner_id_err = EFX_QWORD_FIELD(*event, + RX_EV_BUF_OWNER_ID_ERR); + rx_ev_ip_frag_err = EFX_QWORD_FIELD(*event, RX_EV_IF_FRAG_ERR); + rx_ev_ip_hdr_chksum_err = EFX_QWORD_FIELD(*event, + RX_EV_IP_HDR_CHKSUM_ERR); + rx_ev_tcp_udp_chksum_err = EFX_QWORD_FIELD(*event, + RX_EV_TCP_UDP_CHKSUM_ERR); + rx_ev_eth_crc_err = EFX_QWORD_FIELD(*event, RX_EV_ETH_CRC_ERR); + rx_ev_frm_trunc = EFX_QWORD_FIELD(*event, RX_EV_FRM_TRUNC); + rx_ev_drib_nib = ((FALCON_REV(efx) >= FALCON_REV_B0) ? + 0 : EFX_QWORD_FIELD(*event, RX_EV_DRIB_NIB)); + rx_ev_pause_frm = EFX_QWORD_FIELD(*event, RX_EV_PAUSE_FRM_ERR); + + /* Every error apart from tobe_disc and pause_frm */ + rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err | + rx_ev_buf_owner_id_err | rx_ev_eth_crc_err | + rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err); + + snap = (rx_ev_pkt_type == RX_EV_PKT_TYPE_LLC_DECODE) || + (rx_ev_pkt_type == RX_EV_PKT_TYPE_VLAN_LLC_DECODE); + non_ip = (rx_ev_hdr_type == RX_EV_HDR_TYPE_NON_IP_DECODE); + + /* SFC bug 5475/8970: The Falcon XMAC incorrectly calculates the + * length field of an LLC frame, which sets TOBE_DISC. We could set + * PASS_LEN_ERR, but we want the MAC to filter out short frames (to + * protect the RX block). + * + * bug5475 - LLC/SNAP: Falcon identifies SNAP packets. + * bug8970 - LLC/noSNAP: Falcon does not provide an LLC flag. + * LLC can't encapsulate IP, so by definition + * these packets are NON_IP. + * + * Unicast mismatch will also cause TOBE_DISC, so the driver needs + * to check this. + */ + if (EFX_WORKAROUND_5475(efx) && rx_ev_tobe_disc && (snap || non_ip)) { + /* If all the other flags are zero then we can state the + * entire packet is ok, which will flag to the kernel not + * to recalculate checksums. + */ + if (!(non_ip | rx_ev_other_err | rx_ev_pause_frm)) + *rx_ev_pkt_ok = 1; + + rx_ev_tobe_disc = 0; + + /* TOBE_DISC is set for unicast mismatch. But given that + * we can't trust TOBE_DISC here, we must validate the dest + * MAC address ourselves. + */ + if (!rx_ev_mcast_pkt && !check_dest_mac(rx_queue, event)) + rx_ev_tobe_disc = 1; + } + + /* Count errors that are not in MAC stats. */ + if (rx_ev_frm_trunc) + ++rx_queue->channel->n_rx_frm_trunc; + else if (rx_ev_tobe_disc) + ++rx_queue->channel->n_rx_tobe_disc; + else if (rx_ev_ip_hdr_chksum_err) + ++rx_queue->channel->n_rx_ip_hdr_chksum_err; + else if (rx_ev_tcp_udp_chksum_err) + ++rx_queue->channel->n_rx_tcp_udp_chksum_err; + if (rx_ev_ip_frag_err) + ++rx_queue->channel->n_rx_ip_frag_err; + + /* The frame must be discarded if any of these are true. */ + *discard = (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_drib_nib | + rx_ev_tobe_disc | rx_ev_pause_frm); + + /* TOBE_DISC is expected on unicast mismatches; don't print out an + * error message. FRM_TRUNC indicates RXDP dropped the packet due + * to a FIFO overflow. + */ +#ifdef EFX_ENABLE_DEBUG + if (rx_ev_other_err) { + EFX_INFO_RL(efx, " RX queue %d unexpected RX event " + EFX_QWORD_FMT "%s%s%s%s%s%s%s%s%s\n", + rx_queue->queue, EFX_QWORD_VAL(*event), + rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "", + rx_ev_ip_hdr_chksum_err ? + " [IP_HDR_CHKSUM_ERR]" : "", + rx_ev_tcp_udp_chksum_err ? + " [TCP_UDP_CHKSUM_ERR]" : "", + rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "", + rx_ev_frm_trunc ? " [FRM_TRUNC]" : "", + rx_ev_drib_nib ? " [DRIB_NIB]" : "", + rx_ev_tobe_disc ? " [TOBE_DISC]" : "", + rx_ev_pause_frm ? " [PAUSE]" : "", + snap ? " [SNAP/LLC]" : ""); + } +#endif + + if (unlikely(rx_ev_eth_crc_err && EFX_WORKAROUND_10750(efx) && + efx->phy_type == PHY_TYPE_10XPRESS)) + tenxpress_crc_err(efx); +} + +/* Handle receive events that are not in-order. */ +static void falcon_handle_rx_bad_index(struct efx_rx_queue *rx_queue, + unsigned index) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned expected, dropped; + + expected = rx_queue->removed_count & FALCON_RXD_RING_MASK; + dropped = ((index + FALCON_RXD_RING_SIZE - expected) & + FALCON_RXD_RING_MASK); + EFX_INFO(efx, "dropped %d events (index=%d expected=%d)\n", + dropped, index, expected); + + efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ? + RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE); +} + +/* Handle a packet received event + * + * Falcon silicon gives a "discard" flag if it's a unicast packet with the + * wrong destination address + * Also "is multicast" and "matches multicast filter" flags can be used to + * discard non-matching multicast packets. + */ +static inline int falcon_handle_rx_event(struct efx_channel *channel, + const efx_qword_t *event) +{ + unsigned int rx_ev_q_label, rx_ev_desc_ptr, rx_ev_byte_cnt; + unsigned int rx_ev_pkt_ok, rx_ev_hdr_type, rx_ev_mcast_pkt; + unsigned expected_ptr; + int discard = 0, checksummed; + struct efx_rx_queue *rx_queue; + struct efx_nic *efx = channel->efx; + + /* Basic packet information */ + rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, RX_EV_BYTE_CNT); + rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, RX_EV_PKT_OK); + rx_ev_hdr_type = EFX_QWORD_FIELD(*event, RX_EV_HDR_TYPE); + WARN_ON(EFX_QWORD_FIELD(*event, RX_EV_JUMBO_CONT)); + WARN_ON(EFX_QWORD_FIELD(*event, RX_EV_SOP) != 1); + + rx_ev_q_label = EFX_QWORD_FIELD(*event, RX_EV_Q_LABEL); + rx_queue = &efx->rx_queue[rx_ev_q_label]; + + rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, RX_EV_DESC_PTR); + expected_ptr = rx_queue->removed_count & FALCON_RXD_RING_MASK; + if (unlikely(rx_ev_desc_ptr != expected_ptr)) { + falcon_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr); + return rx_ev_q_label; + } + + if (likely(rx_ev_pkt_ok)) { + /* If packet is marked as OK and packet type is TCP/IPv4 or + * UDP/IPv4, then we can rely on the hardware checksum. + */ + checksummed = RX_EV_HDR_TYPE_HAS_CHECKSUMS(rx_ev_hdr_type); + } else { + falcon_handle_rx_not_ok(rx_queue, event, &rx_ev_pkt_ok, + &discard, rx_ev_byte_cnt); + checksummed = 0; + } + + /* Detect multicast packets that didn't match the filter */ + rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, RX_EV_MCAST_PKT); + if (rx_ev_mcast_pkt) { + unsigned int rx_ev_mcast_hash_match = + EFX_QWORD_FIELD(*event, RX_EV_MCAST_HASH_MATCH); + + if (unlikely(!rx_ev_mcast_hash_match)) + discard = 1; + } + + /* Handle received packet */ + efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt, + checksummed, discard); + + return rx_ev_q_label; +} + +/* Global events are basically PHY events */ +static void falcon_handle_global_event(struct efx_channel *channel, + efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + int is_phy_event = 0, handled = 0; + + /* Check for interrupt on either port. Some boards have a + * single PHY wired to the interrupt line for port 1. */ + if (EFX_QWORD_FIELD(*event, G_PHY0_INTR) || + EFX_QWORD_FIELD(*event, G_PHY1_INTR) || + EFX_QWORD_FIELD(*event, XG_PHY_INTR)) + is_phy_event = 1; + + if ((FALCON_REV(efx) >= FALCON_REV_B0) && + EFX_OWORD_FIELD(*event, XG_MNT_INTR_B0)) + is_phy_event = 1; + + if (is_phy_event) { + efx->phy_op->clear_interrupt(efx); + queue_work(efx->workqueue, &efx->reconfigure_work); + handled = 1; + } + + if (EFX_QWORD_FIELD_VER(efx, *event, RX_RECOVERY)) { + EFX_ERR(efx, "channel %d seen global RX_RESET " + "event. Resetting.\n", channel->channel); + + atomic_inc(&efx->rx_reset); + efx_schedule_reset(efx, EFX_WORKAROUND_6555(efx) ? + RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE); + handled = 1; + } + + if (!handled) + EFX_ERR(efx, "channel %d unknown global event " + EFX_QWORD_FMT "\n", channel->channel, + EFX_QWORD_VAL(*event)); +} + +static void falcon_handle_driver_event(struct efx_channel *channel, + efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + unsigned int ev_sub_code; + unsigned int ev_sub_data; + + ev_sub_code = EFX_QWORD_FIELD(*event, DRIVER_EV_SUB_CODE); + ev_sub_data = EFX_QWORD_FIELD(*event, DRIVER_EV_SUB_DATA); + + switch (ev_sub_code) { + case TX_DESCQ_FLS_DONE_EV_DECODE: + EFX_TRACE(efx, "channel %d TXQ %d flushed\n", + channel->channel, ev_sub_data); + break; + case RX_DESCQ_FLS_DONE_EV_DECODE: + EFX_TRACE(efx, "channel %d RXQ %d flushed\n", + channel->channel, ev_sub_data); + break; + case EVQ_INIT_DONE_EV_DECODE: + EFX_LOG(efx, "channel %d EVQ %d initialised\n", + channel->channel, ev_sub_data); + break; + case SRM_UPD_DONE_EV_DECODE: + EFX_TRACE(efx, "channel %d SRAM update done\n", + channel->channel); + break; + case WAKE_UP_EV_DECODE: + EFX_TRACE(efx, "channel %d RXQ %d wakeup event\n", + channel->channel, ev_sub_data); + break; + case TIMER_EV_DECODE: + EFX_TRACE(efx, "channel %d RX queue %d timer expired\n", + channel->channel, ev_sub_data); + break; + case RX_RECOVERY_EV_DECODE: + EFX_ERR(efx, "channel %d seen DRIVER RX_RESET event. " + "Resetting.\n", channel->channel); + efx_schedule_reset(efx, + EFX_WORKAROUND_6555(efx) ? + RESET_TYPE_RX_RECOVERY : + RESET_TYPE_DISABLE); + break; + case RX_DSC_ERROR_EV_DECODE: + EFX_ERR(efx, "RX DMA Q %d reports descriptor fetch error." + " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); + efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); + break; + case TX_DSC_ERROR_EV_DECODE: + EFX_ERR(efx, "TX DMA Q %d reports descriptor fetch error." + " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); + efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); + break; + default: + EFX_TRACE(efx, "channel %d unknown driver event code %d " + "data %04x\n", channel->channel, ev_sub_code, + ev_sub_data); + break; + } +} + +int falcon_process_eventq(struct efx_channel *channel, int *rx_quota) +{ + unsigned int read_ptr; + efx_qword_t event, *p_event; + int ev_code; + int rxq; + int rxdmaqs = 0; + + read_ptr = channel->eventq_read_ptr; + + do { + p_event = falcon_event(channel, read_ptr); + event = *p_event; + + if (!falcon_event_present(&event)) + /* End of events */ + break; + + EFX_TRACE(channel->efx, "channel %d event is "EFX_QWORD_FMT"\n", + channel->channel, EFX_QWORD_VAL(event)); + + /* Clear this event by marking it all ones */ + EFX_SET_QWORD(*p_event); + + ev_code = EFX_QWORD_FIELD(event, EV_CODE); + + switch (ev_code) { + case RX_IP_EV_DECODE: + rxq = falcon_handle_rx_event(channel, &event); + rxdmaqs |= (1 << rxq); + (*rx_quota)--; + break; + case TX_IP_EV_DECODE: + falcon_handle_tx_event(channel, &event); + break; + case DRV_GEN_EV_DECODE: + channel->eventq_magic + = EFX_QWORD_FIELD(event, EVQ_MAGIC); + EFX_LOG(channel->efx, "channel %d received generated " + "event "EFX_QWORD_FMT"\n", channel->channel, + EFX_QWORD_VAL(event)); + break; + case GLOBAL_EV_DECODE: + falcon_handle_global_event(channel, &event); + break; + case DRIVER_EV_DECODE: + falcon_handle_driver_event(channel, &event); + break; + default: + EFX_ERR(channel->efx, "channel %d unknown event type %d" + " (data " EFX_QWORD_FMT ")\n", channel->channel, + ev_code, EFX_QWORD_VAL(event)); + } + + /* Increment read pointer */ + read_ptr = (read_ptr + 1) & FALCON_EVQ_MASK; + + } while (*rx_quota); + + channel->eventq_read_ptr = read_ptr; + return rxdmaqs; +} + +void falcon_set_int_moderation(struct efx_channel *channel) +{ + efx_dword_t timer_cmd; + struct efx_nic *efx = channel->efx; + + /* Set timer register */ + if (channel->irq_moderation) { + /* Round to resolution supported by hardware. The value we + * program is based at 0. So actual interrupt moderation + * achieved is ((x + 1) * res). + */ + unsigned int res = 5; + channel->irq_moderation -= (channel->irq_moderation % res); + if (channel->irq_moderation < res) + channel->irq_moderation = res; + EFX_POPULATE_DWORD_2(timer_cmd, + TIMER_MODE, TIMER_MODE_INT_HLDOFF, + TIMER_VAL, + (channel->irq_moderation / res) - 1); + } else { + EFX_POPULATE_DWORD_2(timer_cmd, + TIMER_MODE, TIMER_MODE_DIS, + TIMER_VAL, 0); + } + falcon_writel_page_locked(efx, &timer_cmd, TIMER_CMD_REG_KER, + channel->evqnum); + +} + +/* Allocate buffer table entries for event queue */ +int falcon_probe_eventq(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + unsigned int evq_size; + + evq_size = FALCON_EVQ_SIZE * sizeof(efx_qword_t); + return falcon_alloc_special_buffer(efx, &channel->eventq, evq_size); +} + +int falcon_init_eventq(struct efx_channel *channel) +{ + efx_oword_t evq_ptr; + struct efx_nic *efx = channel->efx; + int rc; + + EFX_LOG(efx, "channel %d event queue in special buffers %d-%d\n", + channel->channel, channel->eventq.index, + channel->eventq.index + channel->eventq.entries - 1); + + /* Pin event queue buffer */ + rc = falcon_init_special_buffer(efx, &channel->eventq); + if (rc) + return rc; + + /* Fill event queue with all ones (i.e. empty events) */ + memset(channel->eventq.addr, 0xff, channel->eventq.len); + + /* Push event queue to card */ + EFX_POPULATE_OWORD_3(evq_ptr, + EVQ_EN, 1, + EVQ_SIZE, FALCON_EVQ_ORDER, + EVQ_BUF_BASE_ID, channel->eventq.index); + falcon_write_table(efx, &evq_ptr, efx->type->evq_ptr_tbl_base, + channel->evqnum); + + falcon_set_int_moderation(channel); + + return 0; +} + +void falcon_fini_eventq(struct efx_channel *channel) +{ + efx_oword_t eventq_ptr; + struct efx_nic *efx = channel->efx; + + /* Remove event queue from card */ + EFX_ZERO_OWORD(eventq_ptr); + falcon_write_table(efx, &eventq_ptr, efx->type->evq_ptr_tbl_base, + channel->evqnum); + + /* Unpin event queue */ + falcon_fini_special_buffer(efx, &channel->eventq); +} + +/* Free buffers backing event queue */ +void falcon_remove_eventq(struct efx_channel *channel) +{ + falcon_free_special_buffer(channel->efx, &channel->eventq); +} + + +/* Generates a test event on the event queue. A subsequent call to + * process_eventq() should pick up the event and place the value of + * "magic" into channel->eventq_magic; + */ +void falcon_generate_test_event(struct efx_channel *channel, unsigned int magic) +{ + efx_qword_t test_event; + + EFX_POPULATE_QWORD_2(test_event, + EV_CODE, DRV_GEN_EV_DECODE, + EVQ_MAGIC, magic); + falcon_generate_event(channel, &test_event); +} + + +/************************************************************************** + * + * Falcon hardware interrupts + * The hardware interrupt handler does very little work; all the event + * queue processing is carried out by per-channel tasklets. + * + **************************************************************************/ + +/* Enable/disable/generate Falcon interrupts */ +static inline void falcon_interrupts(struct efx_nic *efx, int enabled, + int force) +{ + efx_oword_t int_en_reg_ker; + + EFX_POPULATE_OWORD_2(int_en_reg_ker, + KER_INT_KER, force, + DRV_INT_EN_KER, enabled); + falcon_write(efx, &int_en_reg_ker, INT_EN_REG_KER); +} + +void falcon_enable_interrupts(struct efx_nic *efx) +{ + efx_oword_t int_adr_reg_ker; + struct efx_channel *channel; + + EFX_ZERO_OWORD(*((efx_oword_t *) efx->irq_status.addr)); + wmb(); /* Ensure interrupt vector is clear before interrupts enabled */ + + /* Program address */ + EFX_POPULATE_OWORD_2(int_adr_reg_ker, + NORM_INT_VEC_DIS_KER, EFX_INT_MODE_USE_MSI(efx), + INT_ADR_KER, efx->irq_status.dma_addr); + falcon_write(efx, &int_adr_reg_ker, INT_ADR_REG_KER); + + /* Enable interrupts */ + falcon_interrupts(efx, 1, 0); + + /* Force processing of all the channels to get the EVQ RPTRs up to + date */ + efx_for_each_channel_with_interrupt(channel, efx) + efx_schedule_channel(channel); +} + +void falcon_disable_interrupts(struct efx_nic *efx) +{ + /* Disable interrupts */ + falcon_interrupts(efx, 0, 0); +} + +/* Generate a Falcon test interrupt + * Interrupt must already have been enabled, otherwise nasty things + * may happen. + */ +void falcon_generate_interrupt(struct efx_nic *efx) +{ + falcon_interrupts(efx, 1, 1); +} + +/* Acknowledge a legacy interrupt from Falcon + * + * This acknowledges a legacy (not MSI) interrupt via INT_ACK_KER_REG. + * + * Due to SFC bug 3706 (silicon revision <=A1) reads can be duplicated in the + * BIU. Interrupt acknowledge is read sensitive so must write instead + * (then read to ensure the BIU collector is flushed) + * + * NB most hardware supports MSI interrupts + */ +static inline void falcon_irq_ack_a1(struct efx_nic *efx) +{ + efx_dword_t reg; + + EFX_POPULATE_DWORD_1(reg, INT_ACK_DUMMY_DATA, 0xb7eb7e); + falcon_writel(efx, ®, INT_ACK_REG_KER_A1); + falcon_readl(efx, ®, WORK_AROUND_BROKEN_PCI_READS_REG_KER_A1); +} + +/* Process a fatal interrupt + * Disable bus mastering ASAP and schedule a reset + */ +static irqreturn_t falcon_fatal_interrupt(struct efx_nic *efx) +{ + struct falcon_nic_data *nic_data = efx->nic_data; + efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr; + efx_oword_t fatal_intr; + int error, mem_perr; + static int n_int_errors; + + falcon_read(efx, &fatal_intr, FATAL_INTR_REG_KER); + error = EFX_OWORD_FIELD(fatal_intr, INT_KER_ERROR); + + EFX_ERR(efx, "SYSTEM ERROR " EFX_OWORD_FMT " status " + EFX_OWORD_FMT ": %s\n", EFX_OWORD_VAL(*int_ker), + EFX_OWORD_VAL(fatal_intr), + error ? "disabling bus mastering" : "no recognised error"); + if (error == 0) + goto out; + + /* If this is a memory parity error dump which blocks are offending */ + mem_perr = EFX_OWORD_FIELD(fatal_intr, MEM_PERR_INT_KER); + if (mem_perr) { + efx_oword_t reg; + falcon_read(efx, ®, MEM_STAT_REG_KER); + EFX_ERR(efx, "SYSTEM ERROR: memory parity error " + EFX_OWORD_FMT "\n", EFX_OWORD_VAL(reg)); + } + + /* Disable DMA bus mastering on both devices */ + pci_disable_device(efx->pci_dev); + if (FALCON_IS_DUAL_FUNC(efx)) + pci_disable_device(nic_data->pci_dev2); + + if (++n_int_errors < FALCON_MAX_INT_ERRORS) { + EFX_ERR(efx, "SYSTEM ERROR - reset scheduled\n"); + efx_schedule_reset(efx, RESET_TYPE_INT_ERROR); + } else { + EFX_ERR(efx, "SYSTEM ERROR - max number of errors seen." + "NIC will be disabled\n"); + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + } +out: + return IRQ_HANDLED; +} + +/* Handle a legacy interrupt from Falcon + * Acknowledges the interrupt and schedule event queue processing. + */ +static irqreturn_t falcon_legacy_interrupt_b0(int irq, void *dev_id) +{ + struct efx_nic *efx = (struct efx_nic *)dev_id; + efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr; + struct efx_channel *channel; + efx_dword_t reg; + u32 queues; + int syserr; + + /* Read the ISR which also ACKs the interrupts */ + falcon_readl(efx, ®, INT_ISR0_B0); + queues = EFX_EXTRACT_DWORD(reg, 0, 31); + + /* Check to see if we have a serious error condition */ + syserr = EFX_OWORD_FIELD(*int_ker, FATAL_INT); + if (unlikely(syserr)) + return falcon_fatal_interrupt(efx); + + if (queues == 0) + return IRQ_NONE; + + efx->last_irq_cpu = raw_smp_processor_id(); + EFX_TRACE(efx, "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n", + irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg)); + + /* Schedule processing of any interrupting queues */ + channel = &efx->channel[0]; + while (queues) { + if (queues & 0x01) + efx_schedule_channel(channel); + channel++; + queues >>= 1; + } + + return IRQ_HANDLED; +} + + +static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id) +{ + struct efx_nic *efx = (struct efx_nic *)dev_id; + efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr; + struct efx_channel *channel; + int syserr; + int queues; + + /* Check to see if this is our interrupt. If it isn't, we + * exit without having touched the hardware. + */ + if (unlikely(EFX_OWORD_IS_ZERO(*int_ker))) { + EFX_TRACE(efx, "IRQ %d on CPU %d not for me\n", irq, + raw_smp_processor_id()); + return IRQ_NONE; + } + efx->last_irq_cpu = raw_smp_processor_id(); + EFX_TRACE(efx, "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n", + irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker)); + + /* Check to see if we have a serious error condition */ + syserr = EFX_OWORD_FIELD(*int_ker, FATAL_INT); + if (unlikely(syserr)) + return falcon_fatal_interrupt(efx); + + /* Determine interrupting queues, clear interrupt status + * register and acknowledge the device interrupt. + */ + BUILD_BUG_ON(INT_EVQS_WIDTH > EFX_MAX_CHANNELS); + queues = EFX_OWORD_FIELD(*int_ker, INT_EVQS); + EFX_ZERO_OWORD(*int_ker); + wmb(); /* Ensure the vector is cleared before interrupt ack */ + falcon_irq_ack_a1(efx); + + /* Schedule processing of any interrupting queues */ + channel = &efx->channel[0]; + while (queues) { + if (queues & 0x01) + efx_schedule_channel(channel); + channel++; + queues >>= 1; + } + + return IRQ_HANDLED; +} + +/* Handle an MSI interrupt from Falcon + * + * Handle an MSI hardware interrupt. This routine schedules event + * queue processing. No interrupt acknowledgement cycle is necessary. + * Also, we never need to check that the interrupt is for us, since + * MSI interrupts cannot be shared. + */ +static irqreturn_t falcon_msi_interrupt(int irq, void *dev_id) +{ + struct efx_channel *channel = (struct efx_channel *)dev_id; + struct efx_nic *efx = channel->efx; + efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr; + int syserr; + + efx->last_irq_cpu = raw_smp_processor_id(); + EFX_TRACE(efx, "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n", + irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker)); + + /* Check to see if we have a serious error condition */ + syserr = EFX_OWORD_FIELD(*int_ker, FATAL_INT); + if (unlikely(syserr)) + return falcon_fatal_interrupt(efx); + + /* Schedule processing of the channel */ + efx_schedule_channel(channel); + + return IRQ_HANDLED; +} + + +/* Setup RSS indirection table. + * This maps from the hash value of the packet to RXQ + */ +static void falcon_setup_rss_indir_table(struct efx_nic *efx) +{ + int i = 0; + unsigned long offset; + efx_dword_t dword; + + if (FALCON_REV(efx) < FALCON_REV_B0) + return; + + for (offset = RX_RSS_INDIR_TBL_B0; + offset < RX_RSS_INDIR_TBL_B0 + 0x800; + offset += 0x10) { + EFX_POPULATE_DWORD_1(dword, RX_RSS_INDIR_ENT_B0, + i % efx->rss_queues); + falcon_writel(efx, &dword, offset); + i++; + } +} + +/* Hook interrupt handler(s) + * Try MSI and then legacy interrupts. + */ +int falcon_init_interrupt(struct efx_nic *efx) +{ + struct efx_channel *channel; + int rc; + + if (!EFX_INT_MODE_USE_MSI(efx)) { + irq_handler_t handler; + if (FALCON_REV(efx) >= FALCON_REV_B0) + handler = falcon_legacy_interrupt_b0; + else + handler = falcon_legacy_interrupt_a1; + + rc = request_irq(efx->legacy_irq, handler, IRQF_SHARED, + efx->name, efx); + if (rc) { + EFX_ERR(efx, "failed to hook legacy IRQ %d\n", + efx->pci_dev->irq); + goto fail1; + } + return 0; + } + + /* Hook MSI or MSI-X interrupt */ + efx_for_each_channel_with_interrupt(channel, efx) { + rc = request_irq(channel->irq, falcon_msi_interrupt, + IRQF_PROBE_SHARED, /* Not shared */ + efx->name, channel); + if (rc) { + EFX_ERR(efx, "failed to hook IRQ %d\n", channel->irq); + goto fail2; + } + } + + return 0; + + fail2: + efx_for_each_channel_with_interrupt(channel, efx) + free_irq(channel->irq, channel); + fail1: + return rc; +} + +void falcon_fini_interrupt(struct efx_nic *efx) +{ + struct efx_channel *channel; + efx_oword_t reg; + + /* Disable MSI/MSI-X interrupts */ + efx_for_each_channel_with_interrupt(channel, efx) + if (channel->irq) + free_irq(channel->irq, channel); + + /* ACK legacy interrupt */ + if (FALCON_REV(efx) >= FALCON_REV_B0) + falcon_read(efx, ®, INT_ISR0_B0); + else + falcon_irq_ack_a1(efx); + + /* Disable legacy interrupt */ + if (efx->legacy_irq) + free_irq(efx->legacy_irq, efx); +} + +/************************************************************************** + * + * EEPROM/flash + * + ************************************************************************** + */ + +#define FALCON_SPI_MAX_LEN sizeof(efx_oword_t) + +/* Wait for SPI command completion */ +static int falcon_spi_wait(struct efx_nic *efx) +{ + efx_oword_t reg; + int cmd_en, timer_active; + int count; + + count = 0; + do { + falcon_read(efx, ®, EE_SPI_HCMD_REG_KER); + cmd_en = EFX_OWORD_FIELD(reg, EE_SPI_HCMD_CMD_EN); + timer_active = EFX_OWORD_FIELD(reg, EE_WR_TIMER_ACTIVE); + if (!cmd_en && !timer_active) + return 0; + udelay(10); + } while (++count < 10000); /* wait upto 100msec */ + EFX_ERR(efx, "timed out waiting for SPI\n"); + return -ETIMEDOUT; +} + +static int +falcon_spi_read(struct efx_nic *efx, int device_id, unsigned int command, + unsigned int address, unsigned int addr_len, + void *data, unsigned int len) +{ + efx_oword_t reg; + int rc; + + BUG_ON(len > FALCON_SPI_MAX_LEN); + + /* Check SPI not currently being accessed */ + rc = falcon_spi_wait(efx); + if (rc) + return rc; + + /* Program address register */ + EFX_POPULATE_OWORD_1(reg, EE_SPI_HADR_ADR, address); + falcon_write(efx, ®, EE_SPI_HADR_REG_KER); + + /* Issue read command */ + EFX_POPULATE_OWORD_7(reg, + EE_SPI_HCMD_CMD_EN, 1, + EE_SPI_HCMD_SF_SEL, device_id, + EE_SPI_HCMD_DABCNT, len, + EE_SPI_HCMD_READ, EE_SPI_READ, + EE_SPI_HCMD_DUBCNT, 0, + EE_SPI_HCMD_ADBCNT, addr_len, + EE_SPI_HCMD_ENC, command); + falcon_write(efx, ®, EE_SPI_HCMD_REG_KER); + + /* Wait for read to complete */ + rc = falcon_spi_wait(efx); + if (rc) + return rc; + + /* Read data */ + falcon_read(efx, ®, EE_SPI_HDATA_REG_KER); + memcpy(data, ®, len); + return 0; +} + +/************************************************************************** + * + * MAC wrapper + * + ************************************************************************** + */ +void falcon_drain_tx_fifo(struct efx_nic *efx) +{ + efx_oword_t temp; + int count; + + if (FALCON_REV(efx) < FALCON_REV_B0) + return; + + falcon_read(efx, &temp, MAC0_CTRL_REG_KER); + /* There is no point in draining more than once */ + if (EFX_OWORD_FIELD(temp, TXFIFO_DRAIN_EN_B0)) + return; + + /* MAC stats will fail whilst the TX fifo is draining. Serialise + * the drain sequence with the statistics fetch */ + spin_lock(&efx->stats_lock); + + EFX_SET_OWORD_FIELD(temp, TXFIFO_DRAIN_EN_B0, 1); + falcon_write(efx, &temp, MAC0_CTRL_REG_KER); + + /* Reset the MAC and EM block. */ + falcon_read(efx, &temp, GLB_CTL_REG_KER); + EFX_SET_OWORD_FIELD(temp, RST_XGTX, 1); + EFX_SET_OWORD_FIELD(temp, RST_XGRX, 1); + EFX_SET_OWORD_FIELD(temp, RST_EM, 1); + falcon_write(efx, &temp, GLB_CTL_REG_KER); + + count = 0; + while (1) { + falcon_read(efx, &temp, GLB_CTL_REG_KER); + if (!EFX_OWORD_FIELD(temp, RST_XGTX) && + !EFX_OWORD_FIELD(temp, RST_XGRX) && + !EFX_OWORD_FIELD(temp, RST_EM)) { + EFX_LOG(efx, "Completed MAC reset after %d loops\n", + count); + break; + } + if (count > 20) { + EFX_ERR(efx, "MAC reset failed\n"); + break; + } + count++; + udelay(10); + } + + spin_unlock(&efx->stats_lock); + + /* If we've reset the EM block and the link is up, then + * we'll have to kick the XAUI link so the PHY can recover */ + if (efx->link_up && EFX_WORKAROUND_5147(efx)) + falcon_reset_xaui(efx); +} + +void falcon_deconfigure_mac_wrapper(struct efx_nic *efx) +{ + efx_oword_t temp; + + if (FALCON_REV(efx) < FALCON_REV_B0) + return; + + /* Isolate the MAC -> RX */ + falcon_read(efx, &temp, RX_CFG_REG_KER); + EFX_SET_OWORD_FIELD(temp, RX_INGR_EN_B0, 0); + falcon_write(efx, &temp, RX_CFG_REG_KER); + + if (!efx->link_up) + falcon_drain_tx_fifo(efx); +} + +void falcon_reconfigure_mac_wrapper(struct efx_nic *efx) +{ + efx_oword_t reg; + int link_speed; + unsigned int tx_fc; + + if (efx->link_options & GM_LPA_10000) + link_speed = 0x3; + else if (efx->link_options & GM_LPA_1000) + link_speed = 0x2; + else if (efx->link_options & GM_LPA_100) + link_speed = 0x1; + else + link_speed = 0x0; + /* MAC_LINK_STATUS controls MAC backpressure but doesn't work + * as advertised. Disable to ensure packets are not + * indefinitely held and TX queue can be flushed at any point + * while the link is down. */ + EFX_POPULATE_OWORD_5(reg, + MAC_XOFF_VAL, 0xffff /* max pause time */, + MAC_BCAD_ACPT, 1, + MAC_UC_PROM, efx->promiscuous, + MAC_LINK_STATUS, 1, /* always set */ + MAC_SPEED, link_speed); + /* On B0, MAC backpressure can be disabled and packets get + * discarded. */ + if (FALCON_REV(efx) >= FALCON_REV_B0) { + EFX_SET_OWORD_FIELD(reg, TXFIFO_DRAIN_EN_B0, + !efx->link_up); + } + + falcon_write(efx, ®, MAC0_CTRL_REG_KER); + + /* Restore the multicast hash registers. */ + falcon_set_multicast_hash(efx); + + /* Transmission of pause frames when RX crosses the threshold is + * covered by RX_XOFF_MAC_EN and XM_TX_CFG_REG:XM_FCNTL. + * Action on receipt of pause frames is controller by XM_DIS_FCNTL */ + tx_fc = (efx->flow_control & EFX_FC_TX) ? 1 : 0; + falcon_read(efx, ®, RX_CFG_REG_KER); + EFX_SET_OWORD_FIELD_VER(efx, reg, RX_XOFF_MAC_EN, tx_fc); + + /* Unisolate the MAC -> RX */ + if (FALCON_REV(efx) >= FALCON_REV_B0) + EFX_SET_OWORD_FIELD(reg, RX_INGR_EN_B0, 1); + falcon_write(efx, ®, RX_CFG_REG_KER); +} + +int falcon_dma_stats(struct efx_nic *efx, unsigned int done_offset) +{ + efx_oword_t reg; + u32 *dma_done; + int i; + + if (disable_dma_stats) + return 0; + + /* Statistics fetch will fail if the MAC is in TX drain */ + if (FALCON_REV(efx) >= FALCON_REV_B0) { + efx_oword_t temp; + falcon_read(efx, &temp, MAC0_CTRL_REG_KER); + if (EFX_OWORD_FIELD(temp, TXFIFO_DRAIN_EN_B0)) + return 0; + } + + dma_done = (efx->stats_buffer.addr + done_offset); + *dma_done = FALCON_STATS_NOT_DONE; + wmb(); /* ensure done flag is clear */ + + /* Initiate DMA transfer of stats */ + EFX_POPULATE_OWORD_2(reg, + MAC_STAT_DMA_CMD, 1, + MAC_STAT_DMA_ADR, + efx->stats_buffer.dma_addr); + falcon_write(efx, ®, MAC0_STAT_DMA_REG_KER); + + /* Wait for transfer to complete */ + for (i = 0; i < 400; i++) { + if (*(volatile u32 *)dma_done == FALCON_STATS_DONE) + return 0; + udelay(10); + } + + EFX_ERR(efx, "timed out waiting for statistics\n"); + return -ETIMEDOUT; +} + +/************************************************************************** + * + * PHY access via GMII + * + ************************************************************************** + */ + +/* Use the top bit of the MII PHY id to indicate the PHY type + * (1G/10G), with the remaining bits as the actual PHY id. + * + * This allows us to avoid leaking information from the mii_if_info + * structure into other data structures. + */ +#define FALCON_PHY_ID_ID_WIDTH EFX_WIDTH(MD_PRT_DEV_ADR) +#define FALCON_PHY_ID_ID_MASK ((1 << FALCON_PHY_ID_ID_WIDTH) - 1) +#define FALCON_PHY_ID_WIDTH (FALCON_PHY_ID_ID_WIDTH + 1) +#define FALCON_PHY_ID_MASK ((1 << FALCON_PHY_ID_WIDTH) - 1) +#define FALCON_PHY_ID_10G (1 << (FALCON_PHY_ID_WIDTH - 1)) + + +/* Packing the clause 45 port and device fields into a single value */ +#define MD_PRT_ADR_COMP_LBN (MD_PRT_ADR_LBN - MD_DEV_ADR_LBN) +#define MD_PRT_ADR_COMP_WIDTH MD_PRT_ADR_WIDTH +#define MD_DEV_ADR_COMP_LBN 0 +#define MD_DEV_ADR_COMP_WIDTH MD_DEV_ADR_WIDTH + + +/* Wait for GMII access to complete */ +static int falcon_gmii_wait(struct efx_nic *efx) +{ + efx_dword_t md_stat; + int count; + + for (count = 0; count < 1000; count++) { /* wait upto 10ms */ + falcon_readl(efx, &md_stat, MD_STAT_REG_KER); + if (EFX_DWORD_FIELD(md_stat, MD_BSY) == 0) { + if (EFX_DWORD_FIELD(md_stat, MD_LNFL) != 0 || + EFX_DWORD_FIELD(md_stat, MD_BSERR) != 0) { + EFX_ERR(efx, "error from GMII access " + EFX_DWORD_FMT"\n", + EFX_DWORD_VAL(md_stat)); + return -EIO; + } + return 0; + } + udelay(10); + } + EFX_ERR(efx, "timed out waiting for GMII\n"); + return -ETIMEDOUT; +} + +/* Writes a GMII register of a PHY connected to Falcon using MDIO. */ +static void falcon_mdio_write(struct net_device *net_dev, int phy_id, + int addr, int value) +{ + struct efx_nic *efx = (struct efx_nic *)net_dev->priv; + unsigned int phy_id2 = phy_id & FALCON_PHY_ID_ID_MASK; + efx_oword_t reg; + + /* The 'generic' prt/dev packing in mdio_10g.h is conveniently + * chosen so that the only current user, Falcon, can take the + * packed value and use them directly. + * Fail to build if this assumption is broken. + */ + BUILD_BUG_ON(FALCON_PHY_ID_10G != MDIO45_XPRT_ID_IS10G); + BUILD_BUG_ON(FALCON_PHY_ID_ID_WIDTH != MDIO45_PRT_DEV_WIDTH); + BUILD_BUG_ON(MD_PRT_ADR_COMP_LBN != MDIO45_PRT_ID_COMP_LBN); + BUILD_BUG_ON(MD_DEV_ADR_COMP_LBN != MDIO45_DEV_ID_COMP_LBN); + + if (phy_id2 == PHY_ADDR_INVALID) + return; + + /* See falcon_mdio_read for an explanation. */ + if (!(phy_id & FALCON_PHY_ID_10G)) { + int mmd = ffs(efx->phy_op->mmds) - 1; + EFX_TRACE(efx, "Fixing erroneous clause22 write\n"); + phy_id2 = mdio_clause45_pack(phy_id2, mmd) + & FALCON_PHY_ID_ID_MASK; + } + + EFX_REGDUMP(efx, "writing GMII %d register %02x with %04x\n", phy_id, + addr, value); + + spin_lock_bh(&efx->phy_lock); + + /* Check MII not currently being accessed */ + if (falcon_gmii_wait(efx) != 0) + goto out; + + /* Write the address/ID register */ + EFX_POPULATE_OWORD_1(reg, MD_PHY_ADR, addr); + falcon_write(efx, ®, MD_PHY_ADR_REG_KER); + + EFX_POPULATE_OWORD_1(reg, MD_PRT_DEV_ADR, phy_id2); + falcon_write(efx, ®, MD_ID_REG_KER); + + /* Write data */ + EFX_POPULATE_OWORD_1(reg, MD_TXD, value); + falcon_write(efx, ®, MD_TXD_REG_KER); + + EFX_POPULATE_OWORD_2(reg, + MD_WRC, 1, + MD_GC, 0); + falcon_write(efx, ®, MD_CS_REG_KER); + + /* Wait for data to be written */ + if (falcon_gmii_wait(efx) != 0) { + /* Abort the write operation */ + EFX_POPULATE_OWORD_2(reg, + MD_WRC, 0, + MD_GC, 1); + falcon_write(efx, ®, MD_CS_REG_KER); + udelay(10); + } + + out: + spin_unlock_bh(&efx->phy_lock); +} + +/* Reads a GMII register from a PHY connected to Falcon. If no value + * could be read, -1 will be returned. */ +static int falcon_mdio_read(struct net_device *net_dev, int phy_id, int addr) +{ + struct efx_nic *efx = (struct efx_nic *)net_dev->priv; + unsigned int phy_addr = phy_id & FALCON_PHY_ID_ID_MASK; + efx_oword_t reg; + int value = -1; + + if (phy_addr == PHY_ADDR_INVALID) + return -1; + + /* Our PHY code knows whether it needs to talk clause 22(1G) or 45(10G) + * but the generic Linux code does not make any distinction or have + * any state for this. + * We spot the case where someone tried to talk 22 to a 45 PHY and + * redirect the request to the lowest numbered MMD as a clause45 + * request. This is enough to allow simple queries like id and link + * state to succeed. TODO: We may need to do more in future. + */ + if (!(phy_id & FALCON_PHY_ID_10G)) { + int mmd = ffs(efx->phy_op->mmds) - 1; + EFX_TRACE(efx, "Fixing erroneous clause22 read\n"); + phy_addr = mdio_clause45_pack(phy_addr, mmd) + & FALCON_PHY_ID_ID_MASK; + } + + spin_lock_bh(&efx->phy_lock); + + /* Check MII not currently being accessed */ + if (falcon_gmii_wait(efx) != 0) + goto out; + + EFX_POPULATE_OWORD_1(reg, MD_PHY_ADR, addr); + falcon_write(efx, ®, MD_PHY_ADR_REG_KER); + + EFX_POPULATE_OWORD_1(reg, MD_PRT_DEV_ADR, phy_addr); + falcon_write(efx, ®, MD_ID_REG_KER); + + /* Request data to be read */ + EFX_POPULATE_OWORD_2(reg, MD_RDC, 1, MD_GC, 0); + falcon_write(efx, ®, MD_CS_REG_KER); + + /* Wait for data to become available */ + value = falcon_gmii_wait(efx); + if (value == 0) { + falcon_read(efx, ®, MD_RXD_REG_KER); + value = EFX_OWORD_FIELD(reg, MD_RXD); + EFX_REGDUMP(efx, "read from GMII %d register %02x, got %04x\n", + phy_id, addr, value); + } else { + /* Abort the read operation */ + EFX_POPULATE_OWORD_2(reg, + MD_RIC, 0, + MD_GC, 1); + falcon_write(efx, ®, MD_CS_REG_KER); + + EFX_LOG(efx, "read from GMII 0x%x register %02x, got " + "error %d\n", phy_id, addr, value); + } + + out: + spin_unlock_bh(&efx->phy_lock); + + return value; +} + +static void falcon_init_mdio(struct mii_if_info *gmii) +{ + gmii->mdio_read = falcon_mdio_read; + gmii->mdio_write = falcon_mdio_write; + gmii->phy_id_mask = FALCON_PHY_ID_MASK; + gmii->reg_num_mask = ((1 << EFX_WIDTH(MD_PHY_ADR)) - 1); +} + +static int falcon_probe_phy(struct efx_nic *efx) +{ + switch (efx->phy_type) { + case PHY_TYPE_10XPRESS: + efx->phy_op = &falcon_tenxpress_phy_ops; + break; + case PHY_TYPE_XFP: + efx->phy_op = &falcon_xfp_phy_ops; + break; + default: + EFX_ERR(efx, "Unknown PHY type %d\n", + efx->phy_type); + return -1; + } + return 0; +} + +/* This call is responsible for hooking in the MAC and PHY operations */ +int falcon_probe_port(struct efx_nic *efx) +{ + int rc; + + /* Hook in PHY operations table */ + rc = falcon_probe_phy(efx); + if (rc) + return rc; + + /* Set up GMII structure for PHY */ + efx->mii.supports_gmii = 1; + falcon_init_mdio(&efx->mii); + + /* Hardware flow ctrl. FalconA RX FIFO too small for pause generation */ + if (FALCON_REV(efx) >= FALCON_REV_B0) + efx->flow_control = EFX_FC_RX | EFX_FC_TX; + else + efx->flow_control = EFX_FC_RX; + + /* Allocate buffer for stats */ + rc = falcon_alloc_buffer(efx, &efx->stats_buffer, + FALCON_MAC_STATS_SIZE); + if (rc) + return rc; + EFX_LOG(efx, "stats buffer at %llx (virt %p phys %lx)\n", + (unsigned long long)efx->stats_buffer.dma_addr, + efx->stats_buffer.addr, + virt_to_phys(efx->stats_buffer.addr)); + + return 0; +} + +void falcon_remove_port(struct efx_nic *efx) +{ + falcon_free_buffer(efx, &efx->stats_buffer); +} + +/************************************************************************** + * + * Multicast filtering + * + ************************************************************************** + */ + +void falcon_set_multicast_hash(struct efx_nic *efx) +{ + union efx_multicast_hash *mc_hash = &efx->multicast_hash; + + /* Broadcast packets go through the multicast hash filter. + * ether_crc_le() of the broadcast address is 0xbe2612ff + * so we always add bit 0xff to the mask. + */ + set_bit_le(0xff, mc_hash->byte); + + falcon_write(efx, &mc_hash->oword[0], MAC_MCAST_HASH_REG0_KER); + falcon_write(efx, &mc_hash->oword[1], MAC_MCAST_HASH_REG1_KER); +} + +/************************************************************************** + * + * Device reset + * + ************************************************************************** + */ + +/* Resets NIC to known state. This routine must be called in process + * context and is allowed to sleep. */ +int falcon_reset_hw(struct efx_nic *efx, enum reset_type method) +{ + struct falcon_nic_data *nic_data = efx->nic_data; + efx_oword_t glb_ctl_reg_ker; + int rc; + + EFX_LOG(efx, "performing hardware reset (%d)\n", method); + + /* Initiate device reset */ + if (method == RESET_TYPE_WORLD) { + rc = pci_save_state(efx->pci_dev); + if (rc) { + EFX_ERR(efx, "failed to backup PCI state of primary " + "function prior to hardware reset\n"); + goto fail1; + } + if (FALCON_IS_DUAL_FUNC(efx)) { + rc = pci_save_state(nic_data->pci_dev2); + if (rc) { + EFX_ERR(efx, "failed to backup PCI state of " + "secondary function prior to " + "hardware reset\n"); + goto fail2; + } + } + + EFX_POPULATE_OWORD_2(glb_ctl_reg_ker, + EXT_PHY_RST_DUR, 0x7, + SWRST, 1); + } else { + int reset_phy = (method == RESET_TYPE_INVISIBLE ? + EXCLUDE_FROM_RESET : 0); + + EFX_POPULATE_OWORD_7(glb_ctl_reg_ker, + EXT_PHY_RST_CTL, reset_phy, + PCIE_CORE_RST_CTL, EXCLUDE_FROM_RESET, + PCIE_NSTCK_RST_CTL, EXCLUDE_FROM_RESET, + PCIE_SD_RST_CTL, EXCLUDE_FROM_RESET, + EE_RST_CTL, EXCLUDE_FROM_RESET, + EXT_PHY_RST_DUR, 0x7 /* 10ms */, + SWRST, 1); + } + falcon_write(efx, &glb_ctl_reg_ker, GLB_CTL_REG_KER); + + EFX_LOG(efx, "waiting for hardware reset\n"); + schedule_timeout_uninterruptible(HZ / 20); + + /* Restore PCI configuration if needed */ + if (method == RESET_TYPE_WORLD) { + if (FALCON_IS_DUAL_FUNC(efx)) { + rc = pci_restore_state(nic_data->pci_dev2); + if (rc) { + EFX_ERR(efx, "failed to restore PCI config for " + "the secondary function\n"); + goto fail3; + } + } + rc = pci_restore_state(efx->pci_dev); + if (rc) { + EFX_ERR(efx, "failed to restore PCI config for the " + "primary function\n"); + goto fail4; + } + EFX_LOG(efx, "successfully restored PCI config\n"); + } + + /* Assert that reset complete */ + falcon_read(efx, &glb_ctl_reg_ker, GLB_CTL_REG_KER); + if (EFX_OWORD_FIELD(glb_ctl_reg_ker, SWRST) != 0) { + rc = -ETIMEDOUT; + EFX_ERR(efx, "timed out waiting for hardware reset\n"); + goto fail5; + } + EFX_LOG(efx, "hardware reset complete\n"); + + return 0; + + /* pci_save_state() and pci_restore_state() MUST be called in pairs */ +fail2: +fail3: + pci_restore_state(efx->pci_dev); +fail1: +fail4: +fail5: + return rc; +} + +/* Zeroes out the SRAM contents. This routine must be called in + * process context and is allowed to sleep. + */ +static int falcon_reset_sram(struct efx_nic *efx) +{ + efx_oword_t srm_cfg_reg_ker, gpio_cfg_reg_ker; + int count; + + /* Set the SRAM wake/sleep GPIO appropriately. */ + falcon_read(efx, &gpio_cfg_reg_ker, GPIO_CTL_REG_KER); + EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OEN, 1); + EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OUT, 1); + falcon_write(efx, &gpio_cfg_reg_ker, GPIO_CTL_REG_KER); + + /* Initiate SRAM reset */ + EFX_POPULATE_OWORD_2(srm_cfg_reg_ker, + SRAM_OOB_BT_INIT_EN, 1, + SRM_NUM_BANKS_AND_BANK_SIZE, 0); + falcon_write(efx, &srm_cfg_reg_ker, SRM_CFG_REG_KER); + + /* Wait for SRAM reset to complete */ + count = 0; + do { + EFX_LOG(efx, "waiting for SRAM reset (attempt %d)...\n", count); + + /* SRAM reset is slow; expect around 16ms */ + schedule_timeout_uninterruptible(HZ / 50); + + /* Check for reset complete */ + falcon_read(efx, &srm_cfg_reg_ker, SRM_CFG_REG_KER); + if (!EFX_OWORD_FIELD(srm_cfg_reg_ker, SRAM_OOB_BT_INIT_EN)) { + EFX_LOG(efx, "SRAM reset complete\n"); + + return 0; + } + } while (++count < 20); /* wait upto 0.4 sec */ + + EFX_ERR(efx, "timed out waiting for SRAM reset\n"); + return -ETIMEDOUT; +} + +/* Extract non-volatile configuration */ +static int falcon_probe_nvconfig(struct efx_nic *efx) +{ + struct falcon_nvconfig *nvconfig; + efx_oword_t nic_stat; + int device_id; + unsigned addr_len; + size_t offset, len; + int magic_num, struct_ver, board_rev; + int rc; + + /* Find the boot device. */ + falcon_read(efx, &nic_stat, NIC_STAT_REG); + if (EFX_OWORD_FIELD(nic_stat, SF_PRST)) { + device_id = EE_SPI_FLASH; + addr_len = 3; + } else if (EFX_OWORD_FIELD(nic_stat, EE_PRST)) { + device_id = EE_SPI_EEPROM; + addr_len = 2; + } else { + return -ENODEV; + } + + nvconfig = kmalloc(sizeof(*nvconfig), GFP_KERNEL); + + /* Read the whole configuration structure into memory. */ + for (offset = 0; offset < sizeof(*nvconfig); offset += len) { + len = min(sizeof(*nvconfig) - offset, + (size_t) FALCON_SPI_MAX_LEN); + rc = falcon_spi_read(efx, device_id, SPI_READ, + NVCONFIG_BASE + offset, addr_len, + (char *)nvconfig + offset, len); + if (rc) + goto out; + } + + /* Read the MAC addresses */ + memcpy(efx->mac_address, nvconfig->mac_address[0], ETH_ALEN); + + /* Read the board configuration. */ + magic_num = le16_to_cpu(nvconfig->board_magic_num); + struct_ver = le16_to_cpu(nvconfig->board_struct_ver); + + if (magic_num != NVCONFIG_BOARD_MAGIC_NUM || struct_ver < 2) { + EFX_ERR(efx, "Non volatile memory bad magic=%x ver=%x " + "therefore using defaults\n", magic_num, struct_ver); + efx->phy_type = PHY_TYPE_NONE; + efx->mii.phy_id = PHY_ADDR_INVALID; + board_rev = 0; + } else { + struct falcon_nvconfig_board_v2 *v2 = &nvconfig->board_v2; + + efx->phy_type = v2->port0_phy_type; + efx->mii.phy_id = v2->port0_phy_addr; + board_rev = le16_to_cpu(v2->board_revision); + } + + EFX_LOG(efx, "PHY is %d phy_id %d\n", efx->phy_type, efx->mii.phy_id); + + efx_set_board_info(efx, board_rev); + + out: + kfree(nvconfig); + return rc; +} + +/* Probe the NIC variant (revision, ASIC vs FPGA, function count, port + * count, port speed). Set workaround and feature flags accordingly. + */ +static int falcon_probe_nic_variant(struct efx_nic *efx) +{ + efx_oword_t altera_build; + + falcon_read(efx, &altera_build, ALTERA_BUILD_REG_KER); + if (EFX_OWORD_FIELD(altera_build, VER_ALL)) { + EFX_ERR(efx, "Falcon FPGA not supported\n"); + return -ENODEV; + } + + switch (FALCON_REV(efx)) { + case FALCON_REV_A0: + case 0xff: + EFX_ERR(efx, "Falcon rev A0 not supported\n"); + return -ENODEV; + + case FALCON_REV_A1:{ + efx_oword_t nic_stat; + + falcon_read(efx, &nic_stat, NIC_STAT_REG); + + if (EFX_OWORD_FIELD(nic_stat, STRAP_PCIE) == 0) { + EFX_ERR(efx, "Falcon rev A1 PCI-X not supported\n"); + return -ENODEV; + } + if (!EFX_OWORD_FIELD(nic_stat, STRAP_10G)) { + EFX_ERR(efx, "1G mode not supported\n"); + return -ENODEV; + } + break; + } + + case FALCON_REV_B0: + break; + + default: + EFX_ERR(efx, "Unknown Falcon rev %d\n", FALCON_REV(efx)); + return -ENODEV; + } + + return 0; +} + +int falcon_probe_nic(struct efx_nic *efx) +{ + struct falcon_nic_data *nic_data; + int rc; + + /* Initialise I2C interface state */ + efx->i2c.efx = efx; + efx->i2c.op = &falcon_i2c_bit_operations; + efx->i2c.sda = 1; + efx->i2c.scl = 1; + + /* Allocate storage for hardware specific data */ + nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL); + efx->nic_data = (void *) nic_data; + + /* Determine number of ports etc. */ + rc = falcon_probe_nic_variant(efx); + if (rc) + goto fail1; + + /* Probe secondary function if expected */ + if (FALCON_IS_DUAL_FUNC(efx)) { + struct pci_dev *dev = pci_dev_get(efx->pci_dev); + + while ((dev = pci_get_device(EFX_VENDID_SFC, FALCON_A_S_DEVID, + dev))) { + if (dev->bus == efx->pci_dev->bus && + dev->devfn == efx->pci_dev->devfn + 1) { + nic_data->pci_dev2 = dev; + break; + } + } + if (!nic_data->pci_dev2) { + EFX_ERR(efx, "failed to find secondary function\n"); + rc = -ENODEV; + goto fail2; + } + } + + /* Now we can reset the NIC */ + rc = falcon_reset_hw(efx, RESET_TYPE_ALL); + if (rc) { + EFX_ERR(efx, "failed to reset NIC\n"); + goto fail3; + } + + /* Allocate memory for INT_KER */ + rc = falcon_alloc_buffer(efx, &efx->irq_status, sizeof(efx_oword_t)); + if (rc) + goto fail4; + BUG_ON(efx->irq_status.dma_addr & 0x0f); + + EFX_LOG(efx, "INT_KER at %llx (virt %p phys %lx)\n", + (unsigned long long)efx->irq_status.dma_addr, + efx->irq_status.addr, virt_to_phys(efx->irq_status.addr)); + + /* Read in the non-volatile configuration */ + rc = falcon_probe_nvconfig(efx); + if (rc) + goto fail5; + + return 0; + + fail5: + falcon_free_buffer(efx, &efx->irq_status); + fail4: + /* fall-thru */ + fail3: + if (nic_data->pci_dev2) { + pci_dev_put(nic_data->pci_dev2); + nic_data->pci_dev2 = NULL; + } + fail2: + /* fall-thru */ + fail1: + kfree(efx->nic_data); + return rc; +} + +/* This call performs hardware-specific global initialisation, such as + * defining the descriptor cache sizes and number of RSS channels. + * It does not set up any buffers, descriptor rings or event queues. + */ +int falcon_init_nic(struct efx_nic *efx) +{ + struct falcon_nic_data *data; + efx_oword_t temp; + unsigned thresh; + int rc; + + data = (struct falcon_nic_data *)efx->nic_data; + + /* Set up the address region register. This is only needed + * for the B0 FPGA, but since we are just pushing in the + * reset defaults this may as well be unconditional. */ + EFX_POPULATE_OWORD_4(temp, ADR_REGION0, 0, + ADR_REGION1, (1 << 16), + ADR_REGION2, (2 << 16), + ADR_REGION3, (3 << 16)); + falcon_write(efx, &temp, ADR_REGION_REG_KER); + + /* Use on-chip SRAM */ + falcon_read(efx, &temp, NIC_STAT_REG); + EFX_SET_OWORD_FIELD(temp, ONCHIP_SRAM, 1); + falcon_write(efx, &temp, NIC_STAT_REG); + + /* Set buffer table mode */ + EFX_POPULATE_OWORD_1(temp, BUF_TBL_MODE, BUF_TBL_MODE_FULL); + falcon_write(efx, &temp, BUF_TBL_CFG_REG_KER); + + rc = falcon_reset_sram(efx); + if (rc) + return rc; + + /* Set positions of descriptor caches in SRAM. */ + EFX_POPULATE_OWORD_1(temp, SRM_TX_DC_BASE_ADR, TX_DC_BASE / 8); + falcon_write(efx, &temp, SRM_TX_DC_CFG_REG_KER); + EFX_POPULATE_OWORD_1(temp, SRM_RX_DC_BASE_ADR, RX_DC_BASE / 8); + falcon_write(efx, &temp, SRM_RX_DC_CFG_REG_KER); + + /* Set TX descriptor cache size. */ + BUILD_BUG_ON(TX_DC_ENTRIES != (16 << TX_DC_ENTRIES_ORDER)); + EFX_POPULATE_OWORD_1(temp, TX_DC_SIZE, TX_DC_ENTRIES_ORDER); + falcon_write(efx, &temp, TX_DC_CFG_REG_KER); + + /* Set RX descriptor cache size. Set low watermark to size-8, as + * this allows most efficient prefetching. + */ + BUILD_BUG_ON(RX_DC_ENTRIES != (16 << RX_DC_ENTRIES_ORDER)); + EFX_POPULATE_OWORD_1(temp, RX_DC_SIZE, RX_DC_ENTRIES_ORDER); + falcon_write(efx, &temp, RX_DC_CFG_REG_KER); + EFX_POPULATE_OWORD_1(temp, RX_DC_PF_LWM, RX_DC_ENTRIES - 8); + falcon_write(efx, &temp, RX_DC_PF_WM_REG_KER); + + /* Clear the parity enables on the TX data fifos as + * they produce false parity errors because of timing issues + */ + if (EFX_WORKAROUND_5129(efx)) { + falcon_read(efx, &temp, SPARE_REG_KER); + EFX_SET_OWORD_FIELD(temp, MEM_PERR_EN_TX_DATA, 0); + falcon_write(efx, &temp, SPARE_REG_KER); + } + + /* Enable all the genuinely fatal interrupts. (They are still + * masked by the overall interrupt mask, controlled by + * falcon_interrupts()). + * + * Note: All other fatal interrupts are enabled + */ + EFX_POPULATE_OWORD_3(temp, + ILL_ADR_INT_KER_EN, 1, + RBUF_OWN_INT_KER_EN, 1, + TBUF_OWN_INT_KER_EN, 1); + EFX_INVERT_OWORD(temp); + falcon_write(efx, &temp, FATAL_INTR_REG_KER); + + /* Set number of RSS queues for receive path. */ + falcon_read(efx, &temp, RX_FILTER_CTL_REG); + if (FALCON_REV(efx) >= FALCON_REV_B0) + EFX_SET_OWORD_FIELD(temp, NUM_KER, 0); + else + EFX_SET_OWORD_FIELD(temp, NUM_KER, efx->rss_queues - 1); + if (EFX_WORKAROUND_7244(efx)) { + EFX_SET_OWORD_FIELD(temp, UDP_FULL_SRCH_LIMIT, 8); + EFX_SET_OWORD_FIELD(temp, UDP_WILD_SRCH_LIMIT, 8); + EFX_SET_OWORD_FIELD(temp, TCP_FULL_SRCH_LIMIT, 8); + EFX_SET_OWORD_FIELD(temp, TCP_WILD_SRCH_LIMIT, 8); + } + falcon_write(efx, &temp, RX_FILTER_CTL_REG); + + falcon_setup_rss_indir_table(efx); + + /* Setup RX. Wait for descriptor is broken and must + * be disabled. RXDP recovery shouldn't be needed, but is. + */ + falcon_read(efx, &temp, RX_SELF_RST_REG_KER); + EFX_SET_OWORD_FIELD(temp, RX_NODESC_WAIT_DIS, 1); + EFX_SET_OWORD_FIELD(temp, RX_RECOVERY_EN, 1); + if (EFX_WORKAROUND_5583(efx)) + EFX_SET_OWORD_FIELD(temp, RX_ISCSI_DIS, 1); + falcon_write(efx, &temp, RX_SELF_RST_REG_KER); + + /* Disable the ugly timer-based TX DMA backoff and allow TX DMA to be + * controlled by the RX FIFO fill level. Set arbitration to one pkt/Q. + */ + falcon_read(efx, &temp, TX_CFG2_REG_KER); + EFX_SET_OWORD_FIELD(temp, TX_RX_SPACER, 0xfe); + EFX_SET_OWORD_FIELD(temp, TX_RX_SPACER_EN, 1); + EFX_SET_OWORD_FIELD(temp, TX_ONE_PKT_PER_Q, 1); + EFX_SET_OWORD_FIELD(temp, TX_CSR_PUSH_EN, 0); + EFX_SET_OWORD_FIELD(temp, TX_DIS_NON_IP_EV, 1); + /* Enable SW_EV to inherit in char driver - assume harmless here */ + EFX_SET_OWORD_FIELD(temp, TX_SW_EV_EN, 1); + /* Prefetch threshold 2 => fetch when descriptor cache half empty */ + EFX_SET_OWORD_FIELD(temp, TX_PREF_THRESHOLD, 2); + /* Squash TX of packets of 16 bytes or less */ + if (FALCON_REV(efx) >= FALCON_REV_B0 && EFX_WORKAROUND_9141(efx)) + EFX_SET_OWORD_FIELD(temp, TX_FLUSH_MIN_LEN_EN_B0, 1); + falcon_write(efx, &temp, TX_CFG2_REG_KER); + + /* Do not enable TX_NO_EOP_DISC_EN, since it limits packets to 16 + * descriptors (which is bad). + */ + falcon_read(efx, &temp, TX_CFG_REG_KER); + EFX_SET_OWORD_FIELD(temp, TX_NO_EOP_DISC_EN, 0); + falcon_write(efx, &temp, TX_CFG_REG_KER); + + /* RX config */ + falcon_read(efx, &temp, RX_CFG_REG_KER); + EFX_SET_OWORD_FIELD_VER(efx, temp, RX_DESC_PUSH_EN, 0); + if (EFX_WORKAROUND_7575(efx)) + EFX_SET_OWORD_FIELD_VER(efx, temp, RX_USR_BUF_SIZE, + (3 * 4096) / 32); + if (FALCON_REV(efx) >= FALCON_REV_B0) + EFX_SET_OWORD_FIELD(temp, RX_INGR_EN_B0, 1); + + /* RX FIFO flow control thresholds */ + thresh = ((rx_xon_thresh_bytes >= 0) ? + rx_xon_thresh_bytes : efx->type->rx_xon_thresh); + EFX_SET_OWORD_FIELD_VER(efx, temp, RX_XON_MAC_TH, thresh / 256); + thresh = ((rx_xoff_thresh_bytes >= 0) ? + rx_xoff_thresh_bytes : efx->type->rx_xoff_thresh); + EFX_SET_OWORD_FIELD_VER(efx, temp, RX_XOFF_MAC_TH, thresh / 256); + /* RX control FIFO thresholds [32 entries] */ + EFX_SET_OWORD_FIELD_VER(efx, temp, RX_XON_TX_TH, 25); + EFX_SET_OWORD_FIELD_VER(efx, temp, RX_XOFF_TX_TH, 20); + falcon_write(efx, &temp, RX_CFG_REG_KER); + + /* Set destination of both TX and RX Flush events */ + if (FALCON_REV(efx) >= FALCON_REV_B0) { + EFX_POPULATE_OWORD_1(temp, FLS_EVQ_ID, 0); + falcon_write(efx, &temp, DP_CTRL_REG); + } + + return 0; +} + +void falcon_remove_nic(struct efx_nic *efx) +{ + struct falcon_nic_data *nic_data = efx->nic_data; + + falcon_free_buffer(efx, &efx->irq_status); + + (void) falcon_reset_hw(efx, RESET_TYPE_ALL); + + /* Release the second function after the reset */ + if (nic_data->pci_dev2) { + pci_dev_put(nic_data->pci_dev2); + nic_data->pci_dev2 = NULL; + } + + /* Tear down the private nic state */ + kfree(efx->nic_data); + efx->nic_data = NULL; +} + +void falcon_update_nic_stats(struct efx_nic *efx) +{ + efx_oword_t cnt; + + falcon_read(efx, &cnt, RX_NODESC_DROP_REG_KER); + efx->n_rx_nodesc_drop_cnt += EFX_OWORD_FIELD(cnt, RX_NODESC_DROP_CNT); +} + +/************************************************************************** + * + * Revision-dependent attributes used by efx.c + * + ************************************************************************** + */ + +struct efx_nic_type falcon_a_nic_type = { + .mem_bar = 2, + .mem_map_size = 0x20000, + .txd_ptr_tbl_base = TX_DESC_PTR_TBL_KER_A1, + .rxd_ptr_tbl_base = RX_DESC_PTR_TBL_KER_A1, + .buf_tbl_base = BUF_TBL_KER_A1, + .evq_ptr_tbl_base = EVQ_PTR_TBL_KER_A1, + .evq_rptr_tbl_base = EVQ_RPTR_REG_KER_A1, + .txd_ring_mask = FALCON_TXD_RING_MASK, + .rxd_ring_mask = FALCON_RXD_RING_MASK, + .evq_size = FALCON_EVQ_SIZE, + .max_dma_mask = FALCON_DMA_MASK, + .tx_dma_mask = FALCON_TX_DMA_MASK, + .bug5391_mask = 0xf, + .rx_xoff_thresh = 2048, + .rx_xon_thresh = 512, + .rx_buffer_padding = 0x24, + .max_interrupt_mode = EFX_INT_MODE_MSI, + .phys_addr_channels = 4, +}; + +struct efx_nic_type falcon_b_nic_type = { + .mem_bar = 2, + /* Map everything up to and including the RSS indirection + * table. Don't map MSI-X table, MSI-X PBA since Linux + * requires that they not be mapped. */ + .mem_map_size = RX_RSS_INDIR_TBL_B0 + 0x800, + .txd_ptr_tbl_base = TX_DESC_PTR_TBL_KER_B0, + .rxd_ptr_tbl_base = RX_DESC_PTR_TBL_KER_B0, + .buf_tbl_base = BUF_TBL_KER_B0, + .evq_ptr_tbl_base = EVQ_PTR_TBL_KER_B0, + .evq_rptr_tbl_base = EVQ_RPTR_REG_KER_B0, + .txd_ring_mask = FALCON_TXD_RING_MASK, + .rxd_ring_mask = FALCON_RXD_RING_MASK, + .evq_size = FALCON_EVQ_SIZE, + .max_dma_mask = FALCON_DMA_MASK, + .tx_dma_mask = FALCON_TX_DMA_MASK, + .bug5391_mask = 0, + .rx_xoff_thresh = 54272, /* ~80Kb - 3*max MTU */ + .rx_xon_thresh = 27648, /* ~3*max MTU */ + .rx_buffer_padding = 0, + .max_interrupt_mode = EFX_INT_MODE_MSIX, + .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy + * interrupt handler only supports 32 + * channels */ +}; + diff --git a/drivers/net/sfc/falcon.h b/drivers/net/sfc/falcon.h new file mode 100644 index 00000000000..6117403b0c0 --- /dev/null +++ b/drivers/net/sfc/falcon.h @@ -0,0 +1,130 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_FALCON_H +#define EFX_FALCON_H + +#include "net_driver.h" + +/* + * Falcon hardware control + */ + +enum falcon_revision { + FALCON_REV_A0 = 0, + FALCON_REV_A1 = 1, + FALCON_REV_B0 = 2, +}; + +#define FALCON_REV(efx) ((efx)->pci_dev->revision) + +extern struct efx_nic_type falcon_a_nic_type; +extern struct efx_nic_type falcon_b_nic_type; + +/************************************************************************** + * + * Externs + * + ************************************************************************** + */ + +/* TX data path */ +extern int falcon_probe_tx(struct efx_tx_queue *tx_queue); +extern int falcon_init_tx(struct efx_tx_queue *tx_queue); +extern void falcon_fini_tx(struct efx_tx_queue *tx_queue); +extern void falcon_remove_tx(struct efx_tx_queue *tx_queue); +extern void falcon_push_buffers(struct efx_tx_queue *tx_queue); + +/* RX data path */ +extern int falcon_probe_rx(struct efx_rx_queue *rx_queue); +extern int falcon_init_rx(struct efx_rx_queue *rx_queue); +extern void falcon_fini_rx(struct efx_rx_queue *rx_queue); +extern void falcon_remove_rx(struct efx_rx_queue *rx_queue); +extern void falcon_notify_rx_desc(struct efx_rx_queue *rx_queue); + +/* Event data path */ +extern int falcon_probe_eventq(struct efx_channel *channel); +extern int falcon_init_eventq(struct efx_channel *channel); +extern void falcon_fini_eventq(struct efx_channel *channel); +extern void falcon_remove_eventq(struct efx_channel *channel); +extern int falcon_process_eventq(struct efx_channel *channel, int *rx_quota); +extern void falcon_eventq_read_ack(struct efx_channel *channel); + +/* Ports */ +extern int falcon_probe_port(struct efx_nic *efx); +extern void falcon_remove_port(struct efx_nic *efx); + +/* MAC/PHY */ +extern int falcon_xaui_link_ok(struct efx_nic *efx); +extern int falcon_dma_stats(struct efx_nic *efx, + unsigned int done_offset); +extern void falcon_drain_tx_fifo(struct efx_nic *efx); +extern void falcon_deconfigure_mac_wrapper(struct efx_nic *efx); +extern void falcon_reconfigure_mac_wrapper(struct efx_nic *efx); + +/* Interrupts and test events */ +extern int falcon_init_interrupt(struct efx_nic *efx); +extern void falcon_enable_interrupts(struct efx_nic *efx); +extern void falcon_generate_test_event(struct efx_channel *channel, + unsigned int magic); +extern void falcon_generate_interrupt(struct efx_nic *efx); +extern void falcon_set_int_moderation(struct efx_channel *channel); +extern void falcon_disable_interrupts(struct efx_nic *efx); +extern void falcon_fini_interrupt(struct efx_nic *efx); + +/* Global Resources */ +extern int falcon_probe_nic(struct efx_nic *efx); +extern int falcon_probe_resources(struct efx_nic *efx); +extern int falcon_init_nic(struct efx_nic *efx); +extern int falcon_reset_hw(struct efx_nic *efx, enum reset_type method); +extern void falcon_remove_resources(struct efx_nic *efx); +extern void falcon_remove_nic(struct efx_nic *efx); +extern void falcon_update_nic_stats(struct efx_nic *efx); +extern void falcon_set_multicast_hash(struct efx_nic *efx); +extern int falcon_reset_xaui(struct efx_nic *efx); + +/************************************************************************** + * + * Falcon MAC stats + * + ************************************************************************** + */ + +#define FALCON_STAT_OFFSET(falcon_stat) EFX_VAL(falcon_stat, offset) +#define FALCON_STAT_WIDTH(falcon_stat) EFX_VAL(falcon_stat, WIDTH) + +/* Retrieve statistic from statistics block */ +#define FALCON_STAT(efx, falcon_stat, efx_stat) do { \ + if (FALCON_STAT_WIDTH(falcon_stat) == 16) \ + (efx)->mac_stats.efx_stat += le16_to_cpu( \ + *((__force __le16 *) \ + (efx->stats_buffer.addr + \ + FALCON_STAT_OFFSET(falcon_stat)))); \ + else if (FALCON_STAT_WIDTH(falcon_stat) == 32) \ + (efx)->mac_stats.efx_stat += le32_to_cpu( \ + *((__force __le32 *) \ + (efx->stats_buffer.addr + \ + FALCON_STAT_OFFSET(falcon_stat)))); \ + else \ + (efx)->mac_stats.efx_stat += le64_to_cpu( \ + *((__force __le64 *) \ + (efx->stats_buffer.addr + \ + FALCON_STAT_OFFSET(falcon_stat)))); \ + } while (0) + +#define FALCON_MAC_STATS_SIZE 0x100 + +#define MAC_DATA_LBN 0 +#define MAC_DATA_WIDTH 32 + +extern void falcon_generate_event(struct efx_channel *channel, + efx_qword_t *event); + +#endif /* EFX_FALCON_H */ diff --git a/drivers/net/sfc/falcon_hwdefs.h b/drivers/net/sfc/falcon_hwdefs.h new file mode 100644 index 00000000000..0485a63eaff --- /dev/null +++ b/drivers/net/sfc/falcon_hwdefs.h @@ -0,0 +1,1135 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_FALCON_HWDEFS_H +#define EFX_FALCON_HWDEFS_H + +/* + * Falcon hardware value definitions. + * Falcon is the internal codename for the SFC4000 controller that is + * present in SFE400X evaluation boards + */ + +/************************************************************************** + * + * Falcon registers + * + ************************************************************************** + */ + +/* Address region register */ +#define ADR_REGION_REG_KER 0x00 +#define ADR_REGION0_LBN 0 +#define ADR_REGION0_WIDTH 18 +#define ADR_REGION1_LBN 32 +#define ADR_REGION1_WIDTH 18 +#define ADR_REGION2_LBN 64 +#define ADR_REGION2_WIDTH 18 +#define ADR_REGION3_LBN 96 +#define ADR_REGION3_WIDTH 18 + +/* Interrupt enable register */ +#define INT_EN_REG_KER 0x0010 +#define KER_INT_KER_LBN 3 +#define KER_INT_KER_WIDTH 1 +#define DRV_INT_EN_KER_LBN 0 +#define DRV_INT_EN_KER_WIDTH 1 + +/* Interrupt status address register */ +#define INT_ADR_REG_KER 0x0030 +#define NORM_INT_VEC_DIS_KER_LBN 64 +#define NORM_INT_VEC_DIS_KER_WIDTH 1 +#define INT_ADR_KER_LBN 0 +#define INT_ADR_KER_WIDTH EFX_DMA_TYPE_WIDTH(64) /* not 46 for this one */ + +/* Interrupt status register (B0 only) */ +#define INT_ISR0_B0 0x90 +#define INT_ISR1_B0 0xA0 + +/* Interrupt acknowledge register (A0/A1 only) */ +#define INT_ACK_REG_KER_A1 0x0050 +#define INT_ACK_DUMMY_DATA_LBN 0 +#define INT_ACK_DUMMY_DATA_WIDTH 32 + +/* Interrupt acknowledge work-around register (A0/A1 only )*/ +#define WORK_AROUND_BROKEN_PCI_READS_REG_KER_A1 0x0070 + +/* SPI host command register */ +#define EE_SPI_HCMD_REG_KER 0x0100 +#define EE_SPI_HCMD_CMD_EN_LBN 31 +#define EE_SPI_HCMD_CMD_EN_WIDTH 1 +#define EE_WR_TIMER_ACTIVE_LBN 28 +#define EE_WR_TIMER_ACTIVE_WIDTH 1 +#define EE_SPI_HCMD_SF_SEL_LBN 24 +#define EE_SPI_HCMD_SF_SEL_WIDTH 1 +#define EE_SPI_EEPROM 0 +#define EE_SPI_FLASH 1 +#define EE_SPI_HCMD_DABCNT_LBN 16 +#define EE_SPI_HCMD_DABCNT_WIDTH 5 +#define EE_SPI_HCMD_READ_LBN 15 +#define EE_SPI_HCMD_READ_WIDTH 1 +#define EE_SPI_READ 1 +#define EE_SPI_WRITE 0 +#define EE_SPI_HCMD_DUBCNT_LBN 12 +#define EE_SPI_HCMD_DUBCNT_WIDTH 2 +#define EE_SPI_HCMD_ADBCNT_LBN 8 +#define EE_SPI_HCMD_ADBCNT_WIDTH 2 +#define EE_SPI_HCMD_ENC_LBN 0 +#define EE_SPI_HCMD_ENC_WIDTH 8 + +/* SPI host address register */ +#define EE_SPI_HADR_REG_KER 0x0110 +#define EE_SPI_HADR_ADR_LBN 0 +#define EE_SPI_HADR_ADR_WIDTH 24 + +/* SPI host data register */ +#define EE_SPI_HDATA_REG_KER 0x0120 + +/* PCIE CORE ACCESS REG */ +#define PCIE_CORE_ADDR_PCIE_DEVICE_CTRL_STAT 0x68 +#define PCIE_CORE_ADDR_PCIE_LINK_CTRL_STAT 0x70 +#define PCIE_CORE_ADDR_ACK_RPL_TIMER 0x700 +#define PCIE_CORE_ADDR_ACK_FREQ 0x70C + +/* NIC status register */ +#define NIC_STAT_REG 0x0200 +#define ONCHIP_SRAM_LBN 16 +#define ONCHIP_SRAM_WIDTH 1 +#define SF_PRST_LBN 9 +#define SF_PRST_WIDTH 1 +#define EE_PRST_LBN 8 +#define EE_PRST_WIDTH 1 +/* See pic_mode_t for decoding of this field */ +/* These bit definitions are extrapolated from the list of numerical + * values for STRAP_PINS. + */ +#define STRAP_10G_LBN 2 +#define STRAP_10G_WIDTH 1 +#define STRAP_PCIE_LBN 0 +#define STRAP_PCIE_WIDTH 1 + +/* GPIO control register */ +#define GPIO_CTL_REG_KER 0x0210 +#define GPIO_OUTPUTS_LBN (16) +#define GPIO_OUTPUTS_WIDTH (4) +#define GPIO_INPUTS_LBN (8) +#define GPIO_DIRECTION_LBN (24) +#define GPIO_DIRECTION_WIDTH (4) +#define GPIO_DIRECTION_OUT (1) +#define GPIO_SRAM_SLEEP (1 << 1) + +#define GPIO3_OEN_LBN (GPIO_DIRECTION_LBN + 3) +#define GPIO3_OEN_WIDTH 1 +#define GPIO2_OEN_LBN (GPIO_DIRECTION_LBN + 2) +#define GPIO2_OEN_WIDTH 1 +#define GPIO1_OEN_LBN (GPIO_DIRECTION_LBN + 1) +#define GPIO1_OEN_WIDTH 1 +#define GPIO0_OEN_LBN (GPIO_DIRECTION_LBN + 0) +#define GPIO0_OEN_WIDTH 1 + +#define GPIO3_OUT_LBN (GPIO_OUTPUTS_LBN + 3) +#define GPIO3_OUT_WIDTH 1 +#define GPIO2_OUT_LBN (GPIO_OUTPUTS_LBN + 2) +#define GPIO2_OUT_WIDTH 1 +#define GPIO1_OUT_LBN (GPIO_OUTPUTS_LBN + 1) +#define GPIO1_OUT_WIDTH 1 +#define GPIO0_OUT_LBN (GPIO_OUTPUTS_LBN + 0) +#define GPIO0_OUT_WIDTH 1 + +#define GPIO3_IN_LBN (GPIO_INPUTS_LBN + 3) +#define GPIO3_IN_WIDTH 1 +#define GPIO2_IN_WIDTH 1 +#define GPIO1_IN_WIDTH 1 +#define GPIO0_IN_LBN (GPIO_INPUTS_LBN + 0) +#define GPIO0_IN_WIDTH 1 + +/* Global control register */ +#define GLB_CTL_REG_KER 0x0220 +#define EXT_PHY_RST_CTL_LBN 63 +#define EXT_PHY_RST_CTL_WIDTH 1 +#define PCIE_SD_RST_CTL_LBN 61 +#define PCIE_SD_RST_CTL_WIDTH 1 + +#define PCIE_NSTCK_RST_CTL_LBN 58 +#define PCIE_NSTCK_RST_CTL_WIDTH 1 +#define PCIE_CORE_RST_CTL_LBN 57 +#define PCIE_CORE_RST_CTL_WIDTH 1 +#define EE_RST_CTL_LBN 49 +#define EE_RST_CTL_WIDTH 1 +#define RST_XGRX_LBN 24 +#define RST_XGRX_WIDTH 1 +#define RST_XGTX_LBN 23 +#define RST_XGTX_WIDTH 1 +#define RST_EM_LBN 22 +#define RST_EM_WIDTH 1 +#define EXT_PHY_RST_DUR_LBN 1 +#define EXT_PHY_RST_DUR_WIDTH 3 +#define SWRST_LBN 0 +#define SWRST_WIDTH 1 +#define INCLUDE_IN_RESET 0 +#define EXCLUDE_FROM_RESET 1 + +/* Fatal interrupt register */ +#define FATAL_INTR_REG_KER 0x0230 +#define RBUF_OWN_INT_KER_EN_LBN 39 +#define RBUF_OWN_INT_KER_EN_WIDTH 1 +#define TBUF_OWN_INT_KER_EN_LBN 38 +#define TBUF_OWN_INT_KER_EN_WIDTH 1 +#define ILL_ADR_INT_KER_EN_LBN 33 +#define ILL_ADR_INT_KER_EN_WIDTH 1 +#define MEM_PERR_INT_KER_LBN 8 +#define MEM_PERR_INT_KER_WIDTH 1 +#define INT_KER_ERROR_LBN 0 +#define INT_KER_ERROR_WIDTH 12 + +#define DP_CTRL_REG 0x250 +#define FLS_EVQ_ID_LBN 0 +#define FLS_EVQ_ID_WIDTH 11 + +#define MEM_STAT_REG_KER 0x260 + +/* Debug probe register */ +#define DEBUG_BLK_SEL_MISC 7 +#define DEBUG_BLK_SEL_SERDES 6 +#define DEBUG_BLK_SEL_EM 5 +#define DEBUG_BLK_SEL_SR 4 +#define DEBUG_BLK_SEL_EV 3 +#define DEBUG_BLK_SEL_RX 2 +#define DEBUG_BLK_SEL_TX 1 +#define DEBUG_BLK_SEL_BIU 0 + +/* FPGA build version */ +#define ALTERA_BUILD_REG_KER 0x0300 +#define VER_ALL_LBN 0 +#define VER_ALL_WIDTH 32 + +/* Spare EEPROM bits register (flash 0x390) */ +#define SPARE_REG_KER 0x310 +#define MEM_PERR_EN_TX_DATA_LBN 72 +#define MEM_PERR_EN_TX_DATA_WIDTH 2 + +/* Timer table for kernel access */ +#define TIMER_CMD_REG_KER 0x420 +#define TIMER_MODE_LBN 12 +#define TIMER_MODE_WIDTH 2 +#define TIMER_MODE_DIS 0 +#define TIMER_MODE_INT_HLDOFF 2 +#define TIMER_VAL_LBN 0 +#define TIMER_VAL_WIDTH 12 + +/* Driver generated event register */ +#define DRV_EV_REG_KER 0x440 +#define DRV_EV_QID_LBN 64 +#define DRV_EV_QID_WIDTH 12 +#define DRV_EV_DATA_LBN 0 +#define DRV_EV_DATA_WIDTH 64 + +/* Buffer table configuration register */ +#define BUF_TBL_CFG_REG_KER 0x600 +#define BUF_TBL_MODE_LBN 3 +#define BUF_TBL_MODE_WIDTH 1 +#define BUF_TBL_MODE_HALF 0 +#define BUF_TBL_MODE_FULL 1 + +/* SRAM receive descriptor cache configuration register */ +#define SRM_RX_DC_CFG_REG_KER 0x610 +#define SRM_RX_DC_BASE_ADR_LBN 0 +#define SRM_RX_DC_BASE_ADR_WIDTH 21 + +/* SRAM transmit descriptor cache configuration register */ +#define SRM_TX_DC_CFG_REG_KER 0x620 +#define SRM_TX_DC_BASE_ADR_LBN 0 +#define SRM_TX_DC_BASE_ADR_WIDTH 21 + +/* SRAM configuration register */ +#define SRM_CFG_REG_KER 0x630 +#define SRAM_OOB_BT_INIT_EN_LBN 3 +#define SRAM_OOB_BT_INIT_EN_WIDTH 1 +#define SRM_NUM_BANKS_AND_BANK_SIZE_LBN 0 +#define SRM_NUM_BANKS_AND_BANK_SIZE_WIDTH 3 +#define SRM_NB_BSZ_1BANKS_2M 0 +#define SRM_NB_BSZ_1BANKS_4M 1 +#define SRM_NB_BSZ_1BANKS_8M 2 +#define SRM_NB_BSZ_DEFAULT 3 /* char driver will set the default */ +#define SRM_NB_BSZ_2BANKS_4M 4 +#define SRM_NB_BSZ_2BANKS_8M 5 +#define SRM_NB_BSZ_2BANKS_16M 6 +#define SRM_NB_BSZ_RESERVED 7 + +/* Special buffer table update register */ +#define BUF_TBL_UPD_REG_KER 0x0650 +#define BUF_UPD_CMD_LBN 63 +#define BUF_UPD_CMD_WIDTH 1 +#define BUF_CLR_CMD_LBN 62 +#define BUF_CLR_CMD_WIDTH 1 +#define BUF_CLR_END_ID_LBN 32 +#define BUF_CLR_END_ID_WIDTH 20 +#define BUF_CLR_START_ID_LBN 0 +#define BUF_CLR_START_ID_WIDTH 20 + +/* Receive configuration register */ +#define RX_CFG_REG_KER 0x800 + +/* B0 */ +#define RX_INGR_EN_B0_LBN 47 +#define RX_INGR_EN_B0_WIDTH 1 +#define RX_DESC_PUSH_EN_B0_LBN 43 +#define RX_DESC_PUSH_EN_B0_WIDTH 1 +#define RX_XON_TX_TH_B0_LBN 33 +#define RX_XON_TX_TH_B0_WIDTH 5 +#define RX_XOFF_TX_TH_B0_LBN 28 +#define RX_XOFF_TX_TH_B0_WIDTH 5 +#define RX_USR_BUF_SIZE_B0_LBN 19 +#define RX_USR_BUF_SIZE_B0_WIDTH 9 +#define RX_XON_MAC_TH_B0_LBN 10 +#define RX_XON_MAC_TH_B0_WIDTH 9 +#define RX_XOFF_MAC_TH_B0_LBN 1 +#define RX_XOFF_MAC_TH_B0_WIDTH 9 +#define RX_XOFF_MAC_EN_B0_LBN 0 +#define RX_XOFF_MAC_EN_B0_WIDTH 1 + +/* A1 */ +#define RX_DESC_PUSH_EN_A1_LBN 35 +#define RX_DESC_PUSH_EN_A1_WIDTH 1 +#define RX_XON_TX_TH_A1_LBN 25 +#define RX_XON_TX_TH_A1_WIDTH 5 +#define RX_XOFF_TX_TH_A1_LBN 20 +#define RX_XOFF_TX_TH_A1_WIDTH 5 +#define RX_USR_BUF_SIZE_A1_LBN 11 +#define RX_USR_BUF_SIZE_A1_WIDTH 9 +#define RX_XON_MAC_TH_A1_LBN 6 +#define RX_XON_MAC_TH_A1_WIDTH 5 +#define RX_XOFF_MAC_TH_A1_LBN 1 +#define RX_XOFF_MAC_TH_A1_WIDTH 5 +#define RX_XOFF_MAC_EN_A1_LBN 0 +#define RX_XOFF_MAC_EN_A1_WIDTH 1 + +/* Receive filter control register */ +#define RX_FILTER_CTL_REG 0x810 +#define UDP_FULL_SRCH_LIMIT_LBN 32 +#define UDP_FULL_SRCH_LIMIT_WIDTH 8 +#define NUM_KER_LBN 24 +#define NUM_KER_WIDTH 2 +#define UDP_WILD_SRCH_LIMIT_LBN 16 +#define UDP_WILD_SRCH_LIMIT_WIDTH 8 +#define TCP_WILD_SRCH_LIMIT_LBN 8 +#define TCP_WILD_SRCH_LIMIT_WIDTH 8 +#define TCP_FULL_SRCH_LIMIT_LBN 0 +#define TCP_FULL_SRCH_LIMIT_WIDTH 8 + +/* RX queue flush register */ +#define RX_FLUSH_DESCQ_REG_KER 0x0820 +#define RX_FLUSH_DESCQ_CMD_LBN 24 +#define RX_FLUSH_DESCQ_CMD_WIDTH 1 +#define RX_FLUSH_DESCQ_LBN 0 +#define RX_FLUSH_DESCQ_WIDTH 12 + +/* Receive descriptor update register */ +#define RX_DESC_UPD_REG_KER_DWORD (0x830 + 12) +#define RX_DESC_WPTR_DWORD_LBN 0 +#define RX_DESC_WPTR_DWORD_WIDTH 12 + +/* Receive descriptor cache configuration register */ +#define RX_DC_CFG_REG_KER 0x840 +#define RX_DC_SIZE_LBN 0 +#define RX_DC_SIZE_WIDTH 2 + +#define RX_DC_PF_WM_REG_KER 0x850 +#define RX_DC_PF_LWM_LBN 0 +#define RX_DC_PF_LWM_WIDTH 6 + +/* RX no descriptor drop counter */ +#define RX_NODESC_DROP_REG_KER 0x880 +#define RX_NODESC_DROP_CNT_LBN 0 +#define RX_NODESC_DROP_CNT_WIDTH 16 + +/* RX black magic register */ +#define RX_SELF_RST_REG_KER 0x890 +#define RX_ISCSI_DIS_LBN 17 +#define RX_ISCSI_DIS_WIDTH 1 +#define RX_NODESC_WAIT_DIS_LBN 9 +#define RX_NODESC_WAIT_DIS_WIDTH 1 +#define RX_RECOVERY_EN_LBN 8 +#define RX_RECOVERY_EN_WIDTH 1 + +/* TX queue flush register */ +#define TX_FLUSH_DESCQ_REG_KER 0x0a00 +#define TX_FLUSH_DESCQ_CMD_LBN 12 +#define TX_FLUSH_DESCQ_CMD_WIDTH 1 +#define TX_FLUSH_DESCQ_LBN 0 +#define TX_FLUSH_DESCQ_WIDTH 12 + +/* Transmit descriptor update register */ +#define TX_DESC_UPD_REG_KER_DWORD (0xa10 + 12) +#define TX_DESC_WPTR_DWORD_LBN 0 +#define TX_DESC_WPTR_DWORD_WIDTH 12 + +/* Transmit descriptor cache configuration register */ +#define TX_DC_CFG_REG_KER 0xa20 +#define TX_DC_SIZE_LBN 0 +#define TX_DC_SIZE_WIDTH 2 + +/* Transmit checksum configuration register (A0/A1 only) */ +#define TX_CHKSM_CFG_REG_KER_A1 0xa30 + +/* Transmit configuration register */ +#define TX_CFG_REG_KER 0xa50 +#define TX_NO_EOP_DISC_EN_LBN 5 +#define TX_NO_EOP_DISC_EN_WIDTH 1 + +/* Transmit configuration register 2 */ +#define TX_CFG2_REG_KER 0xa80 +#define TX_CSR_PUSH_EN_LBN 89 +#define TX_CSR_PUSH_EN_WIDTH 1 +#define TX_RX_SPACER_LBN 64 +#define TX_RX_SPACER_WIDTH 8 +#define TX_SW_EV_EN_LBN 59 +#define TX_SW_EV_EN_WIDTH 1 +#define TX_RX_SPACER_EN_LBN 57 +#define TX_RX_SPACER_EN_WIDTH 1 +#define TX_PREF_THRESHOLD_LBN 19 +#define TX_PREF_THRESHOLD_WIDTH 2 +#define TX_ONE_PKT_PER_Q_LBN 18 +#define TX_ONE_PKT_PER_Q_WIDTH 1 +#define TX_DIS_NON_IP_EV_LBN 17 +#define TX_DIS_NON_IP_EV_WIDTH 1 +#define TX_FLUSH_MIN_LEN_EN_B0_LBN 7 +#define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1 + +/* PHY management transmit data register */ +#define MD_TXD_REG_KER 0xc00 +#define MD_TXD_LBN 0 +#define MD_TXD_WIDTH 16 + +/* PHY management receive data register */ +#define MD_RXD_REG_KER 0xc10 +#define MD_RXD_LBN 0 +#define MD_RXD_WIDTH 16 + +/* PHY management configuration & status register */ +#define MD_CS_REG_KER 0xc20 +#define MD_GC_LBN 4 +#define MD_GC_WIDTH 1 +#define MD_RIC_LBN 2 +#define MD_RIC_WIDTH 1 +#define MD_RDC_LBN 1 +#define MD_RDC_WIDTH 1 +#define MD_WRC_LBN 0 +#define MD_WRC_WIDTH 1 + +/* PHY management PHY address register */ +#define MD_PHY_ADR_REG_KER 0xc30 +#define MD_PHY_ADR_LBN 0 +#define MD_PHY_ADR_WIDTH 16 + +/* PHY management ID register */ +#define MD_ID_REG_KER 0xc40 +#define MD_PRT_ADR_LBN 11 +#define MD_PRT_ADR_WIDTH 5 +#define MD_DEV_ADR_LBN 6 +#define MD_DEV_ADR_WIDTH 5 +/* Used for writing both at once */ +#define MD_PRT_DEV_ADR_LBN 6 +#define MD_PRT_DEV_ADR_WIDTH 10 + +/* PHY management status & mask register (DWORD read only) */ +#define MD_STAT_REG_KER 0xc50 +#define MD_BSERR_LBN 2 +#define MD_BSERR_WIDTH 1 +#define MD_LNFL_LBN 1 +#define MD_LNFL_WIDTH 1 +#define MD_BSY_LBN 0 +#define MD_BSY_WIDTH 1 + +/* Port 0 and 1 MAC stats registers */ +#define MAC0_STAT_DMA_REG_KER 0xc60 +#define MAC_STAT_DMA_CMD_LBN 48 +#define MAC_STAT_DMA_CMD_WIDTH 1 +#define MAC_STAT_DMA_ADR_LBN 0 +#define MAC_STAT_DMA_ADR_WIDTH EFX_DMA_TYPE_WIDTH(46) + +/* Port 0 and 1 MAC control registers */ +#define MAC0_CTRL_REG_KER 0xc80 +#define MAC_XOFF_VAL_LBN 16 +#define MAC_XOFF_VAL_WIDTH 16 +#define TXFIFO_DRAIN_EN_B0_LBN 7 +#define TXFIFO_DRAIN_EN_B0_WIDTH 1 +#define MAC_BCAD_ACPT_LBN 4 +#define MAC_BCAD_ACPT_WIDTH 1 +#define MAC_UC_PROM_LBN 3 +#define MAC_UC_PROM_WIDTH 1 +#define MAC_LINK_STATUS_LBN 2 +#define MAC_LINK_STATUS_WIDTH 1 +#define MAC_SPEED_LBN 0 +#define MAC_SPEED_WIDTH 2 + +/* 10G XAUI XGXS default values */ +#define XX_TXDRV_DEQ_DEFAULT 0xe /* deq=.6 */ +#define XX_TXDRV_DTX_DEFAULT 0x5 /* 1.25 */ +#define XX_SD_CTL_DRV_DEFAULT 0 /* 20mA */ + +/* Multicast address hash table */ +#define MAC_MCAST_HASH_REG0_KER 0xca0 +#define MAC_MCAST_HASH_REG1_KER 0xcb0 + +/* GMAC registers */ +#define FALCON_GMAC_REGBANK 0xe00 +#define FALCON_GMAC_REGBANK_SIZE 0x200 +#define FALCON_GMAC_REG_SIZE 0x10 + +/* XMAC registers */ +#define FALCON_XMAC_REGBANK 0x1200 +#define FALCON_XMAC_REGBANK_SIZE 0x200 +#define FALCON_XMAC_REG_SIZE 0x10 + +/* XGMAC address register low */ +#define XM_ADR_LO_REG_MAC 0x00 +#define XM_ADR_3_LBN 24 +#define XM_ADR_3_WIDTH 8 +#define XM_ADR_2_LBN 16 +#define XM_ADR_2_WIDTH 8 +#define XM_ADR_1_LBN 8 +#define XM_ADR_1_WIDTH 8 +#define XM_ADR_0_LBN 0 +#define XM_ADR_0_WIDTH 8 + +/* XGMAC address register high */ +#define XM_ADR_HI_REG_MAC 0x01 +#define XM_ADR_5_LBN 8 +#define XM_ADR_5_WIDTH 8 +#define XM_ADR_4_LBN 0 +#define XM_ADR_4_WIDTH 8 + +/* XGMAC global configuration */ +#define XM_GLB_CFG_REG_MAC 0x02 +#define XM_RX_STAT_EN_LBN 11 +#define XM_RX_STAT_EN_WIDTH 1 +#define XM_TX_STAT_EN_LBN 10 +#define XM_TX_STAT_EN_WIDTH 1 +#define XM_RX_JUMBO_MODE_LBN 6 +#define XM_RX_JUMBO_MODE_WIDTH 1 +#define XM_INTCLR_MODE_LBN 3 +#define XM_INTCLR_MODE_WIDTH 1 +#define XM_CORE_RST_LBN 0 +#define XM_CORE_RST_WIDTH 1 + +/* XGMAC transmit configuration */ +#define XM_TX_CFG_REG_MAC 0x03 +#define XM_IPG_LBN 16 +#define XM_IPG_WIDTH 4 +#define XM_FCNTL_LBN 10 +#define XM_FCNTL_WIDTH 1 +#define XM_TXCRC_LBN 8 +#define XM_TXCRC_WIDTH 1 +#define XM_AUTO_PAD_LBN 5 +#define XM_AUTO_PAD_WIDTH 1 +#define XM_TX_PRMBL_LBN 2 +#define XM_TX_PRMBL_WIDTH 1 +#define XM_TXEN_LBN 1 +#define XM_TXEN_WIDTH 1 + +/* XGMAC receive configuration */ +#define XM_RX_CFG_REG_MAC 0x04 +#define XM_PASS_CRC_ERR_LBN 25 +#define XM_PASS_CRC_ERR_WIDTH 1 +#define XM_ACPT_ALL_MCAST_LBN 11 +#define XM_ACPT_ALL_MCAST_WIDTH 1 +#define XM_ACPT_ALL_UCAST_LBN 9 +#define XM_ACPT_ALL_UCAST_WIDTH 1 +#define XM_AUTO_DEPAD_LBN 8 +#define XM_AUTO_DEPAD_WIDTH 1 +#define XM_RXEN_LBN 1 +#define XM_RXEN_WIDTH 1 + +/* XGMAC management interrupt mask register */ +#define XM_MGT_INT_MSK_REG_MAC_B0 0x5 +#define XM_MSK_PRMBLE_ERR_LBN 2 +#define XM_MSK_PRMBLE_ERR_WIDTH 1 +#define XM_MSK_RMTFLT_LBN 1 +#define XM_MSK_RMTFLT_WIDTH 1 +#define XM_MSK_LCLFLT_LBN 0 +#define XM_MSK_LCLFLT_WIDTH 1 + +/* XGMAC flow control register */ +#define XM_FC_REG_MAC 0x7 +#define XM_PAUSE_TIME_LBN 16 +#define XM_PAUSE_TIME_WIDTH 16 +#define XM_DIS_FCNTL_LBN 0 +#define XM_DIS_FCNTL_WIDTH 1 + +/* XGMAC pause time count register */ +#define XM_PAUSE_TIME_REG_MAC 0x9 + +/* XGMAC transmit parameter register */ +#define XM_TX_PARAM_REG_MAC 0x0d +#define XM_TX_JUMBO_MODE_LBN 31 +#define XM_TX_JUMBO_MODE_WIDTH 1 +#define XM_MAX_TX_FRM_SIZE_LBN 16 +#define XM_MAX_TX_FRM_SIZE_WIDTH 14 + +/* XGMAC receive parameter register */ +#define XM_RX_PARAM_REG_MAC 0x0e +#define XM_MAX_RX_FRM_SIZE_LBN 0 +#define XM_MAX_RX_FRM_SIZE_WIDTH 14 + +/* XGMAC management interrupt status register */ +#define XM_MGT_INT_REG_MAC_B0 0x0f +#define XM_PRMBLE_ERR 2 +#define XM_PRMBLE_WIDTH 1 +#define XM_RMTFLT_LBN 1 +#define XM_RMTFLT_WIDTH 1 +#define XM_LCLFLT_LBN 0 +#define XM_LCLFLT_WIDTH 1 + +/* XGXS/XAUI powerdown/reset register */ +#define XX_PWR_RST_REG_MAC 0x10 + +#define XX_PWRDND_EN_LBN 15 +#define XX_PWRDND_EN_WIDTH 1 +#define XX_PWRDNC_EN_LBN 14 +#define XX_PWRDNC_EN_WIDTH 1 +#define XX_PWRDNB_EN_LBN 13 +#define XX_PWRDNB_EN_WIDTH 1 +#define XX_PWRDNA_EN_LBN 12 +#define XX_PWRDNA_EN_WIDTH 1 +#define XX_RSTPLLCD_EN_LBN 9 +#define XX_RSTPLLCD_EN_WIDTH 1 +#define XX_RSTPLLAB_EN_LBN 8 +#define XX_RSTPLLAB_EN_WIDTH 1 +#define XX_RESETD_EN_LBN 7 +#define XX_RESETD_EN_WIDTH 1 +#define XX_RESETC_EN_LBN 6 +#define XX_RESETC_EN_WIDTH 1 +#define XX_RESETB_EN_LBN 5 +#define XX_RESETB_EN_WIDTH 1 +#define XX_RESETA_EN_LBN 4 +#define XX_RESETA_EN_WIDTH 1 +#define XX_RSTXGXSRX_EN_LBN 2 +#define XX_RSTXGXSRX_EN_WIDTH 1 +#define XX_RSTXGXSTX_EN_LBN 1 +#define XX_RSTXGXSTX_EN_WIDTH 1 +#define XX_RST_XX_EN_LBN 0 +#define XX_RST_XX_EN_WIDTH 1 + +/* XGXS/XAUI powerdown/reset control register */ +#define XX_SD_CTL_REG_MAC 0x11 +#define XX_HIDRVD_LBN 15 +#define XX_HIDRVD_WIDTH 1 +#define XX_LODRVD_LBN 14 +#define XX_LODRVD_WIDTH 1 +#define XX_HIDRVC_LBN 13 +#define XX_HIDRVC_WIDTH 1 +#define XX_LODRVC_LBN 12 +#define XX_LODRVC_WIDTH 1 +#define XX_HIDRVB_LBN 11 +#define XX_HIDRVB_WIDTH 1 +#define XX_LODRVB_LBN 10 +#define XX_LODRVB_WIDTH 1 +#define XX_HIDRVA_LBN 9 +#define XX_HIDRVA_WIDTH 1 +#define XX_LODRVA_LBN 8 +#define XX_LODRVA_WIDTH 1 + +#define XX_TXDRV_CTL_REG_MAC 0x12 +#define XX_DEQD_LBN 28 +#define XX_DEQD_WIDTH 4 +#define XX_DEQC_LBN 24 +#define XX_DEQC_WIDTH 4 +#define XX_DEQB_LBN 20 +#define XX_DEQB_WIDTH 4 +#define XX_DEQA_LBN 16 +#define XX_DEQA_WIDTH 4 +#define XX_DTXD_LBN 12 +#define XX_DTXD_WIDTH 4 +#define XX_DTXC_LBN 8 +#define XX_DTXC_WIDTH 4 +#define XX_DTXB_LBN 4 +#define XX_DTXB_WIDTH 4 +#define XX_DTXA_LBN 0 +#define XX_DTXA_WIDTH 4 + +/* XAUI XGXS core status register */ +#define XX_FORCE_SIG_DECODE_FORCED 0xff +#define XX_CORE_STAT_REG_MAC 0x16 +#define XX_ALIGN_DONE_LBN 20 +#define XX_ALIGN_DONE_WIDTH 1 +#define XX_SYNC_STAT_LBN 16 +#define XX_SYNC_STAT_WIDTH 4 +#define XX_SYNC_STAT_DECODE_SYNCED 0xf +#define XX_COMMA_DET_LBN 12 +#define XX_COMMA_DET_WIDTH 4 +#define XX_COMMA_DET_DECODE_DETECTED 0xf +#define XX_COMMA_DET_RESET 0xf +#define XX_CHARERR_LBN 4 +#define XX_CHARERR_WIDTH 4 +#define XX_CHARERR_RESET 0xf +#define XX_DISPERR_LBN 0 +#define XX_DISPERR_WIDTH 4 +#define XX_DISPERR_RESET 0xf + +/* Receive filter table */ +#define RX_FILTER_TBL0 0xF00000 + +/* Receive descriptor pointer table */ +#define RX_DESC_PTR_TBL_KER_A1 0x11800 +#define RX_DESC_PTR_TBL_KER_B0 0xF40000 +#define RX_DESC_PTR_TBL_KER_P0 0x900 +#define RX_ISCSI_DDIG_EN_LBN 88 +#define RX_ISCSI_DDIG_EN_WIDTH 1 +#define RX_ISCSI_HDIG_EN_LBN 87 +#define RX_ISCSI_HDIG_EN_WIDTH 1 +#define RX_DESCQ_BUF_BASE_ID_LBN 36 +#define RX_DESCQ_BUF_BASE_ID_WIDTH 20 +#define RX_DESCQ_EVQ_ID_LBN 24 +#define RX_DESCQ_EVQ_ID_WIDTH 12 +#define RX_DESCQ_OWNER_ID_LBN 10 +#define RX_DESCQ_OWNER_ID_WIDTH 14 +#define RX_DESCQ_LABEL_LBN 5 +#define RX_DESCQ_LABEL_WIDTH 5 +#define RX_DESCQ_SIZE_LBN 3 +#define RX_DESCQ_SIZE_WIDTH 2 +#define RX_DESCQ_SIZE_4K 3 +#define RX_DESCQ_SIZE_2K 2 +#define RX_DESCQ_SIZE_1K 1 +#define RX_DESCQ_SIZE_512 0 +#define RX_DESCQ_TYPE_LBN 2 +#define RX_DESCQ_TYPE_WIDTH 1 +#define RX_DESCQ_JUMBO_LBN 1 +#define RX_DESCQ_JUMBO_WIDTH 1 +#define RX_DESCQ_EN_LBN 0 +#define RX_DESCQ_EN_WIDTH 1 + +/* Transmit descriptor pointer table */ +#define TX_DESC_PTR_TBL_KER_A1 0x11900 +#define TX_DESC_PTR_TBL_KER_B0 0xF50000 +#define TX_DESC_PTR_TBL_KER_P0 0xa40 +#define TX_NON_IP_DROP_DIS_B0_LBN 91 +#define TX_NON_IP_DROP_DIS_B0_WIDTH 1 +#define TX_IP_CHKSM_DIS_B0_LBN 90 +#define TX_IP_CHKSM_DIS_B0_WIDTH 1 +#define TX_TCP_CHKSM_DIS_B0_LBN 89 +#define TX_TCP_CHKSM_DIS_B0_WIDTH 1 +#define TX_DESCQ_EN_LBN 88 +#define TX_DESCQ_EN_WIDTH 1 +#define TX_ISCSI_DDIG_EN_LBN 87 +#define TX_ISCSI_DDIG_EN_WIDTH 1 +#define TX_ISCSI_HDIG_EN_LBN 86 +#define TX_ISCSI_HDIG_EN_WIDTH 1 +#define TX_DESCQ_BUF_BASE_ID_LBN 36 +#define TX_DESCQ_BUF_BASE_ID_WIDTH 20 +#define TX_DESCQ_EVQ_ID_LBN 24 +#define TX_DESCQ_EVQ_ID_WIDTH 12 +#define TX_DESCQ_OWNER_ID_LBN 10 +#define TX_DESCQ_OWNER_ID_WIDTH 14 +#define TX_DESCQ_LABEL_LBN 5 +#define TX_DESCQ_LABEL_WIDTH 5 +#define TX_DESCQ_SIZE_LBN 3 +#define TX_DESCQ_SIZE_WIDTH 2 +#define TX_DESCQ_SIZE_4K 3 +#define TX_DESCQ_SIZE_2K 2 +#define TX_DESCQ_SIZE_1K 1 +#define TX_DESCQ_SIZE_512 0 +#define TX_DESCQ_TYPE_LBN 1 +#define TX_DESCQ_TYPE_WIDTH 2 + +/* Event queue pointer */ +#define EVQ_PTR_TBL_KER_A1 0x11a00 +#define EVQ_PTR_TBL_KER_B0 0xf60000 +#define EVQ_PTR_TBL_KER_P0 0x500 +#define EVQ_EN_LBN 23 +#define EVQ_EN_WIDTH 1 +#define EVQ_SIZE_LBN 20 +#define EVQ_SIZE_WIDTH 3 +#define EVQ_SIZE_32K 6 +#define EVQ_SIZE_16K 5 +#define EVQ_SIZE_8K 4 +#define EVQ_SIZE_4K 3 +#define EVQ_SIZE_2K 2 +#define EVQ_SIZE_1K 1 +#define EVQ_SIZE_512 0 +#define EVQ_BUF_BASE_ID_LBN 0 +#define EVQ_BUF_BASE_ID_WIDTH 20 + +/* Event queue read pointer */ +#define EVQ_RPTR_REG_KER_A1 0x11b00 +#define EVQ_RPTR_REG_KER_B0 0xfa0000 +#define EVQ_RPTR_REG_KER_DWORD (EVQ_RPTR_REG_KER + 0) +#define EVQ_RPTR_DWORD_LBN 0 +#define EVQ_RPTR_DWORD_WIDTH 14 + +/* RSS indirection table */ +#define RX_RSS_INDIR_TBL_B0 0xFB0000 +#define RX_RSS_INDIR_ENT_B0_LBN 0 +#define RX_RSS_INDIR_ENT_B0_WIDTH 6 + +/* Special buffer descriptors (full-mode) */ +#define BUF_FULL_TBL_KER_A1 0x8000 +#define BUF_FULL_TBL_KER_B0 0x800000 +#define IP_DAT_BUF_SIZE_LBN 50 +#define IP_DAT_BUF_SIZE_WIDTH 1 +#define IP_DAT_BUF_SIZE_8K 1 +#define IP_DAT_BUF_SIZE_4K 0 +#define BUF_ADR_REGION_LBN 48 +#define BUF_ADR_REGION_WIDTH 2 +#define BUF_ADR_FBUF_LBN 14 +#define BUF_ADR_FBUF_WIDTH 34 +#define BUF_OWNER_ID_FBUF_LBN 0 +#define BUF_OWNER_ID_FBUF_WIDTH 14 + +/* Transmit descriptor */ +#define TX_KER_PORT_LBN 63 +#define TX_KER_PORT_WIDTH 1 +#define TX_KER_CONT_LBN 62 +#define TX_KER_CONT_WIDTH 1 +#define TX_KER_BYTE_CNT_LBN 48 +#define TX_KER_BYTE_CNT_WIDTH 14 +#define TX_KER_BUF_REGION_LBN 46 +#define TX_KER_BUF_REGION_WIDTH 2 +#define TX_KER_BUF_REGION0_DECODE 0 +#define TX_KER_BUF_REGION1_DECODE 1 +#define TX_KER_BUF_REGION2_DECODE 2 +#define TX_KER_BUF_REGION3_DECODE 3 +#define TX_KER_BUF_ADR_LBN 0 +#define TX_KER_BUF_ADR_WIDTH EFX_DMA_TYPE_WIDTH(46) + +/* Receive descriptor */ +#define RX_KER_BUF_SIZE_LBN 48 +#define RX_KER_BUF_SIZE_WIDTH 14 +#define RX_KER_BUF_REGION_LBN 46 +#define RX_KER_BUF_REGION_WIDTH 2 +#define RX_KER_BUF_REGION0_DECODE 0 +#define RX_KER_BUF_REGION1_DECODE 1 +#define RX_KER_BUF_REGION2_DECODE 2 +#define RX_KER_BUF_REGION3_DECODE 3 +#define RX_KER_BUF_ADR_LBN 0 +#define RX_KER_BUF_ADR_WIDTH EFX_DMA_TYPE_WIDTH(46) + +/************************************************************************** + * + * Falcon events + * + ************************************************************************** + */ + +/* Event queue entries */ +#define EV_CODE_LBN 60 +#define EV_CODE_WIDTH 4 +#define RX_IP_EV_DECODE 0 +#define TX_IP_EV_DECODE 2 +#define DRIVER_EV_DECODE 5 +#define GLOBAL_EV_DECODE 6 +#define DRV_GEN_EV_DECODE 7 +#define WHOLE_EVENT_LBN 0 +#define WHOLE_EVENT_WIDTH 64 + +/* Receive events */ +#define RX_EV_PKT_OK_LBN 56 +#define RX_EV_PKT_OK_WIDTH 1 +#define RX_EV_PAUSE_FRM_ERR_LBN 55 +#define RX_EV_PAUSE_FRM_ERR_WIDTH 1 +#define RX_EV_BUF_OWNER_ID_ERR_LBN 54 +#define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1 +#define RX_EV_IF_FRAG_ERR_LBN 53 +#define RX_EV_IF_FRAG_ERR_WIDTH 1 +#define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52 +#define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1 +#define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51 +#define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1 +#define RX_EV_ETH_CRC_ERR_LBN 50 +#define RX_EV_ETH_CRC_ERR_WIDTH 1 +#define RX_EV_FRM_TRUNC_LBN 49 +#define RX_EV_FRM_TRUNC_WIDTH 1 +#define RX_EV_DRIB_NIB_LBN 48 +#define RX_EV_DRIB_NIB_WIDTH 1 +#define RX_EV_TOBE_DISC_LBN 47 +#define RX_EV_TOBE_DISC_WIDTH 1 +#define RX_EV_PKT_TYPE_LBN 44 +#define RX_EV_PKT_TYPE_WIDTH 3 +#define RX_EV_PKT_TYPE_ETH_DECODE 0 +#define RX_EV_PKT_TYPE_LLC_DECODE 1 +#define RX_EV_PKT_TYPE_JUMBO_DECODE 2 +#define RX_EV_PKT_TYPE_VLAN_DECODE 3 +#define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4 +#define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5 +#define RX_EV_HDR_TYPE_LBN 42 +#define RX_EV_HDR_TYPE_WIDTH 2 +#define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0 +#define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1 +#define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2 +#define RX_EV_HDR_TYPE_NON_IP_DECODE 3 +#define RX_EV_HDR_TYPE_HAS_CHECKSUMS(hdr_type) \ + ((hdr_type) <= RX_EV_HDR_TYPE_UDP_IPV4_DECODE) +#define RX_EV_MCAST_HASH_MATCH_LBN 40 +#define RX_EV_MCAST_HASH_MATCH_WIDTH 1 +#define RX_EV_MCAST_PKT_LBN 39 +#define RX_EV_MCAST_PKT_WIDTH 1 +#define RX_EV_Q_LABEL_LBN 32 +#define RX_EV_Q_LABEL_WIDTH 5 +#define RX_EV_JUMBO_CONT_LBN 31 +#define RX_EV_JUMBO_CONT_WIDTH 1 +#define RX_EV_BYTE_CNT_LBN 16 +#define RX_EV_BYTE_CNT_WIDTH 14 +#define RX_EV_SOP_LBN 15 +#define RX_EV_SOP_WIDTH 1 +#define RX_EV_DESC_PTR_LBN 0 +#define RX_EV_DESC_PTR_WIDTH 12 + +/* Transmit events */ +#define TX_EV_PKT_ERR_LBN 38 +#define TX_EV_PKT_ERR_WIDTH 1 +#define TX_EV_Q_LABEL_LBN 32 +#define TX_EV_Q_LABEL_WIDTH 5 +#define TX_EV_WQ_FF_FULL_LBN 15 +#define TX_EV_WQ_FF_FULL_WIDTH 1 +#define TX_EV_COMP_LBN 12 +#define TX_EV_COMP_WIDTH 1 +#define TX_EV_DESC_PTR_LBN 0 +#define TX_EV_DESC_PTR_WIDTH 12 + +/* Driver events */ +#define DRIVER_EV_SUB_CODE_LBN 56 +#define DRIVER_EV_SUB_CODE_WIDTH 4 +#define DRIVER_EV_SUB_DATA_LBN 0 +#define DRIVER_EV_SUB_DATA_WIDTH 14 +#define TX_DESCQ_FLS_DONE_EV_DECODE 0 +#define RX_DESCQ_FLS_DONE_EV_DECODE 1 +#define EVQ_INIT_DONE_EV_DECODE 2 +#define EVQ_NOT_EN_EV_DECODE 3 +#define RX_DESCQ_FLSFF_OVFL_EV_DECODE 4 +#define SRM_UPD_DONE_EV_DECODE 5 +#define WAKE_UP_EV_DECODE 6 +#define TX_PKT_NON_TCP_UDP_DECODE 9 +#define TIMER_EV_DECODE 10 +#define RX_RECOVERY_EV_DECODE 11 +#define RX_DSC_ERROR_EV_DECODE 14 +#define TX_DSC_ERROR_EV_DECODE 15 +#define DRIVER_EV_TX_DESCQ_ID_LBN 0 +#define DRIVER_EV_TX_DESCQ_ID_WIDTH 12 +#define DRIVER_EV_RX_FLUSH_FAIL_LBN 12 +#define DRIVER_EV_RX_FLUSH_FAIL_WIDTH 1 +#define DRIVER_EV_RX_DESCQ_ID_LBN 0 +#define DRIVER_EV_RX_DESCQ_ID_WIDTH 12 +#define SRM_CLR_EV_DECODE 0 +#define SRM_UPD_EV_DECODE 1 +#define SRM_ILLCLR_EV_DECODE 2 + +/* Global events */ +#define RX_RECOVERY_B0_LBN 12 +#define RX_RECOVERY_B0_WIDTH 1 +#define XG_MNT_INTR_B0_LBN 11 +#define XG_MNT_INTR_B0_WIDTH 1 +#define RX_RECOVERY_A1_LBN 11 +#define RX_RECOVERY_A1_WIDTH 1 +#define XG_PHY_INTR_LBN 9 +#define XG_PHY_INTR_WIDTH 1 +#define G_PHY1_INTR_LBN 8 +#define G_PHY1_INTR_WIDTH 1 +#define G_PHY0_INTR_LBN 7 +#define G_PHY0_INTR_WIDTH 1 + +/* Driver-generated test events */ +#define EVQ_MAGIC_LBN 0 +#define EVQ_MAGIC_WIDTH 32 + +/************************************************************************** + * + * Falcon MAC stats + * + ************************************************************************** + * + */ +#define GRxGoodOct_offset 0x0 +#define GRxBadOct_offset 0x8 +#define GRxMissPkt_offset 0x10 +#define GRxFalseCRS_offset 0x14 +#define GRxPausePkt_offset 0x18 +#define GRxBadPkt_offset 0x1C +#define GRxUcastPkt_offset 0x20 +#define GRxMcastPkt_offset 0x24 +#define GRxBcastPkt_offset 0x28 +#define GRxGoodLt64Pkt_offset 0x2C +#define GRxBadLt64Pkt_offset 0x30 +#define GRx64Pkt_offset 0x34 +#define GRx65to127Pkt_offset 0x38 +#define GRx128to255Pkt_offset 0x3C +#define GRx256to511Pkt_offset 0x40 +#define GRx512to1023Pkt_offset 0x44 +#define GRx1024to15xxPkt_offset 0x48 +#define GRx15xxtoJumboPkt_offset 0x4C +#define GRxGtJumboPkt_offset 0x50 +#define GRxFcsErr64to15xxPkt_offset 0x54 +#define GRxFcsErr15xxtoJumboPkt_offset 0x58 +#define GRxFcsErrGtJumboPkt_offset 0x5C +#define GTxGoodBadOct_offset 0x80 +#define GTxGoodOct_offset 0x88 +#define GTxSglColPkt_offset 0x90 +#define GTxMultColPkt_offset 0x94 +#define GTxExColPkt_offset 0x98 +#define GTxDefPkt_offset 0x9C +#define GTxLateCol_offset 0xA0 +#define GTxExDefPkt_offset 0xA4 +#define GTxPausePkt_offset 0xA8 +#define GTxBadPkt_offset 0xAC +#define GTxUcastPkt_offset 0xB0 +#define GTxMcastPkt_offset 0xB4 +#define GTxBcastPkt_offset 0xB8 +#define GTxLt64Pkt_offset 0xBC +#define GTx64Pkt_offset 0xC0 +#define GTx65to127Pkt_offset 0xC4 +#define GTx128to255Pkt_offset 0xC8 +#define GTx256to511Pkt_offset 0xCC +#define GTx512to1023Pkt_offset 0xD0 +#define GTx1024to15xxPkt_offset 0xD4 +#define GTx15xxtoJumboPkt_offset 0xD8 +#define GTxGtJumboPkt_offset 0xDC +#define GTxNonTcpUdpPkt_offset 0xE0 +#define GTxMacSrcErrPkt_offset 0xE4 +#define GTxIpSrcErrPkt_offset 0xE8 +#define GDmaDone_offset 0xEC + +#define XgRxOctets_offset 0x0 +#define XgRxOctets_WIDTH 48 +#define XgRxOctetsOK_offset 0x8 +#define XgRxOctetsOK_WIDTH 48 +#define XgRxPkts_offset 0x10 +#define XgRxPkts_WIDTH 32 +#define XgRxPktsOK_offset 0x14 +#define XgRxPktsOK_WIDTH 32 +#define XgRxBroadcastPkts_offset 0x18 +#define XgRxBroadcastPkts_WIDTH 32 +#define XgRxMulticastPkts_offset 0x1C +#define XgRxMulticastPkts_WIDTH 32 +#define XgRxUnicastPkts_offset 0x20 +#define XgRxUnicastPkts_WIDTH 32 +#define XgRxUndersizePkts_offset 0x24 +#define XgRxUndersizePkts_WIDTH 32 +#define XgRxOversizePkts_offset 0x28 +#define XgRxOversizePkts_WIDTH 32 +#define XgRxJabberPkts_offset 0x2C +#define XgRxJabberPkts_WIDTH 32 +#define XgRxUndersizeFCSerrorPkts_offset 0x30 +#define XgRxUndersizeFCSerrorPkts_WIDTH 32 +#define XgRxDropEvents_offset 0x34 +#define XgRxDropEvents_WIDTH 32 +#define XgRxFCSerrorPkts_offset 0x38 +#define XgRxFCSerrorPkts_WIDTH 32 +#define XgRxAlignError_offset 0x3C +#define XgRxAlignError_WIDTH 32 +#define XgRxSymbolError_offset 0x40 +#define XgRxSymbolError_WIDTH 32 +#define XgRxInternalMACError_offset 0x44 +#define XgRxInternalMACError_WIDTH 32 +#define XgRxControlPkts_offset 0x48 +#define XgRxControlPkts_WIDTH 32 +#define XgRxPausePkts_offset 0x4C +#define XgRxPausePkts_WIDTH 32 +#define XgRxPkts64Octets_offset 0x50 +#define XgRxPkts64Octets_WIDTH 32 +#define XgRxPkts65to127Octets_offset 0x54 +#define XgRxPkts65to127Octets_WIDTH 32 +#define XgRxPkts128to255Octets_offset 0x58 +#define XgRxPkts128to255Octets_WIDTH 32 +#define XgRxPkts256to511Octets_offset 0x5C +#define XgRxPkts256to511Octets_WIDTH 32 +#define XgRxPkts512to1023Octets_offset 0x60 +#define XgRxPkts512to1023Octets_WIDTH 32 +#define XgRxPkts1024to15xxOctets_offset 0x64 +#define XgRxPkts1024to15xxOctets_WIDTH 32 +#define XgRxPkts15xxtoMaxOctets_offset 0x68 +#define XgRxPkts15xxtoMaxOctets_WIDTH 32 +#define XgRxLengthError_offset 0x6C +#define XgRxLengthError_WIDTH 32 +#define XgTxPkts_offset 0x80 +#define XgTxPkts_WIDTH 32 +#define XgTxOctets_offset 0x88 +#define XgTxOctets_WIDTH 48 +#define XgTxMulticastPkts_offset 0x90 +#define XgTxMulticastPkts_WIDTH 32 +#define XgTxBroadcastPkts_offset 0x94 +#define XgTxBroadcastPkts_WIDTH 32 +#define XgTxUnicastPkts_offset 0x98 +#define XgTxUnicastPkts_WIDTH 32 +#define XgTxControlPkts_offset 0x9C +#define XgTxControlPkts_WIDTH 32 +#define XgTxPausePkts_offset 0xA0 +#define XgTxPausePkts_WIDTH 32 +#define XgTxPkts64Octets_offset 0xA4 +#define XgTxPkts64Octets_WIDTH 32 +#define XgTxPkts65to127Octets_offset 0xA8 +#define XgTxPkts65to127Octets_WIDTH 32 +#define XgTxPkts128to255Octets_offset 0xAC +#define XgTxPkts128to255Octets_WIDTH 32 +#define XgTxPkts256to511Octets_offset 0xB0 +#define XgTxPkts256to511Octets_WIDTH 32 +#define XgTxPkts512to1023Octets_offset 0xB4 +#define XgTxPkts512to1023Octets_WIDTH 32 +#define XgTxPkts1024to15xxOctets_offset 0xB8 +#define XgTxPkts1024to15xxOctets_WIDTH 32 +#define XgTxPkts1519toMaxOctets_offset 0xBC +#define XgTxPkts1519toMaxOctets_WIDTH 32 +#define XgTxUndersizePkts_offset 0xC0 +#define XgTxUndersizePkts_WIDTH 32 +#define XgTxOversizePkts_offset 0xC4 +#define XgTxOversizePkts_WIDTH 32 +#define XgTxNonTcpUdpPkt_offset 0xC8 +#define XgTxNonTcpUdpPkt_WIDTH 16 +#define XgTxMacSrcErrPkt_offset 0xCC +#define XgTxMacSrcErrPkt_WIDTH 16 +#define XgTxIpSrcErrPkt_offset 0xD0 +#define XgTxIpSrcErrPkt_WIDTH 16 +#define XgDmaDone_offset 0xD4 + +#define FALCON_STATS_NOT_DONE 0x00000000 +#define FALCON_STATS_DONE 0xffffffff + +/* Interrupt status register bits */ +#define FATAL_INT_LBN 64 +#define FATAL_INT_WIDTH 1 +#define INT_EVQS_LBN 40 +#define INT_EVQS_WIDTH 4 + +/************************************************************************** + * + * Falcon non-volatile configuration + * + ************************************************************************** + */ + +/* Board configuration v2 (v1 is obsolete; later versions are compatible) */ +struct falcon_nvconfig_board_v2 { + __le16 nports; + u8 port0_phy_addr; + u8 port0_phy_type; + u8 port1_phy_addr; + u8 port1_phy_type; + __le16 asic_sub_revision; + __le16 board_revision; +} __attribute__ ((packed)); + +#define NVCONFIG_BASE 0x300 +#define NVCONFIG_BOARD_MAGIC_NUM 0xFA1C +struct falcon_nvconfig { + efx_oword_t ee_vpd_cfg_reg; /* 0x300 */ + u8 mac_address[2][8]; /* 0x310 */ + efx_oword_t pcie_sd_ctl0123_reg; /* 0x320 */ + efx_oword_t pcie_sd_ctl45_reg; /* 0x330 */ + efx_oword_t pcie_pcs_ctl_stat_reg; /* 0x340 */ + efx_oword_t hw_init_reg; /* 0x350 */ + efx_oword_t nic_stat_reg; /* 0x360 */ + efx_oword_t glb_ctl_reg; /* 0x370 */ + efx_oword_t srm_cfg_reg; /* 0x380 */ + efx_oword_t spare_reg; /* 0x390 */ + __le16 board_magic_num; /* 0x3A0 */ + __le16 board_struct_ver; + __le16 board_checksum; + struct falcon_nvconfig_board_v2 board_v2; +} __attribute__ ((packed)); + +#endif /* EFX_FALCON_HWDEFS_H */ diff --git a/drivers/net/sfc/falcon_io.h b/drivers/net/sfc/falcon_io.h new file mode 100644 index 00000000000..ea08184ddfa --- /dev/null +++ b/drivers/net/sfc/falcon_io.h @@ -0,0 +1,243 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_FALCON_IO_H +#define EFX_FALCON_IO_H + +#include +#include +#include "net_driver.h" + +/************************************************************************** + * + * Falcon hardware access + * + ************************************************************************** + * + * Notes on locking strategy: + * + * Most Falcon registers require 16-byte (or 8-byte, for SRAM + * registers) atomic writes which necessitates locking. + * Under normal operation few writes to the Falcon BAR are made and these + * registers (EVQ_RPTR_REG, RX_DESC_UPD_REG and TX_DESC_UPD_REG) are special + * cased to allow 4-byte (hence lockless) accesses. + * + * It *is* safe to write to these 4-byte registers in the middle of an + * access to an 8-byte or 16-byte register. We therefore use a + * spinlock to protect accesses to the larger registers, but no locks + * for the 4-byte registers. + * + * A write barrier is needed to ensure that DW3 is written after DW0/1/2 + * due to the way the 16byte registers are "collected" in the Falcon BIU + * + * We also lock when carrying out reads, to ensure consistency of the + * data (made possible since the BIU reads all 128 bits into a cache). + * Reads are very rare, so this isn't a significant performance + * impact. (Most data transferred from NIC to host is DMAed directly + * into host memory). + * + * I/O BAR access uses locks for both reads and writes (but is only provided + * for testing purposes). + */ + +/* Special buffer descriptors (Falcon SRAM) */ +#define BUF_TBL_KER_A1 0x18000 +#define BUF_TBL_KER_B0 0x800000 + + +#if BITS_PER_LONG == 64 +#define FALCON_USE_QWORD_IO 1 +#endif + +#define _falcon_writeq(efx, value, reg) \ + __raw_writeq((__force u64) (value), (efx)->membase + (reg)) +#define _falcon_writel(efx, value, reg) \ + __raw_writel((__force u32) (value), (efx)->membase + (reg)) +#define _falcon_readq(efx, reg) \ + ((__force __le64) __raw_readq((efx)->membase + (reg))) +#define _falcon_readl(efx, reg) \ + ((__force __le32) __raw_readl((efx)->membase + (reg))) + +/* Writes to a normal 16-byte Falcon register, locking as appropriate. */ +static inline void falcon_write(struct efx_nic *efx, efx_oword_t *value, + unsigned int reg) +{ + unsigned long flags; + + EFX_REGDUMP(efx, "writing register %x with " EFX_OWORD_FMT "\n", reg, + EFX_OWORD_VAL(*value)); + + spin_lock_irqsave(&efx->biu_lock, flags); +#ifdef FALCON_USE_QWORD_IO + _falcon_writeq(efx, value->u64[0], reg + 0); + wmb(); + _falcon_writeq(efx, value->u64[1], reg + 8); +#else + _falcon_writel(efx, value->u32[0], reg + 0); + _falcon_writel(efx, value->u32[1], reg + 4); + _falcon_writel(efx, value->u32[2], reg + 8); + wmb(); + _falcon_writel(efx, value->u32[3], reg + 12); +#endif + mmiowb(); + spin_unlock_irqrestore(&efx->biu_lock, flags); +} + +/* Writes to an 8-byte Falcon SRAM register, locking as appropriate. */ +static inline void falcon_write_sram(struct efx_nic *efx, efx_qword_t *value, + unsigned int index) +{ + unsigned int reg = efx->type->buf_tbl_base + (index * sizeof(*value)); + unsigned long flags; + + EFX_REGDUMP(efx, "writing SRAM register %x with " EFX_QWORD_FMT "\n", + reg, EFX_QWORD_VAL(*value)); + + spin_lock_irqsave(&efx->biu_lock, flags); +#ifdef FALCON_USE_QWORD_IO + _falcon_writeq(efx, value->u64[0], reg + 0); +#else + _falcon_writel(efx, value->u32[0], reg + 0); + wmb(); + _falcon_writel(efx, value->u32[1], reg + 4); +#endif + mmiowb(); + spin_unlock_irqrestore(&efx->biu_lock, flags); +} + +/* Write dword to Falcon register that allows partial writes + * + * Some Falcon registers (EVQ_RPTR_REG, RX_DESC_UPD_REG and + * TX_DESC_UPD_REG) can be written to as a single dword. This allows + * for lockless writes. + */ +static inline void falcon_writel(struct efx_nic *efx, efx_dword_t *value, + unsigned int reg) +{ + EFX_REGDUMP(efx, "writing partial register %x with "EFX_DWORD_FMT"\n", + reg, EFX_DWORD_VAL(*value)); + + /* No lock required */ + _falcon_writel(efx, value->u32[0], reg); +} + +/* Read from a Falcon register + * + * This reads an entire 16-byte Falcon register in one go, locking as + * appropriate. It is essential to read the first dword first, as this + * prompts Falcon to load the current value into the shadow register. + */ +static inline void falcon_read(struct efx_nic *efx, efx_oword_t *value, + unsigned int reg) +{ + unsigned long flags; + + spin_lock_irqsave(&efx->biu_lock, flags); + value->u32[0] = _falcon_readl(efx, reg + 0); + rmb(); + value->u32[1] = _falcon_readl(efx, reg + 4); + value->u32[2] = _falcon_readl(efx, reg + 8); + value->u32[3] = _falcon_readl(efx, reg + 12); + spin_unlock_irqrestore(&efx->biu_lock, flags); + + EFX_REGDUMP(efx, "read from register %x, got " EFX_OWORD_FMT "\n", reg, + EFX_OWORD_VAL(*value)); +} + +/* This reads an 8-byte Falcon SRAM entry in one go. */ +static inline void falcon_read_sram(struct efx_nic *efx, efx_qword_t *value, + unsigned int index) +{ + unsigned int reg = efx->type->buf_tbl_base + (index * sizeof(*value)); + unsigned long flags; + + spin_lock_irqsave(&efx->biu_lock, flags); +#ifdef FALCON_USE_QWORD_IO + value->u64[0] = _falcon_readq(efx, reg + 0); +#else + value->u32[0] = _falcon_readl(efx, reg + 0); + rmb(); + value->u32[1] = _falcon_readl(efx, reg + 4); +#endif + spin_unlock_irqrestore(&efx->biu_lock, flags); + + EFX_REGDUMP(efx, "read from SRAM register %x, got "EFX_QWORD_FMT"\n", + reg, EFX_QWORD_VAL(*value)); +} + +/* Read dword from Falcon register that allows partial writes (sic) */ +static inline void falcon_readl(struct efx_nic *efx, efx_dword_t *value, + unsigned int reg) +{ + value->u32[0] = _falcon_readl(efx, reg); + EFX_REGDUMP(efx, "read from register %x, got "EFX_DWORD_FMT"\n", + reg, EFX_DWORD_VAL(*value)); +} + +/* Write to a register forming part of a table */ +static inline void falcon_write_table(struct efx_nic *efx, efx_oword_t *value, + unsigned int reg, unsigned int index) +{ + falcon_write(efx, value, reg + index * sizeof(efx_oword_t)); +} + +/* Read to a register forming part of a table */ +static inline void falcon_read_table(struct efx_nic *efx, efx_oword_t *value, + unsigned int reg, unsigned int index) +{ + falcon_read(efx, value, reg + index * sizeof(efx_oword_t)); +} + +/* Write to a dword register forming part of a table */ +static inline void falcon_writel_table(struct efx_nic *efx, efx_dword_t *value, + unsigned int reg, unsigned int index) +{ + falcon_writel(efx, value, reg + index * sizeof(efx_oword_t)); +} + +/* Page-mapped register block size */ +#define FALCON_PAGE_BLOCK_SIZE 0x2000 + +/* Calculate offset to page-mapped register block */ +#define FALCON_PAGED_REG(page, reg) \ + ((page) * FALCON_PAGE_BLOCK_SIZE + (reg)) + +/* As for falcon_write(), but for a page-mapped register. */ +static inline void falcon_write_page(struct efx_nic *efx, efx_oword_t *value, + unsigned int reg, unsigned int page) +{ + falcon_write(efx, value, FALCON_PAGED_REG(page, reg)); +} + +/* As for falcon_writel(), but for a page-mapped register. */ +static inline void falcon_writel_page(struct efx_nic *efx, efx_dword_t *value, + unsigned int reg, unsigned int page) +{ + falcon_writel(efx, value, FALCON_PAGED_REG(page, reg)); +} + +/* Write dword to Falcon page-mapped register with an extra lock. + * + * As for falcon_writel_page(), but for a register that suffers from + * SFC bug 3181. Take out a lock so the BIU collector cannot be + * confused. */ +static inline void falcon_writel_page_locked(struct efx_nic *efx, + efx_dword_t *value, + unsigned int reg, + unsigned int page) +{ + unsigned long flags; + + spin_lock_irqsave(&efx->biu_lock, flags); + falcon_writel(efx, value, FALCON_PAGED_REG(page, reg)); + spin_unlock_irqrestore(&efx->biu_lock, flags); +} + +#endif /* EFX_FALCON_IO_H */ diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c new file mode 100644 index 00000000000..aa7521b24a5 --- /dev/null +++ b/drivers/net/sfc/falcon_xmac.c @@ -0,0 +1,585 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include "net_driver.h" +#include "efx.h" +#include "falcon.h" +#include "falcon_hwdefs.h" +#include "falcon_io.h" +#include "mac.h" +#include "gmii.h" +#include "mdio_10g.h" +#include "phy.h" +#include "boards.h" +#include "workarounds.h" + +/************************************************************************** + * + * MAC register access + * + **************************************************************************/ + +/* Offset of an XMAC register within Falcon */ +#define FALCON_XMAC_REG(mac_reg) \ + (FALCON_XMAC_REGBANK + ((mac_reg) * FALCON_XMAC_REG_SIZE)) + +void falcon_xmac_writel(struct efx_nic *efx, + efx_dword_t *value, unsigned int mac_reg) +{ + efx_oword_t temp; + + EFX_POPULATE_OWORD_1(temp, MAC_DATA, EFX_DWORD_FIELD(*value, MAC_DATA)); + falcon_write(efx, &temp, FALCON_XMAC_REG(mac_reg)); +} + +void falcon_xmac_readl(struct efx_nic *efx, + efx_dword_t *value, unsigned int mac_reg) +{ + efx_oword_t temp; + + falcon_read(efx, &temp, FALCON_XMAC_REG(mac_reg)); + EFX_POPULATE_DWORD_1(*value, MAC_DATA, EFX_OWORD_FIELD(temp, MAC_DATA)); +} + +/************************************************************************** + * + * MAC operations + * + *************************************************************************/ +static int falcon_reset_xmac(struct efx_nic *efx) +{ + efx_dword_t reg; + int count; + + EFX_POPULATE_DWORD_1(reg, XM_CORE_RST, 1); + falcon_xmac_writel(efx, ®, XM_GLB_CFG_REG_MAC); + + for (count = 0; count < 10000; count++) { /* wait upto 100ms */ + falcon_xmac_readl(efx, ®, XM_GLB_CFG_REG_MAC); + if (EFX_DWORD_FIELD(reg, XM_CORE_RST) == 0) + return 0; + udelay(10); + } + + EFX_ERR(efx, "timed out waiting for XMAC core reset\n"); + return -ETIMEDOUT; +} + +/* Configure the XAUI driver that is an output from Falcon */ +static void falcon_setup_xaui(struct efx_nic *efx) +{ + efx_dword_t sdctl, txdrv; + + /* Move the XAUI into low power, unless there is no PHY, in + * which case the XAUI will have to drive a cable. */ + if (efx->phy_type == PHY_TYPE_NONE) + return; + + falcon_xmac_readl(efx, &sdctl, XX_SD_CTL_REG_MAC); + EFX_SET_DWORD_FIELD(sdctl, XX_HIDRVD, XX_SD_CTL_DRV_DEFAULT); + EFX_SET_DWORD_FIELD(sdctl, XX_LODRVD, XX_SD_CTL_DRV_DEFAULT); + EFX_SET_DWORD_FIELD(sdctl, XX_HIDRVC, XX_SD_CTL_DRV_DEFAULT); + EFX_SET_DWORD_FIELD(sdctl, XX_LODRVC, XX_SD_CTL_DRV_DEFAULT); + EFX_SET_DWORD_FIELD(sdctl, XX_HIDRVB, XX_SD_CTL_DRV_DEFAULT); + EFX_SET_DWORD_FIELD(sdctl, XX_LODRVB, XX_SD_CTL_DRV_DEFAULT); + EFX_SET_DWORD_FIELD(sdctl, XX_HIDRVA, XX_SD_CTL_DRV_DEFAULT); + EFX_SET_DWORD_FIELD(sdctl, XX_LODRVA, XX_SD_CTL_DRV_DEFAULT); + falcon_xmac_writel(efx, &sdctl, XX_SD_CTL_REG_MAC); + + EFX_POPULATE_DWORD_8(txdrv, + XX_DEQD, XX_TXDRV_DEQ_DEFAULT, + XX_DEQC, XX_TXDRV_DEQ_DEFAULT, + XX_DEQB, XX_TXDRV_DEQ_DEFAULT, + XX_DEQA, XX_TXDRV_DEQ_DEFAULT, + XX_DTXD, XX_TXDRV_DTX_DEFAULT, + XX_DTXC, XX_TXDRV_DTX_DEFAULT, + XX_DTXB, XX_TXDRV_DTX_DEFAULT, + XX_DTXA, XX_TXDRV_DTX_DEFAULT); + falcon_xmac_writel(efx, &txdrv, XX_TXDRV_CTL_REG_MAC); +} + +static void falcon_hold_xaui_in_rst(struct efx_nic *efx) +{ + efx_dword_t reg; + + EFX_ZERO_DWORD(reg); + EFX_SET_DWORD_FIELD(reg, XX_PWRDNA_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_PWRDNB_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_PWRDNC_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_PWRDND_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_RSTPLLAB_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_RSTPLLCD_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_RESETA_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_RESETB_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_RESETC_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_RESETD_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_RSTXGXSRX_EN, 1); + EFX_SET_DWORD_FIELD(reg, XX_RSTXGXSTX_EN, 1); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); +} + +static int _falcon_reset_xaui_a(struct efx_nic *efx) +{ + efx_dword_t reg; + + falcon_hold_xaui_in_rst(efx); + falcon_xmac_readl(efx, ®, XX_PWR_RST_REG_MAC); + + /* Follow the RAMBUS XAUI data reset sequencing + * Channels A and B first: power down, reset PLL, reset, clear + */ + EFX_SET_DWORD_FIELD(reg, XX_PWRDNA_EN, 0); + EFX_SET_DWORD_FIELD(reg, XX_PWRDNB_EN, 0); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); + + EFX_SET_DWORD_FIELD(reg, XX_RSTPLLAB_EN, 0); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); + + EFX_SET_DWORD_FIELD(reg, XX_RESETA_EN, 0); + EFX_SET_DWORD_FIELD(reg, XX_RESETB_EN, 0); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); + + /* Channels C and D: power down, reset PLL, reset, clear */ + EFX_SET_DWORD_FIELD(reg, XX_PWRDNC_EN, 0); + EFX_SET_DWORD_FIELD(reg, XX_PWRDND_EN, 0); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); + + EFX_SET_DWORD_FIELD(reg, XX_RSTPLLCD_EN, 0); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); + + EFX_SET_DWORD_FIELD(reg, XX_RESETC_EN, 0); + EFX_SET_DWORD_FIELD(reg, XX_RESETD_EN, 0); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); + + /* Setup XAUI */ + falcon_setup_xaui(efx); + udelay(10); + + /* Take XGXS out of reset */ + EFX_ZERO_DWORD(reg); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); + + return 0; +} + +static int _falcon_reset_xaui_b(struct efx_nic *efx) +{ + efx_dword_t reg; + int count; + + EFX_POPULATE_DWORD_1(reg, XX_RST_XX_EN, 1); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + + /* Give some time for the link to establish */ + for (count = 0; count < 1000; count++) { /* wait upto 10ms */ + falcon_xmac_readl(efx, ®, XX_PWR_RST_REG_MAC); + if (EFX_DWORD_FIELD(reg, XX_RST_XX_EN) == 0) { + falcon_setup_xaui(efx); + return 0; + } + udelay(10); + } + EFX_ERR(efx, "timed out waiting for XAUI/XGXS reset\n"); + return -ETIMEDOUT; +} + +int falcon_reset_xaui(struct efx_nic *efx) +{ + int rc; + + if (EFX_WORKAROUND_9388(efx)) { + falcon_hold_xaui_in_rst(efx); + efx->phy_op->reset_xaui(efx); + rc = _falcon_reset_xaui_a(efx); + } else { + rc = _falcon_reset_xaui_b(efx); + } + return rc; +} + +static int falcon_xgmii_status(struct efx_nic *efx) +{ + efx_dword_t reg; + + if (FALCON_REV(efx) < FALCON_REV_B0) + return 1; + + /* The ISR latches, so clear it and re-read */ + falcon_xmac_readl(efx, ®, XM_MGT_INT_REG_MAC_B0); + falcon_xmac_readl(efx, ®, XM_MGT_INT_REG_MAC_B0); + + if (EFX_DWORD_FIELD(reg, XM_LCLFLT) || + EFX_DWORD_FIELD(reg, XM_RMTFLT)) { + EFX_INFO(efx, "MGT_INT: "EFX_DWORD_FMT"\n", EFX_DWORD_VAL(reg)); + return 0; + } + + return 1; +} + +static void falcon_mask_status_intr(struct efx_nic *efx, int enable) +{ + efx_dword_t reg; + + if (FALCON_REV(efx) < FALCON_REV_B0) + return; + + /* Flush the ISR */ + if (enable) + falcon_xmac_readl(efx, ®, XM_MGT_INT_REG_MAC_B0); + + EFX_POPULATE_DWORD_2(reg, + XM_MSK_RMTFLT, !enable, + XM_MSK_LCLFLT, !enable); + falcon_xmac_writel(efx, ®, XM_MGT_INT_MSK_REG_MAC_B0); +} + +int falcon_init_xmac(struct efx_nic *efx) +{ + int rc; + + /* Initialize the PHY first so the clock is around */ + rc = efx->phy_op->init(efx); + if (rc) + goto fail1; + + rc = falcon_reset_xaui(efx); + if (rc) + goto fail2; + + /* Wait again. Give the PHY and MAC time to come back */ + schedule_timeout_uninterruptible(HZ / 10); + + rc = falcon_reset_xmac(efx); + if (rc) + goto fail2; + + falcon_mask_status_intr(efx, 1); + return 0; + + fail2: + efx->phy_op->fini(efx); + fail1: + return rc; +} + +int falcon_xaui_link_ok(struct efx_nic *efx) +{ + efx_dword_t reg; + int align_done, sync_status, link_ok = 0; + + /* Read link status */ + falcon_xmac_readl(efx, ®, XX_CORE_STAT_REG_MAC); + + align_done = EFX_DWORD_FIELD(reg, XX_ALIGN_DONE); + sync_status = EFX_DWORD_FIELD(reg, XX_SYNC_STAT); + if (align_done && (sync_status == XX_SYNC_STAT_DECODE_SYNCED)) + link_ok = 1; + + /* Clear link status ready for next read */ + EFX_SET_DWORD_FIELD(reg, XX_COMMA_DET, XX_COMMA_DET_RESET); + EFX_SET_DWORD_FIELD(reg, XX_CHARERR, XX_CHARERR_RESET); + EFX_SET_DWORD_FIELD(reg, XX_DISPERR, XX_DISPERR_RESET); + falcon_xmac_writel(efx, ®, XX_CORE_STAT_REG_MAC); + + /* If the link is up, then check the phy side of the xaui link + * (error conditions from the wire side propoagate back through + * the phy to the xaui side). */ + if (efx->link_up && link_ok) { + int has_phyxs = efx->phy_op->mmds & (1 << MDIO_MMD_PHYXS); + if (has_phyxs) + link_ok = mdio_clause45_phyxgxs_lane_sync(efx); + } + + /* If the PHY and XAUI links are up, then check the mac's xgmii + * fault state */ + if (efx->link_up && link_ok) + link_ok = falcon_xgmii_status(efx); + + return link_ok; +} + +static void falcon_reconfigure_xmac_core(struct efx_nic *efx) +{ + unsigned int max_frame_len; + efx_dword_t reg; + int rx_fc = (efx->flow_control & EFX_FC_RX) ? 1 : 0; + + /* Configure MAC - cut-thru mode is hard wired on */ + EFX_POPULATE_DWORD_3(reg, + XM_RX_JUMBO_MODE, 1, + XM_TX_STAT_EN, 1, + XM_RX_STAT_EN, 1); + falcon_xmac_writel(efx, ®, XM_GLB_CFG_REG_MAC); + + /* Configure TX */ + EFX_POPULATE_DWORD_6(reg, + XM_TXEN, 1, + XM_TX_PRMBL, 1, + XM_AUTO_PAD, 1, + XM_TXCRC, 1, + XM_FCNTL, 1, + XM_IPG, 0x3); + falcon_xmac_writel(efx, ®, XM_TX_CFG_REG_MAC); + + /* Configure RX */ + EFX_POPULATE_DWORD_5(reg, + XM_RXEN, 1, + XM_AUTO_DEPAD, 0, + XM_ACPT_ALL_MCAST, 1, + XM_ACPT_ALL_UCAST, efx->promiscuous, + XM_PASS_CRC_ERR, 1); + falcon_xmac_writel(efx, ®, XM_RX_CFG_REG_MAC); + + /* Set frame length */ + max_frame_len = EFX_MAX_FRAME_LEN(efx->net_dev->mtu); + EFX_POPULATE_DWORD_1(reg, XM_MAX_RX_FRM_SIZE, max_frame_len); + falcon_xmac_writel(efx, ®, XM_RX_PARAM_REG_MAC); + EFX_POPULATE_DWORD_2(reg, + XM_MAX_TX_FRM_SIZE, max_frame_len, + XM_TX_JUMBO_MODE, 1); + falcon_xmac_writel(efx, ®, XM_TX_PARAM_REG_MAC); + + EFX_POPULATE_DWORD_2(reg, + XM_PAUSE_TIME, 0xfffe, /* MAX PAUSE TIME */ + XM_DIS_FCNTL, rx_fc ? 0 : 1); + falcon_xmac_writel(efx, ®, XM_FC_REG_MAC); + + /* Set MAC address */ + EFX_POPULATE_DWORD_4(reg, + XM_ADR_0, efx->net_dev->dev_addr[0], + XM_ADR_1, efx->net_dev->dev_addr[1], + XM_ADR_2, efx->net_dev->dev_addr[2], + XM_ADR_3, efx->net_dev->dev_addr[3]); + falcon_xmac_writel(efx, ®, XM_ADR_LO_REG_MAC); + EFX_POPULATE_DWORD_2(reg, + XM_ADR_4, efx->net_dev->dev_addr[4], + XM_ADR_5, efx->net_dev->dev_addr[5]); + falcon_xmac_writel(efx, ®, XM_ADR_HI_REG_MAC); +} + +/* Try and bring the Falcon side of the Falcon-Phy XAUI link fails + * to come back up. Bash it until it comes back up */ +static int falcon_check_xaui_link_up(struct efx_nic *efx) +{ + int max_tries, tries; + tries = EFX_WORKAROUND_5147(efx) ? 5 : 1; + max_tries = tries; + + if (efx->phy_type == PHY_TYPE_NONE) + return 0; + + while (tries) { + if (falcon_xaui_link_ok(efx)) + return 1; + + EFX_LOG(efx, "%s Clobbering XAUI (%d tries left).\n", + __func__, tries); + (void) falcon_reset_xaui(efx); + udelay(200); + tries--; + } + + EFX_ERR(efx, "Failed to bring XAUI link back up in %d tries!\n", + max_tries); + return 0; +} + +void falcon_reconfigure_xmac(struct efx_nic *efx) +{ + int xaui_link_ok; + + falcon_mask_status_intr(efx, 0); + + falcon_deconfigure_mac_wrapper(efx); + efx->phy_op->reconfigure(efx); + falcon_reconfigure_xmac_core(efx); + falcon_reconfigure_mac_wrapper(efx); + + /* Ensure XAUI link is up */ + xaui_link_ok = falcon_check_xaui_link_up(efx); + + if (xaui_link_ok && efx->link_up) + falcon_mask_status_intr(efx, 1); +} + +void falcon_fini_xmac(struct efx_nic *efx) +{ + /* Isolate the MAC - PHY */ + falcon_deconfigure_mac_wrapper(efx); + + /* Potentially power down the PHY */ + efx->phy_op->fini(efx); +} + +void falcon_update_stats_xmac(struct efx_nic *efx) +{ + struct efx_mac_stats *mac_stats = &efx->mac_stats; + int rc; + + rc = falcon_dma_stats(efx, XgDmaDone_offset); + if (rc) + return; + + /* Update MAC stats from DMAed values */ + FALCON_STAT(efx, XgRxOctets, rx_bytes); + FALCON_STAT(efx, XgRxOctetsOK, rx_good_bytes); + FALCON_STAT(efx, XgRxPkts, rx_packets); + FALCON_STAT(efx, XgRxPktsOK, rx_good); + FALCON_STAT(efx, XgRxBroadcastPkts, rx_broadcast); + FALCON_STAT(efx, XgRxMulticastPkts, rx_multicast); + FALCON_STAT(efx, XgRxUnicastPkts, rx_unicast); + FALCON_STAT(efx, XgRxUndersizePkts, rx_lt64); + FALCON_STAT(efx, XgRxOversizePkts, rx_gtjumbo); + FALCON_STAT(efx, XgRxJabberPkts, rx_bad_gtjumbo); + FALCON_STAT(efx, XgRxUndersizeFCSerrorPkts, rx_bad_lt64); + FALCON_STAT(efx, XgRxDropEvents, rx_overflow); + FALCON_STAT(efx, XgRxFCSerrorPkts, rx_bad); + FALCON_STAT(efx, XgRxAlignError, rx_align_error); + FALCON_STAT(efx, XgRxSymbolError, rx_symbol_error); + FALCON_STAT(efx, XgRxInternalMACError, rx_internal_error); + FALCON_STAT(efx, XgRxControlPkts, rx_control); + FALCON_STAT(efx, XgRxPausePkts, rx_pause); + FALCON_STAT(efx, XgRxPkts64Octets, rx_64); + FALCON_STAT(efx, XgRxPkts65to127Octets, rx_65_to_127); + FALCON_STAT(efx, XgRxPkts128to255Octets, rx_128_to_255); + FALCON_STAT(efx, XgRxPkts256to511Octets, rx_256_to_511); + FALCON_STAT(efx, XgRxPkts512to1023Octets, rx_512_to_1023); + FALCON_STAT(efx, XgRxPkts1024to15xxOctets, rx_1024_to_15xx); + FALCON_STAT(efx, XgRxPkts15xxtoMaxOctets, rx_15xx_to_jumbo); + FALCON_STAT(efx, XgRxLengthError, rx_length_error); + FALCON_STAT(efx, XgTxPkts, tx_packets); + FALCON_STAT(efx, XgTxOctets, tx_bytes); + FALCON_STAT(efx, XgTxMulticastPkts, tx_multicast); + FALCON_STAT(efx, XgTxBroadcastPkts, tx_broadcast); + FALCON_STAT(efx, XgTxUnicastPkts, tx_unicast); + FALCON_STAT(efx, XgTxControlPkts, tx_control); + FALCON_STAT(efx, XgTxPausePkts, tx_pause); + FALCON_STAT(efx, XgTxPkts64Octets, tx_64); + FALCON_STAT(efx, XgTxPkts65to127Octets, tx_65_to_127); + FALCON_STAT(efx, XgTxPkts128to255Octets, tx_128_to_255); + FALCON_STAT(efx, XgTxPkts256to511Octets, tx_256_to_511); + FALCON_STAT(efx, XgTxPkts512to1023Octets, tx_512_to_1023); + FALCON_STAT(efx, XgTxPkts1024to15xxOctets, tx_1024_to_15xx); + FALCON_STAT(efx, XgTxPkts1519toMaxOctets, tx_15xx_to_jumbo); + FALCON_STAT(efx, XgTxUndersizePkts, tx_lt64); + FALCON_STAT(efx, XgTxOversizePkts, tx_gtjumbo); + FALCON_STAT(efx, XgTxNonTcpUdpPkt, tx_non_tcpudp); + FALCON_STAT(efx, XgTxMacSrcErrPkt, tx_mac_src_error); + FALCON_STAT(efx, XgTxIpSrcErrPkt, tx_ip_src_error); + + /* Update derived statistics */ + mac_stats->tx_good_bytes = + (mac_stats->tx_bytes - mac_stats->tx_bad_bytes); + mac_stats->rx_bad_bytes = + (mac_stats->rx_bytes - mac_stats->rx_good_bytes); +} + +#define EFX_XAUI_RETRAIN_MAX 8 + +int falcon_check_xmac(struct efx_nic *efx) +{ + unsigned xaui_link_ok; + int rc; + + falcon_mask_status_intr(efx, 0); + xaui_link_ok = falcon_xaui_link_ok(efx); + + if (EFX_WORKAROUND_5147(efx) && !xaui_link_ok) + (void) falcon_reset_xaui(efx); + + /* Call the PHY check_hw routine */ + rc = efx->phy_op->check_hw(efx); + + /* Unmask interrupt if everything was (and still is) ok */ + if (xaui_link_ok && efx->link_up) + falcon_mask_status_intr(efx, 1); + + return rc; +} + +/* Simulate a PHY event */ +void falcon_xmac_sim_phy_event(struct efx_nic *efx) +{ + efx_qword_t phy_event; + + EFX_POPULATE_QWORD_2(phy_event, + EV_CODE, GLOBAL_EV_DECODE, + XG_PHY_INTR, 1); + falcon_generate_event(&efx->channel[0], &phy_event); +} + +int falcon_xmac_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd) +{ + mdio_clause45_get_settings(efx, ecmd); + ecmd->transceiver = XCVR_INTERNAL; + ecmd->phy_address = efx->mii.phy_id; + ecmd->autoneg = AUTONEG_DISABLE; + ecmd->duplex = DUPLEX_FULL; + return 0; +} + +int falcon_xmac_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd) +{ + if (ecmd->transceiver != XCVR_INTERNAL) + return -EINVAL; + if (ecmd->autoneg != AUTONEG_DISABLE) + return -EINVAL; + if (ecmd->duplex != DUPLEX_FULL) + return -EINVAL; + + return mdio_clause45_set_settings(efx, ecmd); +} + + +int falcon_xmac_set_pause(struct efx_nic *efx, enum efx_fc_type flow_control) +{ + int reset; + + if (flow_control & EFX_FC_AUTO) { + EFX_LOG(efx, "10G does not support flow control " + "autonegotiation\n"); + return -EINVAL; + } + + if ((flow_control & EFX_FC_TX) && !(flow_control & EFX_FC_RX)) + return -EINVAL; + + /* TX flow control may automatically turn itself off if the + * link partner (intermittently) stops responding to pause + * frames. There isn't any indication that this has happened, + * so the best we do is leave it up to the user to spot this + * and fix it be cycling transmit flow control on this end. */ + reset = ((flow_control & EFX_FC_TX) && + !(efx->flow_control & EFX_FC_TX)); + if (EFX_WORKAROUND_11482(efx) && reset) { + if (FALCON_REV(efx) >= FALCON_REV_B0) { + /* Recover by resetting the EM block */ + if (efx->link_up) + falcon_drain_tx_fifo(efx); + } else { + /* Schedule a reset to recover */ + efx_schedule_reset(efx, RESET_TYPE_INVISIBLE); + } + } + + efx->flow_control = flow_control; + + return 0; +} diff --git a/drivers/net/sfc/gmii.h b/drivers/net/sfc/gmii.h new file mode 100644 index 00000000000..d25bbd1297f --- /dev/null +++ b/drivers/net/sfc/gmii.h @@ -0,0 +1,195 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_GMII_H +#define EFX_GMII_H + +/* + * GMII interface + */ + +#include + +/* GMII registers, excluding registers already defined as MII + * registers in mii.h + */ +#define GMII_IER 0x12 /* Interrupt enable register */ +#define GMII_ISR 0x13 /* Interrupt status register */ + +/* Interrupt enable register */ +#define IER_ANEG_ERR 0x8000 /* Bit 15 - autonegotiation error */ +#define IER_SPEED_CHG 0x4000 /* Bit 14 - speed changed */ +#define IER_DUPLEX_CHG 0x2000 /* Bit 13 - duplex changed */ +#define IER_PAGE_RCVD 0x1000 /* Bit 12 - page received */ +#define IER_ANEG_DONE 0x0800 /* Bit 11 - autonegotiation complete */ +#define IER_LINK_CHG 0x0400 /* Bit 10 - link status changed */ +#define IER_SYM_ERR 0x0200 /* Bit 9 - symbol error */ +#define IER_FALSE_CARRIER 0x0100 /* Bit 8 - false carrier */ +#define IER_FIFO_ERR 0x0080 /* Bit 7 - FIFO over/underflow */ +#define IER_MDIX_CHG 0x0040 /* Bit 6 - MDI crossover changed */ +#define IER_DOWNSHIFT 0x0020 /* Bit 5 - downshift */ +#define IER_ENERGY 0x0010 /* Bit 4 - energy detect */ +#define IER_DTE_POWER 0x0004 /* Bit 2 - DTE power detect */ +#define IER_POLARITY_CHG 0x0002 /* Bit 1 - polarity changed */ +#define IER_JABBER 0x0001 /* Bit 0 - jabber */ + +/* Interrupt status register */ +#define ISR_ANEG_ERR 0x8000 /* Bit 15 - autonegotiation error */ +#define ISR_SPEED_CHG 0x4000 /* Bit 14 - speed changed */ +#define ISR_DUPLEX_CHG 0x2000 /* Bit 13 - duplex changed */ +#define ISR_PAGE_RCVD 0x1000 /* Bit 12 - page received */ +#define ISR_ANEG_DONE 0x0800 /* Bit 11 - autonegotiation complete */ +#define ISR_LINK_CHG 0x0400 /* Bit 10 - link status changed */ +#define ISR_SYM_ERR 0x0200 /* Bit 9 - symbol error */ +#define ISR_FALSE_CARRIER 0x0100 /* Bit 8 - false carrier */ +#define ISR_FIFO_ERR 0x0080 /* Bit 7 - FIFO over/underflow */ +#define ISR_MDIX_CHG 0x0040 /* Bit 6 - MDI crossover changed */ +#define ISR_DOWNSHIFT 0x0020 /* Bit 5 - downshift */ +#define ISR_ENERGY 0x0010 /* Bit 4 - energy detect */ +#define ISR_DTE_POWER 0x0004 /* Bit 2 - DTE power detect */ +#define ISR_POLARITY_CHG 0x0002 /* Bit 1 - polarity changed */ +#define ISR_JABBER 0x0001 /* Bit 0 - jabber */ + +/* Logically extended advertisement register */ +#define GM_ADVERTISE_SLCT ADVERTISE_SLCT +#define GM_ADVERTISE_CSMA ADVERTISE_CSMA +#define GM_ADVERTISE_10HALF ADVERTISE_10HALF +#define GM_ADVERTISE_1000XFULL ADVERTISE_1000XFULL +#define GM_ADVERTISE_10FULL ADVERTISE_10FULL +#define GM_ADVERTISE_1000XHALF ADVERTISE_1000XHALF +#define GM_ADVERTISE_100HALF ADVERTISE_100HALF +#define GM_ADVERTISE_1000XPAUSE ADVERTISE_1000XPAUSE +#define GM_ADVERTISE_100FULL ADVERTISE_100FULL +#define GM_ADVERTISE_1000XPSE_ASYM ADVERTISE_1000XPSE_ASYM +#define GM_ADVERTISE_100BASE4 ADVERTISE_100BASE4 +#define GM_ADVERTISE_PAUSE_CAP ADVERTISE_PAUSE_CAP +#define GM_ADVERTISE_PAUSE_ASYM ADVERTISE_PAUSE_ASYM +#define GM_ADVERTISE_RESV ADVERTISE_RESV +#define GM_ADVERTISE_RFAULT ADVERTISE_RFAULT +#define GM_ADVERTISE_LPACK ADVERTISE_LPACK +#define GM_ADVERTISE_NPAGE ADVERTISE_NPAGE +#define GM_ADVERTISE_1000FULL (ADVERTISE_1000FULL << 8) +#define GM_ADVERTISE_1000HALF (ADVERTISE_1000HALF << 8) +#define GM_ADVERTISE_1000 (GM_ADVERTISE_1000FULL | \ + GM_ADVERTISE_1000HALF) +#define GM_ADVERTISE_FULL (GM_ADVERTISE_1000FULL | \ + ADVERTISE_FULL) +#define GM_ADVERTISE_ALL (GM_ADVERTISE_1000FULL | \ + GM_ADVERTISE_1000HALF | \ + ADVERTISE_ALL) + +/* Logically extended link partner ability register */ +#define GM_LPA_SLCT LPA_SLCT +#define GM_LPA_10HALF LPA_10HALF +#define GM_LPA_1000XFULL LPA_1000XFULL +#define GM_LPA_10FULL LPA_10FULL +#define GM_LPA_1000XHALF LPA_1000XHALF +#define GM_LPA_100HALF LPA_100HALF +#define GM_LPA_1000XPAUSE LPA_1000XPAUSE +#define GM_LPA_100FULL LPA_100FULL +#define GM_LPA_1000XPAUSE_ASYM LPA_1000XPAUSE_ASYM +#define GM_LPA_100BASE4 LPA_100BASE4 +#define GM_LPA_PAUSE_CAP LPA_PAUSE_CAP +#define GM_LPA_PAUSE_ASYM LPA_PAUSE_ASYM +#define GM_LPA_RESV LPA_RESV +#define GM_LPA_RFAULT LPA_RFAULT +#define GM_LPA_LPACK LPA_LPACK +#define GM_LPA_NPAGE LPA_NPAGE +#define GM_LPA_1000FULL (LPA_1000FULL << 6) +#define GM_LPA_1000HALF (LPA_1000HALF << 6) +#define GM_LPA_10000FULL 0x00040000 +#define GM_LPA_10000HALF 0x00080000 +#define GM_LPA_DUPLEX (GM_LPA_1000FULL | GM_LPA_10000FULL \ + | LPA_DUPLEX) +#define GM_LPA_10 (LPA_10FULL | LPA_10HALF) +#define GM_LPA_100 LPA_100 +#define GM_LPA_1000 (GM_LPA_1000FULL | GM_LPA_1000HALF) +#define GM_LPA_10000 (GM_LPA_10000FULL | GM_LPA_10000HALF) + +/* Retrieve GMII autonegotiation advertised abilities + * + * The MII advertisment register (MII_ADVERTISE) is logically extended + * to include advertisement bits ADVERTISE_1000FULL and + * ADVERTISE_1000HALF from MII_CTRL1000. The result can be tested + * against the GM_ADVERTISE_xxx constants. + */ +static inline unsigned int gmii_advertised(struct mii_if_info *gmii) +{ + unsigned int advertise; + unsigned int ctrl1000; + + advertise = gmii->mdio_read(gmii->dev, gmii->phy_id, MII_ADVERTISE); + ctrl1000 = gmii->mdio_read(gmii->dev, gmii->phy_id, MII_CTRL1000); + return (((ctrl1000 << 8) & GM_ADVERTISE_1000) | advertise); +} + +/* Retrieve GMII autonegotiation link partner abilities + * + * The MII link partner ability register (MII_LPA) is logically + * extended by adding bits LPA_1000HALF and LPA_1000FULL from + * MII_STAT1000. The result can be tested against the GM_LPA_xxx + * constants. + */ +static inline unsigned int gmii_lpa(struct mii_if_info *gmii) +{ + unsigned int lpa; + unsigned int stat1000; + + lpa = gmii->mdio_read(gmii->dev, gmii->phy_id, MII_LPA); + stat1000 = gmii->mdio_read(gmii->dev, gmii->phy_id, MII_STAT1000); + return (((stat1000 << 6) & GM_LPA_1000) | lpa); +} + +/* Calculate GMII autonegotiated link technology + * + * "negotiated" should be the result of gmii_advertised() logically + * ANDed with the result of gmii_lpa(). + * + * "tech" will be negotiated with the unused bits masked out. For + * example, if both ends of the link are capable of both + * GM_LPA_1000FULL and GM_LPA_100FULL, GM_LPA_100FULL will be masked + * out. + */ +static inline unsigned int gmii_nway_result(unsigned int negotiated) +{ + unsigned int other_bits; + + /* Mask out the speed and duplexity bits */ + other_bits = negotiated & ~(GM_LPA_10 | GM_LPA_100 | GM_LPA_1000); + + if (negotiated & GM_LPA_1000FULL) + return (other_bits | GM_LPA_1000FULL); + else if (negotiated & GM_LPA_1000HALF) + return (other_bits | GM_LPA_1000HALF); + else + return (other_bits | mii_nway_result(negotiated)); +} + +/* Calculate GMII non-autonegotiated link technology + * + * This provides an equivalent to gmii_nway_result for the case when + * autonegotiation is disabled. + */ +static inline unsigned int gmii_forced_result(unsigned int bmcr) +{ + unsigned int result; + int full_duplex; + + full_duplex = bmcr & BMCR_FULLDPLX; + if (bmcr & BMCR_SPEED1000) + result = full_duplex ? GM_LPA_1000FULL : GM_LPA_1000HALF; + else if (bmcr & BMCR_SPEED100) + result = full_duplex ? GM_LPA_100FULL : GM_LPA_100HALF; + else + result = full_duplex ? GM_LPA_10FULL : GM_LPA_10HALF; + return result; +} + +#endif /* EFX_GMII_H */ diff --git a/drivers/net/sfc/i2c-direct.c b/drivers/net/sfc/i2c-direct.c new file mode 100644 index 00000000000..b6c62d0ed9c --- /dev/null +++ b/drivers/net/sfc/i2c-direct.c @@ -0,0 +1,381 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include "net_driver.h" +#include "i2c-direct.h" + +/* + * I2C data (SDA) and clock (SCL) line read/writes with appropriate + * delays. + */ + +static inline void setsda(struct efx_i2c_interface *i2c, int state) +{ + udelay(i2c->op->udelay); + i2c->sda = state; + i2c->op->setsda(i2c); + udelay(i2c->op->udelay); +} + +static inline void setscl(struct efx_i2c_interface *i2c, int state) +{ + udelay(i2c->op->udelay); + i2c->scl = state; + i2c->op->setscl(i2c); + udelay(i2c->op->udelay); +} + +static inline int getsda(struct efx_i2c_interface *i2c) +{ + int sda; + + udelay(i2c->op->udelay); + sda = i2c->op->getsda(i2c); + udelay(i2c->op->udelay); + return sda; +} + +static inline int getscl(struct efx_i2c_interface *i2c) +{ + int scl; + + udelay(i2c->op->udelay); + scl = i2c->op->getscl(i2c); + udelay(i2c->op->udelay); + return scl; +} + +/* + * I2C low-level protocol operations + * + */ + +static inline void i2c_release(struct efx_i2c_interface *i2c) +{ + EFX_WARN_ON_PARANOID(!i2c->scl); + EFX_WARN_ON_PARANOID(!i2c->sda); + /* Devices may time out if operations do not end */ + setscl(i2c, 1); + setsda(i2c, 1); + EFX_BUG_ON_PARANOID(getsda(i2c) != 1); + EFX_BUG_ON_PARANOID(getscl(i2c) != 1); +} + +static inline void i2c_start(struct efx_i2c_interface *i2c) +{ + /* We may be restarting immediately after a {send,recv}_bit, + * so SCL will not necessarily already be high. + */ + EFX_WARN_ON_PARANOID(!i2c->sda); + setscl(i2c, 1); + setsda(i2c, 0); + setscl(i2c, 0); + setsda(i2c, 1); +} + +static inline void i2c_send_bit(struct efx_i2c_interface *i2c, int bit) +{ + EFX_WARN_ON_PARANOID(i2c->scl != 0); + setsda(i2c, bit); + setscl(i2c, 1); + setscl(i2c, 0); + setsda(i2c, 1); +} + +static inline int i2c_recv_bit(struct efx_i2c_interface *i2c) +{ + int bit; + + EFX_WARN_ON_PARANOID(i2c->scl != 0); + EFX_WARN_ON_PARANOID(!i2c->sda); + setscl(i2c, 1); + bit = getsda(i2c); + setscl(i2c, 0); + return bit; +} + +static inline void i2c_stop(struct efx_i2c_interface *i2c) +{ + EFX_WARN_ON_PARANOID(i2c->scl != 0); + setsda(i2c, 0); + setscl(i2c, 1); + setsda(i2c, 1); +} + +/* + * I2C mid-level protocol operations + * + */ + +/* Sends a byte via the I2C bus and checks for an acknowledgement from + * the slave device. + */ +static int i2c_send_byte(struct efx_i2c_interface *i2c, u8 byte) +{ + int i; + + /* Send byte */ + for (i = 0; i < 8; i++) { + i2c_send_bit(i2c, !!(byte & 0x80)); + byte <<= 1; + } + + /* Check for acknowledgement from slave */ + return (i2c_recv_bit(i2c) == 0 ? 0 : -EIO); +} + +/* Receives a byte via the I2C bus and sends ACK/NACK to the slave device. */ +static u8 i2c_recv_byte(struct efx_i2c_interface *i2c, int ack) +{ + u8 value = 0; + int i; + + /* Receive byte */ + for (i = 0; i < 8; i++) + value = (value << 1) | i2c_recv_bit(i2c); + + /* Send ACK/NACK */ + i2c_send_bit(i2c, (ack ? 0 : 1)); + + return value; +} + +/* Calculate command byte for a read operation */ +static inline u8 i2c_read_cmd(u8 device_id) +{ + return ((device_id << 1) | 1); +} + +/* Calculate command byte for a write operation */ +static inline u8 i2c_write_cmd(u8 device_id) +{ + return ((device_id << 1) | 0); +} + +int efx_i2c_check_presence(struct efx_i2c_interface *i2c, u8 device_id) +{ + int rc; + + /* If someone is driving the bus low we just give up. */ + if (getsda(i2c) == 0 || getscl(i2c) == 0) { + EFX_ERR(i2c->efx, "%s someone is holding the I2C bus low." + " Giving up.\n", __func__); + return -EFAULT; + } + + /* Pretend to initiate a device write */ + i2c_start(i2c); + rc = i2c_send_byte(i2c, i2c_write_cmd(device_id)); + if (rc) + goto out; + + out: + i2c_stop(i2c); + i2c_release(i2c); + + return rc; +} + +/* This performs a fast read of one or more consecutive bytes from an + * I2C device. Not all devices support consecutive reads of more than + * one byte; for these devices use efx_i2c_read() instead. + */ +int efx_i2c_fast_read(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, u8 *data, unsigned int len) +{ + int i; + int rc; + + EFX_WARN_ON_PARANOID(getsda(i2c) != 1); + EFX_WARN_ON_PARANOID(getscl(i2c) != 1); + EFX_WARN_ON_PARANOID(data == NULL); + EFX_WARN_ON_PARANOID(len < 1); + + /* Select device and starting offset */ + i2c_start(i2c); + rc = i2c_send_byte(i2c, i2c_write_cmd(device_id)); + if (rc) + goto out; + rc = i2c_send_byte(i2c, offset); + if (rc) + goto out; + + /* Read data from device */ + i2c_start(i2c); + rc = i2c_send_byte(i2c, i2c_read_cmd(device_id)); + if (rc) + goto out; + for (i = 0; i < (len - 1); i++) + /* Read and acknowledge all but the last byte */ + data[i] = i2c_recv_byte(i2c, 1); + /* Read last byte with no acknowledgement */ + data[i] = i2c_recv_byte(i2c, 0); + + out: + i2c_stop(i2c); + i2c_release(i2c); + + return rc; +} + +/* This performs a fast write of one or more consecutive bytes to an + * I2C device. Not all devices support consecutive writes of more + * than one byte; for these devices use efx_i2c_write() instead. + */ +int efx_i2c_fast_write(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, + const u8 *data, unsigned int len) +{ + int i; + int rc; + + EFX_WARN_ON_PARANOID(getsda(i2c) != 1); + EFX_WARN_ON_PARANOID(getscl(i2c) != 1); + EFX_WARN_ON_PARANOID(len < 1); + + /* Select device and starting offset */ + i2c_start(i2c); + rc = i2c_send_byte(i2c, i2c_write_cmd(device_id)); + if (rc) + goto out; + rc = i2c_send_byte(i2c, offset); + if (rc) + goto out; + + /* Write data to device */ + for (i = 0; i < len; i++) { + rc = i2c_send_byte(i2c, data[i]); + if (rc) + goto out; + } + + out: + i2c_stop(i2c); + i2c_release(i2c); + + return rc; +} + +/* I2C byte-by-byte read */ +int efx_i2c_read(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, u8 *data, unsigned int len) +{ + int rc; + + /* i2c_fast_read with length 1 is a single byte read */ + for (; len > 0; offset++, data++, len--) { + rc = efx_i2c_fast_read(i2c, device_id, offset, data, 1); + if (rc) + return rc; + } + + return 0; +} + +/* I2C byte-by-byte write */ +int efx_i2c_write(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, const u8 *data, unsigned int len) +{ + int rc; + + /* i2c_fast_write with length 1 is a single byte write */ + for (; len > 0; offset++, data++, len--) { + rc = efx_i2c_fast_write(i2c, device_id, offset, data, 1); + if (rc) + return rc; + mdelay(i2c->op->mdelay); + } + + return 0; +} + + +/* This is just a slightly neater wrapper round efx_i2c_fast_write + * in the case where the target doesn't take an offset + */ +int efx_i2c_send_bytes(struct efx_i2c_interface *i2c, + u8 device_id, const u8 *data, unsigned int len) +{ + return efx_i2c_fast_write(i2c, device_id, data[0], data + 1, len - 1); +} + +/* I2C receiving of bytes - does not send an offset byte */ +int efx_i2c_recv_bytes(struct efx_i2c_interface *i2c, u8 device_id, + u8 *bytes, unsigned int len) +{ + int i; + int rc; + + EFX_WARN_ON_PARANOID(getsda(i2c) != 1); + EFX_WARN_ON_PARANOID(getscl(i2c) != 1); + EFX_WARN_ON_PARANOID(len < 1); + + /* Select device */ + i2c_start(i2c); + + /* Read data from device */ + rc = i2c_send_byte(i2c, i2c_read_cmd(device_id)); + if (rc) + goto out; + + for (i = 0; i < (len - 1); i++) + /* Read and acknowledge all but the last byte */ + bytes[i] = i2c_recv_byte(i2c, 1); + /* Read last byte with no acknowledgement */ + bytes[i] = i2c_recv_byte(i2c, 0); + + out: + i2c_stop(i2c); + i2c_release(i2c); + + return rc; +} + +/* SMBus and some I2C devices will time out if the I2C clock is + * held low for too long. This is most likely to happen in virtualised + * systems (when the entire domain is descheduled) but could in + * principle happen due to preemption on any busy system (and given the + * potential length of an I2C operation turning preemption off is not + * a sensible option). The following functions deal with the failure by + * retrying up to a fixed number of times. + */ + +#define I2C_MAX_RETRIES (10) + +/* The timeout problem will result in -EIO. If the wrapped function + * returns any other error, pass this up and do not retry. */ +#define RETRY_WRAPPER(_f) \ + int retries = I2C_MAX_RETRIES; \ + int rc; \ + while (retries) { \ + rc = _f; \ + if (rc != -EIO) \ + return rc; \ + retries--; \ + } \ + return rc; \ + +int efx_i2c_check_presence_retry(struct efx_i2c_interface *i2c, u8 device_id) +{ + RETRY_WRAPPER(efx_i2c_check_presence(i2c, device_id)) +} + +int efx_i2c_read_retry(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, u8 *data, unsigned int len) +{ + RETRY_WRAPPER(efx_i2c_read(i2c, device_id, offset, data, len)) +} + +int efx_i2c_write_retry(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, const u8 *data, unsigned int len) +{ + RETRY_WRAPPER(efx_i2c_write(i2c, device_id, offset, data, len)) +} diff --git a/drivers/net/sfc/i2c-direct.h b/drivers/net/sfc/i2c-direct.h new file mode 100644 index 00000000000..291e561071f --- /dev/null +++ b/drivers/net/sfc/i2c-direct.h @@ -0,0 +1,91 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005 Fen Systems Ltd. + * Copyright 2006 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_I2C_DIRECT_H +#define EFX_I2C_DIRECT_H + +#include "net_driver.h" + +/* + * Direct control of an I2C bus + */ + +struct efx_i2c_interface; + +/** + * struct efx_i2c_bit_operations - I2C bus direct control methods + * + * I2C bus direct control methods. + * + * @setsda: Set state of SDA line + * @setscl: Set state of SCL line + * @getsda: Get state of SDA line + * @getscl: Get state of SCL line + * @udelay: Delay between each bit operation + * @mdelay: Delay between each byte write + */ +struct efx_i2c_bit_operations { + void (*setsda) (struct efx_i2c_interface *i2c); + void (*setscl) (struct efx_i2c_interface *i2c); + int (*getsda) (struct efx_i2c_interface *i2c); + int (*getscl) (struct efx_i2c_interface *i2c); + unsigned int udelay; + unsigned int mdelay; +}; + +/** + * struct efx_i2c_interface - an I2C interface + * + * An I2C interface. + * + * @efx: Attached Efx NIC + * @op: I2C bus control methods + * @sda: Current output state of SDA line + * @scl: Current output state of SCL line + */ +struct efx_i2c_interface { + struct efx_nic *efx; + struct efx_i2c_bit_operations *op; + unsigned int sda:1; + unsigned int scl:1; +}; + +extern int efx_i2c_check_presence(struct efx_i2c_interface *i2c, u8 device_id); +extern int efx_i2c_fast_read(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, + u8 *data, unsigned int len); +extern int efx_i2c_fast_write(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, + const u8 *data, unsigned int len); +extern int efx_i2c_read(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, u8 *data, unsigned int len); +extern int efx_i2c_write(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, + const u8 *data, unsigned int len); + +extern int efx_i2c_send_bytes(struct efx_i2c_interface *i2c, u8 device_id, + const u8 *bytes, unsigned int len); + +extern int efx_i2c_recv_bytes(struct efx_i2c_interface *i2c, u8 device_id, + u8 *bytes, unsigned int len); + + +/* Versions of the API that retry on failure. */ +extern int efx_i2c_check_presence_retry(struct efx_i2c_interface *i2c, + u8 device_id); + +extern int efx_i2c_read_retry(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, u8 *data, unsigned int len); + +extern int efx_i2c_write_retry(struct efx_i2c_interface *i2c, + u8 device_id, u8 offset, + const u8 *data, unsigned int len); + +#endif /* EFX_I2C_DIRECT_H */ diff --git a/drivers/net/sfc/mac.h b/drivers/net/sfc/mac.h new file mode 100644 index 00000000000..edd07d4dee1 --- /dev/null +++ b/drivers/net/sfc/mac.h @@ -0,0 +1,33 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2007 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_MAC_H +#define EFX_MAC_H + +#include "net_driver.h" + +extern void falcon_xmac_writel(struct efx_nic *efx, + efx_dword_t *value, unsigned int mac_reg); +extern void falcon_xmac_readl(struct efx_nic *efx, + efx_dword_t *value, unsigned int mac_reg); +extern int falcon_init_xmac(struct efx_nic *efx); +extern void falcon_reconfigure_xmac(struct efx_nic *efx); +extern void falcon_update_stats_xmac(struct efx_nic *efx); +extern void falcon_fini_xmac(struct efx_nic *efx); +extern int falcon_check_xmac(struct efx_nic *efx); +extern void falcon_xmac_sim_phy_event(struct efx_nic *efx); +extern int falcon_xmac_get_settings(struct efx_nic *efx, + struct ethtool_cmd *ecmd); +extern int falcon_xmac_set_settings(struct efx_nic *efx, + struct ethtool_cmd *ecmd); +extern int falcon_xmac_set_pause(struct efx_nic *efx, + enum efx_fc_type pause_params); + +#endif diff --git a/drivers/net/sfc/mdio_10g.c b/drivers/net/sfc/mdio_10g.c new file mode 100644 index 00000000000..dc06bb0aa57 --- /dev/null +++ b/drivers/net/sfc/mdio_10g.c @@ -0,0 +1,282 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +/* + * Useful functions for working with MDIO clause 45 PHYs + */ +#include +#include +#include +#include "net_driver.h" +#include "mdio_10g.h" +#include "boards.h" + +int mdio_clause45_reset_mmd(struct efx_nic *port, int mmd, + int spins, int spintime) +{ + u32 ctrl; + int phy_id = port->mii.phy_id; + + /* Catch callers passing values in the wrong units (or just silly) */ + EFX_BUG_ON_PARANOID(spins * spintime >= 5000); + + mdio_clause45_write(port, phy_id, mmd, MDIO_MMDREG_CTRL1, + (1 << MDIO_MMDREG_CTRL1_RESET_LBN)); + /* Wait for the reset bit to clear. */ + do { + msleep(spintime); + ctrl = mdio_clause45_read(port, phy_id, mmd, MDIO_MMDREG_CTRL1); + spins--; + + } while (spins && (ctrl & (1 << MDIO_MMDREG_CTRL1_RESET_LBN))); + + return spins ? spins : -ETIMEDOUT; +} + +static int mdio_clause45_check_mmd(struct efx_nic *efx, int mmd, + int fault_fatal) +{ + int status; + int phy_id = efx->mii.phy_id; + + /* Read MMD STATUS2 to check it is responding. */ + status = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_STAT2); + if (((status >> MDIO_MMDREG_STAT2_PRESENT_LBN) & + ((1 << MDIO_MMDREG_STAT2_PRESENT_WIDTH) - 1)) != + MDIO_MMDREG_STAT2_PRESENT_VAL) { + EFX_ERR(efx, "PHY MMD %d not responding.\n", mmd); + return -EIO; + } + + /* Read MMD STATUS 1 to check for fault. */ + status = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_STAT1); + if ((status & (1 << MDIO_MMDREG_STAT1_FAULT_LBN)) != 0) { + if (fault_fatal) { + EFX_ERR(efx, "PHY MMD %d reporting fatal" + " fault: status %x\n", mmd, status); + return -EIO; + } else { + EFX_LOG(efx, "PHY MMD %d reporting status" + " %x (expected)\n", mmd, status); + } + } + return 0; +} + +/* This ought to be ridiculous overkill. We expect it to fail rarely */ +#define MDIO45_RESET_TIME 1000 /* ms */ +#define MDIO45_RESET_ITERS 100 + +int mdio_clause45_wait_reset_mmds(struct efx_nic *efx, + unsigned int mmd_mask) +{ + const int spintime = MDIO45_RESET_TIME / MDIO45_RESET_ITERS; + int tries = MDIO45_RESET_ITERS; + int rc = 0; + int in_reset; + + while (tries) { + int mask = mmd_mask; + int mmd = 0; + int stat; + in_reset = 0; + while (mask) { + if (mask & 1) { + stat = mdio_clause45_read(efx, + efx->mii.phy_id, + mmd, + MDIO_MMDREG_CTRL1); + if (stat < 0) { + EFX_ERR(efx, "failed to read status of" + " MMD %d\n", mmd); + return -EIO; + } + if (stat & (1 << MDIO_MMDREG_CTRL1_RESET_LBN)) + in_reset |= (1 << mmd); + } + mask = mask >> 1; + mmd++; + } + if (!in_reset) + break; + tries--; + msleep(spintime); + } + if (in_reset != 0) { + EFX_ERR(efx, "not all MMDs came out of reset in time." + " MMDs still in reset: %x\n", in_reset); + rc = -ETIMEDOUT; + } + return rc; +} + +int mdio_clause45_check_mmds(struct efx_nic *efx, + unsigned int mmd_mask, unsigned int fatal_mask) +{ + int devices, mmd = 0; + int probe_mmd; + + /* Historically we have probed the PHYXS to find out what devices are + * present,but that doesn't work so well if the PHYXS isn't expected + * to exist, if so just find the first item in the list supplied. */ + probe_mmd = (mmd_mask & MDIO_MMDREG_DEVS0_PHYXS) ? MDIO_MMD_PHYXS : + __ffs(mmd_mask); + devices = mdio_clause45_read(efx, efx->mii.phy_id, + probe_mmd, MDIO_MMDREG_DEVS0); + + /* Check all the expected MMDs are present */ + if (devices < 0) { + EFX_ERR(efx, "failed to read devices present\n"); + return -EIO; + } + if ((devices & mmd_mask) != mmd_mask) { + EFX_ERR(efx, "required MMDs not present: got %x, " + "wanted %x\n", devices, mmd_mask); + return -ENODEV; + } + EFX_TRACE(efx, "Devices present: %x\n", devices); + + /* Check all required MMDs are responding and happy. */ + while (mmd_mask) { + if (mmd_mask & 1) { + int fault_fatal = fatal_mask & 1; + if (mdio_clause45_check_mmd(efx, mmd, fault_fatal)) + return -EIO; + } + mmd_mask = mmd_mask >> 1; + fatal_mask = fatal_mask >> 1; + mmd++; + } + + return 0; +} + +int mdio_clause45_links_ok(struct efx_nic *efx, unsigned int mmd_mask) +{ + int phy_id = efx->mii.phy_id; + int status; + int ok = 1; + int mmd = 0; + int good; + + while (mmd_mask) { + if (mmd_mask & 1) { + /* Double reads because link state is latched, and a + * read moves the current state into the register */ + status = mdio_clause45_read(efx, phy_id, + mmd, MDIO_MMDREG_STAT1); + status = mdio_clause45_read(efx, phy_id, + mmd, MDIO_MMDREG_STAT1); + + good = status & (1 << MDIO_MMDREG_STAT1_LINK_LBN); + ok = ok && good; + } + mmd_mask = (mmd_mask >> 1); + mmd++; + } + return ok; +} + +/** + * mdio_clause45_get_settings - Read (some of) the PHY settings over MDIO. + * @efx: Efx NIC + * @ecmd: Buffer for settings + * + * On return the 'port', 'speed', 'supported' and 'advertising' fields of + * ecmd have been filled out based on the PMA type. + */ +void mdio_clause45_get_settings(struct efx_nic *efx, + struct ethtool_cmd *ecmd) +{ + int pma_type; + + /* If no PMA is present we are presumably talking something XAUI-ish + * like CX4. Which we report as FIBRE (see below) */ + if ((efx->phy_op->mmds & DEV_PRESENT_BIT(MDIO_MMD_PMAPMD)) == 0) { + ecmd->speed = SPEED_10000; + ecmd->port = PORT_FIBRE; + ecmd->supported = SUPPORTED_FIBRE; + ecmd->advertising = ADVERTISED_FIBRE; + return; + } + + pma_type = mdio_clause45_read(efx, efx->mii.phy_id, + MDIO_MMD_PMAPMD, MDIO_MMDREG_CTRL2); + pma_type &= MDIO_PMAPMD_CTRL2_TYPE_MASK; + + switch (pma_type) { + /* We represent CX4 as fibre in the absence of anything + better. */ + case MDIO_PMAPMD_CTRL2_10G_CX4: + ecmd->speed = SPEED_10000; + ecmd->port = PORT_FIBRE; + ecmd->supported = SUPPORTED_FIBRE; + ecmd->advertising = ADVERTISED_FIBRE; + break; + /* 10G Base-T */ + case MDIO_PMAPMD_CTRL2_10G_BT: + ecmd->speed = SPEED_10000; + ecmd->port = PORT_TP; + ecmd->supported = SUPPORTED_TP | SUPPORTED_10000baseT_Full; + ecmd->advertising = (ADVERTISED_FIBRE + | ADVERTISED_10000baseT_Full); + break; + case MDIO_PMAPMD_CTRL2_1G_BT: + ecmd->speed = SPEED_1000; + ecmd->port = PORT_TP; + ecmd->supported = SUPPORTED_TP | SUPPORTED_1000baseT_Full; + ecmd->advertising = (ADVERTISED_FIBRE + | ADVERTISED_1000baseT_Full); + break; + case MDIO_PMAPMD_CTRL2_100_BT: + ecmd->speed = SPEED_100; + ecmd->port = PORT_TP; + ecmd->supported = SUPPORTED_TP | SUPPORTED_100baseT_Full; + ecmd->advertising = (ADVERTISED_FIBRE + | ADVERTISED_100baseT_Full); + break; + case MDIO_PMAPMD_CTRL2_10_BT: + ecmd->speed = SPEED_10; + ecmd->port = PORT_TP; + ecmd->supported = SUPPORTED_TP | SUPPORTED_10baseT_Full; + ecmd->advertising = ADVERTISED_FIBRE | ADVERTISED_10baseT_Full; + break; + /* All the other defined modes are flavours of + * 10G optical */ + default: + ecmd->speed = SPEED_10000; + ecmd->port = PORT_FIBRE; + ecmd->supported = SUPPORTED_FIBRE; + ecmd->advertising = ADVERTISED_FIBRE; + break; + } +} + +/** + * mdio_clause45_set_settings - Set (some of) the PHY settings over MDIO. + * @efx: Efx NIC + * @ecmd: New settings + * + * Currently this just enforces that we are _not_ changing the + * 'port', 'speed', 'supported' or 'advertising' settings as these + * cannot be changed on any currently supported PHY. + */ +int mdio_clause45_set_settings(struct efx_nic *efx, + struct ethtool_cmd *ecmd) +{ + struct ethtool_cmd tmpcmd; + mdio_clause45_get_settings(efx, &tmpcmd); + /* None of the current PHYs support more than one mode + * of operation (and only 10GBT ever will), so keep things + * simple for now */ + if ((ecmd->speed == tmpcmd.speed) && (ecmd->port == tmpcmd.port) && + (ecmd->supported == tmpcmd.supported) && + (ecmd->advertising == tmpcmd.advertising)) + return 0; + return -EOPNOTSUPP; +} diff --git a/drivers/net/sfc/mdio_10g.h b/drivers/net/sfc/mdio_10g.h new file mode 100644 index 00000000000..2214b6d820a --- /dev/null +++ b/drivers/net/sfc/mdio_10g.h @@ -0,0 +1,232 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_MDIO_10G_H +#define EFX_MDIO_10G_H + +/* + * Definitions needed for doing 10G MDIO as specified in clause 45 + * MDIO, which do not appear in Linux yet. Also some helper functions. + */ + +#include "efx.h" +#include "boards.h" + +/* Numbering of the MDIO Manageable Devices (MMDs) */ +/* Physical Medium Attachment/ Physical Medium Dependent sublayer */ +#define MDIO_MMD_PMAPMD (1) +/* WAN Interface Sublayer */ +#define MDIO_MMD_WIS (2) +/* Physical Coding Sublayer */ +#define MDIO_MMD_PCS (3) +/* PHY Extender Sublayer */ +#define MDIO_MMD_PHYXS (4) +/* Extender Sublayer */ +#define MDIO_MMD_DTEXS (5) +/* Transmission convergence */ +#define MDIO_MMD_TC (6) +/* Auto negotiation */ +#define MDIO_MMD_AN (7) + +/* Generic register locations */ +#define MDIO_MMDREG_CTRL1 (0) +#define MDIO_MMDREG_STAT1 (1) +#define MDIO_MMDREG_IDHI (2) +#define MDIO_MMDREG_IDLOW (3) +#define MDIO_MMDREG_SPEED (4) +#define MDIO_MMDREG_DEVS0 (5) +#define MDIO_MMDREG_DEVS1 (6) +#define MDIO_MMDREG_CTRL2 (7) +#define MDIO_MMDREG_STAT2 (8) + +/* Bits in MMDREG_CTRL1 */ +/* Reset */ +#define MDIO_MMDREG_CTRL1_RESET_LBN (15) +#define MDIO_MMDREG_CTRL1_RESET_WIDTH (1) + +/* Bits in MMDREG_STAT1 */ +#define MDIO_MMDREG_STAT1_FAULT_LBN (7) +#define MDIO_MMDREG_STAT1_FAULT_WIDTH (1) +/* Link state */ +#define MDIO_MMDREG_STAT1_LINK_LBN (2) +#define MDIO_MMDREG_STAT1_LINK_WIDTH (1) + +/* Bits in ID reg */ +#define MDIO_ID_REV(_id32) (_id32 & 0xf) +#define MDIO_ID_MODEL(_id32) ((_id32 >> 4) & 0x3f) +#define MDIO_ID_OUI(_id32) (_id32 >> 10) + +/* Bits in MMDREG_DEVS0. Someone thoughtfully layed things out + * so the 'bit present' bit number of an MMD is the number of + * that MMD */ +#define DEV_PRESENT_BIT(_b) (1 << _b) + +#define MDIO_MMDREG_DEVS0_PHYXS DEV_PRESENT_BIT(MDIO_MMD_PHYXS) +#define MDIO_MMDREG_DEVS0_PCS DEV_PRESENT_BIT(MDIO_MMD_PCS) +#define MDIO_MMDREG_DEVS0_PMAPMD DEV_PRESENT_BIT(MDIO_MMD_PMAPMD) + +/* Bits in MMDREG_STAT2 */ +#define MDIO_MMDREG_STAT2_PRESENT_VAL (2) +#define MDIO_MMDREG_STAT2_PRESENT_LBN (14) +#define MDIO_MMDREG_STAT2_PRESENT_WIDTH (2) + +/* PMA type (4 bits) */ +#define MDIO_PMAPMD_CTRL2_10G_CX4 (0x0) +#define MDIO_PMAPMD_CTRL2_10G_EW (0x1) +#define MDIO_PMAPMD_CTRL2_10G_LW (0x2) +#define MDIO_PMAPMD_CTRL2_10G_SW (0x3) +#define MDIO_PMAPMD_CTRL2_10G_LX4 (0x4) +#define MDIO_PMAPMD_CTRL2_10G_ER (0x5) +#define MDIO_PMAPMD_CTRL2_10G_LR (0x6) +#define MDIO_PMAPMD_CTRL2_10G_SR (0x7) +/* Reserved */ +#define MDIO_PMAPMD_CTRL2_10G_BT (0x9) +/* Reserved */ +/* Reserved */ +#define MDIO_PMAPMD_CTRL2_1G_BT (0xc) +/* Reserved */ +#define MDIO_PMAPMD_CTRL2_100_BT (0xe) +#define MDIO_PMAPMD_CTRL2_10_BT (0xf) +#define MDIO_PMAPMD_CTRL2_TYPE_MASK (0xf) + +/* /\* PHY XGXS lane state *\/ */ +#define MDIO_PHYXS_LANE_STATE (0x18) +#define MDIO_PHYXS_LANE_ALIGNED_LBN (12) + +/* AN registers */ +#define MDIO_AN_STATUS (1) +#define MDIO_AN_STATUS_XNP_LBN (7) +#define MDIO_AN_STATUS_PAGE_LBN (6) +#define MDIO_AN_STATUS_AN_DONE_LBN (5) +#define MDIO_AN_STATUS_LP_AN_CAP_LBN (0) + +#define MDIO_AN_10GBT_STATUS (33) +#define MDIO_AN_10GBT_STATUS_MS_FLT_LBN (15) /* MASTER/SLAVE config fault */ +#define MDIO_AN_10GBT_STATUS_MS_LBN (14) /* MASTER/SLAVE config */ +#define MDIO_AN_10GBT_STATUS_LOC_OK_LBN (13) /* Local OK */ +#define MDIO_AN_10GBT_STATUS_REM_OK_LBN (12) /* Remote OK */ +#define MDIO_AN_10GBT_STATUS_LP_10G_LBN (11) /* Link partner is 10GBT capable */ +#define MDIO_AN_10GBT_STATUS_LP_LTA_LBN (10) /* LP loop timing ability */ +#define MDIO_AN_10GBT_STATUS_LP_TRR_LBN (9) /* LP Training Reset Request */ + + +/* Packing of the prt and dev arguments of clause 45 style MDIO into a + * single int so they can be passed into the mdio_read/write functions + * that currently exist. Note that as Falcon is the only current user, + * the packed form is chosen to match what Falcon needs to write into + * a register. This is checked at compile-time so do not change it. If + * your target chip needs things layed out differently you will need + * to unpack the arguments in your chip-specific mdio functions. + */ + /* These are defined by the standard. */ +#define MDIO45_PRT_ID_WIDTH (5) +#define MDIO45_DEV_ID_WIDTH (5) + +/* The prt ID is just packed in immediately to the left of the dev ID */ +#define MDIO45_PRT_DEV_WIDTH (MDIO45_PRT_ID_WIDTH + MDIO45_DEV_ID_WIDTH) + +#define MDIO45_PRT_ID_MASK ((1 << MDIO45_PRT_DEV_WIDTH) - 1) +/* This is the prt + dev extended by 1 bit to hold the 'is clause 45' flag. */ +#define MDIO45_XPRT_ID_WIDTH (MDIO45_PRT_DEV_WIDTH + 1) +#define MDIO45_XPRT_ID_MASK ((1 << MDIO45_XPRT_ID_WIDTH) - 1) +#define MDIO45_XPRT_ID_IS10G (1 << (MDIO45_XPRT_ID_WIDTH - 1)) + + +#define MDIO45_PRT_ID_COMP_LBN MDIO45_DEV_ID_WIDTH +#define MDIO45_PRT_ID_COMP_WIDTH MDIO45_PRT_ID_WIDTH +#define MDIO45_DEV_ID_COMP_LBN 0 +#define MDIO45_DEV_ID_COMP_WIDTH MDIO45_DEV_ID_WIDTH + +/* Compose port and device into a phy_id */ +static inline int mdio_clause45_pack(u8 prt, u8 dev) +{ + efx_dword_t phy_id; + EFX_POPULATE_DWORD_2(phy_id, MDIO45_PRT_ID_COMP, prt, + MDIO45_DEV_ID_COMP, dev); + return MDIO45_XPRT_ID_IS10G | EFX_DWORD_VAL(phy_id); +} + +static inline void mdio_clause45_unpack(u32 val, u8 *prt, u8 *dev) +{ + efx_dword_t phy_id; + EFX_POPULATE_DWORD_1(phy_id, EFX_DWORD_0, val); + *prt = EFX_DWORD_FIELD(phy_id, MDIO45_PRT_ID_COMP); + *dev = EFX_DWORD_FIELD(phy_id, MDIO45_DEV_ID_COMP); +} + +static inline int mdio_clause45_read(struct efx_nic *efx, + u8 prt, u8 dev, u16 addr) +{ + return efx->mii.mdio_read(efx->net_dev, + mdio_clause45_pack(prt, dev), addr); +} + +static inline void mdio_clause45_write(struct efx_nic *efx, + u8 prt, u8 dev, u16 addr, int value) +{ + efx->mii.mdio_write(efx->net_dev, + mdio_clause45_pack(prt, dev), addr, value); +} + + +static inline u32 mdio_clause45_read_id(struct efx_nic *efx, int mmd) +{ + int phy_id = efx->mii.phy_id; + u16 id_low = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_IDLOW); + u16 id_hi = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_IDHI); + return (id_hi << 16) | (id_low); +} + +static inline int mdio_clause45_phyxgxs_lane_sync(struct efx_nic *efx) +{ + int i, sync, lane_status; + + for (i = 0; i < 2; ++i) + lane_status = mdio_clause45_read(efx, efx->mii.phy_id, + MDIO_MMD_PHYXS, + MDIO_PHYXS_LANE_STATE); + + sync = (lane_status & (1 << MDIO_PHYXS_LANE_ALIGNED_LBN)) != 0; + if (!sync) + EFX_INFO(efx, "XGXS lane status: %x\n", lane_status); + return sync; +} + +extern const char *mdio_clause45_mmd_name(int mmd); + +/* + * Reset a specific MMD and wait for reset to clear. + * Return number of spins left (>0) on success, -%ETIMEDOUT on failure. + * + * This function will sleep + */ +extern int mdio_clause45_reset_mmd(struct efx_nic *efx, int mmd, + int spins, int spintime); + +/* As mdio_clause45_check_mmd but for multiple MMDs */ +int mdio_clause45_check_mmds(struct efx_nic *efx, + unsigned int mmd_mask, unsigned int fatal_mask); + +/* Check the link status of specified mmds in bit mask */ +extern int mdio_clause45_links_ok(struct efx_nic *efx, + unsigned int mmd_mask); + +/* Read (some of) the PHY settings over MDIO */ +extern void mdio_clause45_get_settings(struct efx_nic *efx, + struct ethtool_cmd *ecmd); + +/* Set (some of) the PHY settings over MDIO */ +extern int mdio_clause45_set_settings(struct efx_nic *efx, + struct ethtool_cmd *ecmd); + +/* Wait for specified MMDs to exit reset within a timeout */ +extern int mdio_clause45_wait_reset_mmds(struct efx_nic *efx, + unsigned int mmd_mask); + +#endif /* EFX_MDIO_10G_H */ diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h new file mode 100644 index 00000000000..c505482c252 --- /dev/null +++ b/drivers/net/sfc/net_driver.h @@ -0,0 +1,883 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +/* Common definitions for all Efx net driver code */ + +#ifndef EFX_NET_DRIVER_H +#define EFX_NET_DRIVER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "enum.h" +#include "bitfield.h" +#include "i2c-direct.h" + +#define EFX_MAX_LRO_DESCRIPTORS 8 +#define EFX_MAX_LRO_AGGR MAX_SKB_FRAGS + +/************************************************************************** + * + * Build definitions + * + **************************************************************************/ +#ifndef EFX_DRIVER_NAME +#define EFX_DRIVER_NAME "sfc" +#endif +#define EFX_DRIVER_VERSION "2.2.0136" + +#ifdef EFX_ENABLE_DEBUG +#define EFX_BUG_ON_PARANOID(x) BUG_ON(x) +#define EFX_WARN_ON_PARANOID(x) WARN_ON(x) +#else +#define EFX_BUG_ON_PARANOID(x) do {} while (0) +#define EFX_WARN_ON_PARANOID(x) do {} while (0) +#endif + +#define NET_DEV_REGISTERED(efx) \ + ((efx)->net_dev->reg_state == NETREG_REGISTERED) + +/* Include net device name in log messages if it has been registered. + * Use efx->name not efx->net_dev->name so that races with (un)registration + * are harmless. + */ +#define NET_DEV_NAME(efx) (NET_DEV_REGISTERED(efx) ? (efx)->name : "") + +/* Un-rate-limited logging */ +#define EFX_ERR(efx, fmt, args...) \ +dev_err(&((efx)->pci_dev->dev), "ERR: %s " fmt, NET_DEV_NAME(efx), ##args) + +#define EFX_INFO(efx, fmt, args...) \ +dev_info(&((efx)->pci_dev->dev), "INFO: %s " fmt, NET_DEV_NAME(efx), ##args) + +#ifdef EFX_ENABLE_DEBUG +#define EFX_LOG(efx, fmt, args...) \ +dev_info(&((efx)->pci_dev->dev), "DBG: %s " fmt, NET_DEV_NAME(efx), ##args) +#else +#define EFX_LOG(efx, fmt, args...) \ +dev_dbg(&((efx)->pci_dev->dev), "DBG: %s " fmt, NET_DEV_NAME(efx), ##args) +#endif + +#define EFX_TRACE(efx, fmt, args...) do {} while (0) + +#define EFX_REGDUMP(efx, fmt, args...) do {} while (0) + +/* Rate-limited logging */ +#define EFX_ERR_RL(efx, fmt, args...) \ +do {if (net_ratelimit()) EFX_ERR(efx, fmt, ##args); } while (0) + +#define EFX_INFO_RL(efx, fmt, args...) \ +do {if (net_ratelimit()) EFX_INFO(efx, fmt, ##args); } while (0) + +#define EFX_LOG_RL(efx, fmt, args...) \ +do {if (net_ratelimit()) EFX_LOG(efx, fmt, ##args); } while (0) + +/* Kernel headers may redefine inline anyway */ +#ifndef inline +#define inline inline __attribute__ ((always_inline)) +#endif + +/************************************************************************** + * + * Efx data structures + * + **************************************************************************/ + +#define EFX_MAX_CHANNELS 32 +#define EFX_MAX_TX_QUEUES 1 +#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS + +/** + * struct efx_special_buffer - An Efx special buffer + * @addr: CPU base address of the buffer + * @dma_addr: DMA base address of the buffer + * @len: Buffer length, in bytes + * @index: Buffer index within controller;s buffer table + * @entries: Number of buffer table entries + * + * Special buffers are used for the event queues and the TX and RX + * descriptor queues for each channel. They are *not* used for the + * actual transmit and receive buffers. + * + * Note that for Falcon, TX and RX descriptor queues live in host memory. + * Allocation and freeing procedures must take this into account. + */ +struct efx_special_buffer { + void *addr; + dma_addr_t dma_addr; + unsigned int len; + int index; + int entries; +}; + +/** + * struct efx_tx_buffer - An Efx TX buffer + * @skb: The associated socket buffer. + * Set only on the final fragment of a packet; %NULL for all other + * fragments. When this fragment completes, then we can free this + * skb. + * @dma_addr: DMA address of the fragment. + * @len: Length of this fragment. + * This field is zero when the queue slot is empty. + * @continuation: True if this fragment is not the end of a packet. + * @unmap_single: True if pci_unmap_single should be used. + * @unmap_addr: DMA address to unmap + * @unmap_len: Length of this fragment to unmap + */ +struct efx_tx_buffer { + const struct sk_buff *skb; + dma_addr_t dma_addr; + unsigned short len; + unsigned char continuation; + unsigned char unmap_single; + dma_addr_t unmap_addr; + unsigned short unmap_len; +}; + +/** + * struct efx_tx_queue - An Efx TX queue + * + * This is a ring buffer of TX fragments. + * Since the TX completion path always executes on the same + * CPU and the xmit path can operate on different CPUs, + * performance is increased by ensuring that the completion + * path and the xmit path operate on different cache lines. + * This is particularly important if the xmit path is always + * executing on one CPU which is different from the completion + * path. There is also a cache line for members which are + * read but not written on the fast path. + * + * @efx: The associated Efx NIC + * @queue: DMA queue number + * @used: Queue is used by net driver + * @channel: The associated channel + * @buffer: The software buffer ring + * @txd: The hardware descriptor ring + * @read_count: Current read pointer. + * This is the number of buffers that have been removed from both rings. + * @stopped: Stopped flag. + * Set if this TX queue is currently stopping its port. + * @insert_count: Current insert pointer + * This is the number of buffers that have been added to the + * software ring. + * @write_count: Current write pointer + * This is the number of buffers that have been added to the + * hardware ring. + * @old_read_count: The value of read_count when last checked. + * This is here for performance reasons. The xmit path will + * only get the up-to-date value of read_count if this + * variable indicates that the queue is full. This is to + * avoid cache-line ping-pong between the xmit path and the + * completion path. + */ +struct efx_tx_queue { + /* Members which don't change on the fast path */ + struct efx_nic *efx ____cacheline_aligned_in_smp; + int queue; + int used; + struct efx_channel *channel; + struct efx_nic *nic; + struct efx_tx_buffer *buffer; + struct efx_special_buffer txd; + + /* Members used mainly on the completion path */ + unsigned int read_count ____cacheline_aligned_in_smp; + int stopped; + + /* Members used only on the xmit path */ + unsigned int insert_count ____cacheline_aligned_in_smp; + unsigned int write_count; + unsigned int old_read_count; +}; + +/** + * struct efx_rx_buffer - An Efx RX data buffer + * @dma_addr: DMA base address of the buffer + * @skb: The associated socket buffer, if any. + * If both this and page are %NULL, the buffer slot is currently free. + * @page: The associated page buffer, if any. + * If both this and skb are %NULL, the buffer slot is currently free. + * @data: Pointer to ethernet header + * @len: Buffer length, in bytes. + * @unmap_addr: DMA address to unmap + */ +struct efx_rx_buffer { + dma_addr_t dma_addr; + struct sk_buff *skb; + struct page *page; + char *data; + unsigned int len; + dma_addr_t unmap_addr; +}; + +/** + * struct efx_rx_queue - An Efx RX queue + * @efx: The associated Efx NIC + * @queue: DMA queue number + * @used: Queue is used by net driver + * @channel: The associated channel + * @buffer: The software buffer ring + * @rxd: The hardware descriptor ring + * @added_count: Number of buffers added to the receive queue. + * @notified_count: Number of buffers given to NIC (<= @added_count). + * @removed_count: Number of buffers removed from the receive queue. + * @add_lock: Receive queue descriptor add spin lock. + * This lock must be held in order to add buffers to the RX + * descriptor ring (rxd and buffer) and to update added_count (but + * not removed_count). + * @max_fill: RX descriptor maximum fill level (<= ring size) + * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill + * (<= @max_fill) + * @fast_fill_limit: The level to which a fast fill will fill + * (@fast_fill_trigger <= @fast_fill_limit <= @max_fill) + * @min_fill: RX descriptor minimum non-zero fill level. + * This records the minimum fill level observed when a ring + * refill was triggered. + * @min_overfill: RX descriptor minimum overflow fill level. + * This records the minimum fill level at which RX queue + * overflow was observed. It should never be set. + * @alloc_page_count: RX allocation strategy counter. + * @alloc_skb_count: RX allocation strategy counter. + * @work: Descriptor push work thread + * @buf_page: Page for next RX buffer. + * We can use a single page for multiple RX buffers. This tracks + * the remaining space in the allocation. + * @buf_dma_addr: Page's DMA address. + * @buf_data: Page's host address. + */ +struct efx_rx_queue { + struct efx_nic *efx; + int queue; + int used; + struct efx_channel *channel; + struct efx_rx_buffer *buffer; + struct efx_special_buffer rxd; + + int added_count; + int notified_count; + int removed_count; + spinlock_t add_lock; + unsigned int max_fill; + unsigned int fast_fill_trigger; + unsigned int fast_fill_limit; + unsigned int min_fill; + unsigned int min_overfill; + unsigned int alloc_page_count; + unsigned int alloc_skb_count; + struct delayed_work work; + unsigned int slow_fill_count; + + struct page *buf_page; + dma_addr_t buf_dma_addr; + char *buf_data; +}; + +/** + * struct efx_buffer - An Efx general-purpose buffer + * @addr: host base address of the buffer + * @dma_addr: DMA base address of the buffer + * @len: Buffer length, in bytes + * + * Falcon uses these buffers for its interrupt status registers and + * MAC stats dumps. + */ +struct efx_buffer { + void *addr; + dma_addr_t dma_addr; + unsigned int len; +}; + + +/* Flags for channel->used_flags */ +#define EFX_USED_BY_RX 1 +#define EFX_USED_BY_TX 2 +#define EFX_USED_BY_RX_TX (EFX_USED_BY_RX | EFX_USED_BY_TX) + +enum efx_rx_alloc_method { + RX_ALLOC_METHOD_AUTO = 0, + RX_ALLOC_METHOD_SKB = 1, + RX_ALLOC_METHOD_PAGE = 2, +}; + +/** + * struct efx_channel - An Efx channel + * + * A channel comprises an event queue, at least one TX queue, at least + * one RX queue, and an associated tasklet for processing the event + * queue. + * + * @efx: Associated Efx NIC + * @evqnum: Event queue number + * @channel: Channel instance number + * @used_flags: Channel is used by net driver + * @enabled: Channel enabled indicator + * @irq: IRQ number (MSI and MSI-X only) + * @has_interrupt: Channel has an interrupt + * @irq_moderation: IRQ moderation value (in us) + * @napi_dev: Net device used with NAPI + * @napi_str: NAPI control structure + * @reset_work: Scheduled reset work thread + * @work_pending: Is work pending via NAPI? + * @eventq: Event queue buffer + * @eventq_read_ptr: Event queue read pointer + * @last_eventq_read_ptr: Last event queue read pointer value. + * @eventq_magic: Event queue magic value for driver-generated test events + * @lro_mgr: LRO state + * @rx_alloc_level: Watermark based heuristic counter for pushing descriptors + * and diagnostic counters + * @rx_alloc_push_pages: RX allocation method currently in use for pushing + * descriptors + * @rx_alloc_pop_pages: RX allocation method currently in use for popping + * descriptors + * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors + * @n_rx_ip_frag_err: Count of RX IP fragment errors + * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors + * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors + * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors + * @n_rx_overlength: Count of RX_OVERLENGTH errors + * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun + */ +struct efx_channel { + struct efx_nic *efx; + int evqnum; + int channel; + int used_flags; + int enabled; + int irq; + unsigned int has_interrupt; + unsigned int irq_moderation; + struct net_device *napi_dev; + struct napi_struct napi_str; + struct work_struct reset_work; + int work_pending; + struct efx_special_buffer eventq; + unsigned int eventq_read_ptr; + unsigned int last_eventq_read_ptr; + unsigned int eventq_magic; + + struct net_lro_mgr lro_mgr; + int rx_alloc_level; + int rx_alloc_push_pages; + int rx_alloc_pop_pages; + + unsigned n_rx_tobe_disc; + unsigned n_rx_ip_frag_err; + unsigned n_rx_ip_hdr_chksum_err; + unsigned n_rx_tcp_udp_chksum_err; + unsigned n_rx_frm_trunc; + unsigned n_rx_overlength; + unsigned n_skbuff_leaks; + + /* Used to pipeline received packets in order to optimise memory + * access with prefetches. + */ + struct efx_rx_buffer *rx_pkt; + int rx_pkt_csummed; + +}; + +/** + * struct efx_blinker - S/W LED blinking context + * @led_num: LED ID (board-specific meaning) + * @state: Current state - on or off + * @resubmit: Timer resubmission flag + * @timer: Control timer for blinking + */ +struct efx_blinker { + int led_num; + int state; + int resubmit; + struct timer_list timer; +}; + + +/** + * struct efx_board - board information + * @type: Board model type + * @major: Major rev. ('A', 'B' ...) + * @minor: Minor rev. (0, 1, ...) + * @init: Initialisation function + * @init_leds: Sets up board LEDs + * @set_fault_led: Turns the fault LED on or off + * @blink: Starts/stops blinking + * @blinker: used to blink LEDs in software + */ +struct efx_board { + int type; + int major; + int minor; + int (*init) (struct efx_nic *nic); + /* As the LEDs are typically attached to the PHY, LEDs + * have a separate init callback that happens later than + * board init. */ + int (*init_leds)(struct efx_nic *efx); + void (*set_fault_led) (struct efx_nic *efx, int state); + void (*blink) (struct efx_nic *efx, int start); + struct efx_blinker blinker; +}; + +enum efx_int_mode { + /* Be careful if altering to correct macro below */ + EFX_INT_MODE_MSIX = 0, + EFX_INT_MODE_MSI = 1, + EFX_INT_MODE_LEGACY = 2, + EFX_INT_MODE_MAX /* Insert any new items before this */ +}; +#define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI) + +enum phy_type { + PHY_TYPE_NONE = 0, + PHY_TYPE_CX4_RTMR = 1, + PHY_TYPE_1G_ALASKA = 2, + PHY_TYPE_10XPRESS = 3, + PHY_TYPE_XFP = 4, + PHY_TYPE_PM8358 = 6, + PHY_TYPE_MAX /* Insert any new items before this */ +}; + +#define PHY_ADDR_INVALID 0xff + +enum nic_state { + STATE_INIT = 0, + STATE_RUNNING = 1, + STATE_FINI = 2, + STATE_RESETTING = 3, /* rtnl_lock always held */ + STATE_DISABLED = 4, + STATE_MAX, +}; + +/* + * Alignment of page-allocated RX buffers + * + * Controls the number of bytes inserted at the start of an RX buffer. + * This is the equivalent of NET_IP_ALIGN [which controls the alignment + * of the skb->head for hardware DMA]. + */ +#if defined(__i386__) || defined(__x86_64__) +#define EFX_PAGE_IP_ALIGN 0 +#else +#define EFX_PAGE_IP_ALIGN NET_IP_ALIGN +#endif + +/* + * Alignment of the skb->head which wraps a page-allocated RX buffer + * + * The skb allocated to wrap an rx_buffer can have this alignment. Since + * the data is memcpy'd from the rx_buf, it does not need to be equal to + * EFX_PAGE_IP_ALIGN. + */ +#define EFX_PAGE_SKB_ALIGN 2 + +/* Forward declaration */ +struct efx_nic; + +/* Pseudo bit-mask flow control field */ +enum efx_fc_type { + EFX_FC_RX = 1, + EFX_FC_TX = 2, + EFX_FC_AUTO = 4, +}; + +/** + * struct efx_phy_operations - Efx PHY operations table + * @init: Initialise PHY + * @fini: Shut down PHY + * @reconfigure: Reconfigure PHY (e.g. for new link parameters) + * @clear_interrupt: Clear down interrupt + * @blink: Blink LEDs + * @check_hw: Check hardware + * @reset_xaui: Reset XAUI side of PHY for (software sequenced reset) + * @mmds: MMD presence mask + */ +struct efx_phy_operations { + int (*init) (struct efx_nic *efx); + void (*fini) (struct efx_nic *efx); + void (*reconfigure) (struct efx_nic *efx); + void (*clear_interrupt) (struct efx_nic *efx); + int (*check_hw) (struct efx_nic *efx); + void (*reset_xaui) (struct efx_nic *efx); + int mmds; +}; + +/* + * Efx extended statistics + * + * Not all statistics are provided by all supported MACs. The purpose + * is this structure is to contain the raw statistics provided by each + * MAC. + */ +struct efx_mac_stats { + u64 tx_bytes; + u64 tx_good_bytes; + u64 tx_bad_bytes; + unsigned long tx_packets; + unsigned long tx_bad; + unsigned long tx_pause; + unsigned long tx_control; + unsigned long tx_unicast; + unsigned long tx_multicast; + unsigned long tx_broadcast; + unsigned long tx_lt64; + unsigned long tx_64; + unsigned long tx_65_to_127; + unsigned long tx_128_to_255; + unsigned long tx_256_to_511; + unsigned long tx_512_to_1023; + unsigned long tx_1024_to_15xx; + unsigned long tx_15xx_to_jumbo; + unsigned long tx_gtjumbo; + unsigned long tx_collision; + unsigned long tx_single_collision; + unsigned long tx_multiple_collision; + unsigned long tx_excessive_collision; + unsigned long tx_deferred; + unsigned long tx_late_collision; + unsigned long tx_excessive_deferred; + unsigned long tx_non_tcpudp; + unsigned long tx_mac_src_error; + unsigned long tx_ip_src_error; + u64 rx_bytes; + u64 rx_good_bytes; + u64 rx_bad_bytes; + unsigned long rx_packets; + unsigned long rx_good; + unsigned long rx_bad; + unsigned long rx_pause; + unsigned long rx_control; + unsigned long rx_unicast; + unsigned long rx_multicast; + unsigned long rx_broadcast; + unsigned long rx_lt64; + unsigned long rx_64; + unsigned long rx_65_to_127; + unsigned long rx_128_to_255; + unsigned long rx_256_to_511; + unsigned long rx_512_to_1023; + unsigned long rx_1024_to_15xx; + unsigned long rx_15xx_to_jumbo; + unsigned long rx_gtjumbo; + unsigned long rx_bad_lt64; + unsigned long rx_bad_64_to_15xx; + unsigned long rx_bad_15xx_to_jumbo; + unsigned long rx_bad_gtjumbo; + unsigned long rx_overflow; + unsigned long rx_missed; + unsigned long rx_false_carrier; + unsigned long rx_symbol_error; + unsigned long rx_align_error; + unsigned long rx_length_error; + unsigned long rx_internal_error; + unsigned long rx_good_lt64; +}; + +/* Number of bits used in a multicast filter hash address */ +#define EFX_MCAST_HASH_BITS 8 + +/* Number of (single-bit) entries in a multicast filter hash */ +#define EFX_MCAST_HASH_ENTRIES (1 << EFX_MCAST_HASH_BITS) + +/* An Efx multicast filter hash */ +union efx_multicast_hash { + u8 byte[EFX_MCAST_HASH_ENTRIES / 8]; + efx_oword_t oword[EFX_MCAST_HASH_ENTRIES / sizeof(efx_oword_t) / 8]; +}; + +/** + * struct efx_nic - an Efx NIC + * @name: Device name (net device name or bus id before net device registered) + * @pci_dev: The PCI device + * @type: Controller type attributes + * @legacy_irq: IRQ number + * @workqueue: Workqueue for resets, port reconfigures and the HW monitor + * @reset_work: Scheduled reset workitem + * @monitor_work: Hardware monitor workitem + * @membase_phys: Memory BAR value as physical address + * @membase: Memory BAR value + * @biu_lock: BIU (bus interface unit) lock + * @interrupt_mode: Interrupt mode + * @i2c: I2C interface + * @board_info: Board-level information + * @state: Device state flag. Serialised by the rtnl_lock. + * @reset_pending: Pending reset method (normally RESET_TYPE_NONE) + * @tx_queue: TX DMA queues + * @rx_queue: RX DMA queues + * @channel: Channels + * @rss_queues: Number of RSS queues + * @rx_buffer_len: RX buffer length + * @rx_buffer_order: Order (log2) of number of pages for each RX buffer + * @irq_status: Interrupt status buffer + * @last_irq_cpu: Last CPU to handle interrupt. + * This register is written with the SMP processor ID whenever an + * interrupt is handled. It is used by falcon_test_interrupt() + * to verify that an interrupt has occurred. + * @n_rx_nodesc_drop_cnt: RX no descriptor drop count + * @nic_data: Hardware dependant state + * @mac_lock: MAC access lock. Protects @port_enabled, efx_monitor() and + * efx_reconfigure_port() + * @port_enabled: Port enabled indicator. + * Serialises efx_stop_all(), efx_start_all() and efx_monitor() and + * efx_reconfigure_work with kernel interfaces. Safe to read under any + * one of the rtnl_lock, mac_lock, or netif_tx_lock, but all three must + * be held to modify it. + * @port_initialized: Port initialized? + * @net_dev: Operating system network device. Consider holding the rtnl lock + * @rx_checksum_enabled: RX checksumming enabled + * @netif_stop_count: Port stop count + * @netif_stop_lock: Port stop lock + * @mac_stats: MAC statistics. These include all statistics the MACs + * can provide. Generic code converts these into a standard + * &struct net_device_stats. + * @stats_buffer: DMA buffer for statistics + * @stats_lock: Statistics update lock + * @mac_address: Permanent MAC address + * @phy_type: PHY type + * @phy_lock: PHY access lock + * @phy_op: PHY interface + * @phy_data: PHY private data (including PHY-specific stats) + * @mii: PHY interface + * @phy_powered: PHY power state + * @tx_disabled: PHY transmitter turned off + * @link_up: Link status + * @link_options: Link options (MII/GMII format) + * @n_link_state_changes: Number of times the link has changed state + * @promiscuous: Promiscuous flag. Protected by netif_tx_lock. + * @multicast_hash: Multicast hash table + * @flow_control: Flow control flags - separate RX/TX so can't use link_options + * @reconfigure_work: work item for dealing with PHY events + * + * The @priv field of the corresponding &struct net_device points to + * this. + */ +struct efx_nic { + char name[IFNAMSIZ]; + struct pci_dev *pci_dev; + const struct efx_nic_type *type; + int legacy_irq; + struct workqueue_struct *workqueue; + struct work_struct reset_work; + struct delayed_work monitor_work; + unsigned long membase_phys; + void __iomem *membase; + spinlock_t biu_lock; + enum efx_int_mode interrupt_mode; + + struct efx_i2c_interface i2c; + struct efx_board board_info; + + enum nic_state state; + enum reset_type reset_pending; + + struct efx_tx_queue tx_queue[EFX_MAX_TX_QUEUES]; + struct efx_rx_queue rx_queue[EFX_MAX_RX_QUEUES]; + struct efx_channel channel[EFX_MAX_CHANNELS]; + + int rss_queues; + unsigned int rx_buffer_len; + unsigned int rx_buffer_order; + + struct efx_buffer irq_status; + volatile signed int last_irq_cpu; + + unsigned n_rx_nodesc_drop_cnt; + + void *nic_data; + + struct mutex mac_lock; + int port_enabled; + + int port_initialized; + struct net_device *net_dev; + int rx_checksum_enabled; + + atomic_t netif_stop_count; + spinlock_t netif_stop_lock; + + struct efx_mac_stats mac_stats; + struct efx_buffer stats_buffer; + spinlock_t stats_lock; + + unsigned char mac_address[ETH_ALEN]; + + enum phy_type phy_type; + spinlock_t phy_lock; + struct efx_phy_operations *phy_op; + void *phy_data; + struct mii_if_info mii; + + int link_up; + unsigned int link_options; + unsigned int n_link_state_changes; + + int promiscuous; + union efx_multicast_hash multicast_hash; + enum efx_fc_type flow_control; + struct work_struct reconfigure_work; + + atomic_t rx_reset; +}; + +/** + * struct efx_nic_type - Efx device type definition + * @mem_bar: Memory BAR number + * @mem_map_size: Memory BAR mapped size + * @txd_ptr_tbl_base: TX descriptor ring base address + * @rxd_ptr_tbl_base: RX descriptor ring base address + * @buf_tbl_base: Buffer table base address + * @evq_ptr_tbl_base: Event queue pointer table base address + * @evq_rptr_tbl_base: Event queue read-pointer table base address + * @txd_ring_mask: TX descriptor ring size - 1 (must be a power of two - 1) + * @rxd_ring_mask: RX descriptor ring size - 1 (must be a power of two - 1) + * @evq_size: Event queue size (must be a power of two) + * @max_dma_mask: Maximum possible DMA mask + * @tx_dma_mask: TX DMA mask + * @bug5391_mask: Address mask for bug 5391 workaround + * @rx_xoff_thresh: RX FIFO XOFF watermark (bytes) + * @rx_xon_thresh: RX FIFO XON watermark (bytes) + * @rx_buffer_padding: Padding added to each RX buffer + * @max_interrupt_mode: Highest capability interrupt mode supported + * from &enum efx_init_mode. + * @phys_addr_channels: Number of channels with physically addressed + * descriptors + */ +struct efx_nic_type { + unsigned int mem_bar; + unsigned int mem_map_size; + unsigned int txd_ptr_tbl_base; + unsigned int rxd_ptr_tbl_base; + unsigned int buf_tbl_base; + unsigned int evq_ptr_tbl_base; + unsigned int evq_rptr_tbl_base; + + unsigned int txd_ring_mask; + unsigned int rxd_ring_mask; + unsigned int evq_size; + dma_addr_t max_dma_mask; + unsigned int tx_dma_mask; + unsigned bug5391_mask; + + int rx_xoff_thresh; + int rx_xon_thresh; + unsigned int rx_buffer_padding; + unsigned int max_interrupt_mode; + unsigned int phys_addr_channels; +}; + +/************************************************************************** + * + * Prototypes and inline functions + * + *************************************************************************/ + +/* Iterate over all used channels */ +#define efx_for_each_channel(_channel, _efx) \ + for (_channel = &_efx->channel[0]; \ + _channel < &_efx->channel[EFX_MAX_CHANNELS]; \ + _channel++) \ + if (!_channel->used_flags) \ + continue; \ + else + +/* Iterate over all used channels with interrupts */ +#define efx_for_each_channel_with_interrupt(_channel, _efx) \ + for (_channel = &_efx->channel[0]; \ + _channel < &_efx->channel[EFX_MAX_CHANNELS]; \ + _channel++) \ + if (!(_channel->used_flags && _channel->has_interrupt)) \ + continue; \ + else + +/* Iterate over all used TX queues */ +#define efx_for_each_tx_queue(_tx_queue, _efx) \ + for (_tx_queue = &_efx->tx_queue[0]; \ + _tx_queue < &_efx->tx_queue[EFX_MAX_TX_QUEUES]; \ + _tx_queue++) \ + if (!_tx_queue->used) \ + continue; \ + else + +/* Iterate over all TX queues belonging to a channel */ +#define efx_for_each_channel_tx_queue(_tx_queue, _channel) \ + for (_tx_queue = &_channel->efx->tx_queue[0]; \ + _tx_queue < &_channel->efx->tx_queue[EFX_MAX_TX_QUEUES]; \ + _tx_queue++) \ + if ((!_tx_queue->used) || \ + (_tx_queue->channel != _channel)) \ + continue; \ + else + +/* Iterate over all used RX queues */ +#define efx_for_each_rx_queue(_rx_queue, _efx) \ + for (_rx_queue = &_efx->rx_queue[0]; \ + _rx_queue < &_efx->rx_queue[EFX_MAX_RX_QUEUES]; \ + _rx_queue++) \ + if (!_rx_queue->used) \ + continue; \ + else + +/* Iterate over all RX queues belonging to a channel */ +#define efx_for_each_channel_rx_queue(_rx_queue, _channel) \ + for (_rx_queue = &_channel->efx->rx_queue[0]; \ + _rx_queue < &_channel->efx->rx_queue[EFX_MAX_RX_QUEUES]; \ + _rx_queue++) \ + if ((!_rx_queue->used) || \ + (_rx_queue->channel != _channel)) \ + continue; \ + else + +/* Returns a pointer to the specified receive buffer in the RX + * descriptor queue. + */ +static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue, + unsigned int index) +{ + return (&rx_queue->buffer[index]); +} + +/* Set bit in a little-endian bitfield */ +static inline void set_bit_le(int nr, unsigned char *addr) +{ + addr[nr / 8] |= (1 << (nr % 8)); +} + +/* Clear bit in a little-endian bitfield */ +static inline void clear_bit_le(int nr, unsigned char *addr) +{ + addr[nr / 8] &= ~(1 << (nr % 8)); +} + + +/** + * EFX_MAX_FRAME_LEN - calculate maximum frame length + * + * This calculates the maximum frame length that will be used for a + * given MTU. The frame length will be equal to the MTU plus a + * constant amount of header space and padding. This is the quantity + * that the net driver will program into the MAC as the maximum frame + * length. + * + * The 10G MAC used in Falcon requires 8-byte alignment on the frame + * length, so we round up to the nearest 8. + */ +#define EFX_MAX_FRAME_LEN(mtu) \ + ((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */) + 7) & ~7) + + +#endif /* EFX_NET_DRIVER_H */ diff --git a/drivers/net/sfc/phy.h b/drivers/net/sfc/phy.h new file mode 100644 index 00000000000..9d02c84e6b2 --- /dev/null +++ b/drivers/net/sfc/phy.h @@ -0,0 +1,48 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2007 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_PHY_H +#define EFX_PHY_H + +/**************************************************************************** + * 10Xpress (SFX7101) PHY + */ +extern struct efx_phy_operations falcon_tenxpress_phy_ops; + +enum tenxpress_state { + TENXPRESS_STATUS_OFF = 0, + TENXPRESS_STATUS_OTEMP = 1, + TENXPRESS_STATUS_NORMAL = 2, +}; + +extern void tenxpress_set_state(struct efx_nic *efx, + enum tenxpress_state state); +extern void tenxpress_phy_blink(struct efx_nic *efx, int blink); +extern void tenxpress_crc_err(struct efx_nic *efx); + +/**************************************************************************** + * Exported functions from the driver for XFP optical PHYs + */ +extern struct efx_phy_operations falcon_xfp_phy_ops; + +/* The QUAKE XFP PHY provides various H/W control states for LEDs */ +#define QUAKE_LED_LINK_INVAL (0) +#define QUAKE_LED_LINK_STAT (1) +#define QUAKE_LED_LINK_ACT (2) +#define QUAKE_LED_LINK_ACTSTAT (3) +#define QUAKE_LED_OFF (4) +#define QUAKE_LED_ON (5) +#define QUAKE_LED_LINK_INPUT (6) /* Pin is an input. */ +/* What link the LED tracks */ +#define QUAKE_LED_TXLINK (0) +#define QUAKE_LED_RXLINK (8) + +extern void xfp_set_led(struct efx_nic *p, int led, int state); + +#endif diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c new file mode 100644 index 00000000000..551299b462a --- /dev/null +++ b/drivers/net/sfc/rx.c @@ -0,0 +1,875 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "net_driver.h" +#include "rx.h" +#include "efx.h" +#include "falcon.h" +#include "workarounds.h" + +/* Number of RX descriptors pushed at once. */ +#define EFX_RX_BATCH 8 + +/* Size of buffer allocated for skb header area. */ +#define EFX_SKB_HEADERS 64u + +/* + * rx_alloc_method - RX buffer allocation method + * + * This driver supports two methods for allocating and using RX buffers: + * each RX buffer may be backed by an skb or by an order-n page. + * + * When LRO is in use then the second method has a lower overhead, + * since we don't have to allocate then free skbs on reassembled frames. + * + * Values: + * - RX_ALLOC_METHOD_AUTO = 0 + * - RX_ALLOC_METHOD_SKB = 1 + * - RX_ALLOC_METHOD_PAGE = 2 + * + * The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count + * controlled by the parameters below. + * + * - Since pushing and popping descriptors are separated by the rx_queue + * size, so the watermarks should be ~rxd_size. + * - The performance win by using page-based allocation for LRO is less + * than the performance hit of using page-based allocation of non-LRO, + * so the watermarks should reflect this. + * + * Per channel we maintain a single variable, updated by each channel: + * + * rx_alloc_level += (lro_performed ? RX_ALLOC_FACTOR_LRO : + * RX_ALLOC_FACTOR_SKB) + * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which + * limits the hysteresis), and update the allocation strategy: + * + * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_LRO ? + * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB) + */ +static int rx_alloc_method = RX_ALLOC_METHOD_PAGE; + +#define RX_ALLOC_LEVEL_LRO 0x2000 +#define RX_ALLOC_LEVEL_MAX 0x3000 +#define RX_ALLOC_FACTOR_LRO 1 +#define RX_ALLOC_FACTOR_SKB (-2) + +/* This is the percentage fill level below which new RX descriptors + * will be added to the RX descriptor ring. + */ +static unsigned int rx_refill_threshold = 90; + +/* This is the percentage fill level to which an RX queue will be refilled + * when the "RX refill threshold" is reached. + */ +static unsigned int rx_refill_limit = 95; + +/* + * RX maximum head room required. + * + * This must be at least 1 to prevent overflow and at least 2 to allow + * pipelined receives. + */ +#define EFX_RXD_HEAD_ROOM 2 + +/* Macros for zero-order pages (potentially) containing multiple RX buffers */ +#define RX_DATA_OFFSET(_data) \ + (((unsigned long) (_data)) & (PAGE_SIZE-1)) +#define RX_BUF_OFFSET(_rx_buf) \ + RX_DATA_OFFSET((_rx_buf)->data) + +#define RX_PAGE_SIZE(_efx) \ + (PAGE_SIZE * (1u << (_efx)->rx_buffer_order)) + + +/************************************************************************** + * + * Linux generic LRO handling + * + ************************************************************************** + */ + +static int efx_lro_get_skb_hdr(struct sk_buff *skb, void **ip_hdr, + void **tcpudp_hdr, u64 *hdr_flags, void *priv) +{ + struct efx_channel *channel = (struct efx_channel *)priv; + struct iphdr *iph; + struct tcphdr *th; + + iph = (struct iphdr *)skb->data; + if (skb->protocol != htons(ETH_P_IP) || iph->protocol != IPPROTO_TCP) + goto fail; + + th = (struct tcphdr *)(skb->data + iph->ihl * 4); + + *tcpudp_hdr = th; + *ip_hdr = iph; + *hdr_flags = LRO_IPV4 | LRO_TCP; + + channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO; + return 0; +fail: + channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; + return -1; +} + +static int efx_get_frag_hdr(struct skb_frag_struct *frag, void **mac_hdr, + void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags, + void *priv) +{ + struct efx_channel *channel = (struct efx_channel *)priv; + struct ethhdr *eh; + struct iphdr *iph; + + /* We support EtherII and VLAN encapsulated IPv4 */ + eh = (struct ethhdr *)(page_address(frag->page) + frag->page_offset); + *mac_hdr = eh; + + if (eh->h_proto == htons(ETH_P_IP)) { + iph = (struct iphdr *)(eh + 1); + } else { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)eh; + if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP)) + goto fail; + + iph = (struct iphdr *)(veh + 1); + } + *ip_hdr = iph; + + /* We can only do LRO over TCP */ + if (iph->protocol != IPPROTO_TCP) + goto fail; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *tcpudp_hdr = (struct tcphdr *)((u8 *) iph + iph->ihl * 4); + + channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO; + return 0; + fail: + channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; + return -1; +} + +int efx_lro_init(struct net_lro_mgr *lro_mgr, struct efx_nic *efx) +{ + size_t s = sizeof(struct net_lro_desc) * EFX_MAX_LRO_DESCRIPTORS; + struct net_lro_desc *lro_arr; + + /* Allocate the LRO descriptors structure */ + lro_arr = kzalloc(s, GFP_KERNEL); + if (lro_arr == NULL) + return -ENOMEM; + + lro_mgr->lro_arr = lro_arr; + lro_mgr->max_desc = EFX_MAX_LRO_DESCRIPTORS; + lro_mgr->max_aggr = EFX_MAX_LRO_AGGR; + lro_mgr->frag_align_pad = EFX_PAGE_SKB_ALIGN; + + lro_mgr->get_skb_header = efx_lro_get_skb_hdr; + lro_mgr->get_frag_header = efx_get_frag_hdr; + lro_mgr->dev = efx->net_dev; + + lro_mgr->features = LRO_F_NAPI; + + /* We can pass packets up with the checksum intact */ + lro_mgr->ip_summed = CHECKSUM_UNNECESSARY; + + lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; + + return 0; +} + +void efx_lro_fini(struct net_lro_mgr *lro_mgr) +{ + kfree(lro_mgr->lro_arr); + lro_mgr->lro_arr = NULL; +} + +/** + * efx_init_rx_buffer_skb - create new RX buffer using skb-based allocation + * + * @rx_queue: Efx RX queue + * @rx_buf: RX buffer structure to populate + * + * This allocates memory for a new receive buffer, maps it for DMA, + * and populates a struct efx_rx_buffer with the relevant + * information. Return a negative error code or 0 on success. + */ +static inline int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf) +{ + struct efx_nic *efx = rx_queue->efx; + struct net_device *net_dev = efx->net_dev; + int skb_len = efx->rx_buffer_len; + + rx_buf->skb = netdev_alloc_skb(net_dev, skb_len); + if (unlikely(!rx_buf->skb)) + return -ENOMEM; + + /* Adjust the SKB for padding and checksum */ + skb_reserve(rx_buf->skb, NET_IP_ALIGN); + rx_buf->len = skb_len - NET_IP_ALIGN; + rx_buf->data = (char *)rx_buf->skb->data; + rx_buf->skb->ip_summed = CHECKSUM_UNNECESSARY; + + rx_buf->dma_addr = pci_map_single(efx->pci_dev, + rx_buf->data, rx_buf->len, + PCI_DMA_FROMDEVICE); + + if (unlikely(pci_dma_mapping_error(rx_buf->dma_addr))) { + dev_kfree_skb_any(rx_buf->skb); + rx_buf->skb = NULL; + return -EIO; + } + + return 0; +} + +/** + * efx_init_rx_buffer_page - create new RX buffer using page-based allocation + * + * @rx_queue: Efx RX queue + * @rx_buf: RX buffer structure to populate + * + * This allocates memory for a new receive buffer, maps it for DMA, + * and populates a struct efx_rx_buffer with the relevant + * information. Return a negative error code or 0 on success. + */ +static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf) +{ + struct efx_nic *efx = rx_queue->efx; + int bytes, space, offset; + + bytes = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN; + + /* If there is space left in the previously allocated page, + * then use it. Otherwise allocate a new one */ + rx_buf->page = rx_queue->buf_page; + if (rx_buf->page == NULL) { + dma_addr_t dma_addr; + + rx_buf->page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, + efx->rx_buffer_order); + if (unlikely(rx_buf->page == NULL)) + return -ENOMEM; + + dma_addr = pci_map_page(efx->pci_dev, rx_buf->page, + 0, RX_PAGE_SIZE(efx), + PCI_DMA_FROMDEVICE); + + if (unlikely(pci_dma_mapping_error(dma_addr))) { + __free_pages(rx_buf->page, efx->rx_buffer_order); + rx_buf->page = NULL; + return -EIO; + } + + rx_queue->buf_page = rx_buf->page; + rx_queue->buf_dma_addr = dma_addr; + rx_queue->buf_data = ((char *) page_address(rx_buf->page) + + EFX_PAGE_IP_ALIGN); + } + + offset = RX_DATA_OFFSET(rx_queue->buf_data); + rx_buf->len = bytes; + rx_buf->dma_addr = rx_queue->buf_dma_addr + offset; + rx_buf->data = rx_queue->buf_data; + + /* Try to pack multiple buffers per page */ + if (efx->rx_buffer_order == 0) { + /* The next buffer starts on the next 512 byte boundary */ + rx_queue->buf_data += ((bytes + 0x1ff) & ~0x1ff); + offset += ((bytes + 0x1ff) & ~0x1ff); + + space = RX_PAGE_SIZE(efx) - offset; + if (space >= bytes) { + /* Refs dropped on kernel releasing each skb */ + get_page(rx_queue->buf_page); + goto out; + } + } + + /* This is the final RX buffer for this page, so mark it for + * unmapping */ + rx_queue->buf_page = NULL; + rx_buf->unmap_addr = rx_queue->buf_dma_addr; + + out: + return 0; +} + +/* This allocates memory for a new receive buffer, maps it for DMA, + * and populates a struct efx_rx_buffer with the relevant + * information. + */ +static inline int efx_init_rx_buffer(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *new_rx_buf) +{ + int rc = 0; + + if (rx_queue->channel->rx_alloc_push_pages) { + new_rx_buf->skb = NULL; + rc = efx_init_rx_buffer_page(rx_queue, new_rx_buf); + rx_queue->alloc_page_count++; + } else { + new_rx_buf->page = NULL; + rc = efx_init_rx_buffer_skb(rx_queue, new_rx_buf); + rx_queue->alloc_skb_count++; + } + + if (unlikely(rc < 0)) + EFX_LOG_RL(rx_queue->efx, "%s RXQ[%d] =%d\n", __func__, + rx_queue->queue, rc); + return rc; +} + +static inline void efx_unmap_rx_buffer(struct efx_nic *efx, + struct efx_rx_buffer *rx_buf) +{ + if (rx_buf->page) { + EFX_BUG_ON_PARANOID(rx_buf->skb); + if (rx_buf->unmap_addr) { + pci_unmap_page(efx->pci_dev, rx_buf->unmap_addr, + RX_PAGE_SIZE(efx), PCI_DMA_FROMDEVICE); + rx_buf->unmap_addr = 0; + } + } else if (likely(rx_buf->skb)) { + pci_unmap_single(efx->pci_dev, rx_buf->dma_addr, + rx_buf->len, PCI_DMA_FROMDEVICE); + } +} + +static inline void efx_free_rx_buffer(struct efx_nic *efx, + struct efx_rx_buffer *rx_buf) +{ + if (rx_buf->page) { + __free_pages(rx_buf->page, efx->rx_buffer_order); + rx_buf->page = NULL; + } else if (likely(rx_buf->skb)) { + dev_kfree_skb_any(rx_buf->skb); + rx_buf->skb = NULL; + } +} + +static inline void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf) +{ + efx_unmap_rx_buffer(rx_queue->efx, rx_buf); + efx_free_rx_buffer(rx_queue->efx, rx_buf); +} + +/** + * efx_fast_push_rx_descriptors - push new RX descriptors quickly + * @rx_queue: RX descriptor queue + * @retry: Recheck the fill level + * This will aim to fill the RX descriptor queue up to + * @rx_queue->@fast_fill_limit. If there is insufficient atomic + * memory to do so, the caller should retry. + */ +static int __efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, + int retry) +{ + struct efx_rx_buffer *rx_buf; + unsigned fill_level, index; + int i, space, rc = 0; + + /* Calculate current fill level. Do this outside the lock, + * because most of the time we'll end up not wanting to do the + * fill anyway. + */ + fill_level = (rx_queue->added_count - rx_queue->removed_count); + EFX_BUG_ON_PARANOID(fill_level > + rx_queue->efx->type->rxd_ring_mask + 1); + + /* Don't fill if we don't need to */ + if (fill_level >= rx_queue->fast_fill_trigger) + return 0; + + /* Record minimum fill level */ + if (unlikely(fill_level < rx_queue->min_fill)) + if (fill_level) + rx_queue->min_fill = fill_level; + + /* Acquire RX add lock. If this lock is contended, then a fast + * fill must already be in progress (e.g. in the refill + * tasklet), so we don't need to do anything + */ + if (!spin_trylock_bh(&rx_queue->add_lock)) + return -1; + + retry: + /* Recalculate current fill level now that we have the lock */ + fill_level = (rx_queue->added_count - rx_queue->removed_count); + EFX_BUG_ON_PARANOID(fill_level > + rx_queue->efx->type->rxd_ring_mask + 1); + space = rx_queue->fast_fill_limit - fill_level; + if (space < EFX_RX_BATCH) + goto out_unlock; + + EFX_TRACE(rx_queue->efx, "RX queue %d fast-filling descriptor ring from" + " level %d to level %d using %s allocation\n", + rx_queue->queue, fill_level, rx_queue->fast_fill_limit, + rx_queue->channel->rx_alloc_push_pages ? "page" : "skb"); + + do { + for (i = 0; i < EFX_RX_BATCH; ++i) { + index = (rx_queue->added_count & + rx_queue->efx->type->rxd_ring_mask); + rx_buf = efx_rx_buffer(rx_queue, index); + rc = efx_init_rx_buffer(rx_queue, rx_buf); + if (unlikely(rc)) + goto out; + ++rx_queue->added_count; + } + } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH); + + EFX_TRACE(rx_queue->efx, "RX queue %d fast-filled descriptor ring " + "to level %d\n", rx_queue->queue, + rx_queue->added_count - rx_queue->removed_count); + + out: + /* Send write pointer to card. */ + falcon_notify_rx_desc(rx_queue); + + /* If the fast fill is running inside from the refill tasklet, then + * for SMP systems it may be running on a different CPU to + * RX event processing, which means that the fill level may now be + * out of date. */ + if (unlikely(retry && (rc == 0))) + goto retry; + + out_unlock: + spin_unlock_bh(&rx_queue->add_lock); + + return rc; +} + +/** + * efx_fast_push_rx_descriptors - push new RX descriptors quickly + * @rx_queue: RX descriptor queue + * + * This will aim to fill the RX descriptor queue up to + * @rx_queue->@fast_fill_limit. If there is insufficient memory to do so, + * it will schedule a work item to immediately continue the fast fill + */ +void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue) +{ + int rc; + + rc = __efx_fast_push_rx_descriptors(rx_queue, 0); + if (unlikely(rc)) { + /* Schedule the work item to run immediately. The hope is + * that work is immediately pending to free some memory + * (e.g. an RX event or TX completion) + */ + efx_schedule_slow_fill(rx_queue, 0); + } +} + +void efx_rx_work(struct work_struct *data) +{ + struct efx_rx_queue *rx_queue; + int rc; + + rx_queue = container_of(data, struct efx_rx_queue, work.work); + + if (unlikely(!rx_queue->channel->enabled)) + return; + + EFX_TRACE(rx_queue->efx, "RX queue %d worker thread executing on CPU " + "%d\n", rx_queue->queue, raw_smp_processor_id()); + + ++rx_queue->slow_fill_count; + /* Push new RX descriptors, allowing at least 1 jiffy for + * the kernel to free some more memory. */ + rc = __efx_fast_push_rx_descriptors(rx_queue, 1); + if (rc) + efx_schedule_slow_fill(rx_queue, 1); +} + +static inline void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + int len, int *discard, + int *leak_packet) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; + + if (likely(len <= max_len)) + return; + + /* The packet must be discarded, but this is only a fatal error + * if the caller indicated it was + */ + *discard = 1; + + if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) { + EFX_ERR_RL(efx, " RX queue %d seriously overlength " + "RX event (0x%x > 0x%x+0x%x). Leaking\n", + rx_queue->queue, len, max_len, + efx->type->rx_buffer_padding); + /* If this buffer was skb-allocated, then the meta + * data at the end of the skb will be trashed. So + * we have no choice but to leak the fragment. + */ + *leak_packet = (rx_buf->skb != NULL); + efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY); + } else { + EFX_ERR_RL(efx, " RX queue %d overlength RX event " + "(0x%x > 0x%x)\n", rx_queue->queue, len, max_len); + } + + rx_queue->channel->n_rx_overlength++; +} + +/* Pass a received packet up through the generic LRO stack + * + * Handles driverlink veto, and passes the fragment up via + * the appropriate LRO method + */ +static inline void efx_rx_packet_lro(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf) +{ + struct net_lro_mgr *lro_mgr = &channel->lro_mgr; + void *priv = channel; + + /* Pass the skb/page into the LRO engine */ + if (rx_buf->page) { + struct skb_frag_struct frags; + + frags.page = rx_buf->page; + frags.page_offset = RX_BUF_OFFSET(rx_buf); + frags.size = rx_buf->len; + + lro_receive_frags(lro_mgr, &frags, rx_buf->len, + rx_buf->len, priv, 0); + + EFX_BUG_ON_PARANOID(rx_buf->skb); + rx_buf->page = NULL; + } else { + EFX_BUG_ON_PARANOID(!rx_buf->skb); + + lro_receive_skb(lro_mgr, rx_buf->skb, priv); + rx_buf->skb = NULL; + } +} + +/* Allocate and construct an SKB around a struct page.*/ +static inline struct sk_buff *efx_rx_mk_skb(struct efx_rx_buffer *rx_buf, + struct efx_nic *efx, + int hdr_len) +{ + struct sk_buff *skb; + + /* Allocate an SKB to store the headers */ + skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN); + if (unlikely(skb == NULL)) { + EFX_ERR_RL(efx, "RX out of memory for skb\n"); + return NULL; + } + + EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags); + EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len); + + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_reserve(skb, EFX_PAGE_SKB_ALIGN); + + skb->len = rx_buf->len; + skb->truesize = rx_buf->len + sizeof(struct sk_buff); + memcpy(skb->data, rx_buf->data, hdr_len); + skb->tail += hdr_len; + + /* Append the remaining page onto the frag list */ + if (unlikely(rx_buf->len > hdr_len)) { + struct skb_frag_struct *frag = skb_shinfo(skb)->frags; + frag->page = rx_buf->page; + frag->page_offset = RX_BUF_OFFSET(rx_buf) + hdr_len; + frag->size = skb->len - hdr_len; + skb_shinfo(skb)->nr_frags = 1; + skb->data_len = frag->size; + } else { + __free_pages(rx_buf->page, efx->rx_buffer_order); + skb->data_len = 0; + } + + /* Ownership has transferred from the rx_buf to skb */ + rx_buf->page = NULL; + + /* Move past the ethernet header */ + skb->protocol = eth_type_trans(skb, efx->net_dev); + + return skb; +} + +void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, + unsigned int len, int checksummed, int discard) +{ + struct efx_nic *efx = rx_queue->efx; + struct efx_rx_buffer *rx_buf; + int leak_packet = 0; + + rx_buf = efx_rx_buffer(rx_queue, index); + EFX_BUG_ON_PARANOID(!rx_buf->data); + EFX_BUG_ON_PARANOID(rx_buf->skb && rx_buf->page); + EFX_BUG_ON_PARANOID(!(rx_buf->skb || rx_buf->page)); + + /* This allows the refill path to post another buffer. + * EFX_RXD_HEAD_ROOM ensures that the slot we are using + * isn't overwritten yet. + */ + rx_queue->removed_count++; + + /* Validate the length encoded in the event vs the descriptor pushed */ + efx_rx_packet__check_len(rx_queue, rx_buf, len, + &discard, &leak_packet); + + EFX_TRACE(efx, "RX queue %d received id %x at %llx+%x %s%s\n", + rx_queue->queue, index, + (unsigned long long)rx_buf->dma_addr, len, + (checksummed ? " [SUMMED]" : ""), + (discard ? " [DISCARD]" : "")); + + /* Discard packet, if instructed to do so */ + if (unlikely(discard)) { + if (unlikely(leak_packet)) + rx_queue->channel->n_skbuff_leaks++; + else + /* We haven't called efx_unmap_rx_buffer yet, + * so fini the entire rx_buffer here */ + efx_fini_rx_buffer(rx_queue, rx_buf); + return; + } + + /* Release card resources - assumes all RX buffers consumed in-order + * per RX queue + */ + efx_unmap_rx_buffer(efx, rx_buf); + + /* Prefetch nice and early so data will (hopefully) be in cache by + * the time we look at it. + */ + prefetch(rx_buf->data); + + /* Pipeline receives so that we give time for packet headers to be + * prefetched into cache. + */ + rx_buf->len = len; + if (rx_queue->channel->rx_pkt) + __efx_rx_packet(rx_queue->channel, + rx_queue->channel->rx_pkt, + rx_queue->channel->rx_pkt_csummed); + rx_queue->channel->rx_pkt = rx_buf; + rx_queue->channel->rx_pkt_csummed = checksummed; +} + +/* Handle a received packet. Second half: Touches packet payload. */ +void __efx_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, int checksummed) +{ + struct efx_nic *efx = channel->efx; + struct sk_buff *skb; + int lro = efx->net_dev->features & NETIF_F_LRO; + + if (rx_buf->skb) { + prefetch(skb_shinfo(rx_buf->skb)); + + skb_put(rx_buf->skb, rx_buf->len); + + /* Move past the ethernet header. rx_buf->data still points + * at the ethernet header */ + rx_buf->skb->protocol = eth_type_trans(rx_buf->skb, + efx->net_dev); + } + + /* Both our generic-LRO and SFC-SSR support skb and page based + * allocation, but neither support switching from one to the + * other on the fly. If we spot that the allocation mode has + * changed, then flush the LRO state. + */ + if (unlikely(channel->rx_alloc_pop_pages != (rx_buf->page != NULL))) { + efx_flush_lro(channel); + channel->rx_alloc_pop_pages = (rx_buf->page != NULL); + } + if (likely(checksummed && lro)) { + efx_rx_packet_lro(channel, rx_buf); + goto done; + } + + /* Form an skb if required */ + if (rx_buf->page) { + int hdr_len = min(rx_buf->len, EFX_SKB_HEADERS); + skb = efx_rx_mk_skb(rx_buf, efx, hdr_len); + if (unlikely(skb == NULL)) { + efx_free_rx_buffer(efx, rx_buf); + goto done; + } + } else { + /* We now own the SKB */ + skb = rx_buf->skb; + rx_buf->skb = NULL; + } + + EFX_BUG_ON_PARANOID(rx_buf->page); + EFX_BUG_ON_PARANOID(rx_buf->skb); + EFX_BUG_ON_PARANOID(!skb); + + /* Set the SKB flags */ + if (unlikely(!checksummed || !efx->rx_checksum_enabled)) + skb->ip_summed = CHECKSUM_NONE; + + /* Pass the packet up */ + netif_receive_skb(skb); + + /* Update allocation strategy method */ + channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; + + /* fall-thru */ +done: + efx->net_dev->last_rx = jiffies; +} + +void efx_rx_strategy(struct efx_channel *channel) +{ + enum efx_rx_alloc_method method = rx_alloc_method; + + /* Only makes sense to use page based allocation if LRO is enabled */ + if (!(channel->efx->net_dev->features & NETIF_F_LRO)) { + method = RX_ALLOC_METHOD_SKB; + } else if (method == RX_ALLOC_METHOD_AUTO) { + /* Constrain the rx_alloc_level */ + if (channel->rx_alloc_level < 0) + channel->rx_alloc_level = 0; + else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX) + channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX; + + /* Decide on the allocation method */ + method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_LRO) ? + RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB); + } + + /* Push the option */ + channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE); +} + +int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int rxq_size; + int rc; + + EFX_LOG(efx, "creating RX queue %d\n", rx_queue->queue); + + /* Allocate RX buffers */ + rxq_size = (efx->type->rxd_ring_mask + 1) * sizeof(*rx_queue->buffer); + rx_queue->buffer = kzalloc(rxq_size, GFP_KERNEL); + if (!rx_queue->buffer) { + rc = -ENOMEM; + goto fail1; + } + + rc = falcon_probe_rx(rx_queue); + if (rc) + goto fail2; + + return 0; + + fail2: + kfree(rx_queue->buffer); + rx_queue->buffer = NULL; + fail1: + rx_queue->used = 0; + + return rc; +} + +int efx_init_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int max_fill, trigger, limit; + + EFX_LOG(rx_queue->efx, "initialising RX queue %d\n", rx_queue->queue); + + /* Initialise ptr fields */ + rx_queue->added_count = 0; + rx_queue->notified_count = 0; + rx_queue->removed_count = 0; + rx_queue->min_fill = -1U; + rx_queue->min_overfill = -1U; + + /* Initialise limit fields */ + max_fill = efx->type->rxd_ring_mask + 1 - EFX_RXD_HEAD_ROOM; + trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; + limit = max_fill * min(rx_refill_limit, 100U) / 100U; + + rx_queue->max_fill = max_fill; + rx_queue->fast_fill_trigger = trigger; + rx_queue->fast_fill_limit = limit; + + /* Set up RX descriptor ring */ + return falcon_init_rx(rx_queue); +} + +void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) +{ + int i; + struct efx_rx_buffer *rx_buf; + + EFX_LOG(rx_queue->efx, "shutting down RX queue %d\n", rx_queue->queue); + + falcon_fini_rx(rx_queue); + + /* Release RX buffers NB start at index 0 not current HW ptr */ + if (rx_queue->buffer) { + for (i = 0; i <= rx_queue->efx->type->rxd_ring_mask; i++) { + rx_buf = efx_rx_buffer(rx_queue, i); + efx_fini_rx_buffer(rx_queue, rx_buf); + } + } + + /* For a page that is part-way through splitting into RX buffers */ + if (rx_queue->buf_page != NULL) { + pci_unmap_page(rx_queue->efx->pci_dev, rx_queue->buf_dma_addr, + RX_PAGE_SIZE(rx_queue->efx), PCI_DMA_FROMDEVICE); + __free_pages(rx_queue->buf_page, + rx_queue->efx->rx_buffer_order); + rx_queue->buf_page = NULL; + } +} + +void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) +{ + EFX_LOG(rx_queue->efx, "destroying RX queue %d\n", rx_queue->queue); + + falcon_remove_rx(rx_queue); + + kfree(rx_queue->buffer); + rx_queue->buffer = NULL; + rx_queue->used = 0; +} + +void efx_flush_lro(struct efx_channel *channel) +{ + lro_flush_all(&channel->lro_mgr); +} + + +module_param(rx_alloc_method, int, 0644); +MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers"); + +module_param(rx_refill_threshold, uint, 0444); +MODULE_PARM_DESC(rx_refill_threshold, + "RX descriptor ring fast/slow fill threshold (%)"); + diff --git a/drivers/net/sfc/rx.h b/drivers/net/sfc/rx.h new file mode 100644 index 00000000000..f35e377bfc5 --- /dev/null +++ b/drivers/net/sfc/rx.h @@ -0,0 +1,29 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2006 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_RX_H +#define EFX_RX_H + +#include "net_driver.h" + +int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); +void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); +int efx_init_rx_queue(struct efx_rx_queue *rx_queue); +void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); + +int efx_lro_init(struct net_lro_mgr *lro_mgr, struct efx_nic *efx); +void efx_lro_fini(struct net_lro_mgr *lro_mgr); +void efx_flush_lro(struct efx_channel *channel); +void efx_rx_strategy(struct efx_channel *channel); +void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue); +void efx_rx_work(struct work_struct *data); +void __efx_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, int checksummed); + +#endif /* EFX_RX_H */ diff --git a/drivers/net/sfc/sfe4001.c b/drivers/net/sfc/sfe4001.c new file mode 100644 index 00000000000..11fa9fb8f48 --- /dev/null +++ b/drivers/net/sfc/sfe4001.c @@ -0,0 +1,252 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2007 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +/***************************************************************************** + * Support for the SFE4001 NIC: driver code for the PCA9539 I/O expander that + * controls the PHY power rails, and for the MAX6647 temp. sensor used to check + * the PHY + */ +#include +#include "efx.h" +#include "phy.h" +#include "boards.h" +#include "falcon.h" +#include "falcon_hwdefs.h" +#include "mac.h" + +/************************************************************************** + * + * I2C IO Expander device + * + **************************************************************************/ +#define PCA9539 0x74 + +#define P0_IN 0x00 +#define P0_OUT 0x02 +#define P0_INVERT 0x04 +#define P0_CONFIG 0x06 + +#define P0_EN_1V0X_LBN 0 +#define P0_EN_1V0X_WIDTH 1 +#define P0_EN_1V2_LBN 1 +#define P0_EN_1V2_WIDTH 1 +#define P0_EN_2V5_LBN 2 +#define P0_EN_2V5_WIDTH 1 +#define P0_EN_3V3X_LBN 3 +#define P0_EN_3V3X_WIDTH 1 +#define P0_EN_5V_LBN 4 +#define P0_EN_5V_WIDTH 1 +#define P0_SHORTEN_JTAG_LBN 5 +#define P0_SHORTEN_JTAG_WIDTH 1 +#define P0_X_TRST_LBN 6 +#define P0_X_TRST_WIDTH 1 +#define P0_DSP_RESET_LBN 7 +#define P0_DSP_RESET_WIDTH 1 + +#define P1_IN 0x01 +#define P1_OUT 0x03 +#define P1_INVERT 0x05 +#define P1_CONFIG 0x07 + +#define P1_AFE_PWD_LBN 0 +#define P1_AFE_PWD_WIDTH 1 +#define P1_DSP_PWD25_LBN 1 +#define P1_DSP_PWD25_WIDTH 1 +#define P1_RESERVED_LBN 2 +#define P1_RESERVED_WIDTH 2 +#define P1_SPARE_LBN 4 +#define P1_SPARE_WIDTH 4 + + +/************************************************************************** + * + * Temperature Sensor + * + **************************************************************************/ +#define MAX6647 0x4e + +#define RLTS 0x00 +#define RLTE 0x01 +#define RSL 0x02 +#define RCL 0x03 +#define RCRA 0x04 +#define RLHN 0x05 +#define RLLI 0x06 +#define RRHI 0x07 +#define RRLS 0x08 +#define WCRW 0x0a +#define WLHO 0x0b +#define WRHA 0x0c +#define WRLN 0x0e +#define OSHT 0x0f +#define REET 0x10 +#define RIET 0x11 +#define RWOE 0x19 +#define RWOI 0x20 +#define HYS 0x21 +#define QUEUE 0x22 +#define MFID 0xfe +#define REVID 0xff + +/* Status bits */ +#define MAX6647_BUSY (1 << 7) /* ADC is converting */ +#define MAX6647_LHIGH (1 << 6) /* Local high temp. alarm */ +#define MAX6647_LLOW (1 << 5) /* Local low temp. alarm */ +#define MAX6647_RHIGH (1 << 4) /* Remote high temp. alarm */ +#define MAX6647_RLOW (1 << 3) /* Remote low temp. alarm */ +#define MAX6647_FAULT (1 << 2) /* DXN/DXP short/open circuit */ +#define MAX6647_EOT (1 << 1) /* Remote junction overtemp. */ +#define MAX6647_IOT (1 << 0) /* Local junction overtemp. */ + +static const u8 xgphy_max_temperature = 90; + +void sfe4001_poweroff(struct efx_nic *efx) +{ + struct efx_i2c_interface *i2c = &efx->i2c; + + u8 cfg, out, in; + + EFX_INFO(efx, "%s\n", __func__); + + /* Turn off all power rails */ + out = 0xff; + (void) efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); + + /* Disable port 1 outputs on IO expander */ + cfg = 0xff; + (void) efx_i2c_write(i2c, PCA9539, P1_CONFIG, &cfg, 1); + + /* Disable port 0 outputs on IO expander */ + cfg = 0xff; + (void) efx_i2c_write(i2c, PCA9539, P0_CONFIG, &cfg, 1); + + /* Clear any over-temperature alert */ + (void) efx_i2c_read(i2c, MAX6647, RSL, &in, 1); +} + +/* This board uses an I2C expander to provider power to the PHY, which needs to + * be turned on before the PHY can be used. + * Context: Process context, rtnl lock held + */ +int sfe4001_poweron(struct efx_nic *efx) +{ + struct efx_i2c_interface *i2c = &efx->i2c; + unsigned int count; + int rc; + u8 out, in, cfg; + efx_dword_t reg; + + /* 10Xpress has fixed-function LED pins, so there is no board-specific + * blink code. */ + efx->board_info.blink = tenxpress_phy_blink; + + /* Ensure that XGXS and XAUI SerDes are held in reset */ + EFX_POPULATE_DWORD_7(reg, XX_PWRDNA_EN, 1, + XX_PWRDNB_EN, 1, + XX_RSTPLLAB_EN, 1, + XX_RESETA_EN, 1, + XX_RESETB_EN, 1, + XX_RSTXGXSRX_EN, 1, + XX_RSTXGXSTX_EN, 1); + falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); + udelay(10); + + /* Set DSP over-temperature alert threshold */ + EFX_INFO(efx, "DSP cut-out at %dC\n", xgphy_max_temperature); + rc = efx_i2c_write(i2c, MAX6647, WLHO, + &xgphy_max_temperature, 1); + if (rc) + goto fail1; + + /* Read it back and verify */ + rc = efx_i2c_read(i2c, MAX6647, RLHN, &in, 1); + if (rc) + goto fail1; + if (in != xgphy_max_temperature) { + rc = -EFAULT; + goto fail1; + } + + /* Clear any previous over-temperature alert */ + rc = efx_i2c_read(i2c, MAX6647, RSL, &in, 1); + if (rc) + goto fail1; + + /* Enable port 0 and port 1 outputs on IO expander */ + cfg = 0x00; + rc = efx_i2c_write(i2c, PCA9539, P0_CONFIG, &cfg, 1); + if (rc) + goto fail1; + cfg = 0xff & ~(1 << P1_SPARE_LBN); + rc = efx_i2c_write(i2c, PCA9539, P1_CONFIG, &cfg, 1); + if (rc) + goto fail2; + + /* Turn all power off then wait 1 sec. This ensures PHY is reset */ + out = 0xff & ~((0 << P0_EN_1V2_LBN) | (0 << P0_EN_2V5_LBN) | + (0 << P0_EN_3V3X_LBN) | (0 << P0_EN_5V_LBN) | + (0 << P0_EN_1V0X_LBN)); + rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); + if (rc) + goto fail3; + + schedule_timeout_uninterruptible(HZ); + count = 0; + do { + /* Turn on 1.2V, 2.5V, 3.3V and 5V power rails */ + out = 0xff & ~((1 << P0_EN_1V2_LBN) | (1 << P0_EN_2V5_LBN) | + (1 << P0_EN_3V3X_LBN) | (1 << P0_EN_5V_LBN) | + (1 << P0_X_TRST_LBN)); + + rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); + if (rc) + goto fail3; + msleep(10); + + /* Turn on 1V power rail */ + out &= ~(1 << P0_EN_1V0X_LBN); + rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); + if (rc) + goto fail3; + + EFX_INFO(efx, "waiting for power (attempt %d)...\n", count); + + schedule_timeout_uninterruptible(HZ); + + /* Check DSP is powered */ + rc = efx_i2c_read(i2c, PCA9539, P1_IN, &in, 1); + if (rc) + goto fail3; + if (in & (1 << P1_AFE_PWD_LBN)) + goto done; + + } while (++count < 20); + + EFX_INFO(efx, "timed out waiting for power\n"); + rc = -ETIMEDOUT; + goto fail3; + +done: + EFX_INFO(efx, "PHY is powered on\n"); + return 0; + +fail3: + /* Turn off all power rails */ + out = 0xff; + (void) efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); + /* Disable port 1 outputs on IO expander */ + out = 0xff; + (void) efx_i2c_write(i2c, PCA9539, P1_CONFIG, &out, 1); +fail2: + /* Disable port 0 outputs on IO expander */ + out = 0xff; + (void) efx_i2c_write(i2c, PCA9539, P0_CONFIG, &out, 1); +fail1: + return rc; +} diff --git a/drivers/net/sfc/spi.h b/drivers/net/sfc/spi.h new file mode 100644 index 00000000000..34412f3d41c --- /dev/null +++ b/drivers/net/sfc/spi.h @@ -0,0 +1,71 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005 Fen Systems Ltd. + * Copyright 2006 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_SPI_H +#define EFX_SPI_H + +#include "net_driver.h" + +/************************************************************************** + * + * Basic SPI command set and bit definitions + * + *************************************************************************/ + +/* + * Commands common to all known devices. + * + */ + +/* Write status register */ +#define SPI_WRSR 0x01 + +/* Write data to memory array */ +#define SPI_WRITE 0x02 + +/* Read data from memory array */ +#define SPI_READ 0x03 + +/* Reset write enable latch */ +#define SPI_WRDI 0x04 + +/* Read status register */ +#define SPI_RDSR 0x05 + +/* Set write enable latch */ +#define SPI_WREN 0x06 + +/* SST: Enable write to status register */ +#define SPI_SST_EWSR 0x50 + +/* + * Status register bits. Not all bits are supported on all devices. + * + */ + +/* Write-protect pin enabled */ +#define SPI_STATUS_WPEN 0x80 + +/* Block protection bit 2 */ +#define SPI_STATUS_BP2 0x10 + +/* Block protection bit 1 */ +#define SPI_STATUS_BP1 0x08 + +/* Block protection bit 0 */ +#define SPI_STATUS_BP0 0x04 + +/* State of the write enable latch */ +#define SPI_STATUS_WEN 0x02 + +/* Device busy flag */ +#define SPI_STATUS_NRDY 0x01 + +#endif /* EFX_SPI_H */ diff --git a/drivers/net/sfc/tenxpress.c b/drivers/net/sfc/tenxpress.c new file mode 100644 index 00000000000..a2e9f79e47b --- /dev/null +++ b/drivers/net/sfc/tenxpress.c @@ -0,0 +1,434 @@ +/**************************************************************************** + * Driver for Solarflare 802.3an compliant PHY + * Copyright 2007 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include "efx.h" +#include "gmii.h" +#include "mdio_10g.h" +#include "falcon.h" +#include "phy.h" +#include "falcon_hwdefs.h" +#include "boards.h" +#include "mac.h" + +/* We expect these MMDs to be in the package */ +/* AN not here as mdio_check_mmds() requires STAT2 support */ +#define TENXPRESS_REQUIRED_DEVS (MDIO_MMDREG_DEVS0_PMAPMD | \ + MDIO_MMDREG_DEVS0_PCS | \ + MDIO_MMDREG_DEVS0_PHYXS) + +/* We complain if we fail to see the link partner as 10G capable this many + * times in a row (must be > 1 as sampling the autoneg. registers is racy) + */ +#define MAX_BAD_LP_TRIES (5) + +/* Extended control register */ +#define PMA_PMD_XCONTROL_REG 0xc000 +#define PMA_PMD_LNPGA_POWERDOWN_LBN 8 +#define PMA_PMD_LNPGA_POWERDOWN_WIDTH 1 + +/* extended status register */ +#define PMA_PMD_XSTATUS_REG 0xc001 +#define PMA_PMD_XSTAT_FLP_LBN (12) + +/* LED control register */ +#define PMA_PMD_LED_CTRL_REG (0xc007) +#define PMA_PMA_LED_ACTIVITY_LBN (3) + +/* LED function override register */ +#define PMA_PMD_LED_OVERR_REG (0xc009) +/* Bit positions for different LEDs (there are more but not wired on SFE4001)*/ +#define PMA_PMD_LED_LINK_LBN (0) +#define PMA_PMD_LED_SPEED_LBN (2) +#define PMA_PMD_LED_TX_LBN (4) +#define PMA_PMD_LED_RX_LBN (6) +/* Override settings */ +#define PMA_PMD_LED_AUTO (0) /* H/W control */ +#define PMA_PMD_LED_ON (1) +#define PMA_PMD_LED_OFF (2) +#define PMA_PMD_LED_FLASH (3) +/* All LEDs under hardware control */ +#define PMA_PMD_LED_FULL_AUTO (0) +/* Green and Amber under hardware control, Red off */ +#define PMA_PMD_LED_DEFAULT (PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN) + + +/* Self test (BIST) control register */ +#define PMA_PMD_BIST_CTRL_REG (0xc014) +#define PMA_PMD_BIST_BER_LBN (2) /* Run BER test */ +#define PMA_PMD_BIST_CONT_LBN (1) /* Run continuous BIST until cleared */ +#define PMA_PMD_BIST_SINGLE_LBN (0) /* Run 1 BIST iteration (self clears) */ +/* Self test status register */ +#define PMA_PMD_BIST_STAT_REG (0xc015) +#define PMA_PMD_BIST_ENX_LBN (3) +#define PMA_PMD_BIST_PMA_LBN (2) +#define PMA_PMD_BIST_RXD_LBN (1) +#define PMA_PMD_BIST_AFE_LBN (0) + +#define BIST_MAX_DELAY (1000) +#define BIST_POLL_DELAY (10) + +/* Misc register defines */ +#define PCS_CLOCK_CTRL_REG 0xd801 +#define PLL312_RST_N_LBN 2 + +#define PCS_SOFT_RST2_REG 0xd806 +#define SERDES_RST_N_LBN 13 +#define XGXS_RST_N_LBN 12 + +#define PCS_TEST_SELECT_REG 0xd807 /* PRM 10.5.8 */ +#define CLK312_EN_LBN 3 + +/* Boot status register */ +#define PCS_BOOT_STATUS_REG (0xd000) +#define PCS_BOOT_FATAL_ERR_LBN (0) +#define PCS_BOOT_PROGRESS_LBN (1) +#define PCS_BOOT_PROGRESS_WIDTH (2) +#define PCS_BOOT_COMPLETE_LBN (3) +#define PCS_BOOT_MAX_DELAY (100) +#define PCS_BOOT_POLL_DELAY (10) + +/* Time to wait between powering down the LNPGA and turning off the power + * rails */ +#define LNPGA_PDOWN_WAIT (HZ / 5) + +static int crc_error_reset_threshold = 100; +module_param(crc_error_reset_threshold, int, 0644); +MODULE_PARM_DESC(crc_error_reset_threshold, + "Max number of CRC errors before XAUI reset"); + +struct tenxpress_phy_data { + enum tenxpress_state state; + atomic_t bad_crc_count; + int bad_lp_tries; +}; + +static int tenxpress_state_is(struct efx_nic *efx, int state) +{ + struct tenxpress_phy_data *phy_data = efx->phy_data; + return (phy_data != NULL) && (state == phy_data->state); +} + +void tenxpress_set_state(struct efx_nic *efx, + enum tenxpress_state state) +{ + struct tenxpress_phy_data *phy_data = efx->phy_data; + if (phy_data != NULL) + phy_data->state = state; +} + +void tenxpress_crc_err(struct efx_nic *efx) +{ + struct tenxpress_phy_data *phy_data = efx->phy_data; + if (phy_data != NULL) + atomic_inc(&phy_data->bad_crc_count); +} + +/* Check that the C166 has booted successfully */ +static int tenxpress_phy_check(struct efx_nic *efx) +{ + int phy_id = efx->mii.phy_id; + int count = PCS_BOOT_MAX_DELAY / PCS_BOOT_POLL_DELAY; + int boot_stat; + + /* Wait for the boot to complete (or not) */ + while (count) { + boot_stat = mdio_clause45_read(efx, phy_id, + MDIO_MMD_PCS, + PCS_BOOT_STATUS_REG); + if (boot_stat & (1 << PCS_BOOT_COMPLETE_LBN)) + break; + count--; + udelay(PCS_BOOT_POLL_DELAY); + } + + if (!count) { + EFX_ERR(efx, "%s: PHY boot timed out. Last status " + "%x\n", __func__, + (boot_stat >> PCS_BOOT_PROGRESS_LBN) & + ((1 << PCS_BOOT_PROGRESS_WIDTH) - 1)); + return -ETIMEDOUT; + } + + return 0; +} + +static void tenxpress_reset_xaui(struct efx_nic *efx); + +static int tenxpress_init(struct efx_nic *efx) +{ + int rc, reg; + + /* Turn on the clock */ + reg = (1 << CLK312_EN_LBN); + mdio_clause45_write(efx, efx->mii.phy_id, + MDIO_MMD_PCS, PCS_TEST_SELECT_REG, reg); + + rc = tenxpress_phy_check(efx); + if (rc < 0) + return rc; + + /* Set the LEDs up as: Green = Link, Amber = Link/Act, Red = Off */ + reg = mdio_clause45_read(efx, efx->mii.phy_id, + MDIO_MMD_PMAPMD, PMA_PMD_LED_CTRL_REG); + reg |= (1 << PMA_PMA_LED_ACTIVITY_LBN); + mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD, + PMA_PMD_LED_CTRL_REG, reg); + + reg = PMA_PMD_LED_DEFAULT; + mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD, + PMA_PMD_LED_OVERR_REG, reg); + + return rc; +} + +static int tenxpress_phy_init(struct efx_nic *efx) +{ + struct tenxpress_phy_data *phy_data; + int rc = 0; + + phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL); + efx->phy_data = phy_data; + + tenxpress_set_state(efx, TENXPRESS_STATUS_NORMAL); + + rc = mdio_clause45_wait_reset_mmds(efx, + TENXPRESS_REQUIRED_DEVS); + if (rc < 0) + goto fail; + + rc = mdio_clause45_check_mmds(efx, TENXPRESS_REQUIRED_DEVS, 0); + if (rc < 0) + goto fail; + + rc = tenxpress_init(efx); + if (rc < 0) + goto fail; + + schedule_timeout_uninterruptible(HZ / 5); /* 200ms */ + + /* Let XGXS and SerDes out of reset and resets 10XPress */ + falcon_reset_xaui(efx); + + return 0; + + fail: + kfree(efx->phy_data); + efx->phy_data = NULL; + return rc; +} + +static void tenxpress_set_bad_lp(struct efx_nic *efx, int bad_lp) +{ + struct tenxpress_phy_data *pd = efx->phy_data; + int reg; + + /* Nothing to do if all is well and was previously so. */ + if (!(bad_lp || pd->bad_lp_tries)) + return; + + reg = mdio_clause45_read(efx, efx->mii.phy_id, + MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG); + + if (bad_lp) + pd->bad_lp_tries++; + else + pd->bad_lp_tries = 0; + + if (pd->bad_lp_tries == MAX_BAD_LP_TRIES) { + pd->bad_lp_tries = 0; /* Restart count */ + reg &= ~(PMA_PMD_LED_FLASH << PMA_PMD_LED_RX_LBN); + reg |= (PMA_PMD_LED_FLASH << PMA_PMD_LED_RX_LBN); + EFX_ERR(efx, "This NIC appears to be plugged into" + " a port that is not 10GBASE-T capable.\n" + " This PHY is 10GBASE-T ONLY, so no link can" + " be established.\n"); + } else { + reg |= (PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN); + } + mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD, + PMA_PMD_LED_OVERR_REG, reg); +} + +/* Check link status and return a boolean OK value. If the link is NOT + * OK we have a quick rummage round to see if we appear to be plugged + * into a non-10GBT port and if so warn the user that they won't get + * link any time soon as we are 10GBT only, unless caller specified + * not to do this check (it isn't useful in loopback) */ +static int tenxpress_link_ok(struct efx_nic *efx, int check_lp) +{ + int ok = mdio_clause45_links_ok(efx, TENXPRESS_REQUIRED_DEVS); + + if (ok) { + tenxpress_set_bad_lp(efx, 0); + } else if (check_lp) { + /* Are we plugged into the wrong sort of link? */ + int bad_lp = 0; + int phy_id = efx->mii.phy_id; + int an_stat = mdio_clause45_read(efx, phy_id, MDIO_MMD_AN, + MDIO_AN_STATUS); + int xphy_stat = mdio_clause45_read(efx, phy_id, + MDIO_MMD_PMAPMD, + PMA_PMD_XSTATUS_REG); + /* Are we plugged into anything that sends FLPs? If + * not we can't distinguish between not being plugged + * in and being plugged into a non-AN antique. The FLP + * bit has the advantage of not clearing when autoneg + * restarts. */ + if (!(xphy_stat & (1 << PMA_PMD_XSTAT_FLP_LBN))) { + tenxpress_set_bad_lp(efx, 0); + return ok; + } + + /* If it can do 10GBT it must be XNP capable */ + bad_lp = !(an_stat & (1 << MDIO_AN_STATUS_XNP_LBN)); + if (!bad_lp && (an_stat & (1 << MDIO_AN_STATUS_PAGE_LBN))) { + bad_lp = !(mdio_clause45_read(efx, phy_id, + MDIO_MMD_AN, MDIO_AN_10GBT_STATUS) & + (1 << MDIO_AN_10GBT_STATUS_LP_10G_LBN)); + } + tenxpress_set_bad_lp(efx, bad_lp); + } + return ok; +} + +static void tenxpress_phy_reconfigure(struct efx_nic *efx) +{ + if (!tenxpress_state_is(efx, TENXPRESS_STATUS_NORMAL)) + return; + + efx->link_up = tenxpress_link_ok(efx, 0); + efx->link_options = GM_LPA_10000FULL; +} + +static void tenxpress_phy_clear_interrupt(struct efx_nic *efx) +{ + /* Nothing done here - LASI interrupts aren't reliable so poll */ +} + + +/* Poll PHY for interrupt */ +static int tenxpress_phy_check_hw(struct efx_nic *efx) +{ + struct tenxpress_phy_data *phy_data = efx->phy_data; + int phy_up = tenxpress_state_is(efx, TENXPRESS_STATUS_NORMAL); + int link_ok; + + link_ok = phy_up && tenxpress_link_ok(efx, 1); + + if (link_ok != efx->link_up) + falcon_xmac_sim_phy_event(efx); + + /* Nothing to check if we've already shut down the PHY */ + if (!phy_up) + return 0; + + if (atomic_read(&phy_data->bad_crc_count) > crc_error_reset_threshold) { + EFX_ERR(efx, "Resetting XAUI due to too many CRC errors\n"); + falcon_reset_xaui(efx); + atomic_set(&phy_data->bad_crc_count, 0); + } + + return 0; +} + +static void tenxpress_phy_fini(struct efx_nic *efx) +{ + int reg; + + /* Power down the LNPGA */ + reg = (1 << PMA_PMD_LNPGA_POWERDOWN_LBN); + mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD, + PMA_PMD_XCONTROL_REG, reg); + + /* Waiting here ensures that the board fini, which can turn off the + * power to the PHY, won't get run until the LNPGA powerdown has been + * given long enough to complete. */ + schedule_timeout_uninterruptible(LNPGA_PDOWN_WAIT); /* 200 ms */ + + kfree(efx->phy_data); + efx->phy_data = NULL; +} + + +/* Set the RX and TX LEDs and Link LED flashing. The other LEDs + * (which probably aren't wired anyway) are left in AUTO mode */ +void tenxpress_phy_blink(struct efx_nic *efx, int blink) +{ + int reg; + + if (blink) + reg = (PMA_PMD_LED_FLASH << PMA_PMD_LED_TX_LBN) | + (PMA_PMD_LED_FLASH << PMA_PMD_LED_RX_LBN) | + (PMA_PMD_LED_FLASH << PMA_PMD_LED_LINK_LBN); + else + reg = PMA_PMD_LED_DEFAULT; + + mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD, + PMA_PMD_LED_OVERR_REG, reg); +} + +static void tenxpress_reset_xaui(struct efx_nic *efx) +{ + int phy = efx->mii.phy_id; + int clk_ctrl, test_select, soft_rst2; + + /* Real work is done on clock_ctrl other resets are thought to be + * optional but make the reset more reliable + */ + + /* Read */ + clk_ctrl = mdio_clause45_read(efx, phy, MDIO_MMD_PCS, + PCS_CLOCK_CTRL_REG); + test_select = mdio_clause45_read(efx, phy, MDIO_MMD_PCS, + PCS_TEST_SELECT_REG); + soft_rst2 = mdio_clause45_read(efx, phy, MDIO_MMD_PCS, + PCS_SOFT_RST2_REG); + + /* Put in reset */ + test_select &= ~(1 << CLK312_EN_LBN); + mdio_clause45_write(efx, phy, MDIO_MMD_PCS, + PCS_TEST_SELECT_REG, test_select); + + soft_rst2 &= ~((1 << XGXS_RST_N_LBN) | (1 << SERDES_RST_N_LBN)); + mdio_clause45_write(efx, phy, MDIO_MMD_PCS, + PCS_SOFT_RST2_REG, soft_rst2); + + clk_ctrl &= ~(1 << PLL312_RST_N_LBN); + mdio_clause45_write(efx, phy, MDIO_MMD_PCS, + PCS_CLOCK_CTRL_REG, clk_ctrl); + udelay(10); + + /* Remove reset */ + clk_ctrl |= (1 << PLL312_RST_N_LBN); + mdio_clause45_write(efx, phy, MDIO_MMD_PCS, + PCS_CLOCK_CTRL_REG, clk_ctrl); + udelay(10); + + soft_rst2 |= ((1 << XGXS_RST_N_LBN) | (1 << SERDES_RST_N_LBN)); + mdio_clause45_write(efx, phy, MDIO_MMD_PCS, + PCS_SOFT_RST2_REG, soft_rst2); + udelay(10); + + test_select |= (1 << CLK312_EN_LBN); + mdio_clause45_write(efx, phy, MDIO_MMD_PCS, + PCS_TEST_SELECT_REG, test_select); + udelay(10); +} + +struct efx_phy_operations falcon_tenxpress_phy_ops = { + .init = tenxpress_phy_init, + .reconfigure = tenxpress_phy_reconfigure, + .check_hw = tenxpress_phy_check_hw, + .fini = tenxpress_phy_fini, + .clear_interrupt = tenxpress_phy_clear_interrupt, + .reset_xaui = tenxpress_reset_xaui, + .mmds = TENXPRESS_REQUIRED_DEVS, +}; diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c new file mode 100644 index 00000000000..fbb866b2185 --- /dev/null +++ b/drivers/net/sfc/tx.c @@ -0,0 +1,452 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include "net_driver.h" +#include "tx.h" +#include "efx.h" +#include "falcon.h" +#include "workarounds.h" + +/* + * TX descriptor ring full threshold + * + * The tx_queue descriptor ring fill-level must fall below this value + * before we restart the netif queue + */ +#define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \ + (_tx_queue->efx->type->txd_ring_mask / 2u) + +/* We want to be able to nest calls to netif_stop_queue(), since each + * channel can have an individual stop on the queue. + */ +void efx_stop_queue(struct efx_nic *efx) +{ + spin_lock_bh(&efx->netif_stop_lock); + EFX_TRACE(efx, "stop TX queue\n"); + + atomic_inc(&efx->netif_stop_count); + netif_stop_queue(efx->net_dev); + + spin_unlock_bh(&efx->netif_stop_lock); +} + +/* Wake netif's TX queue + * We want to be able to nest calls to netif_stop_queue(), since each + * channel can have an individual stop on the queue. + */ +inline void efx_wake_queue(struct efx_nic *efx) +{ + local_bh_disable(); + if (atomic_dec_and_lock(&efx->netif_stop_count, + &efx->netif_stop_lock)) { + EFX_TRACE(efx, "waking TX queue\n"); + netif_wake_queue(efx->net_dev); + spin_unlock(&efx->netif_stop_lock); + } + local_bh_enable(); +} + +static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer) +{ + if (buffer->unmap_len) { + struct pci_dev *pci_dev = tx_queue->efx->pci_dev; + if (buffer->unmap_single) + pci_unmap_single(pci_dev, buffer->unmap_addr, + buffer->unmap_len, PCI_DMA_TODEVICE); + else + pci_unmap_page(pci_dev, buffer->unmap_addr, + buffer->unmap_len, PCI_DMA_TODEVICE); + buffer->unmap_len = 0; + buffer->unmap_single = 0; + } + + if (buffer->skb) { + dev_kfree_skb_any((struct sk_buff *) buffer->skb); + buffer->skb = NULL; + EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x " + "complete\n", tx_queue->queue, read_ptr); + } +} + + +/* + * Add a socket buffer to a TX queue + * + * This maps all fragments of a socket buffer for DMA and adds them to + * the TX queue. The queue's insert pointer will be incremented by + * the number of fragments in the socket buffer. + * + * If any DMA mapping fails, any mapped fragments will be unmapped, + * the queue's insert pointer will be restored to its original value. + * + * Returns NETDEV_TX_OK or NETDEV_TX_BUSY + * You must hold netif_tx_lock() to call this function. + */ +static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue, + const struct sk_buff *skb) +{ + struct efx_nic *efx = tx_queue->efx; + struct pci_dev *pci_dev = efx->pci_dev; + struct efx_tx_buffer *buffer; + skb_frag_t *fragment; + struct page *page; + int page_offset; + unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign; + dma_addr_t dma_addr, unmap_addr = 0; + unsigned int dma_len; + unsigned unmap_single; + int q_space, i = 0; + int rc = NETDEV_TX_OK; + + EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); + + /* Get size of the initial fragment */ + len = skb_headlen(skb); + + fill_level = tx_queue->insert_count - tx_queue->old_read_count; + q_space = efx->type->txd_ring_mask - 1 - fill_level; + + /* Map for DMA. Use pci_map_single rather than pci_map_page + * since this is more efficient on machines with sparse + * memory. + */ + unmap_single = 1; + dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE); + + /* Process all fragments */ + while (1) { + if (unlikely(pci_dma_mapping_error(dma_addr))) + goto pci_err; + + /* Store fields for marking in the per-fragment final + * descriptor */ + unmap_len = len; + unmap_addr = dma_addr; + + /* Add to TX queue, splitting across DMA boundaries */ + do { + if (unlikely(q_space-- <= 0)) { + /* It might be that completions have + * happened since the xmit path last + * checked. Update the xmit path's + * copy of read_count. + */ + ++tx_queue->stopped; + /* This memory barrier protects the + * change of stopped from the access + * of read_count. */ + smp_mb(); + tx_queue->old_read_count = + *(volatile unsigned *) + &tx_queue->read_count; + fill_level = (tx_queue->insert_count + - tx_queue->old_read_count); + q_space = (efx->type->txd_ring_mask - 1 - + fill_level); + if (unlikely(q_space-- <= 0)) + goto stop; + smp_mb(); + --tx_queue->stopped; + } + + insert_ptr = (tx_queue->insert_count & + efx->type->txd_ring_mask); + buffer = &tx_queue->buffer[insert_ptr]; + EFX_BUG_ON_PARANOID(buffer->skb); + EFX_BUG_ON_PARANOID(buffer->len); + EFX_BUG_ON_PARANOID(buffer->continuation != 1); + EFX_BUG_ON_PARANOID(buffer->unmap_len); + + dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1); + if (likely(dma_len > len)) + dma_len = len; + + misalign = (unsigned)dma_addr & efx->type->bug5391_mask; + if (misalign && dma_len + misalign > 512) + dma_len = 512 - misalign; + + /* Fill out per descriptor fields */ + buffer->len = dma_len; + buffer->dma_addr = dma_addr; + len -= dma_len; + dma_addr += dma_len; + ++tx_queue->insert_count; + } while (len); + + /* Transfer ownership of the unmapping to the final buffer */ + buffer->unmap_addr = unmap_addr; + buffer->unmap_single = unmap_single; + buffer->unmap_len = unmap_len; + unmap_len = 0; + + /* Get address and size of next fragment */ + if (i >= skb_shinfo(skb)->nr_frags) + break; + fragment = &skb_shinfo(skb)->frags[i]; + len = fragment->size; + page = fragment->page; + page_offset = fragment->page_offset; + i++; + /* Map for DMA */ + unmap_single = 0; + dma_addr = pci_map_page(pci_dev, page, page_offset, len, + PCI_DMA_TODEVICE); + } + + /* Transfer ownership of the skb to the final buffer */ + buffer->skb = skb; + buffer->continuation = 0; + + /* Pass off to hardware */ + falcon_push_buffers(tx_queue); + + return NETDEV_TX_OK; + + pci_err: + EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d " + "fragments for DMA\n", tx_queue->queue, skb->len, + skb_shinfo(skb)->nr_frags + 1); + + /* Mark the packet as transmitted, and free the SKB ourselves */ + dev_kfree_skb_any((struct sk_buff *)skb); + goto unwind; + + stop: + rc = NETDEV_TX_BUSY; + + if (tx_queue->stopped == 1) + efx_stop_queue(efx); + + unwind: + /* Work backwards until we hit the original insert pointer value */ + while (tx_queue->insert_count != tx_queue->write_count) { + --tx_queue->insert_count; + insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask; + buffer = &tx_queue->buffer[insert_ptr]; + efx_dequeue_buffer(tx_queue, buffer); + buffer->len = 0; + } + + /* Free the fragment we were mid-way through pushing */ + if (unmap_len) + pci_unmap_page(pci_dev, unmap_addr, unmap_len, + PCI_DMA_TODEVICE); + + return rc; +} + +/* Remove packets from the TX queue + * + * This removes packets from the TX queue, up to and including the + * specified index. + */ +static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, + unsigned int index) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned int stop_index, read_ptr; + unsigned int mask = tx_queue->efx->type->txd_ring_mask; + + stop_index = (index + 1) & mask; + read_ptr = tx_queue->read_count & mask; + + while (read_ptr != stop_index) { + struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; + if (unlikely(buffer->len == 0)) { + EFX_ERR(tx_queue->efx, "TX queue %d spurious TX " + "completion id %x\n", tx_queue->queue, + read_ptr); + efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); + return; + } + + efx_dequeue_buffer(tx_queue, buffer); + buffer->continuation = 1; + buffer->len = 0; + + ++tx_queue->read_count; + read_ptr = tx_queue->read_count & mask; + } +} + +/* Initiate a packet transmission on the specified TX queue. + * Note that returning anything other than NETDEV_TX_OK will cause the + * OS to free the skb. + * + * This function is split out from efx_hard_start_xmit to allow the + * loopback test to direct packets via specific TX queues. It is + * therefore a non-static inline, so as not to penalise performance + * for non-loopback transmissions. + * + * Context: netif_tx_lock held + */ +inline int efx_xmit(struct efx_nic *efx, + struct efx_tx_queue *tx_queue, struct sk_buff *skb) +{ + int rc; + + /* Map fragments for DMA and add to TX queue */ + rc = efx_enqueue_skb(tx_queue, skb); + if (unlikely(rc != NETDEV_TX_OK)) + goto out; + + /* Update last TX timer */ + efx->net_dev->trans_start = jiffies; + + out: + return rc; +} + +/* Initiate a packet transmission. We use one channel per CPU + * (sharing when we have more CPUs than channels). On Falcon, the TX + * completion events will be directed back to the CPU that transmitted + * the packet, which should be cache-efficient. + * + * Context: non-blocking. + * Note that returning anything other than NETDEV_TX_OK will cause the + * OS to free the skb. + */ +int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) +{ + struct efx_nic *efx = net_dev->priv; + return efx_xmit(efx, &efx->tx_queue[0], skb); +} + +void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) +{ + unsigned fill_level; + struct efx_nic *efx = tx_queue->efx; + + EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask); + + efx_dequeue_buffers(tx_queue, index); + + /* See if we need to restart the netif queue. This barrier + * separates the update of read_count from the test of + * stopped. */ + smp_mb(); + if (unlikely(tx_queue->stopped)) { + fill_level = tx_queue->insert_count - tx_queue->read_count; + if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) { + EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx)); + + /* Do this under netif_tx_lock(), to avoid racing + * with efx_xmit(). */ + netif_tx_lock(efx->net_dev); + if (tx_queue->stopped) { + tx_queue->stopped = 0; + efx_wake_queue(efx); + } + netif_tx_unlock(efx->net_dev); + } + } +} + +int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned int txq_size; + int i, rc; + + EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue); + + /* Allocate software ring */ + txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer); + tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL); + if (!tx_queue->buffer) { + rc = -ENOMEM; + goto fail1; + } + for (i = 0; i <= efx->type->txd_ring_mask; ++i) + tx_queue->buffer[i].continuation = 1; + + /* Allocate hardware ring */ + rc = falcon_probe_tx(tx_queue); + if (rc) + goto fail2; + + return 0; + + fail2: + kfree(tx_queue->buffer); + tx_queue->buffer = NULL; + fail1: + tx_queue->used = 0; + + return rc; +} + +int efx_init_tx_queue(struct efx_tx_queue *tx_queue) +{ + EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue); + + tx_queue->insert_count = 0; + tx_queue->write_count = 0; + tx_queue->read_count = 0; + tx_queue->old_read_count = 0; + BUG_ON(tx_queue->stopped); + + /* Set up TX descriptor ring */ + return falcon_init_tx(tx_queue); +} + +void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) +{ + struct efx_tx_buffer *buffer; + + if (!tx_queue->buffer) + return; + + /* Free any buffers left in the ring */ + while (tx_queue->read_count != tx_queue->write_count) { + buffer = &tx_queue->buffer[tx_queue->read_count & + tx_queue->efx->type->txd_ring_mask]; + efx_dequeue_buffer(tx_queue, buffer); + buffer->continuation = 1; + buffer->len = 0; + + ++tx_queue->read_count; + } +} + +void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) +{ + EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue); + + /* Flush TX queue, remove descriptor ring */ + falcon_fini_tx(tx_queue); + + efx_release_tx_buffers(tx_queue); + + /* Release queue's stop on port, if any */ + if (tx_queue->stopped) { + tx_queue->stopped = 0; + efx_wake_queue(tx_queue->efx); + } +} + +void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) +{ + EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue); + falcon_remove_tx(tx_queue); + + kfree(tx_queue->buffer); + tx_queue->buffer = NULL; + tx_queue->used = 0; +} + + diff --git a/drivers/net/sfc/tx.h b/drivers/net/sfc/tx.h new file mode 100644 index 00000000000..1526a73b4b5 --- /dev/null +++ b/drivers/net/sfc/tx.h @@ -0,0 +1,24 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2006 Fen Systems Ltd. + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_TX_H +#define EFX_TX_H + +#include "net_driver.h" + +int efx_probe_tx_queue(struct efx_tx_queue *tx_queue); +void efx_remove_tx_queue(struct efx_tx_queue *tx_queue); +int efx_init_tx_queue(struct efx_tx_queue *tx_queue); +void efx_fini_tx_queue(struct efx_tx_queue *tx_queue); + +int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); +void efx_release_tx_buffers(struct efx_tx_queue *tx_queue); + +#endif /* EFX_TX_H */ diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h new file mode 100644 index 00000000000..dca62f19019 --- /dev/null +++ b/drivers/net/sfc/workarounds.h @@ -0,0 +1,56 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_WORKAROUNDS_H +#define EFX_WORKAROUNDS_H + +/* + * Hardware workarounds. + * Bug numbers are from Solarflare's Bugzilla. + */ + +#define EFX_WORKAROUND_ALWAYS(efx) 1 +#define EFX_WORKAROUND_FALCON_A(efx) (FALCON_REV(efx) <= FALCON_REV_A1) + +/* XAUI resets if link not detected */ +#define EFX_WORKAROUND_5147 EFX_WORKAROUND_ALWAYS +/* SNAP frames have TOBE_DISC set */ +#define EFX_WORKAROUND_5475 EFX_WORKAROUND_ALWAYS +/* RX PCIe double split performance issue */ +#define EFX_WORKAROUND_7575 EFX_WORKAROUND_ALWAYS +/* TX pkt parser problem with <= 16 byte TXes */ +#define EFX_WORKAROUND_9141 EFX_WORKAROUND_ALWAYS +/* XGXS and XAUI reset sequencing in SW */ +#define EFX_WORKAROUND_9388 EFX_WORKAROUND_ALWAYS +/* Low rate CRC errors require XAUI reset */ +#define EFX_WORKAROUND_10750 EFX_WORKAROUND_ALWAYS +/* TX_EV_PKT_ERR can be caused by a dangling TX descriptor + * or a PCIe error (bug 11028) */ +#define EFX_WORKAROUND_10727 EFX_WORKAROUND_ALWAYS +/* Transmit flow control may get disabled */ +#define EFX_WORKAROUND_11482 EFX_WORKAROUND_ALWAYS +/* Flush events can take a very long time to appear */ +#define EFX_WORKAROUND_11557 EFX_WORKAROUND_ALWAYS + +/* Spurious parity errors in TSORT buffers */ +#define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A +/* iSCSI parsing errors */ +#define EFX_WORKAROUND_5583 EFX_WORKAROUND_FALCON_A +/* RX events go missing */ +#define EFX_WORKAROUND_5676 EFX_WORKAROUND_FALCON_A +/* RX_RESET on A1 */ +#define EFX_WORKAROUND_6555 EFX_WORKAROUND_FALCON_A +/* Increase filter depth to avoid RX_RESET */ +#define EFX_WORKAROUND_7244 EFX_WORKAROUND_FALCON_A +/* Flushes may never complete */ +#define EFX_WORKAROUND_7803 EFX_WORKAROUND_FALCON_A +/* Leak overlength packets rather than free */ +#define EFX_WORKAROUND_8071 EFX_WORKAROUND_FALCON_A + +#endif /* EFX_WORKAROUNDS_H */ diff --git a/drivers/net/sfc/xenpack.h b/drivers/net/sfc/xenpack.h new file mode 100644 index 00000000000..b0d1f225b70 --- /dev/null +++ b/drivers/net/sfc/xenpack.h @@ -0,0 +1,62 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2006 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_XENPACK_H +#define EFX_XENPACK_H + +/* Exported functions from Xenpack standard PHY control */ + +#include "mdio_10g.h" + +/****************************************************************************/ +/* XENPACK MDIO register extensions */ +#define MDIO_XP_LASI_RX_CTRL (0x9000) +#define MDIO_XP_LASI_TX_CTRL (0x9001) +#define MDIO_XP_LASI_CTRL (0x9002) +#define MDIO_XP_LASI_RX_STAT (0x9003) +#define MDIO_XP_LASI_TX_STAT (0x9004) +#define MDIO_XP_LASI_STAT (0x9005) + +/* Control/Status bits */ +#define XP_LASI_LS_ALARM (1 << 0) +#define XP_LASI_TX_ALARM (1 << 1) +#define XP_LASI_RX_ALARM (1 << 2) +/* These two are Quake vendor extensions to the standard XENPACK defines */ +#define XP_LASI_LS_INTB (1 << 3) +#define XP_LASI_TEST (1 << 7) + +/* Enable LASI interrupts for PHY */ +static inline void xenpack_enable_lasi_irqs(struct efx_nic *efx) +{ + int reg; + int phy_id = efx->mii.phy_id; + /* Read to clear LASI status register */ + reg = mdio_clause45_read(efx, phy_id, MDIO_MMD_PMAPMD, + MDIO_XP_LASI_STAT); + + mdio_clause45_write(efx, phy_id, MDIO_MMD_PMAPMD, + MDIO_XP_LASI_CTRL, XP_LASI_LS_ALARM); +} + +/* Read the LASI interrupt status to clear the interrupt. */ +static inline int xenpack_clear_lasi_irqs(struct efx_nic *efx) +{ + /* Read to clear link status alarm */ + return mdio_clause45_read(efx, efx->mii.phy_id, + MDIO_MMD_PMAPMD, MDIO_XP_LASI_STAT); +} + +/* Turn off LASI interrupts */ +static inline void xenpack_disable_lasi_irqs(struct efx_nic *efx) +{ + mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD, + MDIO_XP_LASI_CTRL, 0); +} + +#endif /* EFX_XENPACK_H */ diff --git a/drivers/net/sfc/xfp_phy.c b/drivers/net/sfc/xfp_phy.c new file mode 100644 index 00000000000..66dd5bf1eaa --- /dev/null +++ b/drivers/net/sfc/xfp_phy.c @@ -0,0 +1,132 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2006-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +/* + * Driver for XFP optical PHYs (plus some support specific to the Quake 2032) + * See www.amcc.com for details (search for qt2032) + */ + +#include +#include +#include "efx.h" +#include "gmii.h" +#include "mdio_10g.h" +#include "xenpack.h" +#include "phy.h" +#include "mac.h" + +#define XFP_REQUIRED_DEVS (MDIO_MMDREG_DEVS0_PCS | \ + MDIO_MMDREG_DEVS0_PMAPMD | \ + MDIO_MMDREG_DEVS0_PHYXS) + +/****************************************************************************/ +/* Quake-specific MDIO registers */ +#define MDIO_QUAKE_LED0_REG (0xD006) + +void xfp_set_led(struct efx_nic *p, int led, int mode) +{ + int addr = MDIO_QUAKE_LED0_REG + led; + mdio_clause45_write(p, p->mii.phy_id, MDIO_MMD_PMAPMD, addr, + mode); +} + +#define XFP_MAX_RESET_TIME 500 +#define XFP_RESET_WAIT 10 + +/* Reset the PHYXS MMD. This is documented (for the Quake PHY) as doing + * a complete soft reset. + */ +static int xfp_reset_phy(struct efx_nic *efx) +{ + int rc; + + rc = mdio_clause45_reset_mmd(efx, MDIO_MMD_PHYXS, + XFP_MAX_RESET_TIME / XFP_RESET_WAIT, + XFP_RESET_WAIT); + if (rc < 0) + goto fail; + + /* Wait 250ms for the PHY to complete bootup */ + msleep(250); + + /* Check that all the MMDs we expect are present and responding. We + * expect faults on some if the link is down, but not on the PHY XS */ + rc = mdio_clause45_check_mmds(efx, XFP_REQUIRED_DEVS, + MDIO_MMDREG_DEVS0_PHYXS); + if (rc < 0) + goto fail; + + efx->board_info.init_leds(efx); + + return rc; + + fail: + EFX_ERR(efx, "XFP: reset timed out!\n"); + return rc; +} + +static int xfp_phy_init(struct efx_nic *efx) +{ + u32 devid = mdio_clause45_read_id(efx, MDIO_MMD_PHYXS); + int rc; + + EFX_INFO(efx, "XFP: PHY ID reg %x (OUI %x model %x revision" + " %x)\n", devid, MDIO_ID_OUI(devid), MDIO_ID_MODEL(devid), + MDIO_ID_REV(devid)); + + rc = xfp_reset_phy(efx); + + EFX_INFO(efx, "XFP: PHY init %s.\n", + rc ? "failed" : "successful"); + + return rc; +} + +static void xfp_phy_clear_interrupt(struct efx_nic *efx) +{ + xenpack_clear_lasi_irqs(efx); +} + +static int xfp_link_ok(struct efx_nic *efx) +{ + return mdio_clause45_links_ok(efx, XFP_REQUIRED_DEVS); +} + +static int xfp_phy_check_hw(struct efx_nic *efx) +{ + int rc = 0; + int link_up = xfp_link_ok(efx); + /* Simulate a PHY event if link state has changed */ + if (link_up != efx->link_up) + falcon_xmac_sim_phy_event(efx); + + return rc; +} + +static void xfp_phy_reconfigure(struct efx_nic *efx) +{ + efx->link_up = xfp_link_ok(efx); + efx->link_options = GM_LPA_10000FULL; +} + + +static void xfp_phy_fini(struct efx_nic *efx) +{ + /* Clobber the LED if it was blinking */ + efx->board_info.blink(efx, 0); +} + +struct efx_phy_operations falcon_xfp_phy_ops = { + .init = xfp_phy_init, + .reconfigure = xfp_phy_reconfigure, + .check_hw = xfp_phy_check_hw, + .fini = xfp_phy_fini, + .clear_interrupt = xfp_phy_clear_interrupt, + .reset_xaui = efx_port_dummy_op_void, + .mmds = XFP_REQUIRED_DEVS, +}; -- cgit v1.2.3-70-g09d2 From ba0f6caeb5d9cf6fbb99f84ff0f2731f04996595 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Tue, 29 Apr 2008 02:27:37 +0300 Subject: 3c505: use netstats in net_device structure Use net_device_stats from net_device structure instead of local. No need to memset it to 0, because it is allocated by kzalloc. Signed-off-by: Paulius Zaleckas Signed-off-by: Jeff Garzik --- drivers/net/3c505.c | 30 ++++++++++++++---------------- drivers/net/3c505.h | 1 - 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c index 9c6573419f5..fdfb2b2cb73 100644 --- a/drivers/net/3c505.c +++ b/drivers/net/3c505.c @@ -670,7 +670,7 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id) memcpy(adapter->current_dma.target, adapter->dma_buffer, adapter->current_dma.length); } skb->protocol = eth_type_trans(skb,dev); - adapter->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += skb->len; netif_rx(skb); dev->last_rx = jiffies; } @@ -773,12 +773,12 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id) * received board statistics */ case CMD_NETWORK_STATISTICS_RESPONSE: - adapter->stats.rx_packets += adapter->irx_pcb.data.netstat.tot_recv; - adapter->stats.tx_packets += adapter->irx_pcb.data.netstat.tot_xmit; - adapter->stats.rx_crc_errors += adapter->irx_pcb.data.netstat.err_CRC; - adapter->stats.rx_frame_errors += adapter->irx_pcb.data.netstat.err_align; - adapter->stats.rx_fifo_errors += adapter->irx_pcb.data.netstat.err_ovrrun; - adapter->stats.rx_over_errors += adapter->irx_pcb.data.netstat.err_res; + dev->stats.rx_packets += adapter->irx_pcb.data.netstat.tot_recv; + dev->stats.tx_packets += adapter->irx_pcb.data.netstat.tot_xmit; + dev->stats.rx_crc_errors += adapter->irx_pcb.data.netstat.err_CRC; + dev->stats.rx_frame_errors += adapter->irx_pcb.data.netstat.err_align; + dev->stats.rx_fifo_errors += adapter->irx_pcb.data.netstat.err_ovrrun; + dev->stats.rx_over_errors += adapter->irx_pcb.data.netstat.err_res; adapter->got[CMD_NETWORK_STATISTICS] = 1; if (elp_debug >= 3) printk(KERN_DEBUG "%s: interrupt - statistics response received\n", dev->name); @@ -794,11 +794,11 @@ static irqreturn_t elp_interrupt(int irq, void *dev_id) break; switch (adapter->irx_pcb.data.xmit_resp.c_stat) { case 0xffff: - adapter->stats.tx_aborted_errors++; + dev->stats.tx_aborted_errors++; printk(KERN_INFO "%s: transmit timed out, network cable problem?\n", dev->name); break; case 0xfffe: - adapter->stats.tx_fifo_errors++; + dev->stats.tx_fifo_errors++; printk(KERN_INFO "%s: transmit timed out, FIFO underrun\n", dev->name); break; } @@ -986,7 +986,7 @@ static bool send_packet(struct net_device *dev, struct sk_buff *skb) return false; } - adapter->stats.tx_bytes += nlen; + dev->stats.tx_bytes += nlen; /* * send the adapter a transmit packet command. Ignore segment and offset @@ -1041,7 +1041,6 @@ static bool send_packet(struct net_device *dev, struct sk_buff *skb) static void elp_timeout(struct net_device *dev) { - elp_device *adapter = dev->priv; int stat; stat = inb_status(dev->base_addr); @@ -1049,7 +1048,7 @@ static void elp_timeout(struct net_device *dev) if (elp_debug >= 1) printk(KERN_DEBUG "%s: status %#02x\n", dev->name, stat); dev->trans_start = jiffies; - adapter->stats.tx_dropped++; + dev->stats.tx_dropped++; netif_wake_queue(dev); } @@ -1113,7 +1112,7 @@ static struct net_device_stats *elp_get_stats(struct net_device *dev) /* If the device is closed, just return the latest stats we have, - we cannot ask from the adapter without interrupts */ if (!netif_running(dev)) - return &adapter->stats; + return &dev->stats; /* send a get statistics command to the board */ adapter->tx_pcb.command = CMD_NETWORK_STATISTICS; @@ -1126,12 +1125,12 @@ static struct net_device_stats *elp_get_stats(struct net_device *dev) while (adapter->got[CMD_NETWORK_STATISTICS] == 0 && time_before(jiffies, timeout)); if (time_after_eq(jiffies, timeout)) { TIMEOUT_MSG(__LINE__); - return &adapter->stats; + return &dev->stats; } } /* statistics are now up to date */ - return &adapter->stats; + return &dev->stats; } @@ -1571,7 +1570,6 @@ static int __init elplus_setup(struct net_device *dev) dev->set_multicast_list = elp_set_mc_list; /* local */ dev->ethtool_ops = &netdev_ethtool_ops; /* local */ - memset(&(adapter->stats), 0, sizeof(struct net_device_stats)); dev->mem_start = dev->mem_end = 0; err = register_netdev(dev); diff --git a/drivers/net/3c505.h b/drivers/net/3c505.h index 1910cb1dc78..04df2a9002b 100644 --- a/drivers/net/3c505.h +++ b/drivers/net/3c505.h @@ -264,7 +264,6 @@ typedef struct { pcb_struct rx_pcb; /* PCB for foreground receiving */ pcb_struct itx_pcb; /* PCB for background sending */ pcb_struct irx_pcb; /* PCB for background receiving */ - struct net_device_stats stats; void *dma_buffer; -- cgit v1.2.3-70-g09d2 From 815f8802d201aba1ce343ba832daf639165f01a1 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Tue, 29 Apr 2008 02:45:43 +0300 Subject: 3c509: use netstats in net_device structure Use net_device_stats from net_device structure instead of local. Signed-off-by: Paulius Zaleckas Signed-off-by: Jeff Garzik --- drivers/net/3c509.c | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index 54dac0696d9..e6c545fe5f5 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -167,7 +167,6 @@ enum RxFilter { enum el3_cardtype { EL3_ISA, EL3_PNP, EL3_MCA, EL3_EISA }; struct el3_private { - struct net_device_stats stats; spinlock_t lock; /* skb send-queue */ int head, size; @@ -794,7 +793,6 @@ el3_open(struct net_device *dev) static void el3_tx_timeout (struct net_device *dev) { - struct el3_private *lp = netdev_priv(dev); int ioaddr = dev->base_addr; /* Transmitter timeout, serious problems. */ @@ -802,7 +800,7 @@ el3_tx_timeout (struct net_device *dev) "Tx FIFO room %d.\n", dev->name, inb(ioaddr + TX_STATUS), inw(ioaddr + EL3_STATUS), inw(ioaddr + TX_FREE)); - lp->stats.tx_errors++; + dev->stats.tx_errors++; dev->trans_start = jiffies; /* Issue TX_RESET and TX_START commands. */ outw(TxReset, ioaddr + EL3_CMD); @@ -820,7 +818,7 @@ el3_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_stop_queue (dev); - lp->stats.tx_bytes += skb->len; + dev->stats.tx_bytes += skb->len; if (el3_debug > 4) { printk(KERN_DEBUG "%s: el3_start_xmit(length = %u) called, status %4.4x.\n", @@ -881,7 +879,7 @@ el3_start_xmit(struct sk_buff *skb, struct net_device *dev) int i = 4; while (--i > 0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { - if (tx_status & 0x38) lp->stats.tx_aborted_errors++; + if (tx_status & 0x38) dev->stats.tx_aborted_errors++; if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD); if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD); outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */ @@ -931,12 +929,11 @@ el3_interrupt(int irq, void *dev_id) outw(AckIntr | RxEarly, ioaddr + EL3_CMD); } if (status & TxComplete) { /* Really Tx error. */ - struct el3_private *lp = netdev_priv(dev); short tx_status; int i = 4; while (--i>0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { - if (tx_status & 0x38) lp->stats.tx_aborted_errors++; + if (tx_status & 0x38) dev->stats.tx_aborted_errors++; if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD); if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD); outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */ @@ -1002,7 +999,7 @@ el3_get_stats(struct net_device *dev) spin_lock_irqsave(&lp->lock, flags); update_stats(dev); spin_unlock_irqrestore(&lp->lock, flags); - return &lp->stats; + return &dev->stats; } /* Update statistics. We change to register window 6, so this should be run @@ -1012,7 +1009,6 @@ el3_get_stats(struct net_device *dev) */ static void update_stats(struct net_device *dev) { - struct el3_private *lp = netdev_priv(dev); int ioaddr = dev->base_addr; if (el3_debug > 5) @@ -1021,13 +1017,13 @@ static void update_stats(struct net_device *dev) outw(StatsDisable, ioaddr + EL3_CMD); /* Switch to the stats window, and read everything. */ EL3WINDOW(6); - lp->stats.tx_carrier_errors += inb(ioaddr + 0); - lp->stats.tx_heartbeat_errors += inb(ioaddr + 1); + dev->stats.tx_carrier_errors += inb(ioaddr + 0); + dev->stats.tx_heartbeat_errors += inb(ioaddr + 1); /* Multiple collisions. */ inb(ioaddr + 2); - lp->stats.collisions += inb(ioaddr + 3); - lp->stats.tx_window_errors += inb(ioaddr + 4); - lp->stats.rx_fifo_errors += inb(ioaddr + 5); - lp->stats.tx_packets += inb(ioaddr + 6); + dev->stats.collisions += inb(ioaddr + 3); + dev->stats.tx_window_errors += inb(ioaddr + 4); + dev->stats.rx_fifo_errors += inb(ioaddr + 5); + dev->stats.tx_packets += inb(ioaddr + 6); /* Rx packets */ inb(ioaddr + 7); /* Tx deferrals */ inb(ioaddr + 8); inw(ioaddr + 10); /* Total Rx and Tx octets. */ @@ -1042,7 +1038,6 @@ static void update_stats(struct net_device *dev) static int el3_rx(struct net_device *dev) { - struct el3_private *lp = netdev_priv(dev); int ioaddr = dev->base_addr; short rx_status; @@ -1054,21 +1049,21 @@ el3_rx(struct net_device *dev) short error = rx_status & 0x3800; outw(RxDiscard, ioaddr + EL3_CMD); - lp->stats.rx_errors++; + dev->stats.rx_errors++; switch (error) { - case 0x0000: lp->stats.rx_over_errors++; break; - case 0x0800: lp->stats.rx_length_errors++; break; - case 0x1000: lp->stats.rx_frame_errors++; break; - case 0x1800: lp->stats.rx_length_errors++; break; - case 0x2000: lp->stats.rx_frame_errors++; break; - case 0x2800: lp->stats.rx_crc_errors++; break; + case 0x0000: dev->stats.rx_over_errors++; break; + case 0x0800: dev->stats.rx_length_errors++; break; + case 0x1000: dev->stats.rx_frame_errors++; break; + case 0x1800: dev->stats.rx_length_errors++; break; + case 0x2000: dev->stats.rx_frame_errors++; break; + case 0x2800: dev->stats.rx_crc_errors++; break; } } else { short pkt_len = rx_status & 0x7ff; struct sk_buff *skb; skb = dev_alloc_skb(pkt_len+5); - lp->stats.rx_bytes += pkt_len; + dev->stats.rx_bytes += pkt_len; if (el3_debug > 4) printk("Receiving packet size %d status %4.4x.\n", pkt_len, rx_status); @@ -1083,11 +1078,11 @@ el3_rx(struct net_device *dev) skb->protocol = eth_type_trans(skb,dev); netif_rx(skb); dev->last_rx = jiffies; - lp->stats.rx_packets++; + dev->stats.rx_packets++; continue; } outw(RxDiscard, ioaddr + EL3_CMD); - lp->stats.rx_dropped++; + dev->stats.rx_dropped++; if (el3_debug) printk("%s: Couldn't allocate a sk_buff of size %d.\n", dev->name, pkt_len); -- cgit v1.2.3-70-g09d2 From dfd44151e8888b964b7f2400f26794154a58c86b Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Tue, 29 Apr 2008 03:07:31 +0300 Subject: 3c515: use netstats in net_device structure Use net_device_stats from net_device structure instead of local. Signed-off-by: Paulius Zaleckas Signed-off-by: Jeff Garzik --- drivers/net/3c515.c | 64 +++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c index 6ab84b661d7..105a8c7ca7e 100644 --- a/drivers/net/3c515.c +++ b/drivers/net/3c515.c @@ -310,7 +310,6 @@ struct corkscrew_private { struct sk_buff *tx_skbuff[TX_RING_SIZE]; unsigned int cur_rx, cur_tx; /* The next free ring entry */ unsigned int dirty_rx, dirty_tx;/* The ring entries to be free()ed. */ - struct net_device_stats stats; struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */ struct timer_list timer; /* Media selection timer. */ int capabilities ; /* Adapter capabilities word. */ @@ -983,8 +982,8 @@ static void corkscrew_timeout(struct net_device *dev) break; outw(TxEnable, ioaddr + EL3_CMD); dev->trans_start = jiffies; - vp->stats.tx_errors++; - vp->stats.tx_dropped++; + dev->stats.tx_errors++; + dev->stats.tx_dropped++; netif_wake_queue(dev); } @@ -1050,7 +1049,7 @@ static int corkscrew_start_xmit(struct sk_buff *skb, } /* Put out the doubleword header... */ outl(skb->len, ioaddr + TX_FIFO); - vp->stats.tx_bytes += skb->len; + dev->stats.tx_bytes += skb->len; #ifdef VORTEX_BUS_MASTER if (vp->bus_master) { /* Set the bus-master controller to transfer the packet. */ @@ -1094,9 +1093,9 @@ static int corkscrew_start_xmit(struct sk_buff *skb, printk("%s: Tx error, status %2.2x.\n", dev->name, tx_status); if (tx_status & 0x04) - vp->stats.tx_fifo_errors++; + dev->stats.tx_fifo_errors++; if (tx_status & 0x38) - vp->stats.tx_aborted_errors++; + dev->stats.tx_aborted_errors++; if (tx_status & 0x30) { int j; outw(TxReset, ioaddr + EL3_CMD); @@ -1257,7 +1256,6 @@ static irqreturn_t corkscrew_interrupt(int irq, void *dev_id) static int corkscrew_rx(struct net_device *dev) { - struct corkscrew_private *vp = netdev_priv(dev); int ioaddr = dev->base_addr; int i; short rx_status; @@ -1271,17 +1269,17 @@ static int corkscrew_rx(struct net_device *dev) if (corkscrew_debug > 2) printk(" Rx error: status %2.2x.\n", rx_error); - vp->stats.rx_errors++; + dev->stats.rx_errors++; if (rx_error & 0x01) - vp->stats.rx_over_errors++; + dev->stats.rx_over_errors++; if (rx_error & 0x02) - vp->stats.rx_length_errors++; + dev->stats.rx_length_errors++; if (rx_error & 0x04) - vp->stats.rx_frame_errors++; + dev->stats.rx_frame_errors++; if (rx_error & 0x08) - vp->stats.rx_crc_errors++; + dev->stats.rx_crc_errors++; if (rx_error & 0x10) - vp->stats.rx_length_errors++; + dev->stats.rx_length_errors++; } else { /* The packet length: up to 4.5K!. */ short pkt_len = rx_status & 0x1fff; @@ -1301,8 +1299,8 @@ static int corkscrew_rx(struct net_device *dev) skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); dev->last_rx = jiffies; - vp->stats.rx_packets++; - vp->stats.rx_bytes += pkt_len; + dev->stats.rx_packets++; + dev->stats.rx_bytes += pkt_len; /* Wait a limited time to go to next packet. */ for (i = 200; i >= 0; i--) if (! (inw(ioaddr + EL3_STATUS) & CmdInProgress)) @@ -1312,7 +1310,7 @@ static int corkscrew_rx(struct net_device *dev) printk("%s: Couldn't allocate a sk_buff of size %d.\n", dev->name, pkt_len); } outw(RxDiscard, ioaddr + EL3_CMD); - vp->stats.rx_dropped++; + dev->stats.rx_dropped++; /* Wait a limited time to skip this packet. */ for (i = 200; i >= 0; i--) if (!(inw(ioaddr + EL3_STATUS) & CmdInProgress)) @@ -1337,23 +1335,23 @@ static int boomerang_rx(struct net_device *dev) if (corkscrew_debug > 2) printk(" Rx error: status %2.2x.\n", rx_error); - vp->stats.rx_errors++; + dev->stats.rx_errors++; if (rx_error & 0x01) - vp->stats.rx_over_errors++; + dev->stats.rx_over_errors++; if (rx_error & 0x02) - vp->stats.rx_length_errors++; + dev->stats.rx_length_errors++; if (rx_error & 0x04) - vp->stats.rx_frame_errors++; + dev->stats.rx_frame_errors++; if (rx_error & 0x08) - vp->stats.rx_crc_errors++; + dev->stats.rx_crc_errors++; if (rx_error & 0x10) - vp->stats.rx_length_errors++; + dev->stats.rx_length_errors++; } else { /* The packet length: up to 4.5K!. */ short pkt_len = rx_status & 0x1fff; struct sk_buff *skb; - vp->stats.rx_bytes += pkt_len; + dev->stats.rx_bytes += pkt_len; if (corkscrew_debug > 4) printk("Receiving packet size %d status %4.4x.\n", pkt_len, rx_status); @@ -1388,7 +1386,7 @@ static int boomerang_rx(struct net_device *dev) skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); dev->last_rx = jiffies; - vp->stats.rx_packets++; + dev->stats.rx_packets++; } entry = (++vp->cur_rx) % RX_RING_SIZE; } @@ -1475,7 +1473,7 @@ static struct net_device_stats *corkscrew_get_stats(struct net_device *dev) update_stats(dev->base_addr, dev); spin_unlock_irqrestore(&vp->lock, flags); } - return &vp->stats; + return &dev->stats; } /* Update statistics. @@ -1487,19 +1485,17 @@ static struct net_device_stats *corkscrew_get_stats(struct net_device *dev) */ static void update_stats(int ioaddr, struct net_device *dev) { - struct corkscrew_private *vp = netdev_priv(dev); - /* Unlike the 3c5x9 we need not turn off stats updates while reading. */ /* Switch to the stats window, and read everything. */ EL3WINDOW(6); - vp->stats.tx_carrier_errors += inb(ioaddr + 0); - vp->stats.tx_heartbeat_errors += inb(ioaddr + 1); + dev->stats.tx_carrier_errors += inb(ioaddr + 0); + dev->stats.tx_heartbeat_errors += inb(ioaddr + 1); /* Multiple collisions. */ inb(ioaddr + 2); - vp->stats.collisions += inb(ioaddr + 3); - vp->stats.tx_window_errors += inb(ioaddr + 4); - vp->stats.rx_fifo_errors += inb(ioaddr + 5); - vp->stats.tx_packets += inb(ioaddr + 6); - vp->stats.tx_packets += (inb(ioaddr + 9) & 0x30) << 4; + dev->stats.collisions += inb(ioaddr + 3); + dev->stats.tx_window_errors += inb(ioaddr + 4); + dev->stats.rx_fifo_errors += inb(ioaddr + 5); + dev->stats.tx_packets += inb(ioaddr + 6); + dev->stats.tx_packets += (inb(ioaddr + 9) & 0x30) << 4; /* Rx packets */ inb(ioaddr + 7); /* Must read to clear */ /* Tx deferrals */ inb(ioaddr + 8); -- cgit v1.2.3-70-g09d2 From 0425b46a4beef234c522f183d5c2934edbb0f625 Mon Sep 17 00:00:00 2001 From: Sreenivasa Honnur Date: Mon, 28 Apr 2008 21:08:45 -0400 Subject: S2io: Enable multi ring support - Seperate ring specific data - Initialize all configured rings with equal priority. - Updated boundary check for number of Rings. - Updated per ring statistics of rx_bytes and rx_packets. - Moved lro struct from struct s2io_nic to struct ring_info. - Access respective rx ring directly in fill_rx_buffers. - Moved rx_bufs_left struct s2io_nic to struct ring_info. - Added per ring variables - rxd_mode, rxd_count, dev, pdev. Signed-off-by: Surjit Reang Signed-off-by: Sreenivasa Honnur Signed-off-by: Ramkrishna Vepa Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 335 +++++++++++++++++++++++++++-------------------------- drivers/net/s2io.h | 82 ++++++++----- 2 files changed, 226 insertions(+), 191 deletions(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 157fd932e95..74cc80cc49b 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -809,6 +809,7 @@ static int init_shared_mem(struct s2io_nic *nic) config->rx_cfg[i].num_rxd - 1; mac_control->rings[i].nic = nic; mac_control->rings[i].ring_no = i; + mac_control->rings[i].lro = lro_enable; blk_cnt = config->rx_cfg[i].num_rxd / (rxd_count[nic->rxd_mode] + 1); @@ -1560,113 +1561,112 @@ static int init_nic(struct s2io_nic *nic) writeq(val64, &bar0->tx_fifo_partition_0); /* Filling the Rx round robin registers as per the - * number of Rings and steering based on QoS. - */ + * number of Rings and steering based on QoS with + * equal priority. + */ switch (config->rx_ring_num) { case 1: + val64 = 0x0; + writeq(val64, &bar0->rx_w_round_robin_0); + writeq(val64, &bar0->rx_w_round_robin_1); + writeq(val64, &bar0->rx_w_round_robin_2); + writeq(val64, &bar0->rx_w_round_robin_3); + writeq(val64, &bar0->rx_w_round_robin_4); + val64 = 0x8080808080808080ULL; writeq(val64, &bar0->rts_qos_steering); break; case 2: - val64 = 0x0000010000010000ULL; + val64 = 0x0001000100010001ULL; writeq(val64, &bar0->rx_w_round_robin_0); - val64 = 0x0100000100000100ULL; writeq(val64, &bar0->rx_w_round_robin_1); - val64 = 0x0001000001000001ULL; writeq(val64, &bar0->rx_w_round_robin_2); - val64 = 0x0000010000010000ULL; writeq(val64, &bar0->rx_w_round_robin_3); - val64 = 0x0100000000000000ULL; + val64 = 0x0001000100000000ULL; writeq(val64, &bar0->rx_w_round_robin_4); val64 = 0x8080808040404040ULL; writeq(val64, &bar0->rts_qos_steering); break; case 3: - val64 = 0x0001000102000001ULL; + val64 = 0x0001020001020001ULL; writeq(val64, &bar0->rx_w_round_robin_0); - val64 = 0x0001020000010001ULL; + val64 = 0x0200010200010200ULL; writeq(val64, &bar0->rx_w_round_robin_1); - val64 = 0x0200000100010200ULL; + val64 = 0x0102000102000102ULL; writeq(val64, &bar0->rx_w_round_robin_2); - val64 = 0x0001000102000001ULL; + val64 = 0x0001020001020001ULL; writeq(val64, &bar0->rx_w_round_robin_3); - val64 = 0x0001020000000000ULL; + val64 = 0x0200010200000000ULL; writeq(val64, &bar0->rx_w_round_robin_4); val64 = 0x8080804040402020ULL; writeq(val64, &bar0->rts_qos_steering); break; case 4: - val64 = 0x0001020300010200ULL; + val64 = 0x0001020300010203ULL; writeq(val64, &bar0->rx_w_round_robin_0); - val64 = 0x0100000102030001ULL; writeq(val64, &bar0->rx_w_round_robin_1); - val64 = 0x0200010000010203ULL; writeq(val64, &bar0->rx_w_round_robin_2); - val64 = 0x0001020001000001ULL; writeq(val64, &bar0->rx_w_round_robin_3); - val64 = 0x0203000100000000ULL; + val64 = 0x0001020300000000ULL; writeq(val64, &bar0->rx_w_round_robin_4); val64 = 0x8080404020201010ULL; writeq(val64, &bar0->rts_qos_steering); break; case 5: - val64 = 0x0001000203000102ULL; + val64 = 0x0001020304000102ULL; writeq(val64, &bar0->rx_w_round_robin_0); - val64 = 0x0001020001030004ULL; + val64 = 0x0304000102030400ULL; writeq(val64, &bar0->rx_w_round_robin_1); - val64 = 0x0001000203000102ULL; + val64 = 0x0102030400010203ULL; writeq(val64, &bar0->rx_w_round_robin_2); - val64 = 0x0001020001030004ULL; + val64 = 0x0400010203040001ULL; writeq(val64, &bar0->rx_w_round_robin_3); - val64 = 0x0001000000000000ULL; + val64 = 0x0203040000000000ULL; writeq(val64, &bar0->rx_w_round_robin_4); val64 = 0x8080404020201008ULL; writeq(val64, &bar0->rts_qos_steering); break; case 6: - val64 = 0x0001020304000102ULL; + val64 = 0x0001020304050001ULL; writeq(val64, &bar0->rx_w_round_robin_0); - val64 = 0x0304050001020001ULL; + val64 = 0x0203040500010203ULL; writeq(val64, &bar0->rx_w_round_robin_1); - val64 = 0x0203000100000102ULL; + val64 = 0x0405000102030405ULL; writeq(val64, &bar0->rx_w_round_robin_2); - val64 = 0x0304000102030405ULL; + val64 = 0x0001020304050001ULL; writeq(val64, &bar0->rx_w_round_robin_3); - val64 = 0x0001000200000000ULL; + val64 = 0x0203040500000000ULL; writeq(val64, &bar0->rx_w_round_robin_4); val64 = 0x8080404020100804ULL; writeq(val64, &bar0->rts_qos_steering); break; case 7: - val64 = 0x0001020001020300ULL; + val64 = 0x0001020304050600ULL; writeq(val64, &bar0->rx_w_round_robin_0); - val64 = 0x0102030400010203ULL; + val64 = 0x0102030405060001ULL; writeq(val64, &bar0->rx_w_round_robin_1); - val64 = 0x0405060001020001ULL; + val64 = 0x0203040506000102ULL; writeq(val64, &bar0->rx_w_round_robin_2); - val64 = 0x0304050000010200ULL; + val64 = 0x0304050600010203ULL; writeq(val64, &bar0->rx_w_round_robin_3); - val64 = 0x0102030000000000ULL; + val64 = 0x0405060000000000ULL; writeq(val64, &bar0->rx_w_round_robin_4); val64 = 0x8080402010080402ULL; writeq(val64, &bar0->rts_qos_steering); break; case 8: - val64 = 0x0001020300040105ULL; + val64 = 0x0001020304050607ULL; writeq(val64, &bar0->rx_w_round_robin_0); - val64 = 0x0200030106000204ULL; writeq(val64, &bar0->rx_w_round_robin_1); - val64 = 0x0103000502010007ULL; writeq(val64, &bar0->rx_w_round_robin_2); - val64 = 0x0304010002060500ULL; writeq(val64, &bar0->rx_w_round_robin_3); - val64 = 0x0103020400000000ULL; + val64 = 0x0001020300000000ULL; writeq(val64, &bar0->rx_w_round_robin_4); val64 = 0x8040201008040201ULL; @@ -2499,8 +2499,7 @@ static void stop_nic(struct s2io_nic *nic) /** * fill_rx_buffers - Allocates the Rx side skbs - * @nic: device private variable - * @ring_no: ring number + * @ring_info: per ring structure * Description: * The function allocates Rx side skbs and puts the physical * address of these buffers into the RxD buffer pointers, so that the NIC @@ -2518,103 +2517,94 @@ static void stop_nic(struct s2io_nic *nic) * SUCCESS on success or an appropriate -ve value on failure. */ -static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) +static int fill_rx_buffers(struct ring_info *ring) { - struct net_device *dev = nic->dev; struct sk_buff *skb; struct RxD_t *rxdp; - int off, off1, size, block_no, block_no1; + int off, size, block_no, block_no1; u32 alloc_tab = 0; u32 alloc_cnt; - struct mac_info *mac_control; - struct config_param *config; u64 tmp; struct buffAdd *ba; struct RxD_t *first_rxdp = NULL; u64 Buffer0_ptr = 0, Buffer1_ptr = 0; + int rxd_index = 0; struct RxD1 *rxdp1; struct RxD3 *rxdp3; - struct swStat *stats = &nic->mac_control.stats_info->sw_stat; + struct swStat *stats = &ring->nic->mac_control.stats_info->sw_stat; - mac_control = &nic->mac_control; - config = &nic->config; - alloc_cnt = mac_control->rings[ring_no].pkt_cnt - - atomic_read(&nic->rx_bufs_left[ring_no]); + alloc_cnt = ring->pkt_cnt - ring->rx_bufs_left; - block_no1 = mac_control->rings[ring_no].rx_curr_get_info.block_index; - off1 = mac_control->rings[ring_no].rx_curr_get_info.offset; + block_no1 = ring->rx_curr_get_info.block_index; while (alloc_tab < alloc_cnt) { - block_no = mac_control->rings[ring_no].rx_curr_put_info. - block_index; - off = mac_control->rings[ring_no].rx_curr_put_info.offset; + block_no = ring->rx_curr_put_info.block_index; - rxdp = mac_control->rings[ring_no]. - rx_blocks[block_no].rxds[off].virt_addr; + off = ring->rx_curr_put_info.offset; + + rxdp = ring->rx_blocks[block_no].rxds[off].virt_addr; + + rxd_index = off + 1; + if (block_no) + rxd_index += (block_no * ring->rxd_count); - if ((block_no == block_no1) && (off == off1) && - (rxdp->Host_Control)) { + if ((block_no == block_no1) && + (off == ring->rx_curr_get_info.offset) && + (rxdp->Host_Control)) { DBG_PRINT(INTR_DBG, "%s: Get and Put", - dev->name); + ring->dev->name); DBG_PRINT(INTR_DBG, " info equated\n"); goto end; } - if (off && (off == rxd_count[nic->rxd_mode])) { - mac_control->rings[ring_no].rx_curr_put_info. - block_index++; - if (mac_control->rings[ring_no].rx_curr_put_info. - block_index == mac_control->rings[ring_no]. - block_count) - mac_control->rings[ring_no].rx_curr_put_info. - block_index = 0; - block_no = mac_control->rings[ring_no]. - rx_curr_put_info.block_index; - if (off == rxd_count[nic->rxd_mode]) - off = 0; - mac_control->rings[ring_no].rx_curr_put_info. - offset = off; - rxdp = mac_control->rings[ring_no]. - rx_blocks[block_no].block_virt_addr; + if (off && (off == ring->rxd_count)) { + ring->rx_curr_put_info.block_index++; + if (ring->rx_curr_put_info.block_index == + ring->block_count) + ring->rx_curr_put_info.block_index = 0; + block_no = ring->rx_curr_put_info.block_index; + off = 0; + ring->rx_curr_put_info.offset = off; + rxdp = ring->rx_blocks[block_no].block_virt_addr; DBG_PRINT(INTR_DBG, "%s: Next block at: %p\n", - dev->name, rxdp); + ring->dev->name, rxdp); + } if ((rxdp->Control_1 & RXD_OWN_XENA) && - ((nic->rxd_mode == RXD_MODE_3B) && + ((ring->rxd_mode == RXD_MODE_3B) && (rxdp->Control_2 & s2BIT(0)))) { - mac_control->rings[ring_no].rx_curr_put_info. - offset = off; + ring->rx_curr_put_info.offset = off; goto end; } /* calculate size of skb based on ring mode */ - size = dev->mtu + HEADER_ETHERNET_II_802_3_SIZE + + size = ring->mtu + HEADER_ETHERNET_II_802_3_SIZE + HEADER_802_2_SIZE + HEADER_SNAP_SIZE; - if (nic->rxd_mode == RXD_MODE_1) + if (ring->rxd_mode == RXD_MODE_1) size += NET_IP_ALIGN; else - size = dev->mtu + ALIGN_SIZE + BUF0_LEN + 4; + size = ring->mtu + ALIGN_SIZE + BUF0_LEN + 4; /* allocate skb */ skb = dev_alloc_skb(size); if(!skb) { - DBG_PRINT(INFO_DBG, "%s: Out of ", dev->name); + DBG_PRINT(INFO_DBG, "%s: Out of ", ring->dev->name); DBG_PRINT(INFO_DBG, "memory to allocate SKBs\n"); if (first_rxdp) { wmb(); first_rxdp->Control_1 |= RXD_OWN_XENA; } - nic->mac_control.stats_info->sw_stat. \ - mem_alloc_fail_cnt++; + stats->mem_alloc_fail_cnt++; + return -ENOMEM ; } - nic->mac_control.stats_info->sw_stat.mem_allocated - += skb->truesize; - if (nic->rxd_mode == RXD_MODE_1) { + stats->mem_allocated += skb->truesize; + + if (ring->rxd_mode == RXD_MODE_1) { /* 1 buffer mode - normal operation mode */ rxdp1 = (struct RxD1*)rxdp; memset(rxdp, 0, sizeof(struct RxD1)); skb_reserve(skb, NET_IP_ALIGN); rxdp1->Buffer0_ptr = pci_map_single - (nic->pdev, skb->data, size - NET_IP_ALIGN, + (ring->pdev, skb->data, size - NET_IP_ALIGN, PCI_DMA_FROMDEVICE); if( (rxdp1->Buffer0_ptr == 0) || (rxdp1->Buffer0_ptr == @@ -2623,8 +2613,8 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) rxdp->Control_2 = SET_BUFFER0_SIZE_1(size - NET_IP_ALIGN); - - } else if (nic->rxd_mode == RXD_MODE_3B) { + rxdp->Host_Control = (unsigned long) (skb); + } else if (ring->rxd_mode == RXD_MODE_3B) { /* * 2 buffer mode - * 2 buffer mode provides 128 @@ -2640,7 +2630,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) rxdp3->Buffer0_ptr = Buffer0_ptr; rxdp3->Buffer1_ptr = Buffer1_ptr; - ba = &mac_control->rings[ring_no].ba[block_no][off]; + ba = &ring->ba[block_no][off]; skb_reserve(skb, BUF0_LEN); tmp = (u64)(unsigned long) skb->data; tmp += ALIGN_SIZE; @@ -2650,10 +2640,10 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) if (!(rxdp3->Buffer0_ptr)) rxdp3->Buffer0_ptr = - pci_map_single(nic->pdev, ba->ba_0, BUF0_LEN, - PCI_DMA_FROMDEVICE); + pci_map_single(ring->pdev, ba->ba_0, + BUF0_LEN, PCI_DMA_FROMDEVICE); else - pci_dma_sync_single_for_device(nic->pdev, + pci_dma_sync_single_for_device(ring->pdev, (dma_addr_t) rxdp3->Buffer0_ptr, BUF0_LEN, PCI_DMA_FROMDEVICE); if( (rxdp3->Buffer0_ptr == 0) || @@ -2661,7 +2651,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) goto pci_map_failed; rxdp->Control_2 = SET_BUFFER0_SIZE_3(BUF0_LEN); - if (nic->rxd_mode == RXD_MODE_3B) { + if (ring->rxd_mode == RXD_MODE_3B) { /* Two buffer mode */ /* @@ -2669,39 +2659,42 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) * L4 payload */ rxdp3->Buffer2_ptr = pci_map_single - (nic->pdev, skb->data, dev->mtu + 4, + (ring->pdev, skb->data, ring->mtu + 4, PCI_DMA_FROMDEVICE); if( (rxdp3->Buffer2_ptr == 0) || (rxdp3->Buffer2_ptr == DMA_ERROR_CODE)) goto pci_map_failed; - rxdp3->Buffer1_ptr = - pci_map_single(nic->pdev, + if (!rxdp3->Buffer1_ptr) + rxdp3->Buffer1_ptr = + pci_map_single(ring->pdev, ba->ba_1, BUF1_LEN, PCI_DMA_FROMDEVICE); + if( (rxdp3->Buffer1_ptr == 0) || (rxdp3->Buffer1_ptr == DMA_ERROR_CODE)) { pci_unmap_single - (nic->pdev, - (dma_addr_t)rxdp3->Buffer2_ptr, - dev->mtu + 4, + (ring->pdev, + (dma_addr_t)(unsigned long) + skb->data, + ring->mtu + 4, PCI_DMA_FROMDEVICE); goto pci_map_failed; } rxdp->Control_2 |= SET_BUFFER1_SIZE_3(1); rxdp->Control_2 |= SET_BUFFER2_SIZE_3 - (dev->mtu + 4); + (ring->mtu + 4); } rxdp->Control_2 |= s2BIT(0); + rxdp->Host_Control = (unsigned long) (skb); } - rxdp->Host_Control = (unsigned long) (skb); if (alloc_tab & ((1 << rxsync_frequency) - 1)) rxdp->Control_1 |= RXD_OWN_XENA; off++; - if (off == (rxd_count[nic->rxd_mode] + 1)) + if (off == (ring->rxd_count + 1)) off = 0; - mac_control->rings[ring_no].rx_curr_put_info.offset = off; + ring->rx_curr_put_info.offset = off; rxdp->Control_2 |= SET_RXD_MARKER; if (!(alloc_tab & ((1 << rxsync_frequency) - 1))) { @@ -2711,7 +2704,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no) } first_rxdp = rxdp; } - atomic_inc(&nic->rx_bufs_left[ring_no]); + ring->rx_bufs_left += 1; alloc_tab++; } @@ -2783,7 +2776,7 @@ static void free_rxd_blk(struct s2io_nic *sp, int ring_no, int blk) } sp->mac_control.stats_info->sw_stat.mem_freed += skb->truesize; dev_kfree_skb(skb); - atomic_dec(&sp->rx_bufs_left[ring_no]); + mac_control->rings[ring_no].rx_bufs_left -= 1; } } @@ -2814,7 +2807,7 @@ static void free_rx_buffers(struct s2io_nic *sp) mac_control->rings[i].rx_curr_get_info.block_index = 0; mac_control->rings[i].rx_curr_put_info.offset = 0; mac_control->rings[i].rx_curr_get_info.offset = 0; - atomic_set(&sp->rx_bufs_left[i], 0); + mac_control->rings[i].rx_bufs_left = 0; DBG_PRINT(INIT_DBG, "%s:Freed 0x%x Rx Buffers on ring%d\n", dev->name, buf_cnt, i); } @@ -2864,7 +2857,7 @@ static int s2io_poll(struct napi_struct *napi, int budget) netif_rx_complete(dev, napi); for (i = 0; i < config->rx_ring_num; i++) { - if (fill_rx_buffers(nic, i) == -ENOMEM) { + if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) { DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name); DBG_PRINT(INFO_DBG, " in Rx Poll!!\n"); break; @@ -2877,7 +2870,7 @@ static int s2io_poll(struct napi_struct *napi, int budget) no_rx: for (i = 0; i < config->rx_ring_num; i++) { - if (fill_rx_buffers(nic, i) == -ENOMEM) { + if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) { DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name); DBG_PRINT(INFO_DBG, " in Rx Poll!!\n"); break; @@ -2928,7 +2921,7 @@ static void s2io_netpoll(struct net_device *dev) rx_intr_handler(&mac_control->rings[i]); for (i = 0; i < config->rx_ring_num; i++) { - if (fill_rx_buffers(nic, i) == -ENOMEM) { + if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) { DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name); DBG_PRINT(INFO_DBG, " in Rx Netpoll!!\n"); break; @@ -2953,8 +2946,6 @@ static void s2io_netpoll(struct net_device *dev) */ static void rx_intr_handler(struct ring_info *ring_data) { - struct s2io_nic *nic = ring_data->nic; - struct net_device *dev = (struct net_device *) nic->dev; int get_block, put_block; struct rx_curr_get_info get_info, put_info; struct RxD_t *rxdp; @@ -2977,33 +2968,34 @@ static void rx_intr_handler(struct ring_info *ring_data) */ if ((get_block == put_block) && (get_info.offset + 1) == put_info.offset) { - DBG_PRINT(INTR_DBG, "%s: Ring Full\n",dev->name); + DBG_PRINT(INTR_DBG, "%s: Ring Full\n", + ring_data->dev->name); break; } skb = (struct sk_buff *) ((unsigned long)rxdp->Host_Control); if (skb == NULL) { DBG_PRINT(ERR_DBG, "%s: The skb is ", - dev->name); + ring_data->dev->name); DBG_PRINT(ERR_DBG, "Null in Rx Intr\n"); return; } - if (nic->rxd_mode == RXD_MODE_1) { + if (ring_data->rxd_mode == RXD_MODE_1) { rxdp1 = (struct RxD1*)rxdp; - pci_unmap_single(nic->pdev, (dma_addr_t) + pci_unmap_single(ring_data->pdev, (dma_addr_t) rxdp1->Buffer0_ptr, - dev->mtu + + ring_data->mtu + HEADER_ETHERNET_II_802_3_SIZE + HEADER_802_2_SIZE + HEADER_SNAP_SIZE, PCI_DMA_FROMDEVICE); - } else if (nic->rxd_mode == RXD_MODE_3B) { + } else if (ring_data->rxd_mode == RXD_MODE_3B) { rxdp3 = (struct RxD3*)rxdp; - pci_dma_sync_single_for_cpu(nic->pdev, (dma_addr_t) + pci_dma_sync_single_for_cpu(ring_data->pdev, (dma_addr_t) rxdp3->Buffer0_ptr, BUF0_LEN, PCI_DMA_FROMDEVICE); - pci_unmap_single(nic->pdev, (dma_addr_t) + pci_unmap_single(ring_data->pdev, (dma_addr_t) rxdp3->Buffer2_ptr, - dev->mtu + 4, + ring_data->mtu + 4, PCI_DMA_FROMDEVICE); } prefetch(skb->data); @@ -3012,7 +3004,7 @@ static void rx_intr_handler(struct ring_info *ring_data) ring_data->rx_curr_get_info.offset = get_info.offset; rxdp = ring_data->rx_blocks[get_block]. rxds[get_info.offset].virt_addr; - if (get_info.offset == rxd_count[nic->rxd_mode]) { + if (get_info.offset == rxd_count[ring_data->rxd_mode]) { get_info.offset = 0; ring_data->rx_curr_get_info.offset = get_info.offset; get_block++; @@ -3022,19 +3014,21 @@ static void rx_intr_handler(struct ring_info *ring_data) rxdp = ring_data->rx_blocks[get_block].block_virt_addr; } - nic->pkts_to_process -= 1; - if ((napi) && (!nic->pkts_to_process)) - break; + if(ring_data->nic->config.napi){ + ring_data->nic->pkts_to_process -= 1; + if (!ring_data->nic->pkts_to_process) + break; + } pkt_cnt++; if ((indicate_max_pkts) && (pkt_cnt > indicate_max_pkts)) break; } - if (nic->lro) { + if (ring_data->lro) { /* Clear all LRO sessions before exiting */ for (i=0; ilro0_n[i]; + struct lro *lro = &ring_data->lro0_n[i]; if (lro->in_use) { - update_L3L4_header(nic, lro); + update_L3L4_header(ring_data->nic, lro); queue_rx_frame(lro->parent, lro->vlan_tag); clear_lro_session(lro); } @@ -4333,10 +4327,10 @@ s2io_alarm_handle(unsigned long data) mod_timer(&sp->alarm_timer, jiffies + HZ / 2); } -static int s2io_chk_rx_buffers(struct s2io_nic *sp, int rng_n) +static int s2io_chk_rx_buffers(struct ring_info *ring) { - if (fill_rx_buffers(sp, rng_n) == -ENOMEM) { - DBG_PRINT(INFO_DBG, "%s:Out of memory", sp->dev->name); + if (fill_rx_buffers(ring) == -ENOMEM) { + DBG_PRINT(INFO_DBG, "%s:Out of memory", ring->dev->name); DBG_PRINT(INFO_DBG, " in Rx Intr!!\n"); } return 0; @@ -4351,7 +4345,7 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id) return IRQ_HANDLED; rx_intr_handler(ring); - s2io_chk_rx_buffers(sp, ring->ring_no); + s2io_chk_rx_buffers(ring); return IRQ_HANDLED; } @@ -4809,7 +4803,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id) */ if (!config->napi) { for (i = 0; i < config->rx_ring_num; i++) - s2io_chk_rx_buffers(sp, i); + s2io_chk_rx_buffers(&mac_control->rings[i]); } writeq(sp->general_int_mask, &bar0->general_int_mask); readl(&bar0->general_int_status); @@ -4866,6 +4860,7 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev) struct s2io_nic *sp = dev->priv; struct mac_info *mac_control; struct config_param *config; + int i; mac_control = &sp->mac_control; @@ -4885,6 +4880,13 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev) sp->stats.rx_length_errors = le64_to_cpu(mac_control->stats_info->rmac_long_frms); + /* collect per-ring rx_packets and rx_bytes */ + sp->stats.rx_packets = sp->stats.rx_bytes = 0; + for (i = 0; i < config->rx_ring_num; i++) { + sp->stats.rx_packets += mac_control->rings[i].rx_packets; + sp->stats.rx_bytes += mac_control->rings[i].rx_bytes; + } + return (&sp->stats); } @@ -7157,7 +7159,9 @@ static int s2io_card_up(struct s2io_nic * sp) config = &sp->config; for (i = 0; i < config->rx_ring_num; i++) { - if ((ret = fill_rx_buffers(sp, i))) { + mac_control->rings[i].mtu = dev->mtu; + ret = fill_rx_buffers(&mac_control->rings[i]); + if (ret) { DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n", dev->name); s2io_reset(sp); @@ -7165,7 +7169,7 @@ static int s2io_card_up(struct s2io_nic * sp) return -ENOMEM; } DBG_PRINT(INFO_DBG, "Buf in ring:%d is %d:\n", i, - atomic_read(&sp->rx_bufs_left[i])); + mac_control->rings[i].rx_bufs_left); } /* Initialise napi */ @@ -7300,7 +7304,7 @@ static void s2io_tx_watchdog(struct net_device *dev) static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp) { struct s2io_nic *sp = ring_data->nic; - struct net_device *dev = (struct net_device *) sp->dev; + struct net_device *dev = (struct net_device *) ring_data->dev; struct sk_buff *skb = (struct sk_buff *) ((unsigned long) rxdp->Host_Control); int ring_no = ring_data->ring_no; @@ -7377,19 +7381,19 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp) sp->mac_control.stats_info->sw_stat.mem_freed += skb->truesize; dev_kfree_skb(skb); - atomic_dec(&sp->rx_bufs_left[ring_no]); + ring_data->rx_bufs_left -= 1; rxdp->Host_Control = 0; return 0; } } /* Updating statistics */ - sp->stats.rx_packets++; + ring_data->rx_packets++; rxdp->Host_Control = 0; if (sp->rxd_mode == RXD_MODE_1) { int len = RXD_GET_BUFFER0_SIZE_1(rxdp->Control_2); - sp->stats.rx_bytes += len; + ring_data->rx_bytes += len; skb_put(skb, len); } else if (sp->rxd_mode == RXD_MODE_3B) { @@ -7400,13 +7404,13 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp) unsigned char *buff = skb_push(skb, buf0_len); struct buffAdd *ba = &ring_data->ba[get_block][get_off]; - sp->stats.rx_bytes += buf0_len + buf2_len; + ring_data->rx_bytes += buf0_len + buf2_len; memcpy(buff, ba->ba_0, buf0_len); skb_put(skb, buf2_len); } - if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) || - (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) && + if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!ring_data->lro) || + (ring_data->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) && (sp->rx_csum)) { l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1); l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1); @@ -7417,14 +7421,14 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp) * a flag in the RxD. */ skb->ip_summed = CHECKSUM_UNNECESSARY; - if (sp->lro) { + if (ring_data->lro) { u32 tcp_len; u8 *tcp; int ret = 0; - ret = s2io_club_tcp_session(skb->data, &tcp, - &tcp_len, &lro, - rxdp, sp); + ret = s2io_club_tcp_session(ring_data, + skb->data, &tcp, &tcp_len, &lro, + rxdp, sp); switch (ret) { case 3: /* Begin anew */ lro->parent = skb; @@ -7486,7 +7490,7 @@ send_up: queue_rx_frame(skb, RXD_GET_VLAN_TAG(rxdp->Control_2)); dev->last_rx = jiffies; aggregate: - atomic_dec(&sp->rx_bufs_left[ring_no]); + sp->mac_control.rings[ring_no].rx_bufs_left -= 1; return SUCCESS; } @@ -7603,12 +7607,14 @@ static int s2io_verify_parm(struct pci_dev *pdev, u8 *dev_intr_type, tx_steering_type = NO_STEERING; } - if ( rx_ring_num > 8) { - DBG_PRINT(ERR_DBG, "s2io: Requested number of Rx rings not " + if (rx_ring_num > MAX_RX_RINGS) { + DBG_PRINT(ERR_DBG, "s2io: Requested number of rx rings not " "supported\n"); - DBG_PRINT(ERR_DBG, "s2io: Default to 8 Rx rings\n"); - rx_ring_num = 8; + DBG_PRINT(ERR_DBG, "s2io: Default to %d rx rings\n", + MAX_RX_RINGS); + rx_ring_num = MAX_RX_RINGS; } + if (*dev_intr_type != INTA) napi = 0; @@ -7836,10 +7842,15 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) /* Rx side parameters. */ config->rx_ring_num = rx_ring_num; - for (i = 0; i < MAX_RX_RINGS; i++) { + for (i = 0; i < config->rx_ring_num; i++) { config->rx_cfg[i].num_rxd = rx_ring_sz[i] * (rxd_count[sp->rxd_mode] + 1); config->rx_cfg[i].ring_priority = i; + mac_control->rings[i].rx_bufs_left = 0; + mac_control->rings[i].rxd_mode = sp->rxd_mode; + mac_control->rings[i].rxd_count = rxd_count[sp->rxd_mode]; + mac_control->rings[i].pdev = sp->pdev; + mac_control->rings[i].dev = sp->dev; } for (i = 0; i < rx_ring_num; i++) { @@ -7854,10 +7865,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) mac_control->mc_pause_threshold_q4q7 = mc_pause_threshold_q4q7; - /* Initialize Ring buffer parameters. */ - for (i = 0; i < config->rx_ring_num; i++) - atomic_set(&sp->rx_bufs_left[i], 0); - /* initialize the shared memory used by the NIC and the host */ if (init_shared_mem(sp)) { DBG_PRINT(ERR_DBG, "%s: Memory allocation failed\n", @@ -8077,6 +8084,9 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) DBG_PRINT(ERR_DBG, "%s: Using %d Tx fifo(s)\n", dev->name, sp->config.tx_fifo_num); + DBG_PRINT(ERR_DBG, "%s: Using %d Rx ring(s)\n", dev->name, + sp->config.rx_ring_num); + switch(sp->config.intr_type) { case INTA: DBG_PRINT(ERR_DBG, "%s: Interrupt type INTA\n", dev->name); @@ -8391,8 +8401,9 @@ static int verify_l3_l4_lro_capable(struct lro *l_lro, struct iphdr *ip, } static int -s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, struct lro **lro, - struct RxD_t *rxdp, struct s2io_nic *sp) +s2io_club_tcp_session(struct ring_info *ring_data, u8 *buffer, u8 **tcp, + u32 *tcp_len, struct lro **lro, struct RxD_t *rxdp, + struct s2io_nic *sp) { struct iphdr *ip; struct tcphdr *tcph; @@ -8410,7 +8421,7 @@ s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, struct lro **lro, tcph = (struct tcphdr *)*tcp; *tcp_len = get_l4_pyld_length(ip, tcph); for (i=0; ilro0_n[i]; + struct lro *l_lro = &ring_data->lro0_n[i]; if (l_lro->in_use) { if (check_for_socket_match(l_lro, ip, tcph)) continue; @@ -8448,7 +8459,7 @@ s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, struct lro **lro, } for (i=0; ilro0_n[i]; + struct lro *l_lro = &ring_data->lro0_n[i]; if (!(l_lro->in_use)) { *lro = l_lro; ret = 3; /* Begin anew */ diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index ce53a02105f..0709ebae913 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -678,11 +678,53 @@ struct rx_block_info { struct rxd_info *rxds; }; +/* Data structure to represent a LRO session */ +struct lro { + struct sk_buff *parent; + struct sk_buff *last_frag; + u8 *l2h; + struct iphdr *iph; + struct tcphdr *tcph; + u32 tcp_next_seq; + __be32 tcp_ack; + int total_len; + int frags_len; + int sg_num; + int in_use; + __be16 window; + u16 vlan_tag; + u32 cur_tsval; + __be32 cur_tsecr; + u8 saw_ts; +} ____cacheline_aligned; + /* Ring specific structure */ struct ring_info { /* The ring number */ int ring_no; + /* per-ring buffer counter */ + u32 rx_bufs_left; + + #define MAX_LRO_SESSIONS 32 + struct lro lro0_n[MAX_LRO_SESSIONS]; + u8 lro; + + /* copy of sp->rxd_mode flag */ + int rxd_mode; + + /* Number of rxds per block for the rxd_mode */ + int rxd_count; + + /* copy of sp pointer */ + struct s2io_nic *nic; + + /* copy of sp->dev pointer */ + struct net_device *dev; + + /* copy of sp->pdev pointer */ + struct pci_dev *pdev; + /* * Place holders for the virtual and physical addresses of * all the Rx Blocks @@ -703,10 +745,16 @@ struct ring_info { */ struct rx_curr_get_info rx_curr_get_info; + /* interface MTU value */ + unsigned mtu; + /* Buffer Address store. */ struct buffAdd **ba; - struct s2io_nic *nic; -}; + + /* per-Ring statistics */ + unsigned long rx_packets; + unsigned long rx_bytes; +} ____cacheline_aligned; /* Fifo specific structure */ struct fifo_info { @@ -813,26 +861,6 @@ struct msix_info_st { u64 data; }; -/* Data structure to represent a LRO session */ -struct lro { - struct sk_buff *parent; - struct sk_buff *last_frag; - u8 *l2h; - struct iphdr *iph; - struct tcphdr *tcph; - u32 tcp_next_seq; - __be32 tcp_ack; - int total_len; - int frags_len; - int sg_num; - int in_use; - __be16 window; - u16 vlan_tag; - u32 cur_tsval; - __be32 cur_tsecr; - u8 saw_ts; -} ____cacheline_aligned; - /* These flags represent the devices temporary state */ enum s2io_device_state_t { @@ -872,8 +900,6 @@ struct s2io_nic { /* Space to back up the PCI config space */ u32 config_space[256 / sizeof(u32)]; - atomic_t rx_bufs_left[MAX_RX_RINGS]; - #define PROMISC 1 #define ALL_MULTI 2 @@ -950,8 +976,6 @@ struct s2io_nic { #define XFRAME_II_DEVICE 2 u8 device_type; -#define MAX_LRO_SESSIONS 32 - struct lro lro0_n[MAX_LRO_SESSIONS]; unsigned long clubbed_frms_cnt; unsigned long sending_both; u8 lro; @@ -1118,9 +1142,9 @@ static int do_s2io_add_mc(struct s2io_nic *sp, u8 *addr); static int do_s2io_add_mac(struct s2io_nic *sp, u64 addr, int offset); static int do_s2io_delete_unicast_mc(struct s2io_nic *sp, u64 addr); -static int -s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, struct lro **lro, - struct RxD_t *rxdp, struct s2io_nic *sp); +static int s2io_club_tcp_session(struct ring_info *ring_data, u8 *buffer, + u8 **tcp, u32 *tcp_len, struct lro **lro, struct RxD_t *rxdp, + struct s2io_nic *sp); static void clear_lro_session(struct lro *lro); static void queue_rx_frame(struct sk_buff *skb, u16 vlan_tag); static void update_L3L4_header(struct s2io_nic *sp, struct lro *lro); -- cgit v1.2.3-70-g09d2 From 23d9b3871fa03af32d06f4946f8d56b1af55997b Mon Sep 17 00:00:00 2001 From: Sreenivasa Honnur Date: Mon, 28 Apr 2008 21:09:40 -0400 Subject: S2io: Version update for multi ring patches - Updated version number. Signed-off-by: Surjit Reang Signed-off-by: Sreenivasa Honnur Signed-off-by: Ramkrishna Vepa Signed-off-by: Jeff Garzik --- drivers/net/s2io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 74cc80cc49b..523478ebfd6 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -86,7 +86,7 @@ #include "s2io.h" #include "s2io-regs.h" -#define DRV_VERSION "2.0.26.22" +#define DRV_VERSION "2.0.26.23" /* S2io Driver name & version. */ static char s2io_driver_name[] = "Neterion"; -- cgit v1.2.3-70-g09d2 From 5d12b132bc0bfb10d3f8d81f92606719b5032dcb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Apr 2008 10:58:22 -0700 Subject: drivers/net/phy: fix kernel-doc notation Fix kernel-doc warning: Warning(linux-2.6.25-git11//drivers/net/phy/phy_device.c:275): No description found for parameter 'bus_id' Signed-off-by: Randy Dunlap Signed-off-by: Jeff Garzik --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ddf8d51832a..ac3c01d28fd 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -256,7 +256,7 @@ void phy_prepare_link(struct phy_device *phydev, /** * phy_connect - connect an ethernet device to a PHY device * @dev: the network device to connect - * @phy_id: the PHY device to connect + * @bus_id: the id string of the PHY device to connect * @handler: callback function for state change notifications * @flags: PHY device's dev_flags * @interface: PHY device's interface -- cgit v1.2.3-70-g09d2 From 48c41b9941233a85ccdb88c579bd4e9b0ee609cf Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Mon, 28 Apr 2008 18:36:46 +0100 Subject: Rename SMSC phy functions to be more generic Several models of SMSC PHY have the same interrupt status and mask registers as the LAN83C185, so these functions can service multiple different PHY drivers. Signed-off-by: Steve Glendinning Signed-off-by: Jeff Garzik --- drivers/net/phy/smsc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index b1d8ed40ad9..6a901f00ed1 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -38,7 +38,7 @@ (MII_LAN83C185_ISF_INT6 | MII_LAN83C185_ISF_INT4) -static int lan83c185_config_intr(struct phy_device *phydev) +static int smsc_phy_config_intr(struct phy_device *phydev) { int rc = phy_write (phydev, MII_LAN83C185_IM, ((PHY_INTERRUPT_ENABLED == phydev->interrupts) @@ -48,16 +48,16 @@ static int lan83c185_config_intr(struct phy_device *phydev) return rc < 0 ? rc : 0; } -static int lan83c185_ack_interrupt(struct phy_device *phydev) +static int smsc_phy_ack_interrupt(struct phy_device *phydev) { int rc = phy_read (phydev, MII_LAN83C185_ISF); return rc < 0 ? rc : 0; } -static int lan83c185_config_init(struct phy_device *phydev) +static int smsc_phy_config_init(struct phy_device *phydev) { - return lan83c185_ack_interrupt (phydev); + return smsc_phy_ack_interrupt (phydev); } @@ -73,11 +73,11 @@ static struct phy_driver lan83c185_driver = { /* basic functions */ .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, - .config_init = lan83c185_config_init, + .config_init = smsc_phy_config_init, /* IRQ related */ - .ack_interrupt = lan83c185_ack_interrupt, - .config_intr = lan83c185_config_intr, + .ack_interrupt = smsc_phy_ack_interrupt, + .config_intr = smsc_phy_config_intr, .driver = { .owner = THIS_MODULE, } }; -- cgit v1.2.3-70-g09d2 From 4d9b1a022a33c57ca8f31a1364cef682c8c817d6 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Mon, 28 Apr 2008 18:37:29 +0100 Subject: Add support for SMSC LAN8187 and LAN8700 PHYs Add support for two additional SMSC PHY models with identical interrupt source and mask registers to the LAN83C185 Signed-off-by: Steve Glendinning Signed-off-by: Jeff Garzik --- drivers/net/phy/Kconfig | 2 +- drivers/net/phy/smsc.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 3ac8529bb92..6bf9e76b0a0 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -48,7 +48,7 @@ config VITESSE_PHY config SMSC_PHY tristate "Drivers for SMSC PHYs" ---help--- - Currently supports the LAN83C185 PHY + Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs config BROADCOM_PHY tristate "Drivers for Broadcom PHYs" diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 6a901f00ed1..73baa7a3bb0 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -12,6 +12,8 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * + * Support added for SMSC LAN8187 and LAN8700 by steve.glendinning@smsc.com + * */ #include @@ -82,13 +84,78 @@ static struct phy_driver lan83c185_driver = { .driver = { .owner = THIS_MODULE, } }; +static struct phy_driver lan8187_driver = { + .phy_id = 0x0007c0b0, /* OUI=0x00800f, Model#=0x0b */ + .phy_id_mask = 0xfffffff0, + .name = "SMSC LAN8187", + + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause + | SUPPORTED_Asym_Pause), + .flags = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG, + + /* basic functions */ + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .config_init = smsc_phy_config_init, + + /* IRQ related */ + .ack_interrupt = smsc_phy_ack_interrupt, + .config_intr = smsc_phy_config_intr, + + .driver = { .owner = THIS_MODULE, } +}; + +static struct phy_driver lan8700_driver = { + .phy_id = 0x0007c0c0, /* OUI=0x00800f, Model#=0x0c */ + .phy_id_mask = 0xfffffff0, + .name = "SMSC LAN8700", + + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause + | SUPPORTED_Asym_Pause), + .flags = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG, + + /* basic functions */ + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .config_init = smsc_phy_config_init, + + /* IRQ related */ + .ack_interrupt = smsc_phy_ack_interrupt, + .config_intr = smsc_phy_config_intr, + + .driver = { .owner = THIS_MODULE, } +}; + static int __init smsc_init(void) { - return phy_driver_register (&lan83c185_driver); + int ret; + + ret = phy_driver_register (&lan83c185_driver); + if (ret) + goto err1; + + ret = phy_driver_register (&lan8187_driver); + if (ret) + goto err2; + + ret = phy_driver_register (&lan8700_driver); + if (ret) + goto err3; + + return 0; + +err3: + phy_driver_unregister (&lan8187_driver); +err2: + phy_driver_unregister (&lan83c185_driver); +err1: + return ret; } static void __exit smsc_exit(void) { + phy_driver_unregister (&lan8700_driver); + phy_driver_unregister (&lan8187_driver); phy_driver_unregister (&lan83c185_driver); } -- cgit v1.2.3-70-g09d2 From 4e5b864e7cac67f06f18147b1980cb6b8fb213ec Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 26 Apr 2008 23:44:03 -0700 Subject: net: eepro autoport typo Found by sparse dubious !x & y warning...hidden in the GetBit macro why !Word doesn't make any sense. Signed-off-by: Harvey Harrison Signed-off-by: Jeff Garzik --- drivers/net/eepro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c index 83bda6ccde9..56f50491a45 100644 --- a/drivers/net/eepro.c +++ b/drivers/net/eepro.c @@ -633,7 +633,7 @@ static void __init printEEPROMInfo(struct net_device *dev) printk(KERN_DEBUG " PC: %d\n", GetBit(Word,ee_PC)); printk(KERN_DEBUG " TPE/AUI: %d\n", GetBit(Word,ee_TPE_AUI)); printk(KERN_DEBUG " Jabber: %d\n", GetBit(Word,ee_Jabber)); - printk(KERN_DEBUG " AutoPort: %d\n", GetBit(!Word,ee_Jabber)); + printk(KERN_DEBUG " AutoPort: %d\n", !GetBit(Word,ee_AutoPort)); printk(KERN_DEBUG " Duplex: %d\n", GetBit(Word,ee_Duplex)); } -- cgit v1.2.3-70-g09d2 From 7ef0a7ee2f9ac7ee8e2a597821adb2a78b882791 Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Fri, 25 Apr 2008 11:53:10 +0800 Subject: Blackfin EMAC Driver: code cleanup - replace specific "bf537" function or data structure name to "bfin_mac" - cleanup bfin_mac_probe with error checking - punt set_pin_mux function, call peripheral request/free list functions directly Signed-off-by: Bryan Wu Signed-off-by: Jeff Garzik --- drivers/net/bfin_mac.c | 249 ++++++++++++++++++++++++------------------------- drivers/net/bfin_mac.h | 2 +- 2 files changed, 124 insertions(+), 127 deletions(-) diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 4fec8581bfd..609748b84e8 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -42,7 +42,7 @@ #define DRV_NAME "bfin_mac" #define DRV_VERSION "1.1" #define DRV_AUTHOR "Bryan Wu, Luke Yang" -#define DRV_DESC "Blackfin BF53[67] BF527 on-chip Ethernet MAC driver" +#define DRV_DESC "Blackfin on-chip Ethernet MAC driver" MODULE_AUTHOR(DRV_AUTHOR); MODULE_LICENSE("GPL"); @@ -73,8 +73,14 @@ static struct net_dma_desc_tx *current_tx_ptr; static struct net_dma_desc_tx *tx_desc; static struct net_dma_desc_rx *rx_desc; -static void bf537mac_disable(void); -static void bf537mac_enable(void); +#if defined(CONFIG_BFIN_MAC_RMII) +static u16 pin_req[] = P_RMII0; +#else +static u16 pin_req[] = P_MII0; +#endif + +static void bfin_mac_disable(void); +static void bfin_mac_enable(void); static void desc_list_free(void) { @@ -243,27 +249,6 @@ init_error: /*---PHY CONTROL AND CONFIGURATION-----------------------------------------*/ -/* Set FER regs to MUX in Ethernet pins */ -static int setup_pin_mux(int action) -{ -#if defined(CONFIG_BFIN_MAC_RMII) - u16 pin_req[] = P_RMII0; -#else - u16 pin_req[] = P_MII0; -#endif - - if (action) { - if (peripheral_request_list(pin_req, DRV_NAME)) { - printk(KERN_ERR DRV_NAME - ": Requesting Peripherals failed\n"); - return -EFAULT; - } - } else - peripheral_free_list(pin_req); - - return 0; -} - /* * MII operations */ @@ -322,9 +307,9 @@ static int mdiobus_reset(struct mii_bus *bus) return 0; } -static void bf537_adjust_link(struct net_device *dev) +static void bfin_mac_adjust_link(struct net_device *dev) { - struct bf537mac_local *lp = netdev_priv(dev); + struct bfin_mac_local *lp = netdev_priv(dev); struct phy_device *phydev = lp->phydev; unsigned long flags; int new_state = 0; @@ -395,7 +380,7 @@ static void bf537_adjust_link(struct net_device *dev) static int mii_probe(struct net_device *dev) { - struct bf537mac_local *lp = netdev_priv(dev); + struct bfin_mac_local *lp = netdev_priv(dev); struct phy_device *phydev = NULL; unsigned short sysctl; int i; @@ -431,10 +416,10 @@ static int mii_probe(struct net_device *dev) } #if defined(CONFIG_BFIN_MAC_RMII) - phydev = phy_connect(dev, phydev->dev.bus_id, &bf537_adjust_link, 0, + phydev = phy_connect(dev, phydev->dev.bus_id, &bfin_mac_adjust_link, 0, PHY_INTERFACE_MODE_RMII); #else - phydev = phy_connect(dev, phydev->dev.bus_id, &bf537_adjust_link, 0, + phydev = phy_connect(dev, phydev->dev.bus_id, &bfin_mac_adjust_link, 0, PHY_INTERFACE_MODE_MII); #endif @@ -511,7 +496,7 @@ static void setup_mac_addr(u8 *mac_addr) bfin_write_EMAC_ADDRHI(addr_hi); } -static int bf537mac_set_mac_address(struct net_device *dev, void *p) +static int bfin_mac_set_mac_address(struct net_device *dev, void *p) { struct sockaddr *addr = p; if (netif_running(dev)) @@ -573,7 +558,7 @@ adjust_head: } -static int bf537mac_hard_start_xmit(struct sk_buff *skb, +static int bfin_mac_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned int data; @@ -631,7 +616,7 @@ out: return 0; } -static void bf537mac_rx(struct net_device *dev) +static void bfin_mac_rx(struct net_device *dev) { struct sk_buff *skb, *new_skb; unsigned short len; @@ -680,7 +665,7 @@ out: } /* interrupt routine to handle rx and error signal */ -static irqreturn_t bf537mac_interrupt(int irq, void *dev_id) +static irqreturn_t bfin_mac_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; int number = 0; @@ -700,21 +685,21 @@ get_one_packet: } real_rx: - bf537mac_rx(dev); + bfin_mac_rx(dev); number++; goto get_one_packet; } #ifdef CONFIG_NET_POLL_CONTROLLER -static void bf537mac_poll(struct net_device *dev) +static void bfin_mac_poll(struct net_device *dev) { disable_irq(IRQ_MAC_RX); - bf537mac_interrupt(IRQ_MAC_RX, dev); + bfin_mac_interrupt(IRQ_MAC_RX, dev); enable_irq(IRQ_MAC_RX); } #endif /* CONFIG_NET_POLL_CONTROLLER */ -static void bf537mac_disable(void) +static void bfin_mac_disable(void) { unsigned int opmode; @@ -728,7 +713,7 @@ static void bf537mac_disable(void) /* * Enable Interrupts, Receive, and Transmit */ -static void bf537mac_enable(void) +static void bfin_mac_enable(void) { u32 opmode; @@ -766,23 +751,23 @@ static void bf537mac_enable(void) } /* Our watchdog timed out. Called by the networking layer */ -static void bf537mac_timeout(struct net_device *dev) +static void bfin_mac_timeout(struct net_device *dev) { pr_debug("%s: %s\n", dev->name, __FUNCTION__); - bf537mac_disable(); + bfin_mac_disable(); /* reset tx queue */ tx_list_tail = tx_list_head->next; - bf537mac_enable(); + bfin_mac_enable(); /* We can accept TX packets again */ dev->trans_start = jiffies; netif_wake_queue(dev); } -static void bf537mac_multicast_hash(struct net_device *dev) +static void bfin_mac_multicast_hash(struct net_device *dev) { u32 emac_hashhi, emac_hashlo; struct dev_mc_list *dmi = dev->mc_list; @@ -821,7 +806,7 @@ static void bf537mac_multicast_hash(struct net_device *dev) * promiscuous mode (for TCPDUMP and cousins) or accept * a select set of multicast packets */ -static void bf537mac_set_multicast_list(struct net_device *dev) +static void bfin_mac_set_multicast_list(struct net_device *dev) { u32 sysctl; @@ -840,7 +825,7 @@ static void bf537mac_set_multicast_list(struct net_device *dev) sysctl = bfin_read_EMAC_OPMODE(); sysctl |= HM; bfin_write_EMAC_OPMODE(sysctl); - bf537mac_multicast_hash(dev); + bfin_mac_multicast_hash(dev); } else { /* clear promisc or multicast mode */ sysctl = bfin_read_EMAC_OPMODE(); @@ -852,7 +837,7 @@ static void bf537mac_set_multicast_list(struct net_device *dev) /* * this puts the device in an inactive state */ -static void bf537mac_shutdown(struct net_device *dev) +static void bfin_mac_shutdown(struct net_device *dev) { /* Turn off the EMAC */ bfin_write_EMAC_OPMODE(0x00000000); @@ -866,9 +851,9 @@ static void bf537mac_shutdown(struct net_device *dev) * * Set up everything, reset the card, etc.. */ -static int bf537mac_open(struct net_device *dev) +static int bfin_mac_open(struct net_device *dev) { - struct bf537mac_local *lp = netdev_priv(dev); + struct bfin_mac_local *lp = netdev_priv(dev); int retval; pr_debug("%s: %s\n", dev->name, __FUNCTION__); @@ -891,8 +876,8 @@ static int bf537mac_open(struct net_device *dev) phy_start(lp->phydev); phy_write(lp->phydev, MII_BMCR, BMCR_RESET); setup_system_regs(dev); - bf537mac_disable(); - bf537mac_enable(); + bfin_mac_disable(); + bfin_mac_enable(); pr_debug("hardware init finished\n"); netif_start_queue(dev); netif_carrier_on(dev); @@ -906,9 +891,9 @@ static int bf537mac_open(struct net_device *dev) * and not talk to the outside world. Caused by * an 'ifconfig ethX down' */ -static int bf537mac_close(struct net_device *dev) +static int bfin_mac_close(struct net_device *dev) { - struct bf537mac_local *lp = netdev_priv(dev); + struct bfin_mac_local *lp = netdev_priv(dev); pr_debug("%s: %s\n", dev->name, __FUNCTION__); netif_stop_queue(dev); @@ -918,7 +903,7 @@ static int bf537mac_close(struct net_device *dev) phy_write(lp->phydev, MII_BMCR, BMCR_PDOWN); /* clear everything */ - bf537mac_shutdown(dev); + bfin_mac_shutdown(dev); /* free the rx/tx buffers */ desc_list_free(); @@ -926,46 +911,59 @@ static int bf537mac_close(struct net_device *dev) return 0; } -static int __init bf537mac_probe(struct net_device *dev) +static int __init bfin_mac_probe(struct platform_device *pdev) { - struct bf537mac_local *lp = netdev_priv(dev); - int retval; - int i; + struct net_device *ndev; + struct bfin_mac_local *lp; + int rc, i; + + ndev = alloc_etherdev(sizeof(struct bfin_mac_local)); + if (!ndev) { + dev_err(&pdev->dev, "Cannot allocate net device!\n"); + return -ENOMEM; + } + + SET_NETDEV_DEV(ndev, &pdev->dev); + platform_set_drvdata(pdev, ndev); + lp = netdev_priv(ndev); /* Grab the MAC address in the MAC */ - *(__le32 *) (&(dev->dev_addr[0])) = cpu_to_le32(bfin_read_EMAC_ADDRLO()); - *(__le16 *) (&(dev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI()); + *(__le32 *) (&(ndev->dev_addr[0])) = cpu_to_le32(bfin_read_EMAC_ADDRLO()); + *(__le16 *) (&(ndev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI()); /* probe mac */ /*todo: how to proble? which is revision_register */ bfin_write_EMAC_ADDRLO(0x12345678); if (bfin_read_EMAC_ADDRLO() != 0x12345678) { - pr_debug("can't detect bf537 mac!\n"); - retval = -ENODEV; - goto err_out; + dev_err(&pdev->dev, "Cannot detect Blackfin on-chip ethernet MAC controller!\n"); + rc = -ENODEV; + goto out_err_probe_mac; } /* set the GPIO pins to Ethernet mode */ - retval = setup_pin_mux(1); - if (retval) - return retval; - - /*Is it valid? (Did bootloader initialize it?) */ - if (!is_valid_ether_addr(dev->dev_addr)) { - /* Grab the MAC from the board somehow - this is done in the - arch/blackfin/mach-bf537/boards/eth_mac.c */ - bfin_get_ether_addr(dev->dev_addr); + rc = peripheral_request_list(pin_req, DRV_NAME); + if (rc) { + dev_err(&pdev->dev, "Requesting peripherals failed!\n"); + rc = -EFAULT; + goto out_err_setup_pin_mux; } + /* + * Is it valid? (Did bootloader initialize it?) + * Grab the MAC from the board somehow + * this is done in the arch/blackfin/mach-bfxxx/boards/eth_mac.c + */ + if (!is_valid_ether_addr(ndev->dev_addr)) + bfin_get_ether_addr(ndev->dev_addr); + /* If still not valid, get a random one */ - if (!is_valid_ether_addr(dev->dev_addr)) { - random_ether_addr(dev->dev_addr); - } + if (!is_valid_ether_addr(ndev->dev_addr)) + random_ether_addr(ndev->dev_addr); - setup_mac_addr(dev->dev_addr); + setup_mac_addr(ndev->dev_addr); /* MDIO bus initial */ - lp->mii_bus.priv = dev; + lp->mii_bus.priv = ndev; lp->mii_bus.read = mdiobus_read; lp->mii_bus.write = mdiobus_write; lp->mii_bus.reset = mdiobus_reset; @@ -975,86 +973,85 @@ static int __init bf537mac_probe(struct net_device *dev) for (i = 0; i < PHY_MAX_ADDR; ++i) lp->mii_bus.irq[i] = PHY_POLL; - mdiobus_register(&lp->mii_bus); + rc = mdiobus_register(&lp->mii_bus); + if (rc) { + dev_err(&pdev->dev, "Cannot register MDIO bus!\n"); + goto out_err_mdiobus_register; + } - retval = mii_probe(dev); - if (retval) - return retval; + rc = mii_probe(ndev); + if (rc) { + dev_err(&pdev->dev, "MII Probe failed!\n"); + goto out_err_mii_probe; + } /* Fill in the fields of the device structure with ethernet values. */ - ether_setup(dev); - - dev->open = bf537mac_open; - dev->stop = bf537mac_close; - dev->hard_start_xmit = bf537mac_hard_start_xmit; - dev->set_mac_address = bf537mac_set_mac_address; - dev->tx_timeout = bf537mac_timeout; - dev->set_multicast_list = bf537mac_set_multicast_list; + ether_setup(ndev); + + ndev->open = bfin_mac_open; + ndev->stop = bfin_mac_close; + ndev->hard_start_xmit = bfin_mac_hard_start_xmit; + ndev->set_mac_address = bfin_mac_set_mac_address; + ndev->tx_timeout = bfin_mac_timeout; + ndev->set_multicast_list = bfin_mac_set_multicast_list; #ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = bf537mac_poll; + ndev->poll_controller = bfin_mac_poll; #endif spin_lock_init(&lp->lock); /* now, enable interrupts */ /* register irq handler */ - if (request_irq - (IRQ_MAC_RX, bf537mac_interrupt, IRQF_DISABLED | IRQF_SHARED, - "EMAC_RX", dev)) { - printk(KERN_WARNING DRV_NAME - ": Unable to attach BlackFin MAC RX interrupt\n"); - return -EBUSY; + rc = request_irq(IRQ_MAC_RX, bfin_mac_interrupt, + IRQF_DISABLED | IRQF_SHARED, "EMAC_RX", ndev); + if (rc) { + dev_err(&pdev->dev, "Cannot request Blackfin MAC RX IRQ!\n"); + rc = -EBUSY; + goto out_err_request_irq; } - - retval = register_netdev(dev); - if (retval == 0) { - /* now, print out the card info, in a short format.. */ - printk(KERN_INFO "%s: Version %s, %s\n", - DRV_NAME, DRV_VERSION, DRV_DESC); - } - -err_out: - return retval; -} - -static int bfin_mac_probe(struct platform_device *pdev) -{ - struct net_device *ndev; - - ndev = alloc_etherdev(sizeof(struct bf537mac_local)); - if (!ndev) { - printk(KERN_WARNING DRV_NAME ": could not allocate device\n"); - return -ENOMEM; + rc = register_netdev(ndev); + if (rc) { + dev_err(&pdev->dev, "Cannot register net device!\n"); + goto out_err_reg_ndev; } - SET_NETDEV_DEV(ndev, &pdev->dev); + /* now, print out the card info, in a short format.. */ + dev_info(&pdev->dev, "%s, Version %s\n", DRV_DESC, DRV_VERSION); - platform_set_drvdata(pdev, ndev); + return 0; - if (bf537mac_probe(ndev) != 0) { - platform_set_drvdata(pdev, NULL); - free_netdev(ndev); - printk(KERN_WARNING DRV_NAME ": not found\n"); - return -ENODEV; - } +out_err_reg_ndev: + free_irq(IRQ_MAC_RX, ndev); +out_err_request_irq: +out_err_mii_probe: + mdiobus_unregister(&lp->mii_bus); +out_err_mdiobus_register: + peripheral_free_list(pin_req); +out_err_setup_pin_mux: +out_err_probe_mac: + platform_set_drvdata(pdev, NULL); + free_netdev(ndev); - return 0; + return rc; } static int bfin_mac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); + struct bfin_mac_local *lp = netdev_priv(ndev); platform_set_drvdata(pdev, NULL); + mdiobus_unregister(&lp->mii_bus); + unregister_netdev(ndev); free_irq(IRQ_MAC_RX, ndev); free_netdev(ndev); - setup_pin_mux(0); + peripheral_free_list(pin_req); return 0; } @@ -1065,7 +1062,7 @@ static int bfin_mac_suspend(struct platform_device *pdev, pm_message_t mesg) struct net_device *net_dev = platform_get_drvdata(pdev); if (netif_running(net_dev)) - bf537mac_close(net_dev); + bfin_mac_close(net_dev); return 0; } @@ -1075,7 +1072,7 @@ static int bfin_mac_resume(struct platform_device *pdev) struct net_device *net_dev = platform_get_drvdata(pdev); if (netif_running(net_dev)) - bf537mac_open(net_dev); + bfin_mac_open(net_dev); return 0; } diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h index f774d5a3694..beff51064ff 100644 --- a/drivers/net/bfin_mac.h +++ b/drivers/net/bfin_mac.h @@ -49,7 +49,7 @@ struct net_dma_desc_tx { struct status_area_tx status; }; -struct bf537mac_local { +struct bfin_mac_local { /* * these are things that the kernel wants me to keep, so users * can find out semi-useless statistics of how well the card is -- cgit v1.2.3-70-g09d2 From 679dce39e3cdfcc641b2888ce04f1cd5ff0b3b92 Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Fri, 25 Apr 2008 11:53:11 +0800 Subject: Blackfin EMAC Driver: Initial version of ethtool support Signed-off-by: Bryan Wu Signed-off-by: Jeff Garzik --- drivers/net/bfin_mac.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 609748b84e8..89c0018132e 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -454,6 +455,51 @@ static int mii_probe(struct net_device *dev) return 0; } +/* + * Ethtool support + */ + +static int +bfin_mac_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct bfin_mac_local *lp = netdev_priv(dev); + + if (lp->phydev) + return phy_ethtool_gset(lp->phydev, cmd); + + return -EINVAL; +} + +static int +bfin_mac_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct bfin_mac_local *lp = netdev_priv(dev); + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (lp->phydev) + return phy_ethtool_sset(lp->phydev, cmd); + + return -EINVAL; +} + +static void bfin_mac_ethtool_getdrvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); + strcpy(info->fw_version, "N/A"); + strcpy(info->bus_info, dev->dev.bus_id); +} + +static struct ethtool_ops bfin_mac_ethtool_ops = { + .get_settings = bfin_mac_ethtool_getsettings, + .set_settings = bfin_mac_ethtool_setsettings, + .get_link = ethtool_op_get_link, + .get_drvinfo = bfin_mac_ethtool_getdrvinfo, +}; + /**************************************************************************/ void setup_system_regs(struct net_device *dev) { @@ -997,6 +1043,7 @@ static int __init bfin_mac_probe(struct platform_device *pdev) #ifdef CONFIG_NET_POLL_CONTROLLER ndev->poll_controller = bfin_mac_poll; #endif + ndev->ethtool_ops = &bfin_mac_ethtool_ops; spin_lock_init(&lp->lock); -- cgit v1.2.3-70-g09d2 From 93ad37d94d0b42e493d95b8a79181112c76ab459 Mon Sep 17 00:00:00 2001 From: "Klaus D. Wacker" Date: Thu, 24 Apr 2008 10:15:18 +0200 Subject: lcs: CCL-sequ. numbers required for protocol 802.2 only. Sequence numbers in skbs (Receive path) are assigned only to 802.2 packets. Signed-off-by: Klaus D. Wacker Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/lcs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index f51ed997258..dd22f4b3703 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1793,7 +1793,8 @@ lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len) skb->protocol = card->lan_type_trans(skb, card->dev); card->stats.rx_bytes += skb_len; card->stats.rx_packets++; - *((__u32 *)skb->cb) = ++card->pkt_seq; + if (skb->protocol == htons(ETH_P_802_2)) + *((__u32 *)skb->cb) = ++card->pkt_seq; netif_rx(skb); } -- cgit v1.2.3-70-g09d2 From 8bbf84404b02f193c5422c252264d7b82ffe4443 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 24 Apr 2008 10:15:19 +0200 Subject: netiucv: get rid of in_atomic() use There is no urgent need to restart a netiucv connection automatically, if packets are sent while the netiucv device is not up and running. Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/netiucv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 8f876f6ab36..0fb780e35fa 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -1313,8 +1313,6 @@ static int netiucv_tx(struct sk_buff *skb, struct net_device *dev) * and throw away packet. */ if (fsm_getstate(privptr->fsm) != DEV_STATE_RUNNING) { - if (!in_atomic()) - fsm_event(privptr->fsm, DEV_EVENT_START, dev); dev_kfree_skb(skb); privptr->stats.tx_dropped++; privptr->stats.tx_errors++; -- cgit v1.2.3-70-g09d2 From 022b660ae5d075ed9eaddef6f6fb7abb48bdf63b Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 24 Apr 2008 10:15:20 +0200 Subject: ccwgroup: Unify parsing for group attribute. Instead of having each driver for ccwgroup slave device parsing the input itself and calling ccwgroup_create(), introduce a new function ccwgroup_create_from_string() and handle parsing inside the ccwgroup core. Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/cio/ccwgroup.c | 96 ++++++++++++++++++++++++++++++--------- drivers/s390/net/cu3088.c | 20 +------- drivers/s390/net/qeth_core_main.c | 23 +--------- include/asm-s390/ccwgroup.h | 7 ++- 4 files changed, 82 insertions(+), 64 deletions(-) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 03914fa8117..f38923e38e9 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -153,44 +153,89 @@ __ccwgroup_create_symlinks(struct ccwgroup_device *gdev) return 0; } +static int __get_next_bus_id(const char **buf, char *bus_id) +{ + int rc, len; + char *start, *end; + + start = (char *)*buf; + end = strchr(start, ','); + if (!end) { + /* Last entry. Strip trailing newline, if applicable. */ + end = strchr(start, '\n'); + if (end) + *end = '\0'; + len = strlen(start) + 1; + } else { + len = end - start + 1; + end++; + } + if (len < BUS_ID_SIZE) { + strlcpy(bus_id, start, len); + rc = 0; + } else + rc = -EINVAL; + *buf = end; + return rc; +} + +static int __is_valid_bus_id(char bus_id[BUS_ID_SIZE]) +{ + int cssid, ssid, devno; + + /* Must be of form %x.%x.%04x */ + if (sscanf(bus_id, "%x.%1x.%04x", &cssid, &ssid, &devno) != 3) + return 0; + return 1; +} + /** - * ccwgroup_create() - create and register a ccw group device + * ccwgroup_create_from_string() - create and register a ccw group device * @root: parent device for the new device * @creator_id: identifier of creating driver * @cdrv: ccw driver of slave devices - * @argc: number of slave devices - * @argv: bus ids of slave devices + * @num_devices: number of slave devices + * @buf: buffer containing comma separated bus ids of slave devices * * Create and register a new ccw group device as a child of @root. Slave - * devices are obtained from the list of bus ids given in @argv[] and must all + * devices are obtained from the list of bus ids given in @buf and must all * belong to @cdrv. * Returns: * %0 on success and an error code on failure. * Context: * non-atomic */ -int ccwgroup_create(struct device *root, unsigned int creator_id, - struct ccw_driver *cdrv, int argc, char *argv[]) +int ccwgroup_create_from_string(struct device *root, unsigned int creator_id, + struct ccw_driver *cdrv, int num_devices, + const char *buf) { struct ccwgroup_device *gdev; - int i; - int rc; + int rc, i; + char tmp_bus_id[BUS_ID_SIZE]; + const char *curr_buf; - if (argc > 256) /* disallow dumb users */ - return -EINVAL; - - gdev = kzalloc(sizeof(*gdev) + argc*sizeof(gdev->cdev[0]), GFP_KERNEL); + gdev = kzalloc(sizeof(*gdev) + num_devices * sizeof(gdev->cdev[0]), + GFP_KERNEL); if (!gdev) return -ENOMEM; atomic_set(&gdev->onoff, 0); mutex_init(&gdev->reg_mutex); mutex_lock(&gdev->reg_mutex); - for (i = 0; i < argc; i++) { - gdev->cdev[i] = get_ccwdev_by_busid(cdrv, argv[i]); - - /* all devices have to be of the same type in - * order to be grouped */ + curr_buf = buf; + for (i = 0; i < num_devices && curr_buf; i++) { + rc = __get_next_bus_id(&curr_buf, tmp_bus_id); + if (rc != 0) + goto error; + if (!__is_valid_bus_id(tmp_bus_id)) { + rc = -EINVAL; + goto error; + } + gdev->cdev[i] = get_ccwdev_by_busid(cdrv, tmp_bus_id); + /* + * All devices have to be of the same type in + * order to be grouped. + */ if (!gdev->cdev[i] || gdev->cdev[i]->id.driver_info != gdev->cdev[0]->id.driver_info) { @@ -204,9 +249,18 @@ int ccwgroup_create(struct device *root, unsigned int creator_id, } dev_set_drvdata(&gdev->cdev[i]->dev, gdev); } - + /* Check for sufficient number of bus ids. */ + if (i < num_devices && !curr_buf) { + rc = -EINVAL; + goto error; + } + /* Check for trailing stuff. */ + if (i == num_devices && strlen(curr_buf) > 0) { + rc = -EINVAL; + goto error; + } gdev->creator_id = creator_id; - gdev->count = argc; + gdev->count = num_devices; gdev->dev.bus = &ccwgroup_bus_type; gdev->dev.parent = root; gdev->dev.release = ccwgroup_release; @@ -234,7 +288,7 @@ int ccwgroup_create(struct device *root, unsigned int creator_id, device_remove_file(&gdev->dev, &dev_attr_ungroup); device_unregister(&gdev->dev); error: - for (i = 0; i < argc; i++) + for (i = 0; i < num_devices; i++) if (gdev->cdev[i]) { if (dev_get_drvdata(&gdev->cdev[i]->dev) == gdev) dev_set_drvdata(&gdev->cdev[i]->dev, NULL); @@ -244,6 +298,7 @@ error: put_device(&gdev->dev); return rc; } +EXPORT_SYMBOL(ccwgroup_create_from_string); static int __init init_ccwgroup (void) @@ -519,6 +574,5 @@ void ccwgroup_remove_ccwdev(struct ccw_device *cdev) MODULE_LICENSE("GPL"); EXPORT_SYMBOL(ccwgroup_driver_register); EXPORT_SYMBOL(ccwgroup_driver_unregister); -EXPORT_SYMBOL(ccwgroup_create); EXPORT_SYMBOL(ccwgroup_probe_ccwdev); EXPORT_SYMBOL(ccwgroup_remove_ccwdev); diff --git a/drivers/s390/net/cu3088.c b/drivers/s390/net/cu3088.c index 76728ae4b84..8e7697305a4 100644 --- a/drivers/s390/net/cu3088.c +++ b/drivers/s390/net/cu3088.c @@ -62,30 +62,14 @@ static struct device *cu3088_root_dev; static ssize_t group_write(struct device_driver *drv, const char *buf, size_t count) { - const char *start, *end; - char bus_ids[2][BUS_ID_SIZE], *argv[2]; - int i; int ret; struct ccwgroup_driver *cdrv; cdrv = to_ccwgroupdrv(drv); if (!cdrv) return -EINVAL; - start = buf; - for (i=0; i<2; i++) { - static const char delim[] = {',', '\n'}; - int len; - - if (!(end = strchr(start, delim[i]))) - return -EINVAL; - len = min_t(ptrdiff_t, BUS_ID_SIZE, end - start + 1); - strlcpy (bus_ids[i], start, len); - argv[i] = bus_ids[i]; - start = end + 1; - } - - ret = ccwgroup_create(cu3088_root_dev, cdrv->driver_id, - &cu3088_driver, 2, argv); + ret = ccwgroup_create_from_string(cu3088_root_dev, cdrv->driver_id, + &cu3088_driver, 2, buf); return (ret == 0) ? count : ret; } diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 055f5c3e7b5..231d18c3b6f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3827,27 +3827,8 @@ static struct ccw_driver qeth_ccw_driver = { static int qeth_core_driver_group(const char *buf, struct device *root_dev, unsigned long driver_id) { - const char *start, *end; - char bus_ids[3][BUS_ID_SIZE], *argv[3]; - int i; - - start = buf; - for (i = 0; i < 3; i++) { - static const char delim[] = { ',', ',', '\n' }; - int len; - - end = strchr(start, delim[i]); - if (!end) - return -EINVAL; - len = min_t(ptrdiff_t, BUS_ID_SIZE, end - start); - strncpy(bus_ids[i], start, len); - bus_ids[i][len] = '\0'; - start = end + 1; - argv[i] = bus_ids[i]; - } - - return (ccwgroup_create(root_dev, driver_id, - &qeth_ccw_driver, 3, argv)); + return ccwgroup_create_from_string(root_dev, driver_id, + &qeth_ccw_driver, 3, buf); } int qeth_core_hardsetup_card(struct qeth_card *card) diff --git a/include/asm-s390/ccwgroup.h b/include/asm-s390/ccwgroup.h index 289053ef5e6..a27f68985a7 100644 --- a/include/asm-s390/ccwgroup.h +++ b/include/asm-s390/ccwgroup.h @@ -57,10 +57,9 @@ struct ccwgroup_driver { extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver); -extern int ccwgroup_create (struct device *root, - unsigned int creator_id, - struct ccw_driver *gdrv, - int argc, char *argv[]); +int ccwgroup_create_from_string(struct device *root, unsigned int creator_id, + struct ccw_driver *cdrv, int num_devices, + const char *buf); extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev); extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev); -- cgit v1.2.3-70-g09d2 From cd023216e64cc0359ec51312bef14ef2449535dd Mon Sep 17 00:00:00 2001 From: Peter Tiedemann Date: Thu, 24 Apr 2008 10:15:21 +0200 Subject: qeth module size reduction. Replace complex macro for s390dbf calls by equivalent function. This reduces module size about 10% without visible performance impact. Signed-off-by: Peter Tiedemann Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/qeth_core.h | 18 ++---------------- drivers/s390/net/qeth_core_main.c | 15 ++++++++++++--- drivers/s390/net/qeth_l2_main.c | 3 --- drivers/s390/net/qeth_l3.h | 3 --- drivers/s390/net/qeth_l3_main.c | 2 -- 5 files changed, 14 insertions(+), 27 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 66f4f12503c..b7bb0ff87c1 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -72,22 +72,7 @@ struct qeth_dbf_info { debug_sprintf_event(qeth_dbf[QETH_DBF_MSG].id, level, text) #define QETH_DBF_TEXT_(name, level, text...) \ - do { \ - if (qeth_dbf_passes(qeth_dbf[QETH_DBF_##name].id, level)) { \ - char *dbf_txt_buf = \ - get_cpu_var(QETH_DBF_TXT_BUF); \ - sprintf(dbf_txt_buf, text); \ - debug_text_event(qeth_dbf[QETH_DBF_##name].id, \ - level, dbf_txt_buf); \ - put_cpu_var(QETH_DBF_TXT_BUF); \ - } \ - } while (0) - -/* Allow to sort out low debug levels early to avoid wasted sprints */ -static inline int qeth_dbf_passes(debug_info_t *dbf_grp, int level) -{ - return (level <= dbf_grp->level); -} + qeth_dbf_longtext(QETH_DBF_##name, level, text) /** * some more debug stuff @@ -894,6 +879,7 @@ void qeth_core_get_ethtool_stats(struct net_device *, struct ethtool_stats *, u64 *); void qeth_core_get_strings(struct net_device *, u32, u8 *); void qeth_core_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); +void qeth_dbf_longtext(enum qeth_dbf_names dbf_nix, int level, char *text, ...); /* exports for OSN */ int qeth_osn_assist(struct net_device *, void *, int); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 231d18c3b6f..18323250a33 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -26,9 +26,6 @@ #include "qeth_core.h" #include "qeth_core_offl.h" -static DEFINE_PER_CPU(char[256], qeth_core_dbf_txt_buf); -#define QETH_DBF_TXT_BUF qeth_core_dbf_txt_buf - struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS] = { /* define dbf - Name, Pages, Areas, Maxlen, Level, View, Handle */ /* N P A M L V H */ @@ -4067,6 +4064,18 @@ static void qeth_unregister_dbf_views(void) } } +void qeth_dbf_longtext(enum qeth_dbf_names dbf_nix, int level, char *text, ...) +{ + char dbf_txt_buf[32]; + + if (level > (qeth_dbf[dbf_nix].id)->level) + return; + snprintf(dbf_txt_buf, sizeof(dbf_txt_buf), text); + debug_text_event(qeth_dbf[dbf_nix].id, level, dbf_txt_buf); + +} +EXPORT_SYMBOL_GPL(qeth_dbf_longtext); + static int qeth_register_dbf_views(void) { int ret; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 3921d1631a7..7e8f6396509 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -22,9 +22,6 @@ #include "qeth_core.h" #include "qeth_core_offl.h" -#define QETH_DBF_TXT_BUF qeth_l2_dbf_txt_buf -static DEFINE_PER_CPU(char[256], qeth_l2_dbf_txt_buf); - static int qeth_l2_set_offline(struct ccwgroup_device *); static int qeth_l2_stop(struct net_device *); static int qeth_l2_send_delmac(struct qeth_card *, __u8 *); diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h index 1be353593a5..9f143c83bba 100644 --- a/drivers/s390/net/qeth_l3.h +++ b/drivers/s390/net/qeth_l3.h @@ -13,9 +13,6 @@ #include "qeth_core.h" -#define QETH_DBF_TXT_BUF qeth_l3_dbf_txt_buf -DECLARE_PER_CPU(char[256], qeth_l3_dbf_txt_buf); - struct qeth_ipaddr { struct list_head entry; enum qeth_ip_types type; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index e1bfe56087d..62a54364a51 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -28,8 +28,6 @@ #include "qeth_l3.h" #include "qeth_core_offl.h" -DEFINE_PER_CPU(char[256], qeth_l3_dbf_txt_buf); - static int qeth_l3_set_offline(struct ccwgroup_device *); static int qeth_l3_recover(void *); static int qeth_l3_stop(struct net_device *); -- cgit v1.2.3-70-g09d2 From 213298f862d10ade909bdb7d833493d4bdad683d Mon Sep 17 00:00:00 2001 From: Frank Blaschka Date: Thu, 24 Apr 2008 10:15:22 +0200 Subject: qeth: layer 3 support vlan IPv6 on hiper socket hiper socket require the QETH_HDR_EXT_VLAN_FRAME flag in the qdio header to handle vlan tagged frames. Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/qeth_l3_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 62a54364a51..f385e93d757 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2568,9 +2568,10 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, * v6 uses passthrough, v4 sets the tag in the QDIO header. */ if (card->vlangrp && vlan_tx_tag_present(skb)) { - hdr->hdr.l3.ext_flags = (ipv == 4) ? - QETH_HDR_EXT_VLAN_FRAME : - QETH_HDR_EXT_INCLUDE_VLAN_TAG; + if ((ipv == 4) || (card->info.type == QETH_CARD_TYPE_IQD)) + hdr->hdr.l3.ext_flags = QETH_HDR_EXT_VLAN_FRAME; + else + hdr->hdr.l3.ext_flags = QETH_HDR_EXT_INCLUDE_VLAN_TAG; hdr->hdr.l3.vlan_id = vlan_tx_tag_get(skb); } -- cgit v1.2.3-70-g09d2 From 3f9975aa4d5b3c614eef8785ec63da13fbd55b51 Mon Sep 17 00:00:00 2001 From: Frank Blaschka Date: Thu, 24 Apr 2008 10:15:23 +0200 Subject: qeth: provide get ethtool settings Load balancing bonding queries the speed of the slave interfaces. To support a bond consisting of different slave speeds we have to report the speed by ethtool settings. Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_core_main.c | 90 +++++++++++++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_main.c | 1 + drivers/s390/net/qeth_l3_main.c | 1 + 4 files changed, 93 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index b7bb0ff87c1..8dd83d92098 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -880,6 +880,7 @@ void qeth_core_get_ethtool_stats(struct net_device *, void qeth_core_get_strings(struct net_device *, u32, u8 *); void qeth_core_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); void qeth_dbf_longtext(enum qeth_dbf_names dbf_nix, int level, char *text, ...); +int qeth_core_ethtool_get_settings(struct net_device *, struct ethtool_cmd *); /* exports for OSN */ int qeth_osn_assist(struct net_device *, void *, int); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 18323250a33..23a46340ffa 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4423,6 +4423,96 @@ void qeth_core_get_drvinfo(struct net_device *dev, } EXPORT_SYMBOL_GPL(qeth_core_get_drvinfo); +int qeth_core_ethtool_get_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd) +{ + struct qeth_card *card = netdev_priv(netdev); + enum qeth_link_types link_type; + + if ((card->info.type == QETH_CARD_TYPE_IQD) || (card->info.guestlan)) + link_type = QETH_LINK_TYPE_10GBIT_ETH; + else + link_type = card->info.link_type; + + ecmd->transceiver = XCVR_INTERNAL; + ecmd->supported = SUPPORTED_Autoneg; + ecmd->advertising = ADVERTISED_Autoneg; + ecmd->duplex = DUPLEX_FULL; + ecmd->autoneg = AUTONEG_ENABLE; + + switch (link_type) { + case QETH_LINK_TYPE_FAST_ETH: + case QETH_LINK_TYPE_LANE_ETH100: + ecmd->supported |= SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_TP; + ecmd->advertising |= ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_TP; + ecmd->speed = SPEED_100; + ecmd->port = PORT_TP; + break; + + case QETH_LINK_TYPE_GBIT_ETH: + case QETH_LINK_TYPE_LANE_ETH1000: + ecmd->supported |= SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE; + ecmd->advertising |= ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE; + ecmd->speed = SPEED_1000; + ecmd->port = PORT_FIBRE; + break; + + case QETH_LINK_TYPE_10GBIT_ETH: + ecmd->supported |= SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full | + SUPPORTED_10000baseT_Full | + SUPPORTED_FIBRE; + ecmd->advertising |= ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseT_Full | + ADVERTISED_10000baseT_Full | + ADVERTISED_FIBRE; + ecmd->speed = SPEED_10000; + ecmd->port = PORT_FIBRE; + break; + + default: + ecmd->supported |= SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_TP; + ecmd->advertising |= ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_TP; + ecmd->speed = SPEED_10; + ecmd->port = PORT_TP; + } + + return 0; +} +EXPORT_SYMBOL_GPL(qeth_core_ethtool_get_settings); + static int __init qeth_core_init(void) { int rc; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7e8f6396509..e6092829a5a 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -861,6 +861,7 @@ static struct ethtool_ops qeth_l2_ethtool_ops = { .get_ethtool_stats = qeth_core_get_ethtool_stats, .get_stats_count = qeth_core_get_stats_count, .get_drvinfo = qeth_core_get_drvinfo, + .get_settings = qeth_core_ethtool_get_settings, }; static struct ethtool_ops qeth_l2_osn_ops = { diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index f385e93d757..ce23169587e 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2889,6 +2889,7 @@ static struct ethtool_ops qeth_l3_ethtool_ops = { .get_ethtool_stats = qeth_core_get_ethtool_stats, .get_stats_count = qeth_core_get_stats_count, .get_drvinfo = qeth_core_get_drvinfo, + .get_settings = qeth_core_ethtool_get_settings, }; /* -- cgit v1.2.3-70-g09d2 From f90b744eb8ead0af7a7aa2f78ff861dff4863f2c Mon Sep 17 00:00:00 2001 From: Frank Blaschka Date: Thu, 24 Apr 2008 10:15:24 +0200 Subject: qeth: rework fast path Remove unnecessary traces. Remove unnecessary wrappers for skb functions. Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/qeth_core.h | 31 ++++-------------- drivers/s390/net/qeth_core_main.c | 67 ++------------------------------------- drivers/s390/net/qeth_l2_main.c | 8 ++--- drivers/s390/net/qeth_l3_main.c | 5 --- 4 files changed, 13 insertions(+), 98 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 8dd83d92098..699ac11debd 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -758,27 +758,6 @@ static inline int qeth_get_micros(void) return (int) (get_clock() >> 12); } -static inline void *qeth_push_skb(struct qeth_card *card, struct sk_buff *skb, - int size) -{ - void *hdr; - - hdr = (void *) skb_push(skb, size); - /* - * sanity check, the Linux memory allocation scheme should - * never present us cases like this one (the qdio header size plus - * the first 40 bytes of the paket cross a 4k boundary) - */ - if ((((unsigned long) hdr) & (~(PAGE_SIZE - 1))) != - (((unsigned long) hdr + size + - QETH_IP_HEADER_SIZE) & (~(PAGE_SIZE - 1)))) { - PRINT_ERR("Misaligned packet on interface %s. Discarded.", - QETH_CARD_IFNAME(card)); - return NULL; - } - return hdr; -} - static inline int qeth_get_ip_version(struct sk_buff *skb) { switch (skb->protocol) { @@ -791,6 +770,12 @@ static inline int qeth_get_ip_version(struct sk_buff *skb) } } +static inline void qeth_put_buffer_pool_entry(struct qeth_card *card, + struct qeth_buffer_pool_entry *entry) +{ + list_add_tail(&entry->list, &card->qdio.in_buf_pool.entry_list); +} + struct qeth_eddp_context; extern struct ccwgroup_driver qeth_l2_ccwgroup_driver; extern struct ccwgroup_driver qeth_l3_ccwgroup_driver; @@ -828,8 +813,6 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *, int qeth_query_setadapterparms(struct qeth_card *); int qeth_check_qdio_errors(struct qdio_buffer *, unsigned int, unsigned int, const char *); -void qeth_put_buffer_pool_entry(struct qeth_card *, - struct qeth_buffer_pool_entry *); void qeth_queue_input_buffer(struct qeth_card *, int); struct sk_buff *qeth_core_get_next_skb(struct qeth_card *, struct qdio_buffer *, struct qdio_buffer_element **, int *, @@ -865,8 +848,6 @@ int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *, void *reply_param); int qeth_get_cast_type(struct qeth_card *, struct sk_buff *); int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); -struct sk_buff *qeth_prepare_skb(struct qeth_card *, struct sk_buff *, - struct qeth_hdr **); int qeth_get_elements_no(struct qeth_card *, void *, struct sk_buff *, int); int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *, struct sk_buff *, struct qeth_hdr *, int, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 23a46340ffa..820c332096e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2252,14 +2252,6 @@ void qeth_print_status_message(struct qeth_card *card) } EXPORT_SYMBOL_GPL(qeth_print_status_message); -void qeth_put_buffer_pool_entry(struct qeth_card *card, - struct qeth_buffer_pool_entry *entry) -{ - QETH_DBF_TEXT(TRACE, 6, "ptbfplen"); - list_add_tail(&entry->list, &card->qdio.in_buf_pool.entry_list); -} -EXPORT_SYMBOL_GPL(qeth_put_buffer_pool_entry); - static void qeth_initialize_working_pool_list(struct qeth_card *card) { struct qeth_buffer_pool_entry *entry; @@ -2600,7 +2592,6 @@ void qeth_queue_input_buffer(struct qeth_card *card, int index) int rc; int newcount = 0; - QETH_DBF_TEXT(TRACE, 6, "queinbuf"); count = (index < queue->next_buf_to_init)? card->qdio.in_buf_pool.buf_count - (queue->next_buf_to_init - index) : @@ -2789,8 +2780,6 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int under_int, int i; unsigned int qdio_flags; - QETH_DBF_TEXT(TRACE, 6, "flushbuf"); - for (i = index; i < index + count; ++i) { buf = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q]; buf->buffer->element[buf->next_element_to_fill - 1].flags |= @@ -3034,49 +3023,6 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(qeth_get_priority_queue); -static void __qeth_free_new_skb(struct sk_buff *orig_skb, - struct sk_buff *new_skb) -{ - if (orig_skb != new_skb) - dev_kfree_skb_any(new_skb); -} - -static inline struct sk_buff *qeth_realloc_headroom(struct qeth_card *card, - struct sk_buff *skb, int size) -{ - struct sk_buff *new_skb = skb; - - if (skb_headroom(skb) >= size) - return skb; - new_skb = skb_realloc_headroom(skb, size); - if (!new_skb) - PRINT_ERR("Could not realloc headroom for qeth_hdr " - "on interface %s", QETH_CARD_IFNAME(card)); - return new_skb; -} - -struct sk_buff *qeth_prepare_skb(struct qeth_card *card, struct sk_buff *skb, - struct qeth_hdr **hdr) -{ - struct sk_buff *new_skb; - - QETH_DBF_TEXT(TRACE, 6, "prepskb"); - - new_skb = qeth_realloc_headroom(card, skb, - sizeof(struct qeth_hdr)); - if (!new_skb) - return NULL; - - *hdr = ((struct qeth_hdr *)qeth_push_skb(card, new_skb, - sizeof(struct qeth_hdr))); - if (*hdr == NULL) { - __qeth_free_new_skb(skb, new_skb); - return NULL; - } - return new_skb; -} -EXPORT_SYMBOL_GPL(qeth_prepare_skb); - int qeth_get_elements_no(struct qeth_card *card, void *hdr, struct sk_buff *skb, int elems) { @@ -3097,8 +3043,8 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr, } EXPORT_SYMBOL_GPL(qeth_get_elements_no); -static void __qeth_fill_buffer(struct sk_buff *skb, struct qdio_buffer *buffer, - int is_tso, int *next_element_to_fill) +static inline void __qeth_fill_buffer(struct sk_buff *skb, + struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill) { int length = skb->len; int length_here; @@ -3140,15 +3086,13 @@ static void __qeth_fill_buffer(struct sk_buff *skb, struct qdio_buffer *buffer, *next_element_to_fill = element; } -static int qeth_fill_buffer(struct qeth_qdio_out_q *queue, +static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue, struct qeth_qdio_out_buffer *buf, struct sk_buff *skb) { struct qdio_buffer *buffer; struct qeth_hdr_tso *hdr; int flush_cnt = 0, hdr_len, large_send = 0; - QETH_DBF_TEXT(TRACE, 6, "qdfillbf"); - buffer = buf->buffer; atomic_inc(&skb->users); skb_queue_tail(&buf->skb_list, skb); @@ -3207,8 +3151,6 @@ int qeth_do_send_packet_fast(struct qeth_card *card, int flush_cnt = 0; int index; - QETH_DBF_TEXT(TRACE, 6, "dosndpfa"); - /* spin until we get the queue ... */ while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED, QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED); @@ -3260,8 +3202,6 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, int tmp; int rc = 0; - QETH_DBF_TEXT(TRACE, 6, "dosndpkt"); - /* spin until we get the queue ... */ while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED, QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED); @@ -3958,7 +3898,6 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, int use_rx_sg = 0; int frag = 0; - QETH_DBF_TEXT(TRACE, 6, "nextskb"); /* qeth_hdr must not cross element boundaries */ if (element->length < offset + sizeof(struct qeth_hdr)) { if (qeth_is_last_sbale(element)) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index e6092829a5a..ef07e9cadfc 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -632,8 +632,6 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO; struct qeth_eddp_context *ctx = NULL; - QETH_DBF_TEXT(TRACE, 6, "l2xmit"); - if ((card->state != CARD_STATE_UP) || !card->lan_online) { card->stats.tx_carrier_errors++; goto tx_drop; @@ -655,9 +653,12 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if (card->info.type == QETH_CARD_TYPE_OSN) hdr = (struct qeth_hdr *)skb->data; else { - new_skb = qeth_prepare_skb(card, skb, &hdr); + /* create a clone with writeable headroom */ + new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr)); if (!new_skb) goto tx_drop; + hdr = (struct qeth_hdr *)skb_push(new_skb, + sizeof(struct qeth_hdr)); qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type); } @@ -744,7 +745,6 @@ static void qeth_l2_qdio_input_handler(struct ccw_device *ccwdev, int index; int i; - QETH_DBF_TEXT(TRACE, 6, "qdinput"); card = (struct qeth_card *) card_ptr; net_dev = card->dev; if (card->options.performance_stats) { diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index ce23169587e..50545a1f6b3 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2557,8 +2557,6 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, struct sk_buff *skb, int ipv, int cast_type) { - QETH_DBF_TEXT(TRACE, 6, "fillhdr"); - memset(hdr, 0, sizeof(struct qeth_hdr)); hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3; hdr->hdr.l3.ext_flags = 0; @@ -2637,8 +2635,6 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO; struct qeth_eddp_context *ctx = NULL; - QETH_DBF_TEXT(TRACE, 6, "l3xmit"); - if ((card->info.type == QETH_CARD_TYPE_IQD) && (skb->protocol != htons(ETH_P_IPV6)) && (skb->protocol != htons(ETH_P_IP))) @@ -2982,7 +2978,6 @@ static void qeth_l3_qdio_input_handler(struct ccw_device *ccwdev, int index; int i; - QETH_DBF_TEXT(TRACE, 6, "qdinput"); card = (struct qeth_card *) card_ptr; net_dev = card->dev; if (card->options.performance_stats) { -- cgit v1.2.3-70-g09d2 From 8af7c5aebc9a7b46f6ea55ee5a216dce4005f538 Mon Sep 17 00:00:00 2001 From: Frank Blaschka Date: Thu, 24 Apr 2008 10:15:25 +0200 Subject: qeth: layer 3 add missing dev_open/close to ccwgroup handler In case the ccwgroup device is set online/offline we have to run the corresponding dev_open/close for the netdevice. Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/qeth_l3_main.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 50545a1f6b3..94a8ead64ed 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2091,6 +2091,11 @@ static int qeth_l3_stop_card(struct qeth_card *card, int recovery_mode) (card->state == CARD_STATE_UP)) { if (recovery_mode) qeth_l3_stop(card->dev); + else { + rtnl_lock(); + dev_close(card->dev); + rtnl_unlock(); + } if (!card->use_hard_stop) { rc = qeth_send_stoplan(card); if (rc) @@ -3135,9 +3140,15 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) netif_carrier_on(card->dev); qeth_set_allowed_threads(card, 0xffffffff, 0); - if ((recover_flag == CARD_STATE_RECOVER) && recovery_mode) { + if (recover_flag == CARD_STATE_RECOVER) { + if (recovery_mode) qeth_l3_open(card->dev); - qeth_l3_set_multicast_list(card->dev); + else { + rtnl_lock(); + dev_open(card->dev); + rtnl_unlock(); + } + qeth_l3_set_multicast_list(card->dev); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); -- cgit v1.2.3-70-g09d2 From a74b08c7fcfc49727cb9e4409ec0410674410c93 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 24 Apr 2008 10:15:26 +0200 Subject: qeth: read number of ports from card Read out number of ports from the hardware. Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/qeth_core_main.c | 5 +++-- include/asm-s390/qdio.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 820c332096e..436bf1f6d4a 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3803,8 +3803,9 @@ retry: QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); return rc; } - - mpno = QETH_MAX_PORTNO; + mpno = qdio_get_ssqd_pct(CARD_DDEV(card)); + if (mpno) + mpno = min(mpno - 1, QETH_MAX_PORTNO); if (card->info.portno > mpno) { PRINT_ERR("Device %s does not offer port number %d \n.", CARD_BUS_ID(card), card->info.portno); diff --git a/include/asm-s390/qdio.h b/include/asm-s390/qdio.h index 4b8ff55f680..11240342a0f 100644 --- a/include/asm-s390/qdio.h +++ b/include/asm-s390/qdio.h @@ -127,6 +127,7 @@ extern int do_QDIO(struct ccw_device*, unsigned int flags, unsigned int qidx,unsigned int count, struct qdio_buffer *buffers); +extern int qdio_get_ssqd_pct(struct ccw_device*); extern int qdio_synchronize(struct ccw_device*, unsigned int flags, unsigned int queue_number); -- cgit v1.2.3-70-g09d2 From efe3df6f6cfb587e662aa6f0cf9a9fde93d8af0b Mon Sep 17 00:00:00 2001 From: Frank Blaschka Date: Thu, 24 Apr 2008 10:15:27 +0200 Subject: qeth: layer 2 allow ethtool to set TSO Allow ethtool to turn on/off EDDP via ethtool TSO interface. Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/qeth_l2_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index ef07e9cadfc..86ec50ddae1 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -849,6 +849,22 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) return; } +static int qeth_l2_ethtool_set_tso(struct net_device *dev, u32 data) +{ + struct qeth_card *card = netdev_priv(dev); + + if (data) { + if (card->options.large_send == QETH_LARGE_SEND_NO) { + card->options.large_send = QETH_LARGE_SEND_EDDP; + dev->features |= NETIF_F_TSO; + } + } else { + dev->features &= ~NETIF_F_TSO; + card->options.large_send = QETH_LARGE_SEND_NO; + } + return 0; +} + static struct ethtool_ops qeth_l2_ethtool_ops = { .get_link = ethtool_op_get_link, .get_tx_csum = ethtool_op_get_tx_csum, @@ -856,7 +872,7 @@ static struct ethtool_ops qeth_l2_ethtool_ops = { .get_sg = ethtool_op_get_sg, .set_sg = ethtool_op_set_sg, .get_tso = ethtool_op_get_tso, - .set_tso = ethtool_op_set_tso, + .set_tso = qeth_l2_ethtool_set_tso, .get_strings = qeth_core_get_strings, .get_ethtool_stats = qeth_core_get_ethtool_stats, .get_stats_count = qeth_core_get_stats_count, -- cgit v1.2.3-70-g09d2 From 0a0a83107c01a8dba04fa40ddb29ff021d4f8112 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 24 Apr 2008 10:15:28 +0200 Subject: netiucv: Fix missing driver attributes. Signed-off-by: Cornelia Huck Signed-off-by: Frank Blaschka Signed-off-by: Jeff Garzik --- drivers/s390/net/netiucv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 0fb780e35fa..e4ba6a0372a 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -2145,6 +2145,7 @@ static int __init netiucv_init(void) if (rc) goto out_dbf; IUCV_DBF_TEXT(trace, 3, __func__); + netiucv_driver.groups = netiucv_drv_attr_groups; rc = driver_register(&netiucv_driver); if (rc) { PRINT_ERR("NETIUCV: failed to register driver.\n"); -- cgit v1.2.3-70-g09d2 From 770f867991155f9c9e36a845a142f770d55ee67c Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Wed, 23 Apr 2008 23:44:03 +0200 Subject: ARM: am79c961a: platform_get_irq() may return signed unnoticed dev->irq is unsigned, platform_get_irq() may return signed unnoticed Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: Jeff Garzik --- drivers/net/arm/am79c961a.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c index ba6bd03a015..a637910b02d 100644 --- a/drivers/net/arm/am79c961a.c +++ b/drivers/net/arm/am79c961a.c @@ -693,11 +693,15 @@ static int __init am79c961_probe(struct platform_device *pdev) * done by the ether bootp loader. */ dev->base_addr = res->start; - dev->irq = platform_get_irq(pdev, 0); + ret = platform_get_irq(pdev, 0); - ret = -ENODEV; - if (dev->irq < 0) + if (ret < 0) { + ret = -ENODEV; goto nodev; + } + dev->irq = ret; + + ret = -ENODEV; if (!request_region(dev->base_addr, 0x18, dev->name)) goto nodev; -- cgit v1.2.3-70-g09d2 From dac2f83fce01f0c2900918a4a8abd4c652151804 Mon Sep 17 00:00:00 2001 From: Krzysztof Halasa Date: Sun, 20 Apr 2008 19:06:39 +0200 Subject: Driver for IXP4xx built-in Ethernet ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a driver for built-in IXP4xx Ethernet ports. Signed-off-by: Krzysztof HaÅ‚asa Signed-off-by: Jeff Garzik --- arch/arm/mach-ixp4xx/ixp4xx_npe.c | 4 + arch/arm/mach-ixp4xx/ixp4xx_qmgr.c | 2 + drivers/net/arm/Kconfig | 8 + drivers/net/arm/Makefile | 1 + drivers/net/arm/ixp4xx_eth.c | 1265 ++++++++++++++++++++++++++++++++++++ 5 files changed, 1280 insertions(+) create mode 100644 drivers/net/arm/ixp4xx_eth.c diff --git a/arch/arm/mach-ixp4xx/ixp4xx_npe.c b/arch/arm/mach-ixp4xx/ixp4xx_npe.c index 83c137ec582..63a23fa4aab 100644 --- a/arch/arm/mach-ixp4xx/ixp4xx_npe.c +++ b/arch/arm/mach-ixp4xx/ixp4xx_npe.c @@ -448,7 +448,9 @@ int npe_send_message(struct npe *npe, const void *msg, const char *what) return -ETIMEDOUT; } +#if DEBUG_MSG > 1 debug_msg(npe, "Sending a message took %i cycles\n", cycles); +#endif return 0; } @@ -484,7 +486,9 @@ int npe_recv_message(struct npe *npe, void *msg, const char *what) return -ETIMEDOUT; } +#if DEBUG_MSG > 1 debug_msg(npe, "Receiving a message took %i cycles\n", cycles); +#endif return 0; } diff --git a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c index e8330132530..fab94eaecee 100644 --- a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c +++ b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c @@ -184,6 +184,8 @@ void qmgr_release_queue(unsigned int queue) case 3: mask[0] = 0xFF; break; } + mask[1] = mask[2] = mask[3] = 0; + while (addr--) shift_mask(mask); diff --git a/drivers/net/arm/Kconfig b/drivers/net/arm/Kconfig index f9cc2b621fe..8eda6eeb43b 100644 --- a/drivers/net/arm/Kconfig +++ b/drivers/net/arm/Kconfig @@ -47,3 +47,11 @@ config EP93XX_ETH help This is a driver for the ethernet hardware included in EP93xx CPUs. Say Y if you are building a kernel for EP93xx based devices. + +config IXP4XX_ETH + tristate "Intel IXP4xx Ethernet support" + depends on ARM && ARCH_IXP4XX && IXP4XX_NPE && IXP4XX_QMGR + select MII + help + Say Y here if you want to use built-in Ethernet ports + on IXP4xx processor. diff --git a/drivers/net/arm/Makefile b/drivers/net/arm/Makefile index a4c868278e1..7c812ac2b6a 100644 --- a/drivers/net/arm/Makefile +++ b/drivers/net/arm/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_ARM_ETHER3) += ether3.o obj-$(CONFIG_ARM_ETHER1) += ether1.o obj-$(CONFIG_ARM_AT91_ETHER) += at91_ether.o obj-$(CONFIG_EP93XX_ETH) += ep93xx_eth.o +obj-$(CONFIG_IXP4XX_ETH) += ixp4xx_eth.o diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c new file mode 100644 index 00000000000..c617b64c288 --- /dev/null +++ b/drivers/net/arm/ixp4xx_eth.c @@ -0,0 +1,1265 @@ +/* + * Intel IXP4xx Ethernet driver for Linux + * + * Copyright (C) 2007 Krzysztof Halasa + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * Ethernet port config (0x00 is not present on IXP42X): + * + * logical port 0x00 0x10 0x20 + * NPE 0 (NPE-A) 1 (NPE-B) 2 (NPE-C) + * physical PortId 2 0 1 + * TX queue 23 24 25 + * RX-free queue 26 27 28 + * TX-done queue is always 31, per-port RX and TX-ready queues are configurable + * + * + * Queue entries: + * bits 0 -> 1 - NPE ID (RX and TX-done) + * bits 0 -> 2 - priority (TX, per 802.1D) + * bits 3 -> 4 - port ID (user-set?) + * bits 5 -> 31 - physical descriptor address + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_QUEUES 0 +#define DEBUG_DESC 0 +#define DEBUG_RX 0 +#define DEBUG_TX 0 +#define DEBUG_PKT_BYTES 0 +#define DEBUG_MDIO 0 +#define DEBUG_CLOSE 0 + +#define DRV_NAME "ixp4xx_eth" + +#define MAX_NPES 3 + +#define RX_DESCS 64 /* also length of all RX queues */ +#define TX_DESCS 16 /* also length of all TX queues */ +#define TXDONE_QUEUE_LEN 64 /* dwords */ + +#define POOL_ALLOC_SIZE (sizeof(struct desc) * (RX_DESCS + TX_DESCS)) +#define REGS_SIZE 0x1000 +#define MAX_MRU 1536 /* 0x600 */ +#define RX_BUFF_SIZE ALIGN((NET_IP_ALIGN) + MAX_MRU, 4) + +#define NAPI_WEIGHT 16 +#define MDIO_INTERVAL (3 * HZ) +#define MAX_MDIO_RETRIES 100 /* microseconds, typically 30 cycles */ +#define MAX_MII_RESET_RETRIES 100 /* mdio_read() cycles, typically 4 */ +#define MAX_CLOSE_WAIT 1000 /* microseconds, typically 2-3 cycles */ + +#define NPE_ID(port_id) ((port_id) >> 4) +#define PHYSICAL_ID(port_id) ((NPE_ID(port_id) + 2) % 3) +#define TX_QUEUE(port_id) (NPE_ID(port_id) + 23) +#define RXFREE_QUEUE(port_id) (NPE_ID(port_id) + 26) +#define TXDONE_QUEUE 31 + +/* TX Control Registers */ +#define TX_CNTRL0_TX_EN 0x01 +#define TX_CNTRL0_HALFDUPLEX 0x02 +#define TX_CNTRL0_RETRY 0x04 +#define TX_CNTRL0_PAD_EN 0x08 +#define TX_CNTRL0_APPEND_FCS 0x10 +#define TX_CNTRL0_2DEFER 0x20 +#define TX_CNTRL0_RMII 0x40 /* reduced MII */ +#define TX_CNTRL1_RETRIES 0x0F /* 4 bits */ + +/* RX Control Registers */ +#define RX_CNTRL0_RX_EN 0x01 +#define RX_CNTRL0_PADSTRIP_EN 0x02 +#define RX_CNTRL0_SEND_FCS 0x04 +#define RX_CNTRL0_PAUSE_EN 0x08 +#define RX_CNTRL0_LOOP_EN 0x10 +#define RX_CNTRL0_ADDR_FLTR_EN 0x20 +#define RX_CNTRL0_RX_RUNT_EN 0x40 +#define RX_CNTRL0_BCAST_DIS 0x80 +#define RX_CNTRL1_DEFER_EN 0x01 + +/* Core Control Register */ +#define CORE_RESET 0x01 +#define CORE_RX_FIFO_FLUSH 0x02 +#define CORE_TX_FIFO_FLUSH 0x04 +#define CORE_SEND_JAM 0x08 +#define CORE_MDC_EN 0x10 /* MDIO using NPE-B ETH-0 only */ + +#define DEFAULT_TX_CNTRL0 (TX_CNTRL0_TX_EN | TX_CNTRL0_RETRY | \ + TX_CNTRL0_PAD_EN | TX_CNTRL0_APPEND_FCS | \ + TX_CNTRL0_2DEFER) +#define DEFAULT_RX_CNTRL0 RX_CNTRL0_RX_EN +#define DEFAULT_CORE_CNTRL CORE_MDC_EN + + +/* NPE message codes */ +#define NPE_GETSTATUS 0x00 +#define NPE_EDB_SETPORTADDRESS 0x01 +#define NPE_EDB_GETMACADDRESSDATABASE 0x02 +#define NPE_EDB_SETMACADDRESSSDATABASE 0x03 +#define NPE_GETSTATS 0x04 +#define NPE_RESETSTATS 0x05 +#define NPE_SETMAXFRAMELENGTHS 0x06 +#define NPE_VLAN_SETRXTAGMODE 0x07 +#define NPE_VLAN_SETDEFAULTRXVID 0x08 +#define NPE_VLAN_SETPORTVLANTABLEENTRY 0x09 +#define NPE_VLAN_SETPORTVLANTABLERANGE 0x0A +#define NPE_VLAN_SETRXQOSENTRY 0x0B +#define NPE_VLAN_SETPORTIDEXTRACTIONMODE 0x0C +#define NPE_STP_SETBLOCKINGSTATE 0x0D +#define NPE_FW_SETFIREWALLMODE 0x0E +#define NPE_PC_SETFRAMECONTROLDURATIONID 0x0F +#define NPE_PC_SETAPMACTABLE 0x11 +#define NPE_SETLOOPBACK_MODE 0x12 +#define NPE_PC_SETBSSIDTABLE 0x13 +#define NPE_ADDRESS_FILTER_CONFIG 0x14 +#define NPE_APPENDFCSCONFIG 0x15 +#define NPE_NOTIFY_MAC_RECOVERY_DONE 0x16 +#define NPE_MAC_RECOVERY_START 0x17 + + +#ifdef __ARMEB__ +typedef struct sk_buff buffer_t; +#define free_buffer dev_kfree_skb +#define free_buffer_irq dev_kfree_skb_irq +#else +typedef void buffer_t; +#define free_buffer kfree +#define free_buffer_irq kfree +#endif + +struct eth_regs { + u32 tx_control[2], __res1[2]; /* 000 */ + u32 rx_control[2], __res2[2]; /* 010 */ + u32 random_seed, __res3[3]; /* 020 */ + u32 partial_empty_threshold, __res4; /* 030 */ + u32 partial_full_threshold, __res5; /* 038 */ + u32 tx_start_bytes, __res6[3]; /* 040 */ + u32 tx_deferral, rx_deferral, __res7[2];/* 050 */ + u32 tx_2part_deferral[2], __res8[2]; /* 060 */ + u32 slot_time, __res9[3]; /* 070 */ + u32 mdio_command[4]; /* 080 */ + u32 mdio_status[4]; /* 090 */ + u32 mcast_mask[6], __res10[2]; /* 0A0 */ + u32 mcast_addr[6], __res11[2]; /* 0C0 */ + u32 int_clock_threshold, __res12[3]; /* 0E0 */ + u32 hw_addr[6], __res13[61]; /* 0F0 */ + u32 core_control; /* 1FC */ +}; + +struct port { + struct resource *mem_res; + struct eth_regs __iomem *regs; + struct npe *npe; + struct net_device *netdev; + struct napi_struct napi; + struct net_device_stats stat; + struct mii_if_info mii; + struct delayed_work mdio_thread; + struct eth_plat_info *plat; + buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS]; + struct desc *desc_tab; /* coherent */ + u32 desc_tab_phys; + int id; /* logical port ID */ + u16 mii_bmcr; +}; + +/* NPE message structure */ +struct msg { +#ifdef __ARMEB__ + u8 cmd, eth_id, byte2, byte3; + u8 byte4, byte5, byte6, byte7; +#else + u8 byte3, byte2, eth_id, cmd; + u8 byte7, byte6, byte5, byte4; +#endif +}; + +/* Ethernet packet descriptor */ +struct desc { + u32 next; /* pointer to next buffer, unused */ + +#ifdef __ARMEB__ + u16 buf_len; /* buffer length */ + u16 pkt_len; /* packet length */ + u32 data; /* pointer to data buffer in RAM */ + u8 dest_id; + u8 src_id; + u16 flags; + u8 qos; + u8 padlen; + u16 vlan_tci; +#else + u16 pkt_len; /* packet length */ + u16 buf_len; /* buffer length */ + u32 data; /* pointer to data buffer in RAM */ + u16 flags; + u8 src_id; + u8 dest_id; + u16 vlan_tci; + u8 padlen; + u8 qos; +#endif + +#ifdef __ARMEB__ + u8 dst_mac_0, dst_mac_1, dst_mac_2, dst_mac_3; + u8 dst_mac_4, dst_mac_5, src_mac_0, src_mac_1; + u8 src_mac_2, src_mac_3, src_mac_4, src_mac_5; +#else + u8 dst_mac_3, dst_mac_2, dst_mac_1, dst_mac_0; + u8 src_mac_1, src_mac_0, dst_mac_5, dst_mac_4; + u8 src_mac_5, src_mac_4, src_mac_3, src_mac_2; +#endif +}; + + +#define rx_desc_phys(port, n) ((port)->desc_tab_phys + \ + (n) * sizeof(struct desc)) +#define rx_desc_ptr(port, n) (&(port)->desc_tab[n]) + +#define tx_desc_phys(port, n) ((port)->desc_tab_phys + \ + ((n) + RX_DESCS) * sizeof(struct desc)) +#define tx_desc_ptr(port, n) (&(port)->desc_tab[(n) + RX_DESCS]) + +#ifndef __ARMEB__ +static inline void memcpy_swab32(u32 *dest, u32 *src, int cnt) +{ + int i; + for (i = 0; i < cnt; i++) + dest[i] = swab32(src[i]); +} +#endif + +static spinlock_t mdio_lock; +static struct eth_regs __iomem *mdio_regs; /* mdio command and status only */ +static int ports_open; +static struct port *npe_port_tab[MAX_NPES]; +static struct dma_pool *dma_pool; + + +static u16 mdio_cmd(struct net_device *dev, int phy_id, int location, + int write, u16 cmd) +{ + int cycles = 0; + + if (__raw_readl(&mdio_regs->mdio_command[3]) & 0x80) { + printk(KERN_ERR "%s: MII not ready to transmit\n", dev->name); + return 0; + } + + if (write) { + __raw_writel(cmd & 0xFF, &mdio_regs->mdio_command[0]); + __raw_writel(cmd >> 8, &mdio_regs->mdio_command[1]); + } + __raw_writel(((phy_id << 5) | location) & 0xFF, + &mdio_regs->mdio_command[2]); + __raw_writel((phy_id >> 3) | (write << 2) | 0x80 /* GO */, + &mdio_regs->mdio_command[3]); + + while ((cycles < MAX_MDIO_RETRIES) && + (__raw_readl(&mdio_regs->mdio_command[3]) & 0x80)) { + udelay(1); + cycles++; + } + + if (cycles == MAX_MDIO_RETRIES) { + printk(KERN_ERR "%s: MII write failed\n", dev->name); + return 0; + } + +#if DEBUG_MDIO + printk(KERN_DEBUG "%s: mdio_cmd() took %i cycles\n", dev->name, + cycles); +#endif + + if (write) + return 0; + + if (__raw_readl(&mdio_regs->mdio_status[3]) & 0x80) { + printk(KERN_ERR "%s: MII read failed\n", dev->name); + return 0; + } + + return (__raw_readl(&mdio_regs->mdio_status[0]) & 0xFF) | + (__raw_readl(&mdio_regs->mdio_status[1]) << 8); +} + +static int mdio_read(struct net_device *dev, int phy_id, int location) +{ + unsigned long flags; + u16 val; + + spin_lock_irqsave(&mdio_lock, flags); + val = mdio_cmd(dev, phy_id, location, 0, 0); + spin_unlock_irqrestore(&mdio_lock, flags); + return val; +} + +static void mdio_write(struct net_device *dev, int phy_id, int location, + int val) +{ + unsigned long flags; + + spin_lock_irqsave(&mdio_lock, flags); + mdio_cmd(dev, phy_id, location, 1, val); + spin_unlock_irqrestore(&mdio_lock, flags); +} + +static void phy_reset(struct net_device *dev, int phy_id) +{ + struct port *port = netdev_priv(dev); + int cycles = 0; + + mdio_write(dev, phy_id, MII_BMCR, port->mii_bmcr | BMCR_RESET); + + while (cycles < MAX_MII_RESET_RETRIES) { + if (!(mdio_read(dev, phy_id, MII_BMCR) & BMCR_RESET)) { +#if DEBUG_MDIO + printk(KERN_DEBUG "%s: phy_reset() took %i cycles\n", + dev->name, cycles); +#endif + return; + } + udelay(1); + cycles++; + } + + printk(KERN_ERR "%s: MII reset failed\n", dev->name); +} + +static void eth_set_duplex(struct port *port) +{ + if (port->mii.full_duplex) + __raw_writel(DEFAULT_TX_CNTRL0 & ~TX_CNTRL0_HALFDUPLEX, + &port->regs->tx_control[0]); + else + __raw_writel(DEFAULT_TX_CNTRL0 | TX_CNTRL0_HALFDUPLEX, + &port->regs->tx_control[0]); +} + + +static void phy_check_media(struct port *port, int init) +{ + if (mii_check_media(&port->mii, 1, init)) + eth_set_duplex(port); + if (port->mii.force_media) { /* mii_check_media() doesn't work */ + struct net_device *dev = port->netdev; + int cur_link = mii_link_ok(&port->mii); + int prev_link = netif_carrier_ok(dev); + + if (!prev_link && cur_link) { + printk(KERN_INFO "%s: link up\n", dev->name); + netif_carrier_on(dev); + } else if (prev_link && !cur_link) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + } + } +} + + +static void mdio_thread(struct work_struct *work) +{ + struct port *port = container_of(work, struct port, mdio_thread.work); + + phy_check_media(port, 0); + schedule_delayed_work(&port->mdio_thread, MDIO_INTERVAL); +} + + +static inline void debug_pkt(struct net_device *dev, const char *func, + u8 *data, int len) +{ +#if DEBUG_PKT_BYTES + int i; + + printk(KERN_DEBUG "%s: %s(%i) ", dev->name, func, len); + for (i = 0; i < len; i++) { + if (i >= DEBUG_PKT_BYTES) + break; + printk("%s%02X", + ((i == 6) || (i == 12) || (i >= 14)) ? " " : "", + data[i]); + } + printk("\n"); +#endif +} + + +static inline void debug_desc(u32 phys, struct desc *desc) +{ +#if DEBUG_DESC + printk(KERN_DEBUG "%X: %X %3X %3X %08X %2X < %2X %4X %X" + " %X %X %02X%02X%02X%02X%02X%02X < %02X%02X%02X%02X%02X%02X\n", + phys, desc->next, desc->buf_len, desc->pkt_len, + desc->data, desc->dest_id, desc->src_id, desc->flags, + desc->qos, desc->padlen, desc->vlan_tci, + desc->dst_mac_0, desc->dst_mac_1, desc->dst_mac_2, + desc->dst_mac_3, desc->dst_mac_4, desc->dst_mac_5, + desc->src_mac_0, desc->src_mac_1, desc->src_mac_2, + desc->src_mac_3, desc->src_mac_4, desc->src_mac_5); +#endif +} + +static inline void debug_queue(unsigned int queue, int is_get, u32 phys) +{ +#if DEBUG_QUEUES + static struct { + int queue; + char *name; + } names[] = { + { TX_QUEUE(0x10), "TX#0 " }, + { TX_QUEUE(0x20), "TX#1 " }, + { TX_QUEUE(0x00), "TX#2 " }, + { RXFREE_QUEUE(0x10), "RX-free#0 " }, + { RXFREE_QUEUE(0x20), "RX-free#1 " }, + { RXFREE_QUEUE(0x00), "RX-free#2 " }, + { TXDONE_QUEUE, "TX-done " }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(names); i++) + if (names[i].queue == queue) + break; + + printk(KERN_DEBUG "Queue %i %s%s %X\n", queue, + i < ARRAY_SIZE(names) ? names[i].name : "", + is_get ? "->" : "<-", phys); +#endif +} + +static inline u32 queue_get_entry(unsigned int queue) +{ + u32 phys = qmgr_get_entry(queue); + debug_queue(queue, 1, phys); + return phys; +} + +static inline int queue_get_desc(unsigned int queue, struct port *port, + int is_tx) +{ + u32 phys, tab_phys, n_desc; + struct desc *tab; + + if (!(phys = queue_get_entry(queue))) + return -1; + + phys &= ~0x1F; /* mask out non-address bits */ + tab_phys = is_tx ? tx_desc_phys(port, 0) : rx_desc_phys(port, 0); + tab = is_tx ? tx_desc_ptr(port, 0) : rx_desc_ptr(port, 0); + n_desc = (phys - tab_phys) / sizeof(struct desc); + BUG_ON(n_desc >= (is_tx ? TX_DESCS : RX_DESCS)); + debug_desc(phys, &tab[n_desc]); + BUG_ON(tab[n_desc].next); + return n_desc; +} + +static inline void queue_put_desc(unsigned int queue, u32 phys, + struct desc *desc) +{ + debug_queue(queue, 0, phys); + debug_desc(phys, desc); + BUG_ON(phys & 0x1F); + qmgr_put_entry(queue, phys); + BUG_ON(qmgr_stat_overflow(queue)); +} + + +static inline void dma_unmap_tx(struct port *port, struct desc *desc) +{ +#ifdef __ARMEB__ + dma_unmap_single(&port->netdev->dev, desc->data, + desc->buf_len, DMA_TO_DEVICE); +#else + dma_unmap_single(&port->netdev->dev, desc->data & ~3, + ALIGN((desc->data & 3) + desc->buf_len, 4), + DMA_TO_DEVICE); +#endif +} + + +static void eth_rx_irq(void *pdev) +{ + struct net_device *dev = pdev; + struct port *port = netdev_priv(dev); + +#if DEBUG_RX + printk(KERN_DEBUG "%s: eth_rx_irq\n", dev->name); +#endif + qmgr_disable_irq(port->plat->rxq); + netif_rx_schedule(dev, &port->napi); +} + +static int eth_poll(struct napi_struct *napi, int budget) +{ + struct port *port = container_of(napi, struct port, napi); + struct net_device *dev = port->netdev; + unsigned int rxq = port->plat->rxq, rxfreeq = RXFREE_QUEUE(port->id); + int received = 0; + +#if DEBUG_RX + printk(KERN_DEBUG "%s: eth_poll\n", dev->name); +#endif + + while (received < budget) { + struct sk_buff *skb; + struct desc *desc; + int n; +#ifdef __ARMEB__ + struct sk_buff *temp; + u32 phys; +#endif + + if ((n = queue_get_desc(rxq, port, 0)) < 0) { + received = 0; /* No packet received */ +#if DEBUG_RX + printk(KERN_DEBUG "%s: eth_poll netif_rx_complete\n", + dev->name); +#endif + netif_rx_complete(dev, napi); + qmgr_enable_irq(rxq); + if (!qmgr_stat_empty(rxq) && + netif_rx_reschedule(dev, napi)) { +#if DEBUG_RX + printk(KERN_DEBUG "%s: eth_poll" + " netif_rx_reschedule successed\n", + dev->name); +#endif + qmgr_disable_irq(rxq); + continue; + } +#if DEBUG_RX + printk(KERN_DEBUG "%s: eth_poll all done\n", + dev->name); +#endif + return 0; /* all work done */ + } + + desc = rx_desc_ptr(port, n); + +#ifdef __ARMEB__ + if ((skb = netdev_alloc_skb(dev, RX_BUFF_SIZE))) { + phys = dma_map_single(&dev->dev, skb->data, + RX_BUFF_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(phys)) { + dev_kfree_skb(skb); + skb = NULL; + } + } +#else + skb = netdev_alloc_skb(dev, + ALIGN(NET_IP_ALIGN + desc->pkt_len, 4)); +#endif + + if (!skb) { + port->stat.rx_dropped++; + /* put the desc back on RX-ready queue */ + desc->buf_len = MAX_MRU; + desc->pkt_len = 0; + queue_put_desc(rxfreeq, rx_desc_phys(port, n), desc); + continue; + } + + /* process received frame */ +#ifdef __ARMEB__ + temp = skb; + skb = port->rx_buff_tab[n]; + dma_unmap_single(&dev->dev, desc->data - NET_IP_ALIGN, + RX_BUFF_SIZE, DMA_FROM_DEVICE); +#else + dma_sync_single(&dev->dev, desc->data - NET_IP_ALIGN, + RX_BUFF_SIZE, DMA_FROM_DEVICE); + memcpy_swab32((u32 *)skb->data, (u32 *)port->rx_buff_tab[n], + ALIGN(NET_IP_ALIGN + desc->pkt_len, 4) / 4); +#endif + skb_reserve(skb, NET_IP_ALIGN); + skb_put(skb, desc->pkt_len); + + debug_pkt(dev, "eth_poll", skb->data, skb->len); + + skb->protocol = eth_type_trans(skb, dev); + dev->last_rx = jiffies; + port->stat.rx_packets++; + port->stat.rx_bytes += skb->len; + netif_receive_skb(skb); + + /* put the new buffer on RX-free queue */ +#ifdef __ARMEB__ + port->rx_buff_tab[n] = temp; + desc->data = phys + NET_IP_ALIGN; +#endif + desc->buf_len = MAX_MRU; + desc->pkt_len = 0; + queue_put_desc(rxfreeq, rx_desc_phys(port, n), desc); + received++; + } + +#if DEBUG_RX + printk(KERN_DEBUG "eth_poll(): end, not all work done\n"); +#endif + return received; /* not all work done */ +} + + +static void eth_txdone_irq(void *unused) +{ + u32 phys; + +#if DEBUG_TX + printk(KERN_DEBUG DRV_NAME ": eth_txdone_irq\n"); +#endif + while ((phys = queue_get_entry(TXDONE_QUEUE)) != 0) { + u32 npe_id, n_desc; + struct port *port; + struct desc *desc; + int start; + + npe_id = phys & 3; + BUG_ON(npe_id >= MAX_NPES); + port = npe_port_tab[npe_id]; + BUG_ON(!port); + phys &= ~0x1F; /* mask out non-address bits */ + n_desc = (phys - tx_desc_phys(port, 0)) / sizeof(struct desc); + BUG_ON(n_desc >= TX_DESCS); + desc = tx_desc_ptr(port, n_desc); + debug_desc(phys, desc); + + if (port->tx_buff_tab[n_desc]) { /* not the draining packet */ + port->stat.tx_packets++; + port->stat.tx_bytes += desc->pkt_len; + + dma_unmap_tx(port, desc); +#if DEBUG_TX + printk(KERN_DEBUG "%s: eth_txdone_irq free %p\n", + port->netdev->name, port->tx_buff_tab[n_desc]); +#endif + free_buffer_irq(port->tx_buff_tab[n_desc]); + port->tx_buff_tab[n_desc] = NULL; + } + + start = qmgr_stat_empty(port->plat->txreadyq); + queue_put_desc(port->plat->txreadyq, phys, desc); + if (start) { +#if DEBUG_TX + printk(KERN_DEBUG "%s: eth_txdone_irq xmit ready\n", + port->netdev->name); +#endif + netif_wake_queue(port->netdev); + } + } +} + +static int eth_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct port *port = netdev_priv(dev); + unsigned int txreadyq = port->plat->txreadyq; + int len, offset, bytes, n; + void *mem; + u32 phys; + struct desc *desc; + +#if DEBUG_TX + printk(KERN_DEBUG "%s: eth_xmit\n", dev->name); +#endif + + if (unlikely(skb->len > MAX_MRU)) { + dev_kfree_skb(skb); + port->stat.tx_errors++; + return NETDEV_TX_OK; + } + + debug_pkt(dev, "eth_xmit", skb->data, skb->len); + + len = skb->len; +#ifdef __ARMEB__ + offset = 0; /* no need to keep alignment */ + bytes = len; + mem = skb->data; +#else + offset = (int)skb->data & 3; /* keep 32-bit alignment */ + bytes = ALIGN(offset + len, 4); + if (!(mem = kmalloc(bytes, GFP_ATOMIC))) { + dev_kfree_skb(skb); + port->stat.tx_dropped++; + return NETDEV_TX_OK; + } + memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4); + dev_kfree_skb(skb); +#endif + + phys = dma_map_single(&dev->dev, mem, bytes, DMA_TO_DEVICE); + if (dma_mapping_error(phys)) { +#ifdef __ARMEB__ + dev_kfree_skb(skb); +#else + kfree(mem); +#endif + port->stat.tx_dropped++; + return NETDEV_TX_OK; + } + + n = queue_get_desc(txreadyq, port, 1); + BUG_ON(n < 0); + desc = tx_desc_ptr(port, n); + +#ifdef __ARMEB__ + port->tx_buff_tab[n] = skb; +#else + port->tx_buff_tab[n] = mem; +#endif + desc->data = phys + offset; + desc->buf_len = desc->pkt_len = len; + + /* NPE firmware pads short frames with zeros internally */ + wmb(); + queue_put_desc(TX_QUEUE(port->id), tx_desc_phys(port, n), desc); + dev->trans_start = jiffies; + + if (qmgr_stat_empty(txreadyq)) { +#if DEBUG_TX + printk(KERN_DEBUG "%s: eth_xmit queue full\n", dev->name); +#endif + netif_stop_queue(dev); + /* we could miss TX ready interrupt */ + if (!qmgr_stat_empty(txreadyq)) { +#if DEBUG_TX + printk(KERN_DEBUG "%s: eth_xmit ready again\n", + dev->name); +#endif + netif_wake_queue(dev); + } + } + +#if DEBUG_TX + printk(KERN_DEBUG "%s: eth_xmit end\n", dev->name); +#endif + return NETDEV_TX_OK; +} + + +static struct net_device_stats *eth_stats(struct net_device *dev) +{ + struct port *port = netdev_priv(dev); + return &port->stat; +} + +static void eth_set_mcast_list(struct net_device *dev) +{ + struct port *port = netdev_priv(dev); + struct dev_mc_list *mclist = dev->mc_list; + u8 diffs[ETH_ALEN], *addr; + int cnt = dev->mc_count, i; + + if ((dev->flags & IFF_PROMISC) || !mclist || !cnt) { + __raw_writel(DEFAULT_RX_CNTRL0 & ~RX_CNTRL0_ADDR_FLTR_EN, + &port->regs->rx_control[0]); + return; + } + + memset(diffs, 0, ETH_ALEN); + addr = mclist->dmi_addr; /* first MAC address */ + + while (--cnt && (mclist = mclist->next)) + for (i = 0; i < ETH_ALEN; i++) + diffs[i] |= addr[i] ^ mclist->dmi_addr[i]; + + for (i = 0; i < ETH_ALEN; i++) { + __raw_writel(addr[i], &port->regs->mcast_addr[i]); + __raw_writel(~diffs[i], &port->regs->mcast_mask[i]); + } + + __raw_writel(DEFAULT_RX_CNTRL0 | RX_CNTRL0_ADDR_FLTR_EN, + &port->regs->rx_control[0]); +} + + +static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd) +{ + struct port *port = netdev_priv(dev); + unsigned int duplex_chg; + int err; + + if (!netif_running(dev)) + return -EINVAL; + err = generic_mii_ioctl(&port->mii, if_mii(req), cmd, &duplex_chg); + if (duplex_chg) + eth_set_duplex(port); + return err; +} + + +static int request_queues(struct port *port) +{ + int err; + + err = qmgr_request_queue(RXFREE_QUEUE(port->id), RX_DESCS, 0, 0); + if (err) + return err; + + err = qmgr_request_queue(port->plat->rxq, RX_DESCS, 0, 0); + if (err) + goto rel_rxfree; + + err = qmgr_request_queue(TX_QUEUE(port->id), TX_DESCS, 0, 0); + if (err) + goto rel_rx; + + err = qmgr_request_queue(port->plat->txreadyq, TX_DESCS, 0, 0); + if (err) + goto rel_tx; + + /* TX-done queue handles skbs sent out by the NPEs */ + if (!ports_open) { + err = qmgr_request_queue(TXDONE_QUEUE, TXDONE_QUEUE_LEN, 0, 0); + if (err) + goto rel_txready; + } + return 0; + +rel_txready: + qmgr_release_queue(port->plat->txreadyq); +rel_tx: + qmgr_release_queue(TX_QUEUE(port->id)); +rel_rx: + qmgr_release_queue(port->plat->rxq); +rel_rxfree: + qmgr_release_queue(RXFREE_QUEUE(port->id)); + printk(KERN_DEBUG "%s: unable to request hardware queues\n", + port->netdev->name); + return err; +} + +static void release_queues(struct port *port) +{ + qmgr_release_queue(RXFREE_QUEUE(port->id)); + qmgr_release_queue(port->plat->rxq); + qmgr_release_queue(TX_QUEUE(port->id)); + qmgr_release_queue(port->plat->txreadyq); + + if (!ports_open) + qmgr_release_queue(TXDONE_QUEUE); +} + +static int init_queues(struct port *port) +{ + int i; + + if (!ports_open) + if (!(dma_pool = dma_pool_create(DRV_NAME, NULL, + POOL_ALLOC_SIZE, 32, 0))) + return -ENOMEM; + + if (!(port->desc_tab = dma_pool_alloc(dma_pool, GFP_KERNEL, + &port->desc_tab_phys))) + return -ENOMEM; + memset(port->desc_tab, 0, POOL_ALLOC_SIZE); + memset(port->rx_buff_tab, 0, sizeof(port->rx_buff_tab)); /* tables */ + memset(port->tx_buff_tab, 0, sizeof(port->tx_buff_tab)); + + /* Setup RX buffers */ + for (i = 0; i < RX_DESCS; i++) { + struct desc *desc = rx_desc_ptr(port, i); + buffer_t *buff; /* skb or kmalloc()ated memory */ + void *data; +#ifdef __ARMEB__ + if (!(buff = netdev_alloc_skb(port->netdev, RX_BUFF_SIZE))) + return -ENOMEM; + data = buff->data; +#else + if (!(buff = kmalloc(RX_BUFF_SIZE, GFP_KERNEL))) + return -ENOMEM; + data = buff; +#endif + desc->buf_len = MAX_MRU; + desc->data = dma_map_single(&port->netdev->dev, data, + RX_BUFF_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(desc->data)) { + free_buffer(buff); + return -EIO; + } + desc->data += NET_IP_ALIGN; + port->rx_buff_tab[i] = buff; + } + + return 0; +} + +static void destroy_queues(struct port *port) +{ + int i; + + if (port->desc_tab) { + for (i = 0; i < RX_DESCS; i++) { + struct desc *desc = rx_desc_ptr(port, i); + buffer_t *buff = port->rx_buff_tab[i]; + if (buff) { + dma_unmap_single(&port->netdev->dev, + desc->data - NET_IP_ALIGN, + RX_BUFF_SIZE, DMA_FROM_DEVICE); + free_buffer(buff); + } + } + for (i = 0; i < TX_DESCS; i++) { + struct desc *desc = tx_desc_ptr(port, i); + buffer_t *buff = port->tx_buff_tab[i]; + if (buff) { + dma_unmap_tx(port, desc); + free_buffer(buff); + } + } + dma_pool_free(dma_pool, port->desc_tab, port->desc_tab_phys); + port->desc_tab = NULL; + } + + if (!ports_open && dma_pool) { + dma_pool_destroy(dma_pool); + dma_pool = NULL; + } +} + +static int eth_open(struct net_device *dev) +{ + struct port *port = netdev_priv(dev); + struct npe *npe = port->npe; + struct msg msg; + int i, err; + + if (!npe_running(npe)) { + err = npe_load_firmware(npe, npe_name(npe), &dev->dev); + if (err) + return err; + + if (npe_recv_message(npe, &msg, "ETH_GET_STATUS")) { + printk(KERN_ERR "%s: %s not responding\n", dev->name, + npe_name(npe)); + return -EIO; + } + } + + mdio_write(dev, port->plat->phy, MII_BMCR, port->mii_bmcr); + + memset(&msg, 0, sizeof(msg)); + msg.cmd = NPE_VLAN_SETRXQOSENTRY; + msg.eth_id = port->id; + msg.byte5 = port->plat->rxq | 0x80; + msg.byte7 = port->plat->rxq << 4; + for (i = 0; i < 8; i++) { + msg.byte3 = i; + if (npe_send_recv_message(port->npe, &msg, "ETH_SET_RXQ")) + return -EIO; + } + + msg.cmd = NPE_EDB_SETPORTADDRESS; + msg.eth_id = PHYSICAL_ID(port->id); + msg.byte2 = dev->dev_addr[0]; + msg.byte3 = dev->dev_addr[1]; + msg.byte4 = dev->dev_addr[2]; + msg.byte5 = dev->dev_addr[3]; + msg.byte6 = dev->dev_addr[4]; + msg.byte7 = dev->dev_addr[5]; + if (npe_send_recv_message(port->npe, &msg, "ETH_SET_MAC")) + return -EIO; + + memset(&msg, 0, sizeof(msg)); + msg.cmd = NPE_FW_SETFIREWALLMODE; + msg.eth_id = port->id; + if (npe_send_recv_message(port->npe, &msg, "ETH_SET_FIREWALL_MODE")) + return -EIO; + + if ((err = request_queues(port)) != 0) + return err; + + if ((err = init_queues(port)) != 0) { + destroy_queues(port); + release_queues(port); + return err; + } + + for (i = 0; i < ETH_ALEN; i++) + __raw_writel(dev->dev_addr[i], &port->regs->hw_addr[i]); + __raw_writel(0x08, &port->regs->random_seed); + __raw_writel(0x12, &port->regs->partial_empty_threshold); + __raw_writel(0x30, &port->regs->partial_full_threshold); + __raw_writel(0x08, &port->regs->tx_start_bytes); + __raw_writel(0x15, &port->regs->tx_deferral); + __raw_writel(0x08, &port->regs->tx_2part_deferral[0]); + __raw_writel(0x07, &port->regs->tx_2part_deferral[1]); + __raw_writel(0x80, &port->regs->slot_time); + __raw_writel(0x01, &port->regs->int_clock_threshold); + + /* Populate queues with buffers, no failure after this point */ + for (i = 0; i < TX_DESCS; i++) + queue_put_desc(port->plat->txreadyq, + tx_desc_phys(port, i), tx_desc_ptr(port, i)); + + for (i = 0; i < RX_DESCS; i++) + queue_put_desc(RXFREE_QUEUE(port->id), + rx_desc_phys(port, i), rx_desc_ptr(port, i)); + + __raw_writel(TX_CNTRL1_RETRIES, &port->regs->tx_control[1]); + __raw_writel(DEFAULT_TX_CNTRL0, &port->regs->tx_control[0]); + __raw_writel(0, &port->regs->rx_control[1]); + __raw_writel(DEFAULT_RX_CNTRL0, &port->regs->rx_control[0]); + + napi_enable(&port->napi); + phy_check_media(port, 1); + eth_set_mcast_list(dev); + netif_start_queue(dev); + schedule_delayed_work(&port->mdio_thread, MDIO_INTERVAL); + + qmgr_set_irq(port->plat->rxq, QUEUE_IRQ_SRC_NOT_EMPTY, + eth_rx_irq, dev); + if (!ports_open) { + qmgr_set_irq(TXDONE_QUEUE, QUEUE_IRQ_SRC_NOT_EMPTY, + eth_txdone_irq, NULL); + qmgr_enable_irq(TXDONE_QUEUE); + } + ports_open++; + /* we may already have RX data, enables IRQ */ + netif_rx_schedule(dev, &port->napi); + return 0; +} + +static int eth_close(struct net_device *dev) +{ + struct port *port = netdev_priv(dev); + struct msg msg; + int buffs = RX_DESCS; /* allocated RX buffers */ + int i; + + ports_open--; + qmgr_disable_irq(port->plat->rxq); + napi_disable(&port->napi); + netif_stop_queue(dev); + + while (queue_get_desc(RXFREE_QUEUE(port->id), port, 0) >= 0) + buffs--; + + memset(&msg, 0, sizeof(msg)); + msg.cmd = NPE_SETLOOPBACK_MODE; + msg.eth_id = port->id; + msg.byte3 = 1; + if (npe_send_recv_message(port->npe, &msg, "ETH_ENABLE_LOOPBACK")) + printk(KERN_CRIT "%s: unable to enable loopback\n", dev->name); + + i = 0; + do { /* drain RX buffers */ + while (queue_get_desc(port->plat->rxq, port, 0) >= 0) + buffs--; + if (!buffs) + break; + if (qmgr_stat_empty(TX_QUEUE(port->id))) { + /* we have to inject some packet */ + struct desc *desc; + u32 phys; + int n = queue_get_desc(port->plat->txreadyq, port, 1); + BUG_ON(n < 0); + desc = tx_desc_ptr(port, n); + phys = tx_desc_phys(port, n); + desc->buf_len = desc->pkt_len = 1; + wmb(); + queue_put_desc(TX_QUEUE(port->id), phys, desc); + } + udelay(1); + } while (++i < MAX_CLOSE_WAIT); + + if (buffs) + printk(KERN_CRIT "%s: unable to drain RX queue, %i buffer(s)" + " left in NPE\n", dev->name, buffs); +#if DEBUG_CLOSE + if (!buffs) + printk(KERN_DEBUG "Draining RX queue took %i cycles\n", i); +#endif + + buffs = TX_DESCS; + while (queue_get_desc(TX_QUEUE(port->id), port, 1) >= 0) + buffs--; /* cancel TX */ + + i = 0; + do { + while (queue_get_desc(port->plat->txreadyq, port, 1) >= 0) + buffs--; + if (!buffs) + break; + } while (++i < MAX_CLOSE_WAIT); + + if (buffs) + printk(KERN_CRIT "%s: unable to drain TX queue, %i buffer(s) " + "left in NPE\n", dev->name, buffs); +#if DEBUG_CLOSE + if (!buffs) + printk(KERN_DEBUG "Draining TX queues took %i cycles\n", i); +#endif + + msg.byte3 = 0; + if (npe_send_recv_message(port->npe, &msg, "ETH_DISABLE_LOOPBACK")) + printk(KERN_CRIT "%s: unable to disable loopback\n", + dev->name); + + port->mii_bmcr = mdio_read(dev, port->plat->phy, MII_BMCR) & + ~(BMCR_RESET | BMCR_PDOWN); /* may have been altered */ + mdio_write(dev, port->plat->phy, MII_BMCR, + port->mii_bmcr | BMCR_PDOWN); + + if (!ports_open) + qmgr_disable_irq(TXDONE_QUEUE); + cancel_rearming_delayed_work(&port->mdio_thread); + destroy_queues(port); + release_queues(port); + return 0; +} + +static int __devinit eth_init_one(struct platform_device *pdev) +{ + struct port *port; + struct net_device *dev; + struct eth_plat_info *plat = pdev->dev.platform_data; + u32 regs_phys; + int err; + + if (!(dev = alloc_etherdev(sizeof(struct port)))) + return -ENOMEM; + + SET_NETDEV_DEV(dev, &pdev->dev); + port = netdev_priv(dev); + port->netdev = dev; + port->id = pdev->id; + + switch (port->id) { + case IXP4XX_ETH_NPEA: + port->regs = (struct eth_regs __iomem *)IXP4XX_EthA_BASE_VIRT; + regs_phys = IXP4XX_EthA_BASE_PHYS; + break; + case IXP4XX_ETH_NPEB: + port->regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT; + regs_phys = IXP4XX_EthB_BASE_PHYS; + break; + case IXP4XX_ETH_NPEC: + port->regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT; + regs_phys = IXP4XX_EthC_BASE_PHYS; + break; + default: + err = -ENOSYS; + goto err_free; + } + + dev->open = eth_open; + dev->hard_start_xmit = eth_xmit; + dev->stop = eth_close; + dev->get_stats = eth_stats; + dev->do_ioctl = eth_ioctl; + dev->set_multicast_list = eth_set_mcast_list; + dev->tx_queue_len = 100; + + netif_napi_add(dev, &port->napi, eth_poll, NAPI_WEIGHT); + + if (!(port->npe = npe_request(NPE_ID(port->id)))) { + err = -EIO; + goto err_free; + } + + if (register_netdev(dev)) { + err = -EIO; + goto err_npe_rel; + } + + port->mem_res = request_mem_region(regs_phys, REGS_SIZE, dev->name); + if (!port->mem_res) { + err = -EBUSY; + goto err_unreg; + } + + port->plat = plat; + npe_port_tab[NPE_ID(port->id)] = port; + memcpy(dev->dev_addr, plat->hwaddr, ETH_ALEN); + + platform_set_drvdata(pdev, dev); + + __raw_writel(DEFAULT_CORE_CNTRL | CORE_RESET, + &port->regs->core_control); + udelay(50); + __raw_writel(DEFAULT_CORE_CNTRL, &port->regs->core_control); + udelay(50); + + port->mii.dev = dev; + port->mii.mdio_read = mdio_read; + port->mii.mdio_write = mdio_write; + port->mii.phy_id = plat->phy; + port->mii.phy_id_mask = 0x1F; + port->mii.reg_num_mask = 0x1F; + + printk(KERN_INFO "%s: MII PHY %i on %s\n", dev->name, plat->phy, + npe_name(port->npe)); + + phy_reset(dev, plat->phy); + port->mii_bmcr = mdio_read(dev, plat->phy, MII_BMCR) & + ~(BMCR_RESET | BMCR_PDOWN); + mdio_write(dev, plat->phy, MII_BMCR, port->mii_bmcr | BMCR_PDOWN); + + INIT_DELAYED_WORK(&port->mdio_thread, mdio_thread); + return 0; + +err_unreg: + unregister_netdev(dev); +err_npe_rel: + npe_release(port->npe); +err_free: + free_netdev(dev); + return err; +} + +static int __devexit eth_remove_one(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct port *port = netdev_priv(dev); + + unregister_netdev(dev); + npe_port_tab[NPE_ID(port->id)] = NULL; + platform_set_drvdata(pdev, NULL); + npe_release(port->npe); + release_resource(port->mem_res); + free_netdev(dev); + return 0; +} + +static struct platform_driver drv = { + .driver.name = DRV_NAME, + .probe = eth_init_one, + .remove = eth_remove_one, +}; + +static int __init eth_init_module(void) +{ + if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEB_ETH0)) + return -ENOSYS; + + /* All MII PHY accesses use NPE-B Ethernet registers */ + spin_lock_init(&mdio_lock); + mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT; + __raw_writel(DEFAULT_CORE_CNTRL, &mdio_regs->core_control); + + return platform_driver_register(&drv); +} + +static void __exit eth_cleanup_module(void) +{ + platform_driver_unregister(&drv); +} + +MODULE_AUTHOR("Krzysztof Halasa"); +MODULE_DESCRIPTION("Intel IXP4xx Ethernet driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:ixp4xx_eth"); +module_init(eth_init_module); +module_exit(eth_cleanup_module); -- cgit v1.2.3-70-g09d2 From d0313587547092af7f5ee8a576793e1e5d61be89 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 17 Apr 2008 00:08:10 -0400 Subject: [netdrvr] gianfar: Determine TBIPA value dynamically TBIPA needs to be set to a value (on connected MDIO buses) that doesn't conflict with PHYs on the bus. By hardcoding it to 0x1f, we were preventing boards with PHYs at 0x1f from working properly. Instead, scan the bus when it comes up, and find an address that doesn't have a PHY on it. The TBI PHY configuration code then trusts that the value in TBIPA is either safe, or doesn't matter (ie - it's not an active bus with other PHYs). Signed-off-by: Andy Fleming Signed-off-by: Paul Gortmaker Signed-off-by: Jeff Garzik --- drivers/net/gianfar.c | 27 ++++++++++++++------------- drivers/net/gianfar.h | 1 - drivers/net/gianfar_mii.c | 38 +++++++++++++++++++++++++++++++++----- drivers/net/gianfar_mii.h | 3 +++ 4 files changed, 50 insertions(+), 19 deletions(-) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 99a4b990939..587afe7be68 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -131,8 +131,6 @@ static void free_skb_resources(struct gfar_private *priv); static void gfar_set_multi(struct net_device *dev); static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr); static void gfar_configure_serdes(struct net_device *dev); -extern int gfar_local_mdio_write(struct gfar_mii __iomem *regs, int mii_id, int regnum, u16 value); -extern int gfar_local_mdio_read(struct gfar_mii __iomem *regs, int mii_id, int regnum); #ifdef CONFIG_GFAR_NAPI static int gfar_poll(struct napi_struct *napi, int budget); #endif @@ -477,24 +475,30 @@ static int init_phy(struct net_device *dev) return 0; } +/* + * Initialize TBI PHY interface for communicating with the + * SERDES lynx PHY on the chip. We communicate with this PHY + * through the MDIO bus on each controller, treating it as a + * "normal" PHY at the address found in the TBIPA register. We assume + * that the TBIPA register is valid. Either the MDIO bus code will set + * it to a value that doesn't conflict with other PHYs on the bus, or the + * value doesn't matter, as there are no other PHYs on the bus. + */ static void gfar_configure_serdes(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); struct gfar_mii __iomem *regs = (void __iomem *)&priv->regs->gfar_mii_regs; + int tbipa = gfar_read(&priv->regs->tbipa); - /* Initialise TBI i/f to communicate with serdes (lynx phy) */ + /* Single clk mode, mii mode off(for serdes communication) */ + gfar_local_mdio_write(regs, tbipa, MII_TBICON, TBICON_CLK_SELECT); - /* Single clk mode, mii mode off(for aerdes communication) */ - gfar_local_mdio_write(regs, TBIPA_VALUE, MII_TBICON, TBICON_CLK_SELECT); - - /* Supported pause and full-duplex, no half-duplex */ - gfar_local_mdio_write(regs, TBIPA_VALUE, MII_ADVERTISE, + gfar_local_mdio_write(regs, tbipa, MII_ADVERTISE, ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM); - /* ANEG enable, restart ANEG, full duplex mode, speed[1] set */ - gfar_local_mdio_write(regs, TBIPA_VALUE, MII_BMCR, BMCR_ANENABLE | + gfar_local_mdio_write(regs, tbipa, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | BMCR_SPEED1000); } @@ -541,9 +545,6 @@ static void init_registers(struct net_device *dev) /* Initialize the Minimum Frame Length Register */ gfar_write(&priv->regs->minflr, MINFLR_INIT_SETTINGS); - - /* Assign the TBI an address which won't conflict with the PHYs */ - gfar_write(&priv->regs->tbipa, TBIPA_VALUE); } diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h index 0d088360946..fd487be3993 100644 --- a/drivers/net/gianfar.h +++ b/drivers/net/gianfar.h @@ -137,7 +137,6 @@ extern const char gfar_driver_version[]; #define DEFAULT_RXCOUNT 0 #endif /* CONFIG_GFAR_NAPI */ -#define TBIPA_VALUE 0x1f #define MIIMCFG_INIT_VALUE 0x00000007 #define MIIMCFG_RESET 0x80000000 #define MIIMIND_BUSY 0x00000001 diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c index b8898927236..ebcfb27a904 100644 --- a/drivers/net/gianfar_mii.c +++ b/drivers/net/gianfar_mii.c @@ -78,7 +78,6 @@ int gfar_local_mdio_write(struct gfar_mii __iomem *regs, int mii_id, * same as system mdio bus, used for controlling the external PHYs, for eg. */ int gfar_local_mdio_read(struct gfar_mii __iomem *regs, int mii_id, int regnum) - { u16 value; @@ -122,7 +121,7 @@ int gfar_mdio_read(struct mii_bus *bus, int mii_id, int regnum) } /* Reset the MIIM registers, and wait for the bus to free */ -int gfar_mdio_reset(struct mii_bus *bus) +static int gfar_mdio_reset(struct mii_bus *bus) { struct gfar_mii __iomem *regs = (void __iomem *)bus->priv; unsigned int timeout = PHY_INIT_TIMEOUT; @@ -152,14 +151,15 @@ int gfar_mdio_reset(struct mii_bus *bus) } -int gfar_mdio_probe(struct device *dev) +static int gfar_mdio_probe(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct gianfar_mdio_data *pdata; struct gfar_mii __iomem *regs; + struct gfar __iomem *enet_regs; struct mii_bus *new_bus; struct resource *r; - int err = 0; + int i, err = 0; if (NULL == dev) return -EINVAL; @@ -199,6 +199,34 @@ int gfar_mdio_probe(struct device *dev) new_bus->dev = dev; dev_set_drvdata(dev, new_bus); + /* + * This is mildly evil, but so is our hardware for doing this. + * Also, we have to cast back to struct gfar_mii because of + * definition weirdness done in gianfar.h. + */ + enet_regs = (struct gfar __iomem *) + ((char *)regs - offsetof(struct gfar, gfar_mii_regs)); + + /* Scan the bus, looking for an empty spot for TBIPA */ + gfar_write(&enet_regs->tbipa, 0); + for (i = PHY_MAX_ADDR; i > 0; i--) { + u32 phy_id; + int r; + + r = get_phy_id(new_bus, i, &phy_id); + if (r) + return r; + + if (phy_id == 0xffffffff) + break; + } + + /* The bus is full. We don't support using 31 PHYs, sorry */ + if (i == 0) + return -EBUSY; + + gfar_write(&enet_regs->tbipa, i); + err = mdiobus_register(new_bus); if (0 != err) { @@ -218,7 +246,7 @@ reg_map_fail: } -int gfar_mdio_remove(struct device *dev) +static int gfar_mdio_remove(struct device *dev) { struct mii_bus *bus = dev_get_drvdata(dev); diff --git a/drivers/net/gianfar_mii.h b/drivers/net/gianfar_mii.h index b373091c703..2af28b16a0e 100644 --- a/drivers/net/gianfar_mii.h +++ b/drivers/net/gianfar_mii.h @@ -41,6 +41,9 @@ struct gfar_mii { int gfar_mdio_read(struct mii_bus *bus, int mii_id, int regnum); int gfar_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value); +int gfar_local_mdio_write(struct gfar_mii __iomem *regs, int mii_id, + int regnum, u16 value); +int gfar_local_mdio_read(struct gfar_mii __iomem *regs, int mii_id, int regnum); int __init gfar_mdio_init(void); void gfar_mdio_exit(void); #endif /* GIANFAR_PHY_H */ -- cgit v1.2.3-70-g09d2 From ce447eb91409225f8a488f6b7b2a1bdf7b2d884f Mon Sep 17 00:00:00 2001 From: John Heffner Date: Tue, 29 Apr 2008 03:13:02 -0700 Subject: tcp: Allow send-limited cwnd to grow up to max_burst when gso disabled This changes the logic in tcp_is_cwnd_limited() so that cwnd may grow up to tcp_max_burst() even when sk_can_gso() is false, or when sysctl_tcp_tso_win_divisor != 0. Signed-off-by: John Heffner Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 3a6be23d222..bfb1996bd99 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -285,14 +285,11 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) if (in_flight >= tp->snd_cwnd) return 1; - if (!sk_can_gso(sk)) - return 0; - left = tp->snd_cwnd - in_flight; - if (sysctl_tcp_tso_win_divisor) - return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd; - else - return left <= tcp_max_burst(tp); + if (sk_can_gso(sk) && + left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd) + return 1; + return left <= tcp_max_burst(tp); } EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); -- cgit v1.2.3-70-g09d2 From 246eb2af060fc32650f07203c02bdc0456ad76c7 Mon Sep 17 00:00:00 2001 From: John Heffner Date: Tue, 29 Apr 2008 03:13:52 -0700 Subject: tcp: Limit cwnd growth when deferring for GSO This fixes inappropriately large cwnd growth on sender-limited flows when GSO is enabled, limiting cwnd growth to 64k. Signed-off-by: John Heffner Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index bfb1996bd99..6a250828b76 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -287,7 +287,8 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left = tp->snd_cwnd - in_flight; if (sk_can_gso(sk) && - left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd) + left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && + left * tp->mss_cache < sk->sk_gso_max_size) return 1; return left <= tcp_max_burst(tp); } -- cgit v1.2.3-70-g09d2 From be8d0d7903af85d396449b34366e7f5b0c9cc58b Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Tue, 29 Apr 2008 03:15:10 -0700 Subject: netfilter: xt_TCPOPTSTRIP: signed tcphoff for ipv6_skip_exthdr() retval if tcphoff remains unsigned, a negative ipv6_skip_exthdr() return value will go unnoticed, Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_TCPOPTSTRIP.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 3b2aa56833b..9685b6fcbc8 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -90,7 +90,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in, const struct xt_target *target, const void *targinfo) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); - unsigned int tcphoff; + int tcphoff; u_int8_t nexthdr; nexthdr = ipv6h->nexthdr; -- cgit v1.2.3-70-g09d2 From 0e93bb9459f56b50a2f71f2c230f4ad00ec40a73 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 03:15:35 -0700 Subject: netfilter: x_tables: fix net namespace leak when reading /proc/net/xxx_tables_names The seq_open_net() call should be accompanied with seq_release_net() one. Signed-off-by: Pavel Emelyanov Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/x_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f52f7f810ac..11b22abc2b7 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -787,7 +787,7 @@ static const struct file_operations xt_table_ops = { .open = xt_table_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos) -- cgit v1.2.3-70-g09d2 From 9a732ed6d0e126d4c8a818f42a13f3df11755bee Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Tue, 29 Apr 2008 03:16:34 -0700 Subject: netfilter: {nfnetlink,ip,ip6}_queue: fix skb_over_panic when enlarging packets While reinjecting *bigger* modified versions of IPv6 packets using libnetfilter_queue, things work fine on a 2.6.24 kernel (2.6.22 too) but I get the following on recents kernels (2.6.25, trace below is against today's net-2.6 git tree): skb_over_panic: text:c04fddb0 len:696 put:632 head:f7592c00 data:f7592c00 tail:0xf7592eb8 end:0xf7592e80 dev:eth0 ------------[ cut here ]------------ invalid opcode: 0000 [#1] PREEMPT Process sendd (pid: 3657, ti=f6014000 task=f77c31d0 task.ti=f6014000) Stack: c071e638 c04fddb0 000002b8 00000278 f7592c00 f7592c00 f7592eb8 f7592e80 f763c000 f6bc5200 f7592c40 f6015c34 c04cdbfc f6bc5200 00000278 f6015c60 c04fddb0 00000020 f72a10c0 f751b420 00000001 0000000a 000002b8 c065582c Call Trace: [] ? nfqnl_recv_verdict+0x1c0/0x2e0 [] ? skb_put+0x3c/0x40 [] ? nfqnl_recv_verdict+0x1c0/0x2e0 [] ? nfnetlink_rcv_msg+0xf5/0x160 [] ? nfnetlink_rcv_msg+0x1e/0x160 [] ? nfnetlink_rcv_msg+0x0/0x160 [] ? netlink_rcv_skb+0x77/0xa0 [] ? nfnetlink_rcv+0x1c/0x30 [] ? netlink_unicast+0x243/0x2b0 [] ? memcpy_fromiovec+0x4a/0x70 [] ? netlink_sendmsg+0x1c6/0x270 [] ? sock_sendmsg+0xc4/0xf0 [] ? set_next_entity+0x1d/0x50 [] ? autoremove_wake_function+0x0/0x40 [] ? __wake_up_common+0x3e/0x70 [] ? n_tty_receive_buf+0x34f/0x1280 [] ? __wake_up+0x68/0x70 [] ? copy_from_user+0x37/0x70 [] ? verify_iovec+0x2c/0x90 [] ? sys_sendmsg+0x10a/0x230 [] ? __dequeue_entity+0x2a/0xa0 [] ? set_next_entity+0x1d/0x50 [] ? pty_write+0x47/0x60 [] ? tty_default_put_char+0x1b/0x20 [] ? __wake_up+0x49/0x70 [] ? tty_ldisc_deref+0x39/0x90 [] ? tty_write+0x1a0/0x1b0 [] ? sys_socketcall+0x7f/0x260 [] ? sysenter_past_esp+0x6a/0x91 [] ? snd_intel8x0m_probe+0x270/0x6e0 ======================= Code: 00 00 89 5c 24 14 8b 98 9c 00 00 00 89 54 24 0c 89 5c 24 10 8b 40 50 89 4c 24 04 c7 04 24 38 e6 71 c0 89 44 24 08 e8 c4 46 c5 ff <0f> 0b eb fe 55 89 e5 56 89 d6 53 89 c3 83 ec 0c 8b 40 50 39 d0 EIP: [] skb_over_panic+0x5c/0x60 SS:ESP 0068:f6015bf8 Looking at the code, I ended up in nfq_mangle() function (called by nfqnl_recv_verdict()) which performs a call to skb_copy_expand() due to the increased size of data passed to the function. AFAICT, it should ask for 'diff' instead of 'diff - skb_tailroom(e->skb)'. Because the resulting sk_buff has not enough space to support the skb_put(skb, diff) call a few lines later, this results in the call to skb_over_panic(). The patch below asks for allocation of a copy with enough space for mangled packet and the same amount of headroom as old sk_buff. While looking at how the regression appeared (e2b58a67), I noticed the same pattern in ipq_mangle_ipv6() and ipq_mangle_ipv4(). The patch corrects those locations too. Tested with bigger reinjected IPv6 packets (nfqnl_mangle() path), things are ok (2.6.25 and today's net-2.6 git tree). Signed-off-by: Arnaud Ebalard Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 5 ++--- net/ipv6/netfilter/ip6_queue.c | 5 ++--- net/netfilter/nfnetlink_queue.c | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 719be29f750..26a37cedcf2 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -296,9 +296,8 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - nskb = skb_copy_expand(e->skb, 0, - diff - skb_tailroom(e->skb), - GFP_ATOMIC); + nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), + diff, GFP_ATOMIC); if (!nskb) { printk(KERN_WARNING "ip_queue: error " "in mangle, dropping packet\n"); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 92a36c9e540..2eff3ae8977 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -298,9 +298,8 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - nskb = skb_copy_expand(e->skb, 0, - diff - skb_tailroom(e->skb), - GFP_ATOMIC); + nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), + diff, GFP_ATOMIC); if (!nskb) { printk(KERN_WARNING "ip6_queue: OOM " "in mangle, dropping packet\n"); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 2c9fe5c1289..3447025ce06 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -454,9 +454,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e) if (data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - nskb = skb_copy_expand(e->skb, 0, - diff - skb_tailroom(e->skb), - GFP_ATOMIC); + nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), + diff, GFP_ATOMIC); if (!nskb) { printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); -- cgit v1.2.3-70-g09d2 From 43af8532ecd74a61f9e7aeb27c026c1ee27915ca Mon Sep 17 00:00:00 2001 From: Volodymyr G Lukiianyk Date: Tue, 29 Apr 2008 03:17:42 -0700 Subject: bridge: fix error handling in br_add_if() When device is added to bridge its refcnt is incremented (in new_nbp()), but if error occurs during further br_add_if() operations this counter is not decremented back. Fix it by adding dev_put() call in the error path. Signed-off-by: Volodymyr G Lukiianyk Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 298e0f463c5..77a981a1ee5 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -411,9 +411,12 @@ err2: br_fdb_delete_by_port(br, p, 1); err1: kobject_del(&p->kobj); - return err; + goto put_back; err0: kobject_put(&p->kobj); + +put_back: + dev_put(dev); return err; } -- cgit v1.2.3-70-g09d2 From d69efb16891ddfa6c0b527f912a7193054d50281 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Tue, 29 Apr 2008 03:18:13 -0700 Subject: bridge: kernel panic when unloading bridge module There is a race condition when unloading bridge and netfilter. The problem happens if __fake_rtable is in use by a skb coming in, while someone starts to unload bridge.ko. br_netfilter_fini() is called at the beginning of unload in br_deinit() while skbs still are being forwarded and transferred to local ip stack. Thus there is a possibility of the __fake_rtable pointer not being removed in a skb that goes up to ip stack. This results in a kernel panic, as ip_rcv() calls the input-function of __fake_rtable, which is NULL. Moving the call of br_netfilter_fini() to the end of br_deinit() solves the problem. Signed-off-by: Bodo Stroesser Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br.c b/net/bridge/br.c index a9018287312..8f3c58e5f7a 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -76,7 +76,6 @@ static void __exit br_deinit(void) rcu_assign_pointer(br_stp_sap->rcv_func, NULL); br_netlink_fini(); - br_netfilter_fini(); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); @@ -84,6 +83,7 @@ static void __exit br_deinit(void) synchronize_net(); + br_netfilter_fini(); llc_sap_put(br_stp_sap); br_fdb_get_hook = NULL; br_fdb_put_hook = NULL; -- cgit v1.2.3-70-g09d2 From 2ad17defd596ca7e8ba782d5fc6950ee0e99513c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 29 Apr 2008 03:21:23 -0700 Subject: ipvs: fix oops in backup for fwmark conn templates Fixes bug http://bugzilla.kernel.org/show_bug.cgi?id=10556 where conn templates with protocol=IPPROTO_IP can oops backup box. Result from ip_vs_proto_get() should be checked because protocol value can be invalid or unsupported in backup. But for valid message we should not fail for templates which use IPPROTO_IP. Also, add checks to validate message limits and connection state. Show state NONE for templates using IPPROTO_IP. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 +- net/ipv4/ipvs/ip_vs_proto.c | 2 +- net/ipv4/ipvs/ip_vs_proto_ah.c | 1 + net/ipv4/ipvs/ip_vs_proto_esp.c | 1 + net/ipv4/ipvs/ip_vs_proto_tcp.c | 1 + net/ipv4/ipvs/ip_vs_proto_udp.c | 1 + net/ipv4/ipvs/ip_vs_sync.c | 80 ++++++++++++++++++++++++++++++----------- 7 files changed, 66 insertions(+), 23 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 56f3c94ae62..9a51ebad3f1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -405,7 +405,8 @@ struct sk_buff; struct ip_vs_protocol { struct ip_vs_protocol *next; char *name; - __u16 protocol; + u16 protocol; + u16 num_states; int dont_defrag; atomic_t appcnt; /* counter of proto app incs */ int *timeout_table; /* protocol timeout table */ diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index dde28a250d9..4b1c16cbb16 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -148,7 +148,7 @@ const char * ip_vs_state_name(__u16 proto, int state) struct ip_vs_protocol *pp = ip_vs_proto_get(proto); if (pp == NULL || pp->state_name == NULL) - return "ERR!"; + return (IPPROTO_IP == proto) ? "NONE" : "ERR!"; return pp->state_name(state); } diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c index a842676e1c6..4bf835e1d86 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah.c @@ -160,6 +160,7 @@ static void ah_exit(struct ip_vs_protocol *pp) struct ip_vs_protocol ip_vs_protocol_ah = { .name = "AH", .protocol = IPPROTO_AH, + .num_states = 1, .dont_defrag = 1, .init = ah_init, .exit = ah_exit, diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c index aef0d3ee8e4..db6a6b7b1a0 100644 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_esp.c @@ -159,6 +159,7 @@ static void esp_exit(struct ip_vs_protocol *pp) struct ip_vs_protocol ip_vs_protocol_esp = { .name = "ESP", .protocol = IPPROTO_ESP, + .num_states = 1, .dont_defrag = 1, .init = esp_init, .exit = esp_exit, diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 620e40ff79a..b83dc14b0a4 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -594,6 +594,7 @@ static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) struct ip_vs_protocol ip_vs_protocol_tcp = { .name = "TCP", .protocol = IPPROTO_TCP, + .num_states = IP_VS_TCP_S_LAST, .dont_defrag = 0, .appcnt = ATOMIC_INIT(0), .init = ip_vs_tcp_init, diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 1caa2908373..75771cb3cd6 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -409,6 +409,7 @@ static void udp_exit(struct ip_vs_protocol *pp) struct ip_vs_protocol ip_vs_protocol_udp = { .name = "UDP", .protocol = IPPROTO_UDP, + .num_states = IP_VS_UDP_S_LAST, .dont_defrag = 0, .init = udp_init, .exit = udp_exit, diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 69c56663cc9..eff54efe035 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -288,11 +288,16 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) char *p; int i; + if (buflen < sizeof(struct ip_vs_sync_mesg)) { + IP_VS_ERR_RL("sync message header too short\n"); + return; + } + /* Convert size back to host byte order */ m->size = ntohs(m->size); if (buflen != m->size) { - IP_VS_ERR("bogus message\n"); + IP_VS_ERR_RL("bogus sync message size\n"); return; } @@ -307,9 +312,48 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) for (i=0; inr_conns; i++) { unsigned flags, state; - s = (struct ip_vs_sync_conn *)p; + if (p + SIMPLE_CONN_SIZE > buffer+buflen) { + IP_VS_ERR_RL("bogus conn in sync message\n"); + return; + } + s = (struct ip_vs_sync_conn *) p; flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; + flags &= ~IP_VS_CONN_F_HASHED; + if (flags & IP_VS_CONN_F_SEQ_MASK) { + opt = (struct ip_vs_sync_conn_options *)&s[1]; + p += FULL_CONN_SIZE; + if (p > buffer+buflen) { + IP_VS_ERR_RL("bogus conn options in sync message\n"); + return; + } + } else { + opt = NULL; + p += SIMPLE_CONN_SIZE; + } + state = ntohs(s->state); + if (!(flags & IP_VS_CONN_F_TEMPLATE)) { + pp = ip_vs_proto_get(s->protocol); + if (!pp) { + IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", + s->protocol); + continue; + } + if (state >= pp->num_states) { + IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", + pp->name, state); + continue; + } + } else { + /* protocol in templates is not used for state/timeout */ + pp = NULL; + if (state > 0) { + IP_VS_DBG(2, "Invalid template state %u in sync msg\n", + state); + state = 0; + } + } + if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(s->protocol, s->caddr, s->cport, @@ -345,14 +389,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) IP_VS_ERR("ip_vs_conn_new failed\n"); return; } - cp->state = state; } else if (!cp->dest) { dest = ip_vs_try_bind_dest(cp); - if (!dest) { - /* it is an unbound entry created by - * synchronization */ - cp->flags = flags | IP_VS_CONN_F_HASHED; - } else + if (dest) atomic_dec(&dest->refcnt); } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && (cp->state != state)) { @@ -371,23 +410,22 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } } - if (flags & IP_VS_CONN_F_SEQ_MASK) { - opt = (struct ip_vs_sync_conn_options *)&s[1]; + if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - p += FULL_CONN_SIZE; - } else - p += SIMPLE_CONN_SIZE; - atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); cp->state = state; - pp = ip_vs_proto_get(s->protocol); - cp->timeout = pp->timeout_table[cp->state]; + cp->old_state = cp->state; + /* + * We can not recover the right timeout for templates + * in all cases, we can not find the right fwmark + * virtual service. If needed, we can do it for + * non-fwmark persistent services. + */ + if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) + cp->timeout = pp->timeout_table[state]; + else + cp->timeout = (3*60*HZ); ip_vs_conn_put(cp); - - if (p > buffer+buflen) { - IP_VS_ERR("bogus message\n"); - return; - } } } -- cgit v1.2.3-70-g09d2 From be666e0a1345ed80f29cb30c73da0ec2ea5c5863 Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Tue, 29 Apr 2008 03:23:00 -0700 Subject: net: Several cleanups for the setsockopt compat support. 1) added missing "__user" for kgsr and kgf pointers 2) verify read for only GROUP_FILTER_SIZE(0). The group_filter structure definition (via RFC) includes space for one source in the source list array, but that source need not be present. So, sizeof(group_filter) > GROUP_FILTER_SIZE(0). Fixed the user read-check for minimum length to use the smaller size. 3) remove unneeded "&" for gf_slist addresses Signed-off-by: David L Stevens Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/compat.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/compat.c b/net/compat.c index 01bf95d0832..8146f654391 100644 --- a/net/compat.c +++ b/net/compat.c @@ -548,6 +548,9 @@ struct compat_group_filter { __attribute__ ((aligned(4))); } __attribute__ ((packed)); +#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ + sizeof(struct __kernel_sockaddr_storage)) + int compat_mc_setsockopt(struct sock *sock, int level, int optname, char __user *optval, int optlen, @@ -582,7 +585,7 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, case MCAST_UNBLOCK_SOURCE: { struct compat_group_source_req __user *gsr32 = (void *)optval; - struct group_source_req *kgsr = compat_alloc_user_space( + struct group_source_req __user *kgsr = compat_alloc_user_space( sizeof(struct group_source_req)); u32 interface; @@ -603,10 +606,10 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, case MCAST_MSFILTER: { struct compat_group_filter __user *gf32 = (void *)optval; - struct group_filter *kgf; + struct group_filter __user *kgf; u32 interface, fmode, numsrc; - if (!access_ok(VERIFY_READ, gf32, sizeof(*gf32)) || + if (!access_ok(VERIFY_READ, gf32, __COMPAT_GF0_SIZE) || __get_user(interface, &gf32->gf_interface) || __get_user(fmode, &gf32->gf_fmode) || __get_user(numsrc, &gf32->gf_numsrc)) @@ -622,7 +625,7 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, __put_user(numsrc, &kgf->gf_numsrc) || copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)) || - (numsrc && copy_in_user(&kgf->gf_slist, &gf32->gf_slist, + (numsrc && copy_in_user(kgf->gf_slist, gf32->gf_slist, numsrc * sizeof(kgf->gf_slist[0])))) return -EFAULT; koptval = (char __user *)kgf; -- cgit v1.2.3-70-g09d2 From 42908c69f61f75dd70e424263ab89ee52040382b Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Tue, 29 Apr 2008 03:23:22 -0700 Subject: net: Add compat support for getsockopt (MCAST_MSFILTER) This patch adds support for getsockopt for MCAST_MSFILTER for both IPv4 and IPv6. It depends on the previous setsockopt patch, and uses the same method. Signed-off-by: David L Stevens Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/compat.h | 3 ++ net/compat.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_sockglue.c | 9 +++++- net/ipv6/ipv6_sockglue.c | 4 +++ 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/include/net/compat.h b/include/net/compat.h index 05fa5d0254a..164cb682e22 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -42,5 +42,8 @@ extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsi extern int compat_mc_setsockopt(struct sock *, int, int, char __user *, int, int (*)(struct sock *, int, int, char __user *, int)); +extern int compat_mc_getsockopt(struct sock *, int, int, char __user *, + int __user *, int (*)(struct sock *, int, int, char __user *, + int __user *)); #endif /* NET_COMPAT_H */ diff --git a/net/compat.c b/net/compat.c index 8146f654391..c823f6f290c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -640,6 +640,85 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, EXPORT_SYMBOL(compat_mc_setsockopt); +int compat_mc_getsockopt(struct sock *sock, int level, int optname, + char __user *optval, int __user *optlen, + int (*getsockopt)(struct sock *,int,int,char __user *,int __user *)) +{ + struct compat_group_filter __user *gf32 = (void *)optval; + struct group_filter __user *kgf; + int __user *koptlen; + u32 interface, fmode, numsrc; + int klen, ulen, err; + + if (optname != MCAST_MSFILTER) + return getsockopt(sock, level, optname, optval, optlen); + + koptlen = compat_alloc_user_space(sizeof(*koptlen)); + if (!access_ok(VERIFY_READ, optlen, sizeof(*optlen)) || + __get_user(ulen, optlen)) + return -EFAULT; + + /* adjust len for pad */ + klen = ulen + sizeof(*kgf) - sizeof(*gf32); + + if (klen < GROUP_FILTER_SIZE(0)) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, koptlen, sizeof(*koptlen)) || + __put_user(klen, koptlen)) + return -EFAULT; + + /* have to allow space for previous compat_alloc_user_space, too */ + kgf = compat_alloc_user_space(klen+sizeof(*optlen)); + + if (!access_ok(VERIFY_READ, gf32, __COMPAT_GF0_SIZE) || + __get_user(interface, &gf32->gf_interface) || + __get_user(fmode, &gf32->gf_fmode) || + __get_user(numsrc, &gf32->gf_numsrc) || + __put_user(interface, &kgf->gf_interface) || + __put_user(fmode, &kgf->gf_fmode) || + __put_user(numsrc, &kgf->gf_numsrc) || + copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group))) + return -EFAULT; + + err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); + if (err) + return err; + + if (!access_ok(VERIFY_READ, koptlen, sizeof(*koptlen)) || + __get_user(klen, koptlen)) + return -EFAULT; + + ulen = klen - (sizeof(*kgf)-sizeof(*gf32)); + + if (!access_ok(VERIFY_WRITE, optlen, sizeof(*optlen)) || + __put_user(ulen, optlen)) + return -EFAULT; + + if (!access_ok(VERIFY_READ, kgf, klen) || + !access_ok(VERIFY_WRITE, gf32, ulen) || + __get_user(interface, &kgf->gf_interface) || + __get_user(fmode, &kgf->gf_fmode) || + __get_user(numsrc, &kgf->gf_numsrc) || + __put_user(interface, &gf32->gf_interface) || + __put_user(fmode, &gf32->gf_fmode) || + __put_user(numsrc, &gf32->gf_numsrc)) + return -EFAULT; + if (numsrc) { + int copylen; + + klen -= GROUP_FILTER_SIZE(0); + copylen = numsrc * sizeof(gf32->gf_slist[0]); + if (copylen > klen) + copylen = klen; + if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) + return -EFAULT; + } + return err; +} + +EXPORT_SYMBOL(compat_mc_getsockopt); + /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4d8d95404f4..e0514e82308 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1186,7 +1186,14 @@ int ip_getsockopt(struct sock *sk, int level, int compat_ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - int err = do_ip_getsockopt(sk, level, optname, optval, optlen); + int err; + + if (optname == MCAST_MSFILTER) + return compat_mc_getsockopt(sk, level, optname, optval, optlen, + ip_getsockopt); + + err = do_ip_getsockopt(sk, level, optname, optval, optlen); + #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index db6fdc1498a..b4a26f2505f 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1089,6 +1089,10 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if(level != SOL_IPV6) return -ENOPROTOOPT; + if (optname == MCAST_MSFILTER) + return compat_mc_getsockopt(sk, level, optname, optval, optlen, + ipv6_getsockopt); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ -- cgit v1.2.3-70-g09d2 From 980c478ddbb720948967b028ddbb4179a025bc2c Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 29 Apr 2008 03:29:03 -0700 Subject: sch_sfq: use del_timer_sync() in sfq_destroy() Let's delete timer reliably in sfq_destroy(). Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index a20e2ef7704..f0463d757a9 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -521,7 +521,8 @@ static void sfq_destroy(struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); tcf_destroy_chain(q->filter_list); - del_timer(&q->perturb_timer); + q->perturb_period = 0; + del_timer_sync(&q->perturb_timer); } static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From 0010e46577a27c1d915034637f6c2fa57a9a091c Mon Sep 17 00:00:00 2001 From: Timo Teras Date: Tue, 29 Apr 2008 03:32:25 -0700 Subject: ipv4: Update MTU to all related cache entries in ip_rt_frag_needed() Add struct net_device parameter to ip_rt_frag_needed() and update MTU to cache entries where ifindex is specified. This is similar to what is already done in ip_rt_redirect(). Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- include/net/route.h | 2 +- net/ipv4/icmp.c | 3 ++- net/ipv4/route.c | 38 ++++++++++++++++++++++---------------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index c6338802e8f..fc836ff824c 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -116,7 +116,7 @@ extern int __ip_route_output_key(struct net *, struct rtable **, const struct f extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); -extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu); +extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev); extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned inet_addr_type(struct net *net, __be32 addr); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c67d00e8c60..87397351dda 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -691,7 +691,8 @@ static void icmp_unreach(struct sk_buff *skb) NIPQUAD(iph->daddr)); } else { info = ip_rt_frag_needed(net, iph, - ntohs(icmph->un.frag.mtu)); + ntohs(icmph->un.frag.mtu), + skb->dev); if (!info) goto out; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ce25a13f343..5e3685c5c40 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1430,11 +1430,13 @@ static inline unsigned short guess_mtu(unsigned short old_mtu) } unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, - unsigned short new_mtu) + unsigned short new_mtu, + struct net_device *dev) { - int i; + int i, k; unsigned short old_mtu = ntohs(iph->tot_len); struct rtable *rth; + int ikeys[2] = { dev->ifindex, 0 }; __be32 skeys[2] = { iph->saddr, 0, }; __be32 daddr = iph->daddr; unsigned short est_mtu = 0; @@ -1442,22 +1444,26 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, if (ipv4_config.no_pmtu_disc) return 0; - for (i = 0; i < 2; i++) { - unsigned hash = rt_hash(daddr, skeys[i], 0); + for (k = 0; k < 2; k++) { + for (i = 0; i < 2; i++) { + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); - rcu_read_lock(); - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { - if (rth->fl.fl4_dst == daddr && - rth->fl.fl4_src == skeys[i] && - rth->rt_dst == daddr && - rth->rt_src == iph->saddr && - rth->fl.iif == 0 && - !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && - net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + rcu_read_lock(); + for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; + rth = rcu_dereference(rth->u.dst.rt_next)) { unsigned short mtu = new_mtu; + if (rth->fl.fl4_dst != daddr || + rth->fl.fl4_src != skeys[i] || + rth->rt_dst != daddr || + rth->rt_src != iph->saddr || + rth->fl.oif != ikeys[k] || + rth->fl.iif != 0 || + dst_metric_locked(&rth->u.dst, RTAX_MTU) || + !net_eq(dev_net(rth->u.dst.dev), net) || + rth->rt_genid != atomic_read(&rt_genid)) + continue; + if (new_mtu < 68 || new_mtu >= old_mtu) { /* BSD 4.2 compatibility hack :-( */ @@ -1483,8 +1489,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, est_mtu = mtu; } } + rcu_read_unlock(); } - rcu_read_unlock(); } return est_mtu ? : new_mtu; } -- cgit v1.2.3-70-g09d2 From 443a70d50bdc212e1292778e264ce3d0a85b896f Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Tue, 29 Apr 2008 03:35:10 -0700 Subject: netfilter: nf_conntrack: padding breaks conntrack hash on ARM commit 0794935e "[NETFILTER]: nf_conntrack: optimize hash_conntrack()" results in ARM platforms hashing uninitialised padding. This padding doesn't exist on other architectures. Fix this by replacing NF_CT_TUPLE_U_BLANK() with memset() to ensure everything is initialised. There were only 4 bytes that NF_CT_TUPLE_U_BLANK() wasn't clearing anyway (or 12 bytes on ARM). Signed-off-by: Philip Craig Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_tuple.h | 10 ---------- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/netfilter/nf_conntrack_core.c | 4 ++-- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 1bb7087833d..a6874ba22d5 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -107,16 +107,6 @@ struct nf_conntrack_tuple_mask } src; }; -/* This is optimized opposed to a memset of the whole structure. Everything we - * really care about is the source/destination unions */ -#define NF_CT_TUPLE_U_BLANK(tuple) \ - do { \ - (tuple)->src.u.all = 0; \ - (tuple)->dst.u.all = 0; \ - memset(&(tuple)->src.u3, 0, sizeof((tuple)->src.u3)); \ - memset(&(tuple)->dst.u3, 0, sizeof((tuple)->dst.u3)); \ - } while (0) - #ifdef __KERNEL__ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index cacb9cb27da..5a955c44036 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -303,7 +303,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; - NF_CT_TUPLE_U_BLANK(&tuple); + memset(&tuple, 0, sizeof(tuple)); tuple.src.u3.ip = inet->rcv_saddr; tuple.src.u.tcp.port = inet->sport; tuple.dst.u3.ip = inet->daddr; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 4eac65c74ed..c4b1799da5d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -104,7 +104,7 @@ nf_ct_get_tuple(const struct sk_buff *skb, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto) { - NF_CT_TUPLE_U_BLANK(tuple); + memset(tuple, 0, sizeof(*tuple)); tuple->src.l3num = l3num; if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) @@ -151,7 +151,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto) { - NF_CT_TUPLE_U_BLANK(inverse); + memset(inverse, 0, sizeof(*inverse)); inverse->src.l3num = orig->src.l3num; if (l3proto->invert_tuple(inverse, orig) == 0) -- cgit v1.2.3-70-g09d2 From 220fc3fc60e9ebeb5ecfe727e4819d9504f2b0b0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 03:37:41 -0700 Subject: MAINTAINERS: The socketcan-core list is subscribers-only. When I posted a copy_to_user fixes, the list daemon refused to accept the Cc: , because I was not a subscriber. I found, that other lists with such a feature are marked respectively in the MAINTAINERS file. Signed-off-by: Pavel Emelyanov Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2112034e164..68c61420b59 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1026,7 +1026,7 @@ P: Urs Thuermann M: urs.thuermann@volkswagen.de P: Oliver Hartkopp M: oliver.hartkopp@volkswagen.de -L: socketcan-core@lists.berlios.de +L: socketcan-core@lists.berlios.de (subscribers-only) W: http://developer.berlios.de/projects/socketcan/ S: Maintained -- cgit v1.2.3-70-g09d2 From eff0dee54674a449e7f160aad9f3e0d38e6983eb Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Tue, 29 Apr 2008 03:39:29 -0700 Subject: atm: ambassador: vcc_sf semaphore to mutex Signed-off-by: Daniel Walker Signed-off-by: David S. Miller --- drivers/atm/ambassador.c | 19 ++++++++++--------- drivers/atm/ambassador.h | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 5aa12b011a9..6adb72a2f87 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -1177,7 +1178,7 @@ static int amb_open (struct atm_vcc * atm_vcc) vcc->tx_frame_bits = tx_frame_bits; - down (&dev->vcc_sf); + mutex_lock(&dev->vcc_sf); if (dev->rxer[vci]) { // RXer on the channel already, just modify rate... cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE); @@ -1203,7 +1204,7 @@ static int amb_open (struct atm_vcc * atm_vcc) schedule(); } dev->txer[vci].tx_present = 1; - up (&dev->vcc_sf); + mutex_unlock(&dev->vcc_sf); } if (rxtp->traffic_class != ATM_NONE) { @@ -1211,7 +1212,7 @@ static int amb_open (struct atm_vcc * atm_vcc) vcc->rx_info.pool = pool; - down (&dev->vcc_sf); + mutex_lock(&dev->vcc_sf); /* grow RX buffer pool */ if (!dev->rxq[pool].buffers_wanted) dev->rxq[pool].buffers_wanted = rx_lats; @@ -1237,7 +1238,7 @@ static int amb_open (struct atm_vcc * atm_vcc) schedule(); // this link allows RX frames through dev->rxer[vci] = atm_vcc; - up (&dev->vcc_sf); + mutex_unlock(&dev->vcc_sf); } // indicate readiness @@ -1262,7 +1263,7 @@ static void amb_close (struct atm_vcc * atm_vcc) { if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) { command cmd; - down (&dev->vcc_sf); + mutex_lock(&dev->vcc_sf); if (dev->rxer[vci]) { // RXer still on the channel, just modify rate... XXX not really needed cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE); @@ -1277,7 +1278,7 @@ static void amb_close (struct atm_vcc * atm_vcc) { dev->txer[vci].tx_present = 0; while (command_do (dev, &cmd)) schedule(); - up (&dev->vcc_sf); + mutex_unlock(&dev->vcc_sf); } // disable RXing @@ -1287,7 +1288,7 @@ static void amb_close (struct atm_vcc * atm_vcc) { // this is (the?) one reason why we need the amb_vcc struct unsigned char pool = vcc->rx_info.pool; - down (&dev->vcc_sf); + mutex_lock(&dev->vcc_sf); if (dev->txer[vci].tx_present) { // TXer still on the channel, just go to pool zero XXX not really needed cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS); @@ -1314,7 +1315,7 @@ static void amb_close (struct atm_vcc * atm_vcc) { dev->rxq[pool].buffers_wanted = 0; drain_rx_pool (dev, pool); } - up (&dev->vcc_sf); + mutex_unlock(&dev->vcc_sf); } // free our structure @@ -2188,7 +2189,7 @@ static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev) // semaphore for txer/rxer modifications - we cannot use a // spinlock as the critical region needs to switch processes - init_MUTEX (&dev->vcc_sf); + mutex_init(&dev->vcc_sf); // queue manipulation spinlocks; we want atomic reads and // writes to the queue descriptors (handles IRQ and SMP) // consider replacing "int pending" -> "atomic_t available" diff --git a/drivers/atm/ambassador.h b/drivers/atm/ambassador.h index ff2a303cbe0..df55fa8387d 100644 --- a/drivers/atm/ambassador.h +++ b/drivers/atm/ambassador.h @@ -638,7 +638,7 @@ struct amb_dev { amb_txq txq; amb_rxq rxq[NUM_RX_POOLS]; - struct semaphore vcc_sf; + struct mutex vcc_sf; amb_tx_info txer[NUM_VCS]; struct atm_vcc * rxer[NUM_VCS]; unsigned int tx_avail; -- cgit v1.2.3-70-g09d2 From 45e741b89000519bedd4da4e7075a35acf5c655b Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 29 Apr 2008 20:58:15 -0700 Subject: ipv4: annotate a few functions __init in ipconfig.c A few functions are only used from __init context. So annotate these with __init for consistency and silence the following warnings: WARNING: net/ipv4/built-in.o(.text+0x2a876): Section mismatch in reference from the function ic_bootp_init() to the variable .init.data:bootp_packet_type WARNING: net/ipv4/built-in.o(.text+0x2a907): Section mismatch in reference from the function ic_bootp_cleanup() to the variable .init.data:bootp_packet_type Note: The warnings only appear with CONFIG_DEBUG_SECTION_MISMATCH=y Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 0f42d1c1f69..89dee4346f6 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -412,12 +412,12 @@ static struct packet_type rarp_packet_type __initdata = { .func = ic_rarp_recv, }; -static inline void ic_rarp_init(void) +static inline void __init ic_rarp_init(void) { dev_add_pack(&rarp_packet_type); } -static inline void ic_rarp_cleanup(void) +static inline void __init ic_rarp_cleanup(void) { dev_remove_pack(&rarp_packet_type); } @@ -682,7 +682,7 @@ static void __init ic_bootp_init_ext(u8 *e) /* * Initialize the DHCP/BOOTP mechanism. */ -static inline void ic_bootp_init(void) +static inline void __init ic_bootp_init(void) { int i; @@ -696,7 +696,7 @@ static inline void ic_bootp_init(void) /* * DHCP/BOOTP cleanup. */ -static inline void ic_bootp_cleanup(void) +static inline void __init ic_bootp_cleanup(void) { dev_remove_pack(&bootp_packet_type); } -- cgit v1.2.3-70-g09d2 From 3a8209d19dd791aaac3668be2fa51a9b42113efd Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 22:29:59 -0700 Subject: iwlwifi: move the selects to the tristate drivers This patch moves the following select's: - RFKILL : IWLWIFI_RFKILL -> IWLCORE - RFKILL_INPUT : IWLWIFI_RFKILL -> IWLCORE - MAC80211_LEDS : IWL4965_LEDS -> IWLCORE - LEDS_CLASS : IWL4965_LEDS -> IWLCORE - MAC80211_LEDS : IWL3945_LEDS -> IWL3945 - LEDS_CLASS : IWL3945_LEDS -> IWL3945 The effects are: - with IWLCORE=m and/or IWL3945=m RFKILL/RFKILL_INPUT/MAC80211_LEDS/LEDS_CLASS are no longer wrongly forced to y - fixes a build error with IWLCORE=y, IWL4965=m might be a bug in kconfig causing it, but doing this change that is anyway the right thing fixes it Reported-by: Carlos R. Mafra Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- drivers/net/wireless/iwlwifi/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig index 9a25f550fd1..d5b7a76fcaa 100644 --- a/drivers/net/wireless/iwlwifi/Kconfig +++ b/drivers/net/wireless/iwlwifi/Kconfig @@ -6,6 +6,10 @@ config IWLCORE tristate "Intel Wireless Wifi Core" depends on PCI && MAC80211 && WLAN_80211 && EXPERIMENTAL select IWLWIFI + select MAC80211_LEDS if IWLWIFI_LEDS + select LEDS_CLASS if IWLWIFI_LEDS + select RFKILL if IWLWIFI_RFKILL + select RFKILL_INPUT if IWLWIFI_RFKILL config IWLWIFI_LEDS bool @@ -14,8 +18,6 @@ config IWLWIFI_LEDS config IWLWIFI_RFKILL boolean "IWLWIFI RF kill support" depends on IWLCORE - select RFKILL - select RFKILL_INPUT config IWL4965 tristate "Intel Wireless WiFi 4965AGN" @@ -55,8 +57,6 @@ config IWL4965_HT config IWL4965_LEDS bool "Enable LEDS features in iwl4965 driver" depends on IWL4965 - select MAC80211_LEDS - select LEDS_CLASS select IWLWIFI_LEDS ---help--- This option enables LEDS for the iwlwifi drivers @@ -112,6 +112,8 @@ config IWL3945 depends on PCI && MAC80211 && WLAN_80211 && EXPERIMENTAL select FW_LOADER select IWLWIFI + select MAC80211_LEDS if IWL3945_LEDS + select LEDS_CLASS if IWL3945_LEDS ---help--- Select to build the driver supporting the: @@ -143,8 +145,6 @@ config IWL3945_SPECTRUM_MEASUREMENT config IWL3945_LEDS bool "Enable LEDS features in iwl3945 driver" depends on IWL3945 - select MAC80211_LEDS - select LEDS_CLASS ---help--- This option enables LEDS for the iwl3945 driver. -- cgit v1.2.3-70-g09d2 From be9164e769d57aa10b2bbe15d103edc041b9e7de Mon Sep 17 00:00:00 2001 From: Kostya B Date: Tue, 29 Apr 2008 22:36:30 -0700 Subject: [IPv4] UFO: prevent generation of chained skb destined to UFO device Problem: ip_append_data() could wrongly generate a chained skb for devices which support UFO. When sk_write_queue is not empty (e.g. MSG_MORE), __instead__ of appending data into the next nr_frag of the queued skb, a new chained skb is created. I would normally assume UFO device should get data in nr_frags and not in frag_list. Later the udp4_hwcsum_outgoing() resets csum to NONE and skb_gso_segment() has oops. Proposal: 1. Even length is less than mtu, employ ip_ufo_append_data() and append data to the __existed__ skb in the sk_write_queue. 2. ip_ufo_append_data() is fixed due to a wrong manipulation of peek-ing and later enqueue-ing of the same skb. Now, enqueuing is always performed, because on error the further ip_flush_pending_frames() would release the queued skb. Signed-off-by: Kostya B Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 08349267ceb..e527628f56c 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -753,23 +753,15 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; - } - err = skb_append_datato_frags(sk,skb, getfrag, from, - (length - transhdrlen)); - if (!err) { - /* specify the length of each IP datagram fragment*/ + /* specify the length of each IP datagram fragment */ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; __skb_queue_tail(&sk->sk_write_queue, skb); - - return 0; } - /* There is not enough support do UFO , - * so follow normal path - */ - kfree_skb(skb); - return err; + + return skb_append_datato_frags(sk, skb, getfrag, from, + (length - transhdrlen)); } /* @@ -863,9 +855,9 @@ int ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; inet->cork.length += length; - if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { - + if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->u.dst.dev->features & NETIF_F_UFO)) { err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); -- cgit v1.2.3-70-g09d2 From 159131149c2f56c1da5ae5e23ab9d5acef4916d1 Mon Sep 17 00:00:00 2001 From: Lachlan Andrew Date: Wed, 30 Apr 2008 01:04:03 -0700 Subject: tcp: Overflow bug in Vegas From: Lachlan Andrew There is an overflow bug in net/ipv4/tcp_vegas.c for large BDPs (e.g. 400Mbit/s, 400ms). The multiplication (old_wnd * vegas->baseRTT) << V_PARAM_SHIFT overflows a u32. [ Fix tcp_veno.c too, it has similar calculations. -DaveM ] Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 10 ++++++---- net/ipv4/tcp_veno.c | 8 +++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index be24d6ee34b..0e1a8c91f78 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -229,7 +229,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ tcp_reno_cong_avoid(sk, ack, in_flight); } else { - u32 rtt, target_cwnd, diff; + u32 rtt, diff; + u64 target_cwnd; /* We have enough RTT samples, so, using the Vegas * algorithm, we determine if we should increase or @@ -252,8 +253,9 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * We keep it as a fixed point number with * V_PARAM_SHIFT bits to the right of the binary point. */ - target_cwnd = ((old_wnd * vegas->baseRTT) - << V_PARAM_SHIFT) / rtt; + target_cwnd = ((u64)old_wnd * vegas->baseRTT); + target_cwnd <<= V_PARAM_SHIFT; + do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity @@ -279,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * utilization. */ tp->snd_cwnd = min(tp->snd_cwnd, - (target_cwnd >> + ((u32)target_cwnd >> V_PARAM_SHIFT)+1); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index d16689e9851..2bf618a3b00 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -133,7 +133,8 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ tcp_reno_cong_avoid(sk, ack, in_flight); } else { - u32 rtt, target_cwnd; + u64 target_cwnd; + u32 rtt; /* We have enough rtt samples, so, using the Veno * algorithm, we determine the state of the network. @@ -141,8 +142,9 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) rtt = veno->minrtt; - target_cwnd = ((tp->snd_cwnd * veno->basertt) - << V_PARAM_SHIFT) / rtt; + target_cwnd = (tp->snd_cwnd * veno->basertt); + target_cwnd <<= V_PARAM_SHIFT; + do_div(target_cwnd, rtt); veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; -- cgit v1.2.3-70-g09d2