diff options
Diffstat (limited to 'drivers/net/forcedeth.c')
-rw-r--r-- | drivers/net/forcedeth.c | 173 |
1 files changed, 105 insertions, 68 deletions
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 525624fc03b..3682ec61e8a 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -10,7 +10,7 @@ * trademarks of NVIDIA Corporation in the United States and other * countries. * - * Copyright (C) 2003,4 Manfred Spraul + * Copyright (C) 2003,4,5 Manfred Spraul * Copyright (C) 2004 Andrew de Quincey (wol support) * Copyright (C) 2004 Carl-Daniel Hailfinger (invalid MAC handling, insane * IRQ rate fixes, bigendian fixes, cleanups, verification) @@ -100,6 +100,8 @@ * 0.45: 18 Sep 2005: Remove nv_stop/start_rx from every link check * 0.46: 20 Oct 2005: Add irq optimization modes. * 0.47: 26 Oct 2005: Add phyaddr 0 in phy scan. + * 0.48: 24 Dec 2005: Disable TSO, bugfix for pci_map_single + * 0.49: 10 Dec 2005: Fix tso for large buffers. * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -111,7 +113,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.47" +#define FORCEDETH_VERSION "0.49" #define DRV_NAME "forcedeth" #include <linux/module.h> @@ -348,6 +350,8 @@ typedef union _ring_type { #define NV_TX2_VALID (1<<31) #define NV_TX2_TSO (1<<28) #define NV_TX2_TSO_SHIFT 14 +#define NV_TX2_TSO_MAX_SHIFT 14 +#define NV_TX2_TSO_MAX_SIZE (1<<NV_TX2_TSO_MAX_SHIFT) #define NV_TX2_CHECKSUM_L3 (1<<27) #define NV_TX2_CHECKSUM_L4 (1<<26) @@ -407,15 +411,15 @@ typedef union _ring_type { #define NV_WATCHDOG_TIMEO (5*HZ) #define RX_RING 128 -#define TX_RING 64 +#define TX_RING 256 /* * If your nic mysteriously hangs then try to reduce the limits * to 1/0: It might be required to set NV_TX_LASTPACKET in the * last valid ring entry. But this would be impossible to * implement - probably a disassembly error. */ -#define TX_LIMIT_STOP 63 -#define TX_LIMIT_START 62 +#define TX_LIMIT_STOP 255 +#define TX_LIMIT_START 254 /* rx/tx mac addr + type + vlan + align + slack*/ #define NV_RX_HEADERS (64) @@ -534,6 +538,7 @@ struct fe_priv { unsigned int next_tx, nic_tx; struct sk_buff *tx_skbuff[TX_RING]; dma_addr_t tx_dma[TX_RING]; + unsigned int tx_dma_len[TX_RING]; u32 tx_flags; }; @@ -871,8 +876,8 @@ static int nv_alloc_rx(struct net_device *dev) } else { skb = np->rx_skbuff[nr]; } - np->rx_dma[nr] = pci_map_single(np->pci_dev, skb->data, skb->len, - PCI_DMA_FROMDEVICE); + np->rx_dma[nr] = pci_map_single(np->pci_dev, skb->data, + skb->end-skb->data, PCI_DMA_FROMDEVICE); if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { np->rx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->rx_dma[nr]); wmb(); @@ -934,6 +939,7 @@ static void nv_init_tx(struct net_device *dev) else np->tx_ring.ex[i].FlagLen = 0; np->tx_skbuff[i] = NULL; + np->tx_dma[i] = 0; } } @@ -944,30 +950,27 @@ static int nv_init_ring(struct net_device *dev) return nv_alloc_rx(dev); } -static void nv_release_txskb(struct net_device *dev, unsigned int skbnr) +static int nv_release_txskb(struct net_device *dev, unsigned int skbnr) { struct fe_priv *np = netdev_priv(dev); - struct sk_buff *skb = np->tx_skbuff[skbnr]; - unsigned int j, entry, fragments; - - dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d, skb %p\n", - dev->name, skbnr, np->tx_skbuff[skbnr]); - - entry = skbnr; - if ((fragments = skb_shinfo(skb)->nr_frags) != 0) { - for (j = fragments; j >= 1; j--) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[j-1]; - pci_unmap_page(np->pci_dev, np->tx_dma[entry], - frag->size, - PCI_DMA_TODEVICE); - entry = (entry - 1) % TX_RING; - } + + dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d\n", + dev->name, skbnr); + + if (np->tx_dma[skbnr]) { + pci_unmap_page(np->pci_dev, np->tx_dma[skbnr], + np->tx_dma_len[skbnr], + PCI_DMA_TODEVICE); + np->tx_dma[skbnr] = 0; + } + + if (np->tx_skbuff[skbnr]) { + dev_kfree_skb_irq(np->tx_skbuff[skbnr]); + np->tx_skbuff[skbnr] = NULL; + return 1; + } else { + return 0; } - pci_unmap_single(np->pci_dev, np->tx_dma[entry], - skb->len - skb->data_len, - PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); - np->tx_skbuff[skbnr] = NULL; } static void nv_drain_tx(struct net_device *dev) @@ -980,10 +983,8 @@ static void nv_drain_tx(struct net_device *dev) np->tx_ring.orig[i].FlagLen = 0; else np->tx_ring.ex[i].FlagLen = 0; - if (np->tx_skbuff[i]) { - nv_release_txskb(dev, i); + if (nv_release_txskb(dev, i)) np->stats.tx_dropped++; - } } } @@ -999,7 +1000,7 @@ static void nv_drain_rx(struct net_device *dev) wmb(); if (np->rx_skbuff[i]) { pci_unmap_single(np->pci_dev, np->rx_dma[i], - np->rx_skbuff[i]->len, + np->rx_skbuff[i]->end-np->rx_skbuff[i]->data, PCI_DMA_FROMDEVICE); dev_kfree_skb(np->rx_skbuff[i]); np->rx_skbuff[i] = NULL; @@ -1020,68 +1021,105 @@ static void drain_ring(struct net_device *dev) static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); + u32 tx_flags = 0; u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET); unsigned int fragments = skb_shinfo(skb)->nr_frags; - unsigned int nr = (np->next_tx + fragments) % TX_RING; + unsigned int nr = (np->next_tx - 1) % TX_RING; + unsigned int start_nr = np->next_tx % TX_RING; unsigned int i; + u32 offset = 0; + u32 bcnt; + u32 size = skb->len-skb->data_len; + u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); + + /* add fragments to entries count */ + for (i = 0; i < fragments; i++) { + entries += (skb_shinfo(skb)->frags[i].size >> NV_TX2_TSO_MAX_SHIFT) + + ((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); + } spin_lock_irq(&np->lock); - if ((np->next_tx - np->nic_tx + fragments) > TX_LIMIT_STOP) { + if ((np->next_tx - np->nic_tx + entries - 1) > TX_LIMIT_STOP) { spin_unlock_irq(&np->lock); netif_stop_queue(dev); return NETDEV_TX_BUSY; } - np->tx_skbuff[nr] = skb; - - if (fragments) { - dprintk(KERN_DEBUG "%s: nv_start_xmit: buffer contains %d fragments\n", dev->name, fragments); - /* setup descriptors in reverse order */ - for (i = fragments; i >= 1; i--) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset, frag->size, - PCI_DMA_TODEVICE); + /* setup the header buffer */ + do { + bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; + nr = (nr + 1) % TX_RING; + + np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data + offset, bcnt, + PCI_DMA_TODEVICE); + np->tx_dma_len[nr] = bcnt; + + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { + np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]); + np->tx_ring.orig[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags); + } else { + np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32; + np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF; + np->tx_ring.ex[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags); + } + tx_flags = np->tx_flags; + offset += bcnt; + size -= bcnt; + } while(size); + + /* setup the fragments */ + for (i = 0; i < fragments; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + u32 size = frag->size; + offset = 0; + + do { + bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; + nr = (nr + 1) % TX_RING; + + np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt, + PCI_DMA_TODEVICE); + np->tx_dma_len[nr] = bcnt; if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]); - np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (frag->size-1) | np->tx_flags | tx_flags_extra); + np->tx_ring.orig[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags); } else { np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32; np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF; - np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (frag->size-1) | np->tx_flags | tx_flags_extra); + np->tx_ring.ex[nr].FlagLen = cpu_to_le32((bcnt-1) | tx_flags); } - - nr = (nr - 1) % TX_RING; + offset += bcnt; + size -= bcnt; + } while (size); + } - if (np->desc_ver == DESC_VER_1) - tx_flags_extra &= ~NV_TX_LASTPACKET; - else - tx_flags_extra &= ~NV_TX2_LASTPACKET; - } + /* set last fragment flag */ + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { + np->tx_ring.orig[nr].FlagLen |= cpu_to_le32(tx_flags_extra); + } else { + np->tx_ring.ex[nr].FlagLen |= cpu_to_le32(tx_flags_extra); } + np->tx_skbuff[nr] = skb; + #ifdef NETIF_F_TSO if (skb_shinfo(skb)->tso_size) - tx_flags_extra |= NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT); + tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT); else #endif - tx_flags_extra |= (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); + tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); - np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data, skb->len-skb->data_len, - PCI_DMA_TODEVICE); - + /* set tx flags */ if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { - np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]); - np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (skb->len-skb->data_len-1) | np->tx_flags | tx_flags_extra); + np->tx_ring.orig[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra); } else { - np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32; - np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF; - np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (skb->len-skb->data_len-1) | np->tx_flags | tx_flags_extra); + np->tx_ring.ex[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra); } - dprintk(KERN_DEBUG "%s: nv_start_xmit: packet packet %d queued for transmission. tx_flags_extra: %x\n", - dev->name, np->next_tx, tx_flags_extra); + dprintk(KERN_DEBUG "%s: nv_start_xmit: packet %d (entries %d) queued for transmission. tx_flags_extra: %x\n", + dev->name, np->next_tx, entries, tx_flags_extra); { int j; for (j=0; j<64; j++) { @@ -1092,7 +1130,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) dprintk("\n"); } - np->next_tx += 1 + fragments; + np->next_tx += entries; dev->trans_start = jiffies; spin_unlock_irq(&np->lock); @@ -1139,7 +1177,6 @@ static void nv_tx_done(struct net_device *dev) np->stats.tx_packets++; np->stats.tx_bytes += skb->len; } - nv_release_txskb(dev, i); } } else { if (Flags & NV_TX2_LASTPACKET) { @@ -1155,9 +1192,9 @@ static void nv_tx_done(struct net_device *dev) np->stats.tx_packets++; np->stats.tx_bytes += skb->len; } - nv_release_txskb(dev, i); } } + nv_release_txskb(dev, i); np->nic_tx++; } if (np->next_tx - np->nic_tx < TX_LIMIT_START) @@ -1334,7 +1371,7 @@ static void nv_rx_process(struct net_device *dev) * the performance. */ pci_unmap_single(np->pci_dev, np->rx_dma[i], - np->rx_skbuff[i]->len, + np->rx_skbuff[i]->end-np->rx_skbuff[i]->data, PCI_DMA_FROMDEVICE); { |