From 76620aafd66f0004829764940c5466144969cffc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 16 Apr 2009 02:02:07 -0700 Subject: gro: New frags interface to avoid copying shinfo It turns out that copying a 16-byte area at ~800k times a second can be really expensive :) This patch redesigns the frags GRO interface to avoid copying that area twice. The two disciples of the frags interface have been converted. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/cxgb3/sge.c | 53 +++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 21 deletions(-) (limited to 'drivers/net/cxgb3/sge.c') diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 26d3587f339..73d569e758e 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -654,7 +654,8 @@ static void t3_reset_qset(struct sge_qset *q) q->txq_stopped = 0; q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ q->rx_reclaim_timer.function = NULL; - q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0; + q->nomem = 0; + napi_free_frags(&q->napi); } @@ -2074,20 +2075,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct sge_fl *fl, int len, int complete) { struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; + struct sk_buff *skb = NULL; struct cpl_rx_pkt *cpl; - struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags; - int nr_frags = qs->lro_frag_tbl.nr_frags; - int frag_len = qs->lro_frag_tbl.len; + struct skb_frag_struct *rx_frag; + int nr_frags; int offset = 0; - if (!nr_frags) { - offset = 2 + sizeof(struct cpl_rx_pkt); - qs->lro_va = cpl = sd->pg_chunk.va + 2; + if (!qs->nomem) { + skb = napi_get_frags(&qs->napi); + qs->nomem = !skb; } fl->credits--; - len -= offset; pci_dma_sync_single_for_cpu(adap->pdev, pci_unmap_addr(sd, dma_addr), fl->buf_size - SGE_PG_RSVD, @@ -2100,21 +2100,38 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, fl->alloc_size, PCI_DMA_FROMDEVICE); + if (!skb) { + put_page(sd->pg_chunk.page); + if (complete) + qs->nomem = 0; + return; + } + + rx_frag = skb_shinfo(skb)->frags; + nr_frags = skb_shinfo(skb)->nr_frags; + + if (!nr_frags) { + offset = 2 + sizeof(struct cpl_rx_pkt); + qs->lro_va = sd->pg_chunk.va + 2; + } + len -= offset; + prefetch(qs->lro_va); rx_frag += nr_frags; rx_frag->page = sd->pg_chunk.page; rx_frag->page_offset = sd->pg_chunk.offset + offset; rx_frag->size = len; - frag_len += len; - qs->lro_frag_tbl.nr_frags++; - qs->lro_frag_tbl.len = frag_len; + skb->len += len; + skb->data_len += len; + skb->truesize += len; + skb_shinfo(skb)->nr_frags++; if (!complete) return; - qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = CHECKSUM_UNNECESSARY; cpl = qs->lro_va; if (unlikely(cpl->vlan_valid)) { @@ -2123,15 +2140,11 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct vlan_group *grp = pi->vlan_grp; if (likely(grp != NULL)) { - vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan), - &qs->lro_frag_tbl); - goto out; + vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan)); + return; } } - napi_gro_frags(&qs->napi, &qs->lro_frag_tbl); - -out: - qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0; + napi_gro_frags(&qs->napi); } /** @@ -2300,8 +2313,6 @@ no_mem: if (fl->use_pages) { void *addr = fl->sdesc[fl->cidx].pg_chunk.va; - prefetch(&qs->lro_frag_tbl); - prefetch(addr); #if L1_CACHE_BYTES < 128 prefetch(addr + L1_CACHE_BYTES); -- cgit v1.2.3-70-g09d2 From 28679751a924c11f7135641f26e99249385de5b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 May 2009 19:26:37 +0000 Subject: net: dont update dev->trans_start in 10GB drivers Followup of commits 9d21493b4beb8f918ba248032fefa393074a5e2b and 08baf561083bc27a953aa087dd8a664bb2b88e8e (net: tx scalability works : trans_start) (net: txq_trans_update() helper) Now that core network takes care of trans_start updates, dont do it in drivers themselves, if possible. Multi queue drivers can avoid one cache miss (on dev->trans_start) in their start_xmit() handler. Exceptions are NETIF_F_LLTX drivers (vxge & tehuti) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/benet/be_main.c | 2 -- drivers/net/bnx2x_main.c | 1 - drivers/net/chelsio/sge.c | 1 - drivers/net/cxgb3/sge.c | 1 - drivers/net/enic/enic_main.c | 2 -- drivers/net/ixgb/ixgb_main.c | 1 - drivers/net/ixgbe/ixgbe_main.c | 1 - drivers/net/mlx4/en_tx.c | 1 - drivers/net/myri10ge/myri10ge.c | 1 - drivers/net/netxen/netxen_nic_main.c | 1 - drivers/net/qlge/qlge_main.c | 1 - drivers/net/s2io.c | 1 - drivers/net/sfc/selftest.c | 1 + drivers/net/sfc/tx.c | 7 ------- drivers/net/tehuti.c | 5 +++-- drivers/net/vxge/vxge-main.c | 6 ++++-- 16 files changed, 8 insertions(+), 25 deletions(-) (limited to 'drivers/net/cxgb3/sge.c') diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index ae2f6b58ba2..5f17d80300a 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -478,8 +478,6 @@ static int be_xmit(struct sk_buff *skb, struct net_device *netdev) be_txq_notify(&adapter->ctrl, txq->id, wrb_cnt); - netdev->trans_start = jiffies; - be_tx_stats_update(adapter, wrb_cnt, copied, stopped); return NETDEV_TX_OK; } diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index e01539c33b8..fbf1352e9c1 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -10617,7 +10617,6 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) mmiowb(); fp->tx_bd_prod += nbd; - dev->trans_start = jiffies; if (unlikely(bnx2x_tx_avail(fp) < MAX_SKB_FRAGS + 3)) { /* We want bnx2x_tx_int to "see" the updated tx_bd_prod diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 5e97a1a71d8..3711d64e45e 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1879,7 +1879,6 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) cpl->vlan_valid = 0; send: - dev->trans_start = jiffies; ret = t1_sge_tx(skb, adapter, 0, dev); /* If transmit busy, and we reallocated skb's due to headroom limit, diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 73d569e758e..49e64af7b09 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1286,7 +1286,6 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (vlan_tx_tag_present(skb) && pi->vlan_grp) qs->port_stats[SGE_PSTAT_VLANINS]++; - dev->trans_start = jiffies; spin_unlock(&q->lock); /* diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index 9080f07da8f..8005b602f77 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -661,8 +661,6 @@ static int enic_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev) if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + 1) netif_stop_queue(netdev); - netdev->trans_start = jiffies; - spin_unlock_irqrestore(&enic->wq_lock[0], flags); return NETDEV_TX_OK; diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 04cb81a739c..6eb7f37a113 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1488,7 +1488,6 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (count) { ixgb_tx_queue(adapter, count, vlan_id, tx_flags); - netdev->trans_start = jiffies; /* Make sure there is space in the ring for the next send. */ ixgb_maybe_stop_tx(netdev, &adapter->tx_ring, DESC_NEEDED); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index dff1da8ae5c..924aa5ed02c 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -4863,7 +4863,6 @@ static int ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (count) { ixgbe_tx_queue(adapter, tx_ring, tx_flags, count, skb->len, hdr_len); - netdev->trans_start = jiffies; ixgbe_maybe_stop_tx(netdev, tx_ring, DESC_NEEDED); } else { diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index ac6fc499b28..1c83a96fde3 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -819,7 +819,6 @@ int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Ring doorbell! */ wmb(); writel(ring->doorbell_qpn, mdev->uar_map + MLX4_SEND_DOORBELL); - dev->trans_start = jiffies; /* Poll CQ here */ mlx4_en_xmit_poll(priv, tx_ind); diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 7e28b461012..c9a30d3a66f 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -2892,7 +2892,6 @@ again: tx->stop_queue++; netif_tx_stop_queue(netdev_queue); } - dev->trans_start = jiffies; return 0; abort_linearize: diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 50477f5c3ec..98737ef7293 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1496,7 +1496,6 @@ netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) netxen_nic_update_cmd_producer(adapter, tx_ring, producer); adapter->stats.xmitcalled++; - netdev->trans_start = jiffies; return NETDEV_TX_OK; diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c index c92ced24794..0b0778d9919 100644 --- a/drivers/net/qlge/qlge_main.c +++ b/drivers/net/qlge/qlge_main.c @@ -2108,7 +2108,6 @@ static int qlge_send(struct sk_buff *skb, struct net_device *ndev) wmb(); ql_write_db_reg(tx_ring->prod_idx, tx_ring->prod_idx_db_reg); - ndev->trans_start = jiffies; QPRINTK(qdev, TX_QUEUED, DEBUG, "tx queued, slot %d, len %d\n", tx_ring->prod_idx, skb->len); diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 80562ea77de..2bc73ede431 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -4299,7 +4299,6 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) s2io_stop_tx_queue(sp, fifo->fifo_no); } mac_control->stats_info->sw_stat.mem_allocated += skb->truesize; - dev->trans_start = jiffies; spin_unlock_irqrestore(&fifo->tx_lock, flags); if (sp->config.intr_type == MSI_X) diff --git a/drivers/net/sfc/selftest.c b/drivers/net/sfc/selftest.c index 04379571595..b67ccca3fc1 100644 --- a/drivers/net/sfc/selftest.c +++ b/drivers/net/sfc/selftest.c @@ -438,6 +438,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) kfree_skb(skb); return -EPIPE; } + efx->net_dev->trans_start = jiffies; } return 0; diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index d6681edb701..14a14788566 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c @@ -360,13 +360,6 @@ inline int efx_xmit(struct efx_nic *efx, /* Map fragments for DMA and add to TX queue */ rc = efx_enqueue_skb(tx_queue, skb); - if (unlikely(rc != NETDEV_TX_OK)) - goto out; - - /* Update last TX timer */ - efx->net_dev->trans_start = jiffies; - - out: return rc; } diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 7f4a9683ba1..093807a182f 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1718,8 +1718,9 @@ static int bdx_tx_transmit(struct sk_buff *skb, struct net_device *ndev) WRITE_REG(priv, f->m.reg_WPTR, f->m.wptr & TXF_WPTR_WR_PTR); #endif - ndev->trans_start = jiffies; - +#ifdef BDX_LLTX + ndev->trans_start = jiffies; /* NETIF_F_LLTX driver :( */ +#endif priv->net_stats.tx_packets++; priv->net_stats.tx_bytes += skb->len; diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c index b7f08f3e524..6c838b3e063 100644 --- a/drivers/net/vxge/vxge-main.c +++ b/drivers/net/vxge/vxge-main.c @@ -677,7 +677,7 @@ vxge_xmit_compl(struct __vxge_hw_fifo *fifo_hw, void *dtr, return VXGE_HW_OK; } -/* select a vpath to trasmit the packet */ +/* select a vpath to transmit the packet */ static u32 vxge_get_vpath_no(struct vxgedev *vdev, struct sk_buff *skb, int *do_lock) { @@ -992,7 +992,9 @@ vxge_xmit(struct sk_buff *skb, struct net_device *dev) VXGE_HW_FIFO_TXD_TX_CKO_UDP_EN); vxge_hw_fifo_txdl_post(fifo_hw, dtr); - dev->trans_start = jiffies; +#ifdef NETIF_F_LLTX + dev->trans_start = jiffies; /* NETIF_F_LLTX driver :( */ +#endif spin_unlock_irqrestore(&fifo->tx_lock, flags); VXGE_COMPLETE_VPATH_TX(fifo); -- cgit v1.2.3-70-g09d2 From c3a8c5b644118b5e2cfd0690b1dcea904a792c52 Mon Sep 17 00:00:00 2001 From: Divy Le Ray Date: Fri, 29 May 2009 12:52:38 +0000 Subject: cxgb3: move away from LLTX cxgb3 no longer advertizes LLTX. Signed-off-by: Divy Le Ray Signed-off-by: David S. Miller --- drivers/net/cxgb3/cxgb3_main.c | 1 - drivers/net/cxgb3/sge.c | 17 +++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers/net/cxgb3/sge.c') diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index 0b87fee023f..1fe4b070c6d 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -3059,7 +3059,6 @@ static int __devinit init_one(struct pci_dev *pdev, netdev->mem_start = mmio_start; netdev->mem_end = mmio_start + mmio_len - 1; netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; - netdev->features |= NETIF_F_LLTX; netdev->features |= NETIF_F_GRO; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 49e64af7b09..0b978827874 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1241,7 +1241,6 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) q = &qs->txq[TXQ_ETH]; txq = netdev_get_tx_queue(dev, qidx); - spin_lock(&q->lock); reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); credits = q->size - q->in_use; @@ -1252,7 +1251,6 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) dev_err(&adap->pdev->dev, "%s: Tx ring %u full while queue awake!\n", dev->name, q->cntxt_id & 7); - spin_unlock(&q->lock); return NETDEV_TX_BUSY; } @@ -1286,8 +1284,6 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (vlan_tx_tag_present(skb) && pi->vlan_grp) qs->port_stats[SGE_PSTAT_VLANINS]++; - spin_unlock(&q->lock); - /* * We do not use Tx completion interrupts to free DMAd Tx packets. * This is good for performamce but means that we rely on new Tx @@ -2857,11 +2853,12 @@ static void sge_timer_tx(unsigned long data) unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0}; unsigned long next_period; - if (spin_trylock(&qs->txq[TXQ_ETH].lock)) { - tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH], - TX_RECLAIM_TIMER_CHUNK); - spin_unlock(&qs->txq[TXQ_ETH].lock); + if (__netif_tx_trylock(qs->tx_q)) { + tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH], + TX_RECLAIM_TIMER_CHUNK); + __netif_tx_unlock(qs->tx_q); } + if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) { tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD], TX_RECLAIM_TIMER_CHUNK); @@ -2869,8 +2866,8 @@ static void sge_timer_tx(unsigned long data) } next_period = TX_RECLAIM_PERIOD >> - (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) / - TX_RECLAIM_TIMER_CHUNK); + (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) / + TX_RECLAIM_TIMER_CHUNK); mod_timer(&qs->tx_reclaim_timer, jiffies + next_period); } -- cgit v1.2.3-70-g09d2