diff options
-rw-r--r-- | Documentation/ptp/testptp.mk | 33 | ||||
-rw-r--r-- | drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 22 | ||||
-rw-r--r-- | drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c | 16 | ||||
-rw-r--r-- | drivers/net/ethernet/freescale/fec_main.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_main.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/eq.c | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eq.c | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/tx.c | 4 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 1 | ||||
-rw-r--r-- | drivers/net/macvlan.c | 10 | ||||
-rw-r--r-- | drivers/net/xen-netback/common.h | 39 | ||||
-rw-r--r-- | drivers/net/xen-netback/interface.c | 74 | ||||
-rw-r--r-- | drivers/net/xen-netback/netback.c | 319 | ||||
-rw-r--r-- | drivers/net/xen-netback/xenbus.c | 22 | ||||
-rw-r--r-- | net/core/dev.c | 4 | ||||
-rw-r--r-- | net/core/tso.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 59 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 4 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 11 |
22 files changed, 363 insertions, 277 deletions
diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk new file mode 100644 index 00000000000..4ef2d975542 --- /dev/null +++ b/Documentation/ptp/testptp.mk @@ -0,0 +1,33 @@ +# PTP 1588 clock support - User space test program +# +# Copyright (C) 2010 OMICRON electronics GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +CC = $(CROSS_COMPILE)gcc +INC = -I$(KBUILD_OUTPUT)/usr/include +CFLAGS = -Wall $(INC) +LDLIBS = -lrt +PROGS = testptp + +all: $(PROGS) + +testptp: testptp.o + +clean: + rm -f testptp.o + +distclean: clean + rm -f $(PROGS) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 29554992215..2349ea97025 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1465,7 +1465,7 @@ static int xgbe_set_features(struct net_device *netdev, { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned int rxcsum, rxvlan, rxvlan_filter; + netdev_features_t rxcsum, rxvlan, rxvlan_filter; rxcsum = pdata->netdev_features & NETIF_F_RXCSUM; rxvlan = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX; @@ -1598,7 +1598,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) struct skb_shared_hwtstamps *hwtstamps; unsigned int incomplete, error, context_next, context; unsigned int len, put_len, max_len; - int received = 0; + unsigned int received = 0; + int packet_count = 0; DBGPR("-->xgbe_rx_poll: budget=%d\n", budget); @@ -1608,7 +1609,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) rdata = XGBE_GET_DESC_DATA(ring, ring->cur); packet = &ring->packet_data; - while (received < budget) { + while (packet_count < budget) { DBGPR(" cur = %d\n", ring->cur); /* First time in loop see if we need to restore state */ @@ -1662,7 +1663,7 @@ read_again: if (packet->errors) DBGPR("Error in received packet\n"); dev_kfree_skb(skb); - continue; + goto next_packet; } if (!context) { @@ -1677,7 +1678,7 @@ read_again: } dev_kfree_skb(skb); - continue; + goto next_packet; } memcpy(skb_tail_pointer(skb), rdata->skb->data, put_len); @@ -1694,7 +1695,7 @@ read_again: /* Stray Context Descriptor? */ if (!skb) - continue; + goto next_packet; /* Be sure we don't exceed the configured MTU */ max_len = netdev->mtu + ETH_HLEN; @@ -1705,7 +1706,7 @@ read_again: if (skb->len > max_len) { DBGPR("packet length exceeds configured MTU\n"); dev_kfree_skb(skb); - continue; + goto next_packet; } #ifdef XGMAC_ENABLE_RX_PKT_DUMP @@ -1739,6 +1740,9 @@ read_again: netdev->last_rx = jiffies; napi_gro_receive(&pdata->napi, skb); + +next_packet: + packet_count++; } /* Check if we need to save state before leaving */ @@ -1752,9 +1756,9 @@ read_again: rdata->state.error = error; } - DBGPR("<--xgbe_rx_poll: received = %d\n", received); + DBGPR("<--xgbe_rx_poll: packet_count = %d\n", packet_count); - return received; + return packet_count; } static int xgbe_poll(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c index e6d24c21019..c22f32622fa 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -124,20 +124,18 @@ static int xgene_enet_ecc_init(struct xgene_enet_pdata *p) { struct net_device *ndev = p->ndev; u32 data; - int i; + int i = 0; xgene_enet_wr_diag_csr(p, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0); - for (i = 0; i < 10 && data != ~0U ; i++) { + do { usleep_range(100, 110); data = xgene_enet_rd_diag_csr(p, ENET_BLOCK_MEM_RDY_ADDR); - } + if (data == ~0U) + return 0; + } while (++i < 10); - if (data != ~0U) { - netdev_err(ndev, "Failed to release memory from shutdown\n"); - return -ENODEV; - } - - return 0; + netdev_err(ndev, "Failed to release memory from shutdown\n"); + return -ENODEV; } static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *p) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 81b96cf8757..50a851db285 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1581,7 +1581,8 @@ fec_enet_interrupt(int irq, void *dev_id) complete(&fep->mdio_done); } - fec_ptp_check_pps_event(fep); + if (fep->ptp_clock) + fec_ptp_check_pps_event(fep); return ret; } diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 3d4e08be170..b34214e2df5 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -341,6 +341,9 @@ static void restart(struct net_device *dev) FC(fecp, x_cntrl, FEC_TCNTRL_FDEN); /* FD disable */ } + /* Restore multicast and promiscuous settings */ + set_multicast_list(dev); + /* * Enable interrupts we wish to service. */ diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index f30411f0701..7a184e8816a 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -355,6 +355,9 @@ static void restart(struct net_device *dev) if (fep->phydev->duplex) S16(sccp, scc_psmr, SCC_PSMR_LPB | SCC_PSMR_FDE); + /* Restore multicast and promiscuous settings */ + set_multicast_list(dev); + S32(sccp, scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ed5f1c15fb0..c3a7f4a4b77 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6151,7 +6151,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) I40E_GL_MDET_TX_PF_NUM_SHIFT; u8 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >> I40E_GL_MDET_TX_VF_NUM_SHIFT; - u8 event = (reg & I40E_GL_MDET_TX_EVENT_SHIFT) >> + u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; @@ -6165,7 +6165,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) if (reg & I40E_GL_MDET_RX_VALID_MASK) { u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; - u8 event = (reg & I40E_GL_MDET_RX_EVENT_SHIFT) >> + u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index a49c9d11d8a..49290a40590 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1026,6 +1026,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev, pr_cont("\n"); } } + synchronize_irq(eq->irq); mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ed53291468f..a278238a2db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -420,6 +420,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); + synchronize_irq(table->msix_arr[eq->irqn].vector); mlx5_buf_free(dev, &eq->buf); return err; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index ee84a90e371..aaf2987512b 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -343,8 +343,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) unsigned short dma_flags; int i = 0; - EFX_BUG_ON_PARANOID(tx_queue->write_count > tx_queue->insert_count); - if (skb_shinfo(skb)->gso_size) return efx_enqueue_skb_tso(tx_queue, skb); @@ -1258,8 +1256,6 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Find the packet protocol and sanity-check it */ state.protocol = efx_tso_check_protocol(skb); - EFX_BUG_ON_PARANOID(tx_queue->write_count > tx_queue->insert_count); - rc = tso_start(&state, efx, skb); if (rc) goto mem_err; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9e17d1a91e7..78ec33f5100 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -550,6 +550,7 @@ do_lso: do_send: /* Start filling in the page buffers with the rndis hdr */ rndis_msg->msg_len += rndis_msg_size; + packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, skb, &packet->page_buf[0]); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 29b3bb41078..bfb0b6ec8c5 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -272,7 +272,7 @@ static void macvlan_process_broadcast(struct work_struct *w) struct sk_buff *skb; struct sk_buff_head list; - skb_queue_head_init(&list); + __skb_queue_head_init(&list); spin_lock_bh(&port->bc_queue.lock); skb_queue_splice_tail_init(&port->bc_queue, &list); @@ -1082,9 +1082,15 @@ static void macvlan_port_destroy(struct net_device *dev) { struct macvlan_port *port = macvlan_port_get_rtnl(dev); - cancel_work_sync(&port->bc_work); dev->priv_flags &= ~IFF_MACVLAN_PORT; netdev_rx_handler_unregister(dev); + + /* After this point, no packet can schedule bc_work anymore, + * but we need to cancel it and purge left skbs if any. + */ + cancel_work_sync(&port->bc_work); + __skb_queue_purge(&port->bc_queue); + kfree_rcu(port, rcu); } diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index d4eb8d2e9cb..083ecc93fe5 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -176,10 +176,11 @@ struct xenvif_queue { /* Per-queue data for xenvif */ char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */ struct xen_netif_rx_back_ring rx; struct sk_buff_head rx_queue; - RING_IDX rx_last_skb_slots; - unsigned long status; - struct timer_list rx_stalled; + unsigned int rx_queue_max; + unsigned int rx_queue_len; + unsigned long last_rx_time; + bool stalled; struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; @@ -199,18 +200,14 @@ struct xenvif_queue { /* Per-queue data for xenvif */ struct xenvif_stats stats; }; +/* Maximum number of Rx slots a to-guest packet may use, including the + * slot needed for GSO meta-data. + */ +#define XEN_NETBK_RX_SLOTS_MAX (MAX_SKB_FRAGS + 1) + enum state_bit_shift { /* This bit marks that the vif is connected */ VIF_STATUS_CONNECTED, - /* This bit signals the RX thread that queuing was stopped (in - * start_xmit), and either the timer fired or an RX interrupt came - */ - QUEUE_STATUS_RX_PURGE_EVENT, - /* This bit tells the interrupt handler that this queue was the reason - * for the carrier off, so it should kick the thread. Only queues which - * brought it down can turn on the carrier. - */ - QUEUE_STATUS_RX_STALLED }; struct xenvif { @@ -228,9 +225,6 @@ struct xenvif { u8 ip_csum:1; u8 ipv6_csum:1; - /* Internal feature information. */ - u8 can_queue:1; /* can queue packets for receiver? */ - /* Is this interface disabled? True when backend discovers * frontend is rogue. */ @@ -240,6 +234,9 @@ struct xenvif { /* Queues */ struct xenvif_queue *queues; unsigned int num_queues; /* active queues, resource allocated */ + unsigned int stalled_queues; + + spinlock_t lock; #ifdef CONFIG_DEBUG_FS struct dentry *xenvif_dbg_root; @@ -249,6 +246,14 @@ struct xenvif { struct net_device *dev; }; +struct xenvif_rx_cb { + unsigned long expires; + int meta_slots_used; + bool full_coalesce; +}; + +#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb) + static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif) { return to_xenbus_device(vif->dev->dev.parent); @@ -272,8 +277,6 @@ void xenvif_xenbus_fini(void); int xenvif_schedulable(struct xenvif *vif); -int xenvif_must_stop_queue(struct xenvif_queue *queue); - int xenvif_queue_stopped(struct xenvif_queue *queue); void xenvif_wake_queue(struct xenvif_queue *queue); @@ -296,6 +299,8 @@ void xenvif_kick_thread(struct xenvif_queue *queue); int xenvif_dealloc_kthread(void *data); +void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); + /* Determine whether the needed number of slots (req) are available, * and set req_event if not. */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index f379689dde3..895fe84011e 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -43,6 +43,9 @@ #define XENVIF_QUEUE_LENGTH 32 #define XENVIF_NAPI_WEIGHT 64 +/* Number of bytes allowed on the internal guest Rx queue. */ +#define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE) + /* This function is used to set SKBTX_DEV_ZEROCOPY as well as * increasing the inflight counter. We need to increase the inflight * counter because core driver calls into xenvif_zerocopy_callback @@ -60,20 +63,11 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) atomic_dec(&queue->inflight_packets); } -static inline void xenvif_stop_queue(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - - if (!queue->vif->can_queue) - return; - - netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); -} - int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && - test_bit(VIF_STATUS_CONNECTED, &vif->status); + test_bit(VIF_STATUS_CONNECTED, &vif->status) && + !vif->disabled; } static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) @@ -114,16 +108,7 @@ int xenvif_poll(struct napi_struct *napi, int budget) static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; - struct netdev_queue *net_queue = - netdev_get_tx_queue(queue->vif->dev, queue->id); - /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR - * the carrier went down and this queue was previously blocked - */ - if (unlikely(netif_tx_queue_stopped(net_queue) || - (!netif_carrier_ok(queue->vif->dev) && - test_bit(QUEUE_STATUS_RX_STALLED, &queue->status)))) - set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status); xenvif_kick_thread(queue); return IRQ_HANDLED; @@ -151,24 +136,13 @@ void xenvif_wake_queue(struct xenvif_queue *queue) netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); } -/* Callback to wake the queue's thread and turn the carrier off on timeout */ -static void xenvif_rx_stalled(unsigned long data) -{ - struct xenvif_queue *queue = (struct xenvif_queue *)data; - - if (xenvif_queue_stopped(queue)) { - set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status); - xenvif_kick_thread(queue); - } -} - static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; unsigned int num_queues = vif->num_queues; u16 index; - int min_slots_needed; + struct xenvif_rx_cb *cb; BUG_ON(skb->dev != dev); @@ -191,30 +165,10 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) !xenvif_schedulable(vif)) goto drop; - /* At best we'll need one slot for the header and one for each - * frag. - */ - min_slots_needed = 1 + skb_shinfo(skb)->nr_frags; - - /* If the skb is GSO then we'll also need an extra slot for the - * metadata. - */ - if (skb_is_gso(skb)) - min_slots_needed++; + cb = XENVIF_RX_CB(skb); + cb->expires = jiffies + rx_drain_timeout_jiffies; - /* If the skb can't possibly fit in the remaining slots - * then turn off the queue to give the ring a chance to - * drain. - */ - if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) { - queue->rx_stalled.function = xenvif_rx_stalled; - queue->rx_stalled.data = (unsigned long)queue; - xenvif_stop_queue(queue); - mod_timer(&queue->rx_stalled, - jiffies + rx_drain_timeout_jiffies); - } - - skb_queue_tail(&queue->rx_queue, skb); + xenvif_rx_queue_tail(queue, skb); xenvif_kick_thread(queue); return NETDEV_TX_OK; @@ -465,6 +419,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->queues = NULL; vif->num_queues = 0; + spin_lock_init(&vif->lock); + dev->netdev_ops = &xenvif_netdev_ops; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | @@ -508,6 +464,8 @@ int xenvif_init_queue(struct xenvif_queue *queue) init_timer(&queue->credit_timeout); queue->credit_window_start = get_jiffies_64(); + queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES; + skb_queue_head_init(&queue->rx_queue); skb_queue_head_init(&queue->tx_queue); @@ -539,8 +497,6 @@ int xenvif_init_queue(struct xenvif_queue *queue) queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; } - init_timer(&queue->rx_stalled); - return 0; } @@ -551,7 +507,6 @@ void xenvif_carrier_on(struct xenvif *vif) dev_set_mtu(vif->dev, ETH_DATA_LEN); netdev_update_features(vif->dev); set_bit(VIF_STATUS_CONNECTED, &vif->status); - netif_carrier_on(vif->dev); if (netif_running(vif->dev)) xenvif_up(vif); rtnl_unlock(); @@ -611,6 +566,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref, disable_irq(queue->rx_irq); } + queue->stalled = true; + task = kthread_create(xenvif_kthread_guest_rx, (void *)queue, "%s-guest-rx", queue->name); if (IS_ERR(task)) { @@ -674,7 +631,6 @@ void xenvif_disconnect(struct xenvif *vif) netif_napi_del(&queue->napi); if (queue->task) { - del_timer_sync(&queue->rx_stalled); kthread_stop(queue->task); queue->task = NULL; } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 08f65996534..6563f0713fc 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -55,13 +55,20 @@ bool separate_tx_rx_irq = 1; module_param(separate_tx_rx_irq, bool, 0644); -/* When guest ring is filled up, qdisc queues the packets for us, but we have - * to timeout them, otherwise other guests' packets can get stuck there +/* The time that packets can stay on the guest Rx internal queue + * before they are dropped. */ unsigned int rx_drain_timeout_msecs = 10000; module_param(rx_drain_timeout_msecs, uint, 0444); unsigned int rx_drain_timeout_jiffies; +/* The length of time before the frontend is considered unresponsive + * because it isn't providing Rx slots. + */ +static unsigned int rx_stall_timeout_msecs = 60000; +module_param(rx_stall_timeout_msecs, uint, 0444); +static unsigned int rx_stall_timeout_jiffies; + unsigned int xenvif_max_queues; module_param_named(max_queues, xenvif_max_queues, uint, 0644); MODULE_PARM_DESC(max_queues, @@ -83,7 +90,6 @@ static void make_tx_response(struct xenvif_queue *queue, s8 st); static inline int tx_work_todo(struct xenvif_queue *queue); -static inline int rx_work_todo(struct xenvif_queue *queue); static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue, u16 id, @@ -163,6 +169,69 @@ bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed) return false; } +void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_queue.lock, flags); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; + if (queue->rx_queue_len > queue->rx_queue_max) + netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id)); + + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); +} + +static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + spin_lock_irq(&queue->rx_queue.lock); + + skb = __skb_dequeue(&queue->rx_queue); + if (skb) + queue->rx_queue_len -= skb->len; + + spin_unlock_irq(&queue->rx_queue.lock); + + return skb; +} + +static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue) +{ + spin_lock_irq(&queue->rx_queue.lock); + + if (queue->rx_queue_len < queue->rx_queue_max) + netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id)); + + spin_unlock_irq(&queue->rx_queue.lock); +} + + +static void xenvif_rx_queue_purge(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + while ((skb = xenvif_rx_dequeue(queue)) != NULL) + kfree_skb(skb); +} + +static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + for(;;) { + skb = skb_peek(&queue->rx_queue); + if (!skb) + break; + if (time_before(jiffies, XENVIF_RX_CB(skb)->expires)) + break; + xenvif_rx_dequeue(queue); + kfree_skb(skb); + } +} + /* * Returns true if we should start a new receive buffer instead of * adding 'size' bytes to a buffer which currently contains 'offset' @@ -237,13 +306,6 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue, return meta; } -struct xenvif_rx_cb { - int meta_slots_used; - bool full_coalesce; -}; - -#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb) - /* * Set up the grant operations for this fragment. If it's a flipping * interface, we also set up the unmap request from here. @@ -587,12 +649,15 @@ static void xenvif_rx_action(struct xenvif_queue *queue) skb_queue_head_init(&rxq); - while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) { + while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX) + && (skb = xenvif_rx_dequeue(queue)) != NULL) { RING_IDX max_slots_needed; RING_IDX old_req_cons; RING_IDX ring_slots_used; int i; + queue->last_rx_time = jiffies; + /* We need a cheap worse case estimate for the number of * slots we'll use. */ @@ -634,15 +699,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue) skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) max_slots_needed++; - /* If the skb may not fit then bail out now */ - if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) { - skb_queue_head(&queue->rx_queue, skb); - need_to_notify = true; - queue->rx_last_skb_slots = max_slots_needed; - break; - } else - queue->rx_last_skb_slots = 0; - old_req_cons = queue->rx.req_cons; XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue); ring_slots_used = queue->rx.req_cons - old_req_cons; @@ -1869,12 +1925,6 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) } } -static inline int rx_work_todo(struct xenvif_queue *queue) -{ - return (!skb_queue_empty(&queue->rx_queue) && - xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)); -} - static inline int tx_work_todo(struct xenvif_queue *queue) { if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))) @@ -1931,92 +1981,121 @@ err: return err; } -static void xenvif_start_queue(struct xenvif_queue *queue) +static void xenvif_queue_carrier_off(struct xenvif_queue *queue) { - if (xenvif_schedulable(queue->vif)) - xenvif_wake_queue(queue); + struct xenvif *vif = queue->vif; + + queue->stalled = true; + + /* At least one queue has stalled? Disable the carrier. */ + spin_lock(&vif->lock); + if (vif->stalled_queues++ == 0) { + netdev_info(vif->dev, "Guest Rx stalled"); + netif_carrier_off(vif->dev); + } + spin_unlock(&vif->lock); } -/* Only called from the queue's thread, it handles the situation when the guest - * doesn't post enough requests on the receiving ring. - * First xenvif_start_xmit disables QDisc and start a timer, and then either the - * timer fires, or the guest send an interrupt after posting new request. If it - * is the timer, the carrier is turned off here. - * */ -static void xenvif_rx_purge_event(struct xenvif_queue *queue) +static void xenvif_queue_carrier_on(struct xenvif_queue *queue) { - /* Either the last unsuccesful skb or at least 1 slot should fit */ - int needed = queue->rx_last_skb_slots ? - queue->rx_last_skb_slots : 1; + struct xenvif *vif = queue->vif; - /* It is assumed that if the guest post new slots after this, the RX - * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up - * the thread again - */ - set_bit(QUEUE_STATUS_RX_STALLED, &queue->status); - if (!xenvif_rx_ring_slots_available(queue, needed)) { - rtnl_lock(); - if (netif_carrier_ok(queue->vif->dev)) { - /* Timer fired and there are still no slots. Turn off - * everything except the interrupts - */ - netif_carrier_off(queue->vif->dev); - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - if (net_ratelimit()) - netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id); - } else { - /* Probably an another queue already turned the carrier - * off, make sure nothing is stucked in the internal - * queue of this queue - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - } - rtnl_unlock(); - } else if (!netif_carrier_ok(queue->vif->dev)) { - unsigned int num_queues = queue->vif->num_queues; - unsigned int i; - /* The carrier was down, but an interrupt kicked - * the thread again after new requests were - * posted - */ - clear_bit(QUEUE_STATUS_RX_STALLED, - &queue->status); - rtnl_lock(); - netif_carrier_on(queue->vif->dev); - netif_tx_wake_all_queues(queue->vif->dev); - rtnl_unlock(); + queue->last_rx_time = jiffies; /* Reset Rx stall detection. */ + queue->stalled = false; - for (i = 0; i < num_queues; i++) { - struct xenvif_queue *temp = &queue->vif->queues[i]; + /* All queues are ready? Enable the carrier. */ + spin_lock(&vif->lock); + if (--vif->stalled_queues == 0) { + netdev_info(vif->dev, "Guest Rx ready"); + netif_carrier_on(vif->dev); + } + spin_unlock(&vif->lock); +} - xenvif_napi_schedule_or_enable_events(temp); - } - if (net_ratelimit()) - netdev_err(queue->vif->dev, "Carrier on again\n"); - } else { - /* Queuing were stopped, but the guest posted - * new requests and sent an interrupt - */ - clear_bit(QUEUE_STATUS_RX_STALLED, - &queue->status); - del_timer_sync(&queue->rx_stalled); - xenvif_start_queue(queue); +static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + + prod = queue->rx.sring->req_prod; + cons = queue->rx.req_cons; + + return !queue->stalled + && prod - cons < XEN_NETBK_RX_SLOTS_MAX + && time_after(jiffies, + queue->last_rx_time + rx_stall_timeout_jiffies); +} + +static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + + prod = queue->rx.sring->req_prod; + cons = queue->rx.req_cons; + + return queue->stalled + && prod - cons >= XEN_NETBK_RX_SLOTS_MAX; +} + +static bool xenvif_have_rx_work(struct xenvif_queue *queue) +{ + return (!skb_queue_empty(&queue->rx_queue) + && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)) + || xenvif_rx_queue_stalled(queue) + || xenvif_rx_queue_ready(queue) + || kthread_should_stop() + || queue->vif->disabled; +} + +static long xenvif_rx_queue_timeout(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + long timeout; + + skb = skb_peek(&queue->rx_queue); + if (!skb) + return MAX_SCHEDULE_TIMEOUT; + + timeout = XENVIF_RX_CB(skb)->expires - jiffies; + return timeout < 0 ? 0 : timeout; +} + +/* Wait until the guest Rx thread has work. + * + * The timeout needs to be adjusted based on the current head of the + * queue (and not just the head at the beginning). In particular, if + * the queue is initially empty an infinite timeout is used and this + * needs to be reduced when a skb is queued. + * + * This cannot be done with wait_event_timeout() because it only + * calculates the timeout once. + */ +static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) +{ + DEFINE_WAIT(wait); + + if (xenvif_have_rx_work(queue)) + return; + + for (;;) { + long ret; + + prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); + if (xenvif_have_rx_work(queue)) + break; + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); + if (!ret) + break; } + finish_wait(&queue->wq, &wait); } int xenvif_kthread_guest_rx(void *data) { struct xenvif_queue *queue = data; - struct sk_buff *skb; + struct xenvif *vif = queue->vif; - while (!kthread_should_stop()) { - wait_event_interruptible(queue->wq, - rx_work_todo(queue) || - queue->vif->disabled || - test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) || - kthread_should_stop()); + for (;;) { + xenvif_wait_for_rx_work(queue); if (kthread_should_stop()) break; @@ -2028,35 +2107,38 @@ int xenvif_kthread_guest_rx(void *data) * context so we defer it here, if this thread is * associated with queue 0. */ - if (unlikely(queue->vif->disabled && queue->id == 0)) { - xenvif_carrier_off(queue->vif); - } else if (unlikely(queue->vif->disabled)) { - /* kthread_stop() would be called upon this thread soon, - * be a bit proactive - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT, - &queue->status))) { - xenvif_rx_purge_event(queue); - } else if (!netif_carrier_ok(queue->vif->dev)) { - /* Another queue stalled and turned the carrier off, so - * purge the internal queue of queues which were not - * blocked - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; + if (unlikely(vif->disabled && queue->id == 0)) { + xenvif_carrier_off(vif); + xenvif_rx_queue_purge(queue); + continue; } if (!skb_queue_empty(&queue->rx_queue)) xenvif_rx_action(queue); + /* If the guest hasn't provided any Rx slots for a + * while it's probably not responsive, drop the + * carrier so packets are dropped earlier. + */ + if (xenvif_rx_queue_stalled(queue)) + xenvif_queue_carrier_off(queue); + else if (xenvif_rx_queue_ready(queue)) + xenvif_queue_carrier_on(queue); + + /* Queued packets may have foreign pages from other + * domains. These cannot be queued indefinitely as + * this would starve guests of grant refs and transmit + * slots. + */ + xenvif_rx_queue_drop_expired(queue); + + xenvif_rx_queue_maybe_wake(queue); + cond_resched(); } /* Bin any remaining skbs */ - while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) - dev_kfree_skb(skb); + xenvif_rx_queue_purge(queue); return 0; } @@ -2113,6 +2195,7 @@ static int __init netback_init(void) goto failed_init; rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs); + rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs); #ifdef CONFIG_DEBUG_FS xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 8079c31ac5e..4e56a27f968 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -52,6 +52,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v) struct xenvif_queue *queue = m->private; struct xen_netif_tx_back_ring *tx_ring = &queue->tx; struct xen_netif_rx_back_ring *rx_ring = &queue->rx; + struct netdev_queue *dev_queue; if (tx_ring->sring) { struct xen_netif_tx_sring *sring = tx_ring->sring; @@ -112,6 +113,13 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v) queue->credit_timeout.expires, jiffies); + dev_queue = netdev_get_tx_queue(queue->vif->dev, queue->id); + + seq_printf(m, "\nRx internal queue: len %u max %u pkts %u %s\n", + queue->rx_queue_len, queue->rx_queue_max, + skb_queue_len(&queue->rx_queue), + netif_tx_queue_stopped(dev_queue) ? "stopped" : "running"); + return 0; } @@ -703,6 +711,7 @@ static void connect(struct backend_info *be) be->vif->queues = vzalloc(requested_num_queues * sizeof(struct xenvif_queue)); be->vif->num_queues = requested_num_queues; + be->vif->stalled_queues = requested_num_queues; for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) { queue = &be->vif->queues[queue_index]; @@ -873,15 +882,10 @@ static int read_xenbus_vif_flags(struct backend_info *be) if (!rx_copy) return -EOPNOTSUPP; - if (vif->dev->tx_queue_len != 0) { - if (xenbus_scanf(XBT_NIL, dev->otherend, - "feature-rx-notify", "%d", &val) < 0) - val = 0; - if (val) - vif->can_queue = 1; - else - /* Must be non-zero for pfifo_fast to work. */ - vif->dev->tx_queue_len = 1; + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-rx-notify", "%d", &val) < 0 || val == 0) { + xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory"); + return -EINVAL; } if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", diff --git a/net/core/dev.c b/net/core/dev.c index b793e3521a3..945bbd00135 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4157,6 +4157,10 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { + if (unlikely(skb->pfmemalloc)) { + consume_skb(skb); + return; + } __skb_pull(skb, skb_headlen(skb)); /* restore the reserve we had after netdev_alloc_skb_ip_align() */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); diff --git a/net/core/tso.c b/net/core/tso.c index 8c3203c585b..630b30b4fb5 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,6 +1,7 @@ #include <linux/export.h> #include <net/ip.h> #include <net/tso.h> +#include <asm/unaligned.h> /* Calculate expected number of TX descriptors */ int tso_count_descs(struct sk_buff *skb) @@ -23,7 +24,7 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, iph->id = htons(tso->ip_id); iph->tot_len = htons(size + hdr_len - mac_hdr_len); tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); - tcph->seq = htonl(tso->tcp_seq); + put_unaligned_be32(tso->tcp_seq, &tcph->seq); tso->ip_id++; if (!is_last) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1bec4e76d88..39ec0c37954 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2868,61 +2868,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt); #endif #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); static DEFINE_MUTEX(tcp_md5sig_mutex); - -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); - - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - } - free_percpu(pool); -} +static bool tcp_md5sig_pool_populated = false; static void __tcp_alloc_md5sig_pool(void) { int cpu; - struct tcp_md5sig_pool __percpu *pool; - - pool = alloc_percpu(struct tcp_md5sig_pool); - if (!pool) - return; for_each_possible_cpu(cpu) { - struct crypto_hash *hash; - - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR_OR_NULL(hash)) - goto out_free; + if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { + struct crypto_hash *hash; - per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR_OR_NULL(hash)) + return; + per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; + } } - /* before setting tcp_md5sig_pool, we must commit all writes - * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + /* before setting tcp_md5sig_pool_populated, we must commit all writes + * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool = pool; - return; -out_free: - __tcp_free_md5sig_pool(pool); + tcp_md5sig_pool_populated = true; } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool)) { + if (unlikely(!tcp_md5sig_pool_populated)) { mutex_lock(&tcp_md5sig_mutex); - if (!tcp_md5sig_pool) + if (!tcp_md5sig_pool_populated) __tcp_alloc_md5sig_pool(); mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool != NULL; + return tcp_md5sig_pool_populated; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2936,13 +2917,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *p; - local_bh_disable(); - p = ACCESS_ONCE(tcp_md5sig_pool); - if (p) - return raw_cpu_ptr(p); + if (tcp_md5sig_pool_populated) { + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ + smp_rmb(); + return this_cpu_ptr(&tcp_md5sig_pool); + } local_bh_enable(); return NULL; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 94d1a7757ff..9c7d7621466 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -206,8 +206,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; - inet_set_txhash(sk); - inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; @@ -224,6 +222,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err) goto failure; + inet_set_txhash(sk); + rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 831495529b8..ace29b60813 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -200,8 +200,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; - ip6_set_txhash(sk); - /* * TCP over IPv4 */ @@ -297,6 +295,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) goto late_failure; + ip6_set_txhash(sk); + if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ac49f84fe2c..5f983644373 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -170,8 +170,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_DCCP: if (!onlyproto && (nh + offset + 4 < skb->data || pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports = (__be16 *)exthdr; + __be16 *ports; + nh = skb_network_header(skb); + ports = (__be16 *)(nh + offset); fl6->fl6_sport = ports[!!reverse]; fl6->fl6_dport = ports[!reverse]; } @@ -180,8 +182,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ICMPV6: if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { - u8 *icmp = (u8 *)exthdr; + u8 *icmp; + nh = skb_network_header(skb); + icmp = (u8 *)(nh + offset); fl6->fl6_icmp_type = icmp[0]; fl6->fl6_icmp_code = icmp[1]; } @@ -192,8 +196,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_MH: if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; - mh = (struct ip6_mh *)exthdr; + nh = skb_network_header(skb); + mh = (struct ip6_mh *)(nh + offset); fl6->fl6_mh_type = mh->ip6mh_type; } fl6->flowi6_proto = nexthdr; |