From 7e5a90c25f89128c096dbdb0e5451962438b1e05 Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Mon, 4 Feb 2013 15:29:10 +0000 Subject: IPoIB: Fix crash due to skb double destruct After commit b13912bbb4a2 ("IPoIB: Call skb_dst_drop() once skb is enqueued for sending"), using connected mode and running multithreaded iperf for long time, ie iperf -c -P 16 -t 3600 results in a crash. After the above-mentioned patch, the driver is calling skb_orphan() and skb_dst_drop() after calling post_send() in ipoib_cm.c::ipoib_cm_send() (also in ipoib_ib.c::ipoib_send()) The problem with this is, as is written in a comment in both routines, "it's entirely possible that the completion handler will run before we execute anything after the post_send()." This leads to running the skb cleanup routines simultaneously in two different contexts. The solution is to always perform the skb_orphan() and skb_dst_drop() before queueing the send work request. If an error occurs, then it will be no different than the regular case where dev_free_skb_any() in the completion path, which is assumed to be after these two routines. Signed-off-by: Shlomo Pongratz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 6 +++--- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 03103d2bd64..67b0c1d2367 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -741,6 +741,9 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ tx_req->mapping = addr; + skb_orphan(skb); + skb_dst_drop(skb); + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), addr, skb->len); if (unlikely(rc)) { @@ -752,9 +755,6 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ dev->trans_start = jiffies; ++tx->tx_head; - skb_orphan(skb); - skb_dst_drop(skb); - if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index a1bca70e20a..2cfa76f5d99 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -600,6 +600,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, netif_stop_queue(dev); } + skb_orphan(skb); + skb_dst_drop(skb); + rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), address->ah, qpn, tx_req, phead, hlen); if (unlikely(rc)) { @@ -615,9 +618,6 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, address->last_send = priv->tx_head; ++priv->tx_head; - - skb_orphan(skb); - skb_dst_drop(skb); } if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) -- cgit v1.2.3-70-g09d2 From f97b4b5d46d586782b40c8ced81bf9763c18e7cc Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 10 Jan 2013 15:18:35 +0000 Subject: mlx4_core: Fix advertisement of wrong PF context behaviour Commit 08ff32352d6f ("mlx4: 64-byte CQE/EQE support") introduced a regression where older guest VF drivers failed to load even when 64-byte EQEs/CQEs are disabled, since the PF wrongly advertises the new context behaviour anyway. The failure looks like: mlx4_core 0000:00:07.0: Unknown pf context behaviour mlx4_core 0000:00:07.0: Failed to obtain slave caps mlx4_core: probe of 0000:00:07.0 failed with error -38 Fix this by basing this advertisement on dev->caps.flags, which is the operational capabilities used by the QUERY_FUNC_CAP command wrapper (dev_cap->flags holds the firmware capabilities). Reported-by: Alex Williamson Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index e1bafffbc3b..9a84c75e9e3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -380,7 +380,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } } - if ((dev_cap->flags & + if ((dev->caps.flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && mlx4_is_master(dev)) dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; -- cgit v1.2.3-70-g09d2 From d359f35430c4ea4dd4e24d809aeaeff1fa54c314 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 24 Jan 2013 18:59:34 +0000 Subject: IB/qib: Fix for broken sparse warning fix Commit 1fb9fed6d489 ("IB/qib: Fix QP RCU sparse warning") broke QP hash list deletion in qp_remove() badly. This patch restores the former for loop behavior, while still fixing the sparse warnings. Cc: Reviewed-by: Gary Leshner Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_qp.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 4850d03870c..35275099caf 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -263,20 +263,15 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) struct qib_qp __rcu **qpp; qpp = &dev->qp_table[n]; - q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->qpt_lock)); - for (; q; qpp = &q->next) { + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock))) != NULL; + qpp = &q->next) if (q == qp) { atomic_dec(&qp->refcount); *qpp = qp->next; rcu_assign_pointer(qp->next, NULL); - q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->qpt_lock)); break; } - q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->qpt_lock)); - } } spin_unlock_irqrestore(&dev->qpt_lock, flags); -- cgit v1.2.3-70-g09d2