From df7fba66471c6bbbaebb55e1bb3658eb7ce00a9b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:26:45 +0000 Subject: IB/mlx4: Fix possible deadlock on sm_lock spinlock The sm_lock spinlock is taken in the process context by mlx4_ib_modify_device, and in the interrupt context by update_sm_ah, so we need to take that spinlock with irqsave, and release it with irqrestore. Lockdeps reports this as follows: [ INFO: inconsistent lock state ] 3.5.0+ #20 Not tainted inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. swapper/0/0 [HC1[1]:SC0[0]:HE0:SE1] takes: (&(&ibdev->sm_lock)->rlock){?.+...}, at: [] update_sm_ah+0xad/0x100 [mlx4_ib] {HARDIRQ-ON-W} state was registered at: [] mark_irqflags+0x120/0x190 [] __lock_acquire+0x307/0x4c0 [] lock_acquire+0xb1/0x150 [] _raw_spin_lock+0x41/0x50 [] mlx4_ib_modify_device+0x63/0x240 [mlx4_ib] [] ib_modify_device+0x1c/0x20 [ib_core] [] set_node_desc+0x83/0xc0 [ib_core] [] dev_attr_store+0x20/0x30 [] sysfs_write_file+0xe6/0x170 [] vfs_write+0xc8/0x190 [] sys_write+0x51/0x90 [] system_call_fastpath+0x16/0x1b ... *** DEADLOCK *** 1 lock held by swapper/0/0: stack backtrace: Pid: 0, comm: swapper/0 Not tainted 3.5.0+ #20 Call Trace: [] print_usage_bug+0x18a/0x190 [] ? print_irq_inversion_bug+0x210/0x210 [] mark_lock_irq+0xf2/0x280 [] mark_lock+0x150/0x240 [] mark_irqflags+0x16f/0x190 [] __lock_acquire+0x307/0x4c0 [] ? update_sm_ah+0xad/0x100 [mlx4_ib] [] lock_acquire+0xb1/0x150 [] ? update_sm_ah+0xad/0x100 [mlx4_ib] [] _raw_spin_lock+0x41/0x50 [] ? update_sm_ah+0xad/0x100 [mlx4_ib] [] ? ib_create_ah+0x1a/0x40 [ib_core] [] update_sm_ah+0xad/0x100 [mlx4_ib] [] ? is_module_address+0x23/0x30 [] handle_port_mgmt_change_event+0xeb/0x150 [mlx4_ib] [] mlx4_ib_event+0x117/0x160 [mlx4_ib] [] ? _raw_spin_lock_irqsave+0x61/0x70 [] mlx4_dispatch_event+0x6c/0x90 [mlx4_core] [] mlx4_eq_int+0x500/0x950 [mlx4_core] Reported by: Or Gerlitz Tested-by: Bart Van Assche Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 16 ++++++++++------ drivers/infiniband/hw/mlx4/main.c | 5 +++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c27141fef1a..9c2ae7efd00 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -125,6 +125,7 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) { struct ib_ah *new_ah; struct ib_ah_attr ah_attr; + unsigned long flags; if (!dev->send_agent[port_num - 1][0]) return; @@ -139,11 +140,11 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) if (IS_ERR(new_ah)) return; - spin_lock(&dev->sm_lock); + spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) ib_destroy_ah(dev->sm_ah[port_num - 1]); dev->sm_ah[port_num - 1] = new_ah; - spin_unlock(&dev->sm_lock); + spin_unlock_irqrestore(&dev->sm_lock, flags); } /* @@ -197,13 +198,15 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, static void node_desc_override(struct ib_device *dev, struct ib_mad *mad) { + unsigned long flags; + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { - spin_lock(&to_mdev(dev)->sm_lock); + spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags); memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); - spin_unlock(&to_mdev(dev)->sm_lock); + spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags); } } @@ -213,6 +216,7 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; int ret; + unsigned long flags; if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, @@ -225,13 +229,13 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma * wrong following the IB spec strictly, but we know * it's OK for our devices). */ - spin_lock(&dev->sm_lock); + spin_lock_irqsave(&dev->sm_lock, flags); memcpy(send_buf->mad, mad, sizeof *mad); if ((send_buf->ah = dev->sm_ah[port_num - 1])) ret = ib_post_send_mad(send_buf, NULL); else ret = -EINVAL; - spin_unlock(&dev->sm_lock); + spin_unlock_irqrestore(&dev->sm_lock, flags); if (ret) ib_free_send_mad(send_buf); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index fe2088cfa6e..cc05579ebce 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -423,6 +423,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { struct mlx4_cmd_mailbox *mailbox; + unsigned long flags; if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; @@ -430,9 +431,9 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) return 0; - spin_lock(&to_mdev(ibdev)->sm_lock); + spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); memcpy(ibdev->node_desc, props->node_desc, 64); - spin_unlock(&to_mdev(ibdev)->sm_lock); + spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); /* * If possible, pass node desc to FW, so it can generate -- cgit v1.2.3-70-g09d2 From d549f55f2e132e3d1f1288ce4231f45f12988bbf Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 10 Aug 2012 16:52:13 -0700 Subject: RDMA/ocrdma: Don't call vlan_dev_real_dev() for non-VLAN netdevs If CONFIG_VLAN_8021Q is not set, then vlan_dev_real_dev() just goes BUG(), so we shouldn't call it unless we're actually dealing with a VLAN netdev. Reported-by: Fengguang Wu Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 5a044526e4f..c4e0131f1b5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -161,7 +161,7 @@ static void ocrdma_add_default_sgid(struct ocrdma_dev *dev) ocrdma_get_guid(dev, &sgid->raw[8]); } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev) { struct net_device *netdev, *tmp; @@ -202,14 +202,13 @@ static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev) return 0; } -#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_VLAN_8021Q) +#if IS_ENABLED(CONFIG_IPV6) static int ocrdma_inet6addr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; - struct net_device *event_netdev = ifa->idev->dev; - struct net_device *netdev = NULL; + struct net_device *netdev = ifa->idev->dev; struct ib_event gid_event; struct ocrdma_dev *dev; bool found = false; @@ -217,11 +216,12 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier, bool is_vlan = false; u16 vid = 0; - netdev = vlan_dev_real_dev(event_netdev); - if (netdev != event_netdev) { - is_vlan = true; - vid = vlan_dev_vlan_id(event_netdev); + is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN; + if (is_vlan) { + vid = vlan_dev_vlan_id(netdev); + netdev = vlan_dev_real_dev(netdev); } + rcu_read_lock(); list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) { if (dev->nic_info.netdev == netdev) { -- cgit v1.2.3-70-g09d2 From 418edaaba96e58112b15c82b4907084e2a9caf42 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 3 Aug 2012 23:59:41 +0000 Subject: RDMA/ucma.c: Fix for events with wrong context on iWARP It is possible for asynchronous RDMA_CM_EVENT_ESTABLISHED events to be generated with ctx->uid == 0, because ucma_set_event_context() copies ctx->uid to the event structure outside of ctx->file->mut. This leads to a crash in the userspace library, since it gets a bogus event. Fix this by taking the mutex a bit earlier in ucma_event_handler. Signed-off-by: Tatyana Nikolova Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/ucma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 6bf85042289..055ed59838d 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -267,6 +267,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, if (!uevent) return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; + mutex_lock(&ctx->file->mut); uevent->cm_id = cm_id; ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; @@ -277,7 +278,6 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); - mutex_lock(&ctx->file->mut); if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -ENOMEM; -- cgit v1.2.3-70-g09d2 From fa16ebed31f336e41970f3f0ea9e8279f6be2d27 Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Mon, 13 Aug 2012 14:39:49 +0000 Subject: IB/ipoib: Add missing locking when CM object is deleted Commit b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") introduced a bug where in ipoib_cm_destroy_tx() a CM object is moved between lists without any supported locking. Under a stress test, this eventually leads to list corruption and a crash. Previously when this routine was called, callers were taking the device priv lock. Currently this function is called from the RCU callback associated with neighbour deletion. Fix the race by taking the same lock we used to before. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 95ecf4eadf5..24683fda8e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1271,12 +1271,15 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + unsigned long flags; if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + spin_lock_irqsave(&priv->lock, flags); list_move(&tx->list, &priv->cm.reap_list); queue_work(ipoib_workqueue, &priv->cm.reap_task); ipoib_dbg(priv, "Reap connection for gid %pI6\n", tx->neigh->daddr + 4); tx->neigh = NULL; + spin_unlock_irqrestore(&priv->lock, flags); } } -- cgit v1.2.3-70-g09d2 From 6c723a68c661008adf415ee90efe5f737e928ce0 Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Mon, 13 Aug 2012 14:39:50 +0000 Subject: IB/ipoib: Fix RCU pointer dereference of wrong object Commit b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") introduced a bug where in ipoib_neigh_free() (which is called from a few errors flows in the driver), rcu_dereference() is invoked with the wrong pointer object, which results in a crash. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 97920b77a5d..3e2085a3ee4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1052,7 +1052,7 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh) for (n = rcu_dereference_protected(*np, lockdep_is_held(&ntbl->rwlock)); n != NULL; - n = rcu_dereference_protected(neigh->hnext, + n = rcu_dereference_protected(*np, lockdep_is_held(&ntbl->rwlock))) { if (n == neigh) { /* found */ -- cgit v1.2.3-70-g09d2 From 142ad5db2b29a1c392e1b14934fae5d161d6c6e7 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 10 Aug 2012 00:07:58 +0000 Subject: IB: Fix typos in infiniband drivers Correct spelling typos in comments in drivers/infiniband. Signed-off-by: Masanari Iida Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_rnic.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_cm.c | 2 +- drivers/infiniband/hw/qib/qib_sd7220.c | 2 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index 8c81992fa6d..e4a73158fc7 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -439,7 +439,7 @@ static int c2_rnic_close(struct c2_dev *c2dev) /* * Called by c2_probe to initialize the RNIC. This principally - * involves initalizing the various limits and resouce pools that + * involves initializing the various limits and resource pools that * comprise the RNIC instance. */ int __devinit c2_rnic_init(struct c2_dev *c2dev) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 77b6b182778..aaf88ef9409 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1680,7 +1680,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) * T3A does 3 things when a TERM is received: * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet * 2) generate an async event on the QP with the TERMINATE opcode - * 3) post a TERMINATE opcde cqe into the associated CQ. + * 3) post a TERMINATE opcode cqe into the associated CQ. * * For (1), we save the message in the qp for later consumer consumption. * For (2), we move the QP into TERMINATE, post a QP event and disconnect. diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index a322d5171a2..50a8a0d4fe6 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -372,7 +372,7 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, /* Read CTRL reg for each channel to check TRIMDONE */ if (baduns & (1 << chn)) { qib_dev_err(dd, - "Reseting TRIMDONE on chn %d (%s)\n", + "Resetting TRIMDONE on chn %d (%s)\n", chn, where); ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0x10, 0x10); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 7a0ce8d4288..9e1449f8c6a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1469,7 +1469,7 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, * * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping * the data that has been transferred via IB RDMA had to be postponed until the - * check_stop_free() callback. None of this is nessecary anymore and needs to + * check_stop_free() callback. None of this is necessary anymore and needs to * be cleaned up. */ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, -- cgit v1.2.3-70-g09d2 From 51fa3ca37e3bebb291dbe50faa3cb259af35e978 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 14 Aug 2012 12:58:35 +0000 Subject: IB/qib: Fix error return code in qib_init_7322_variables() Convert a 0 error return code to a negative one, as returned elsewhere in the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e,e1,e2,e3,e4,x; @@ ( if (\(ret != 0\|ret < 0\) || ...) { ... return ...; } | ret = 0 ) ... when != ret = e1 *x = \(kmalloc\|kzalloc\|kcalloc\|devm_kzalloc\|ioremap\|ioremap_nocache\|devm_ioremap\|devm_ioremap_nocache\)(...); ... when != x = e2 when != ret = e3 *if (x == NULL || ...) { ... when != ret = e4 * return ret; } // Signed-off-by: Julia Lawall Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_iba7322.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 0d7280af99b..3f6b21e9dc1 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6346,8 +6346,10 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->piobcnt4k * dd->align4k; dd->piovl15base = ioremap_nocache(vl15off, NUM_VL15_BUFS * dd->align4k); - if (!dd->piovl15base) + if (!dd->piovl15base) { + ret = -ENOMEM; goto bail; + } } qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ -- cgit v1.2.3-70-g09d2 From 220329916c72ee3d54ae7262b215a050f04a18fc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Aug 2012 13:18:53 +0000 Subject: IB/srp: Fix a race condition Avoid a crash caused by the scmnd->scsi_done(scmnd) call in srp_process_rsp() being invoked with scsi_done == NULL. This can happen if a reply is received during or after a command abort. Reported-by: Joseph Glanville Reference: http://marc.info/?l=linux-rdma&m=134314367801595 Cc: Acked-by: David Dillow Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 87 +++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index bcbf22ee0aa..1b5b0c73005 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -586,24 +586,62 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, scmnd->sc_data_direction); } -static void srp_remove_req(struct srp_target_port *target, - struct srp_request *req, s32 req_lim_delta) +/** + * srp_claim_req - Take ownership of the scmnd associated with a request. + * @target: SRP target port. + * @req: SRP request. + * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take + * ownership of @req->scmnd if it equals @scmnd. + * + * Return value: + * Either NULL or a pointer to the SCSI command the caller became owner of. + */ +static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target, + struct srp_request *req, + struct scsi_cmnd *scmnd) +{ + unsigned long flags; + + spin_lock_irqsave(&target->lock, flags); + if (!scmnd) { + scmnd = req->scmnd; + req->scmnd = NULL; + } else if (req->scmnd == scmnd) { + req->scmnd = NULL; + } else { + scmnd = NULL; + } + spin_unlock_irqrestore(&target->lock, flags); + + return scmnd; +} + +/** + * srp_free_req() - Unmap data and add request to the free request list. + */ +static void srp_free_req(struct srp_target_port *target, + struct srp_request *req, struct scsi_cmnd *scmnd, + s32 req_lim_delta) { unsigned long flags; - srp_unmap_data(req->scmnd, target, req); + srp_unmap_data(scmnd, target, req); + spin_lock_irqsave(&target->lock, flags); target->req_lim += req_lim_delta; - req->scmnd = NULL; list_add_tail(&req->list, &target->free_reqs); spin_unlock_irqrestore(&target->lock, flags); } static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) { - req->scmnd->result = DID_RESET << 16; - req->scmnd->scsi_done(req->scmnd); - srp_remove_req(target, req, 0); + struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); + + if (scmnd) { + scmnd->result = DID_RESET << 16; + scmnd->scsi_done(scmnd); + srp_free_req(target, req, scmnd, 0); + } } static int srp_reconnect_target(struct srp_target_port *target) @@ -1073,11 +1111,18 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) complete(&target->tsk_mgmt_done); } else { req = &target->req_ring[rsp->tag]; - scmnd = req->scmnd; - if (!scmnd) + scmnd = srp_claim_req(target, req, NULL); + if (!scmnd) { shost_printk(KERN_ERR, target->scsi_host, "Null scmnd for RSP w/tag %016llx\n", (unsigned long long) rsp->tag); + + spin_lock_irqsave(&target->lock, flags); + target->req_lim += be32_to_cpu(rsp->req_lim_delta); + spin_unlock_irqrestore(&target->lock, flags); + + return; + } scmnd->result = rsp->status; if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { @@ -1092,7 +1137,9 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); - srp_remove_req(target, req, be32_to_cpu(rsp->req_lim_delta)); + srp_free_req(target, req, scmnd, + be32_to_cpu(rsp->req_lim_delta)); + scmnd->host_scribble = NULL; scmnd->scsi_done(scmnd); } @@ -1631,25 +1678,17 @@ static int srp_abort(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_request *req = (struct srp_request *) scmnd->host_scribble; - int ret = SUCCESS; shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); - if (!req || target->qp_in_error) + if (!req || target->qp_in_error || !srp_claim_req(target, req, scmnd)) return FAILED; - if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, - SRP_TSK_ABORT_TASK)) - return FAILED; - - if (req->scmnd) { - if (!target->tsk_mgmt_status) { - srp_remove_req(target, req, 0); - scmnd->result = DID_ABORT << 16; - } else - ret = FAILED; - } + srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, + SRP_TSK_ABORT_TASK); + srp_free_req(target, req, scmnd, 0); + scmnd->result = DID_ABORT << 16; - return ret; + return SUCCESS; } static int srp_reset_device(struct scsi_cmnd *scmnd) -- cgit v1.2.3-70-g09d2 From 89dd86db78e08b51bab29e168fd41b2fd943e6b6 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 13 Aug 2012 08:15:06 +0000 Subject: mlx4_core: Allow large mlx4_buddy bitmaps mlx4_buddy_init uses kmalloc() to allocate bitmaps, which fails when the required size is beyond the max supported value (or when memory is too fragmented to handle a huge allocation). Extend this to use use vmalloc() if kmalloc() fails, and take that into account when freeing the bitmaps as well. This fixes a driver load failure when log num mtt is 26 or higher, and is a step in the direction of allowing to register huge amounts of memory on large memory systems. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/mr.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index af55b7ce534..d6a3a392a37 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -130,8 +131,11 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL); - if (!buddy->bits[i]) - goto err_out_free; + if (!buddy->bits[i]) { + buddy->bits[i] = vmalloc(s * sizeof(long)); + if (!buddy->bits[i]) + goto err_out_free; + } bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i)); } @@ -142,7 +146,10 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) err_out_free: for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + if (buddy->bits[i] && is_vmalloc_addr(buddy->bits[i])) + vfree(buddy->bits[i]); + else + kfree(buddy->bits[i]); err_out: kfree(buddy->bits); @@ -156,7 +163,10 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy) int i; for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + if (is_vmalloc_addr(buddy->bits[i])) + vfree(buddy->bits[i]); + else + kfree(buddy->bits[i]); kfree(buddy->bits); kfree(buddy->num_free); -- cgit v1.2.3-70-g09d2 From 3de819e6b642fdd51904f5f4d2716d7466a2f7f5 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 13 Aug 2012 08:15:07 +0000 Subject: mlx4_core: Fix integer overflow issues around MTT table Fix some issues around int variables used in data structures related to memory registration. Handle int overflow in mlx4_init_icm_table by using a u64 intermediate variable and changing struct mlx4_icm_table num_obj field to be u32. Change some more fields/variables to use u32 instead of int to prevent a case where the variable becomes negative when bit 31 is set. Also subtract log_mtts_per_seg from the exponent when computing num_mtt, since its added later on in that very same code area. This and the previous commit fixes some issues which actually prevent commit db5a7a65c058 ("mlx4_core: Scale size of MTT table with system RAM") from working. Now, when the number of MTTs is scaled with the size of the RAM we can map up to 8TB. Signed-off-by: Yishai Hadas Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/icm.c | 9 ++++++--- drivers/net/ethernet/mellanox/mlx4/icm.h | 2 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 ++-- drivers/net/ethernet/mellanox/mlx4/mr.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/profile.c | 4 ++-- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 88b7b3e75ab..daf41792366 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -358,13 +358,14 @@ void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, } int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, - u64 virt, int obj_size, int nobj, int reserved, + u64 virt, int obj_size, u32 nobj, int reserved, int use_lowmem, int use_coherent) { int obj_per_chunk; int num_icm; unsigned chunk_size; int i; + u64 size; obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; @@ -380,10 +381,12 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, table->coherent = use_coherent; mutex_init(&table->mutex); + size = (u64) nobj * obj_size; for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { chunk_size = MLX4_TABLE_CHUNK_SIZE; - if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > nobj * obj_size) - chunk_size = PAGE_ALIGN(nobj * obj_size - i * MLX4_TABLE_CHUNK_SIZE); + if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > size) + chunk_size = PAGE_ALIGN(size - + i * MLX4_TABLE_CHUNK_SIZE); table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT, (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index 19e4efc0b34..a67744f5350 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -78,7 +78,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, int start, int end); int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, - u64 virt, int obj_size, int nobj, int reserved, + u64 virt, int obj_size, u32 nobj, int reserved, int use_lowmem, int use_coherent); void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 59ebc033963..4d9df8f2a12 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -249,7 +249,7 @@ struct mlx4_bitmap { struct mlx4_buddy { unsigned long **bits; unsigned int *num_free; - int max_order; + u32 max_order; spinlock_t lock; }; @@ -258,7 +258,7 @@ struct mlx4_icm; struct mlx4_icm_table { u64 virt; int num_icm; - int num_obj; + u32 num_obj; int obj_size; int lowmem; int coherent; diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index d6a3a392a37..44b8e1ea1cd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -678,7 +678,7 @@ int mlx4_init_mr_table(struct mlx4_dev *dev) return err; err = mlx4_buddy_init(&mr_table->mtt_buddy, - ilog2(dev->caps.num_mtts / + ilog2((u32)dev->caps.num_mtts / (1 << log_mtts_per_seg))); if (err) goto err_buddy; @@ -688,7 +688,7 @@ int mlx4_init_mr_table(struct mlx4_dev *dev) mlx4_alloc_mtt_range(dev, fls(dev->caps.reserved_mtts - 1)); if (priv->reserved_mtts < 0) { - mlx4_warn(dev, "MTT table of order %d is too small.\n", + mlx4_warn(dev, "MTT table of order %u is too small.\n", mr_table->mtt_buddy.max_order); err = -ENOMEM; goto err_reserve_mtts; diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c index 9ee4725363d..8e0c3cc2a1e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/profile.c +++ b/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -76,7 +76,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, u64 size; u64 start; int type; - int num; + u32 num; int log_num; }; @@ -105,7 +105,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, si_meminfo(&si); request->num_mtt = roundup_pow_of_two(max_t(unsigned, request->num_mtt, - min(1UL << 31, + min(1UL << (31 - log_mtts_per_seg), si.totalram >> (log_mtts_per_seg - 1)))); profile[MLX4_RES_QP].size = dev_cap->qpc_entry_sz; -- cgit v1.2.3-70-g09d2 From 96f17d590092ae2511adf599a252e8ed1901b7a4 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 14 Aug 2012 15:17:10 -0700 Subject: mlx4_core: Clean up buddy bitmap allocation - Use kcalloc() / vzalloc() instead of an extra bitmap_zero(). - Add __GFP_NOWARN to kcalloc() since we'll try vzalloc() if it fails. Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/mr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 44b8e1ea1cd..c202d3ad2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -121,7 +121,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->max_order = max_order; spin_lock_init(&buddy->lock); - buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), + buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *), GFP_KERNEL); buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, GFP_KERNEL); @@ -130,13 +130,12 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); - buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL); + buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN); if (!buddy->bits[i]) { - buddy->bits[i] = vmalloc(s * sizeof(long)); + buddy->bits[i] = vzalloc(s * sizeof(long)); if (!buddy->bits[i]) goto err_out_free; } - bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i)); } set_bit(0, buddy->bits[buddy->max_order]); -- cgit v1.2.3-70-g09d2 From a0675a386a3a68f71e831bd064082e6717b45fdc Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Fri, 10 Aug 2012 18:25:34 +0000 Subject: IB/mlx4: Check iboe netdev pointer before dereferencing it Unlike other parts of the mlx4_ib code, the function build_mlx_header() doesn't check if the iboe netdev of the given port is valid before dereferencing it, which can cause a crash if the ethernet interface has already been taken down. Fix this by checking for a valid netdev pointer before using it to get the port MAC address. Signed-off-by: Kleber Sacilotto de Souza Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index a6d8ea060ea..f585eddef4b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1407,6 +1407,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct net_device *ndev; union ib_gid sgid; u16 pkey; int send_size; @@ -1483,7 +1484,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); /* FIXME: cache smac value? */ - smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr; + ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]; + if (!ndev) + return -ENODEV; + smac = ndev->dev_addr; memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); -- cgit v1.2.3-70-g09d2