From 3bea57a5fc1762a72fb9ac88b9aa9e48dcbea8bc Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 30 Apr 2012 10:27:26 -0700 Subject: IB/uverbs: Make lockdep output more readable Add names for our lockdep classes, so instead of having to decipher lockdep output with mysterious names: Chain exists of: key#14 --> key#11 --> key#13 lockdep will give us something nicer: Chain exists of: SRQ-uobj --> PD-uobj --> CQ-uobj Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 39 ++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4d27e4c3fe3..85231e2bdee 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -41,13 +41,18 @@ #include "uverbs.h" -static struct lock_class_key pd_lock_key; -static struct lock_class_key mr_lock_key; -static struct lock_class_key cq_lock_key; -static struct lock_class_key qp_lock_key; -static struct lock_class_key ah_lock_key; -static struct lock_class_key srq_lock_key; -static struct lock_class_key xrcd_lock_key; +struct uverbs_lock_class { + struct lock_class_key key; + char name[16]; +}; + +static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; +static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; +static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; +static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; +static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; +static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; +static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -83,13 +88,13 @@ static struct lock_class_key xrcd_lock_key; */ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct lock_class_key *key) + struct ib_ucontext *context, struct uverbs_lock_class *c) { uobj->user_handle = user_handle; uobj->context = context; kref_init(&uobj->ref); init_rwsem(&uobj->mutex); - lockdep_set_class(&uobj->mutex, key); + lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); uobj->live = 0; } @@ -522,7 +527,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &pd_lock_key); + init_uobj(uobj, 0, file->ucontext, &pd_lock_class); down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, @@ -750,7 +755,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, goto err_tree_mutex_unlock; } - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key); + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); down_write(&obj->uobject.mutex); @@ -947,7 +952,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &mr_lock_key); + init_uobj(uobj, 0, file->ucontext, &mr_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -1115,7 +1120,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key); + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class); down_write(&obj->uobject.mutex); if (cmd.comp_channel >= 0) { @@ -1407,7 +1412,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); if (cmd.qp_type == IB_QPT_XRC_TGT) { @@ -1585,7 +1590,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); @@ -2272,7 +2277,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key); + init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -2476,7 +2481,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); down_write(&obj->uevent.uobject.mutex); pd = idr_read_pd(cmd->pd_handle, file->ucontext); -- cgit v1.2.3-70-g09d2 From 5909ce545db415ae8c26e849df862e8cc1acf571 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 30 Apr 2012 12:51:50 -0700 Subject: IB/uverbs: Lock SRQ / CQ / PD objects in a consistent order Since XRC support was added, the uverbs code has locked SRQ, CQ and PD objects needed during QP and SRQ creation in different orders depending on the the code path. This leads to the (at least theoretical) possibility of deadlock, and triggers the lockdep splat below. Fix this by making sure we always lock the SRQ first, then CQs and finally the PD. ====================================================== [ INFO: possible circular locking dependency detected ] 3.4.0-rc5+ #34 Not tainted ------------------------------------------------------- ibv_srq_pingpon/2484 is trying to acquire lock: (SRQ-uobj){+++++.}, at: [] idr_read_uobj+0x2f/0x4d [ib_uverbs] but task is already holding lock: (CQ-uobj){+++++.}, at: [] idr_read_uobj+0x2f/0x4d [ib_uverbs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (CQ-uobj){+++++.}: [] lock_acquire+0xbf/0xfe [] down_read+0x34/0x43 [] idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_obj+0x9/0x19 [ib_uverbs] [] ib_uverbs_create_qp+0x180/0x684 [ib_uverbs] [] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [] vfs_write+0xa7/0xee [] sys_write+0x45/0x69 [] system_call_fastpath+0x16/0x1b -> #1 (PD-uobj){++++++}: [] lock_acquire+0xbf/0xfe [] down_read+0x34/0x43 [] idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_obj+0x9/0x19 [ib_uverbs] [] __uverbs_create_xsrq+0x96/0x386 [ib_uverbs] [] ib_uverbs_detach_mcast+0x1cd/0x1e6 [ib_uverbs] [] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [] vfs_write+0xa7/0xee [] sys_write+0x45/0x69 [] system_call_fastpath+0x16/0x1b -> #0 (SRQ-uobj){+++++.}: [] __lock_acquire+0xa29/0xd06 [] lock_acquire+0xbf/0xfe [] down_read+0x34/0x43 [] idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_obj+0x9/0x19 [ib_uverbs] [] ib_uverbs_create_qp+0x1e5/0x684 [ib_uverbs] [] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [] vfs_write+0xa7/0xee [] sys_write+0x45/0x69 [] system_call_fastpath+0x16/0x1b other info that might help us debug this: Chain exists of: SRQ-uobj --> PD-uobj --> CQ-uobj Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(CQ-uobj); lock(PD-uobj); lock(CQ-uobj); lock(SRQ-uobj); *** DEADLOCK *** 3 locks held by ibv_srq_pingpon/2484: #0: (QP-uobj){+.+...}, at: [] ib_uverbs_create_qp+0xe9/0x684 [ib_uverbs] #1: (PD-uobj){++++++}, at: [] idr_read_uobj+0x2f/0x4d [ib_uverbs] #2: (CQ-uobj){+++++.}, at: [] idr_read_uobj+0x2f/0x4d [ib_uverbs] stack backtrace: Pid: 2484, comm: ibv_srq_pingpon Not tainted 3.4.0-rc5+ #34 Call Trace: [] print_circular_bug+0x1f8/0x209 [] __lock_acquire+0xa29/0xd06 [] ? __idr_get_uobj+0x20/0x5e [ib_uverbs] [] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [] lock_acquire+0xbf/0xfe [] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [] ? lock_release+0x166/0x189 [] down_read+0x34/0x43 [] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_obj+0x9/0x19 [ib_uverbs] [] ib_uverbs_create_qp+0x1e5/0x684 [ib_uverbs] [] ? lock_acquire+0xdb/0xfe [] ? lock_release_non_nested+0x94/0x213 [] ? might_fault+0x40/0x90 [] ? might_fault+0x40/0x90 [] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [] vfs_write+0xa7/0xee [] ? fget_light+0x3b/0x99 [] sys_write+0x45/0x69 [] system_call_fastpath+0x16/0x1b Reported-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 66 +++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 85231e2bdee..ad750f3c6ca 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1423,13 +1423,6 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, } device = xrcd->device; } else { - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); - if (!pd || !scq) { - ret = -EINVAL; - goto err_put; - } - if (cmd.qp_type == IB_QPT_XRC_INI) { cmd.max_recv_wr = cmd.max_recv_sge = 0; } else { @@ -1440,13 +1433,24 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_put; } } - rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); - if (!rcq) { - ret = -EINVAL; - goto err_put; + + if (cmd.recv_cq_handle != cmd.send_cq_handle) { + rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } } } + + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); + rcq = rcq ?: scq; + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd || !scq) { + ret = -EINVAL; + goto err_put; + } + device = pd->device; } @@ -2484,27 +2488,27 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto err; - } - if (cmd->srq_type == IB_SRQT_XRC) { - attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); - if (!attr.ext.xrc.cq) { - ret = -EINVAL; - goto err_put_pd; - } - attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); if (!attr.ext.xrc.xrcd) { ret = -EINVAL; - goto err_put_cq; + goto err; } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); + + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + if (!attr.ext.xrc.cq) { + ret = -EINVAL; + goto err_put_xrcd; + } + } + + pd = idr_read_pd(cmd->pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_put_cq; } attr.event_handler = ib_uverbs_srq_event_handler; @@ -2581,17 +2585,17 @@ err_destroy: ib_destroy_srq(srq); err_put: - if (cmd->srq_type == IB_SRQT_XRC) { - atomic_dec(&obj->uxrcd->refcnt); - put_uobj_read(xrcd_uobj); - } + put_pd_read(pd); err_put_cq: if (cmd->srq_type == IB_SRQT_XRC) put_cq_read(attr.ext.xrc.cq); -err_put_pd: - put_pd_read(pd); +err_put_xrcd: + if (cmd->srq_type == IB_SRQT_XRC) { + atomic_dec(&obj->uxrcd->refcnt); + put_uobj_read(xrcd_uobj); + } err: put_uobj_write(&obj->uevent.uobject); -- cgit v1.2.3-70-g09d2 From b6cec8aa4a799d1e146095f0ba52454710f5ede4 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 25 Apr 2012 17:42:35 +0000 Subject: RDMA/cma: Fix lockdep false positive recursive locking The following lockdep problem was reported by Or Gerlitz : [ INFO: possible recursive locking detected ] 3.3.0-32035-g1b2649e-dirty #4 Not tainted --------------------------------------------- kworker/5:1/418 is trying to acquire lock: (&id_priv->handler_mutex){+.+.+.}, at: [] rdma_destroy_i d+0x33/0x1f0 [rdma_cm] but task is already holding lock: (&id_priv->handler_mutex){+.+.+.}, at: [] cma_disable_ca llback+0x24/0x45 [rdma_cm] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&id_priv->handler_mutex); lock(&id_priv->handler_mutex); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/5:1/418: #0: (ib_cm){.+.+.+}, at: [] process_one_work+0x210/0x4a 6 #1: ((&(&work->work)->work)){+.+.+.}, at: [] process_on e_work+0x210/0x4a6 #2: (&id_priv->handler_mutex){+.+.+.}, at: [] cma_disab le_callback+0x24/0x45 [rdma_cm] stack backtrace: Pid: 418, comm: kworker/5:1 Not tainted 3.3.0-32035-g1b2649e-dirty #4 Call Trace: [] ? console_unlock+0x1f4/0x204 [] __lock_acquire+0x16b5/0x174e [] ? save_trace+0x3f/0xb3 [] lock_acquire+0xf0/0x116 [] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm] [] mutex_lock_nested+0x64/0x2ce [] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm] [] ? trace_hardirqs_on_caller+0x11e/0x155 [] ? trace_hardirqs_on+0xd/0xf [] rdma_destroy_id+0x33/0x1f0 [rdma_cm] [] cma_req_handler+0x418/0x644 [rdma_cm] [] cm_process_work+0x32/0x119 [ib_cm] [] cm_req_handler+0x928/0x982 [ib_cm] [] ? cm_req_handler+0x982/0x982 [ib_cm] [] cm_work_handler+0x33/0xfe5 [ib_cm] [] ? trace_hardirqs_on_caller+0x11e/0x155 [] ? cm_req_handler+0x982/0x982 [ib_cm] [] process_one_work+0x2bd/0x4a6 [] ? process_one_work+0x210/0x4a6 [] ? _raw_spin_unlock_irq+0x2b/0x40 [] worker_thread+0x1d6/0x350 [] ? rescuer_thread+0x241/0x241 [] kthread+0x84/0x8c [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x56/0x56 [] ? gs_change+0xb/0xb The actual locking is fine, since we're dealing with different locks, but from the same lock class. cma_disable_callback() acquires the listening id mutex, whereas rdma_destroy_id() acquires the mutex for the new connection id. To fix this, delay the call to rdma_destroy_id() until we've released the listening id mutex. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e3e470fecaa..79c7eebb970 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1218,13 +1218,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } if (!conn_id) { ret = -ENOMEM; - goto out; + goto err1; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); ret = cma_acquire_dev(conn_id); if (ret) - goto release_conn_id; + goto err2; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; @@ -1236,31 +1236,33 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); - if (!ret) { - /* - * Acquire mutex to prevent user executing rdma_destroy_id() - * while we're accessing the cm_id. - */ - mutex_lock(&lock); - if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) - ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); - mutex_unlock(&lock); - mutex_unlock(&conn_id->handler_mutex); - cma_deref_id(conn_id); - goto out; - } + if (ret) + goto err3; + + /* + * Acquire mutex to prevent user executing rdma_destroy_id() + * while we're accessing the cm_id. + */ + mutex_lock(&lock); + if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + mutex_unlock(&lock); + mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); + return 0; +err3: + cma_deref_id(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; - -release_conn_id: +err2: cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(&conn_id->id); - -out: +err1: mutex_unlock(&listen_id->handler_mutex); + if (conn_id) + rdma_destroy_id(&conn_id->id); return ret; } -- cgit v1.2.3-70-g09d2 From c938a616aadb621b8e26b0ac09ac13d053c7ed1c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 1 Mar 2012 12:17:51 +0200 Subject: IB/core: Add raw packet QP type IB_QPT_RAW_PACKET allows applications to build a complete packet, including L2 headers, when sending; on the receive side, the HW will not strip any headers. This QP type is designed for userspace direct access to Ethernet; for example by applications that do TCP/IP themselves. Only processes with the NET_RAW capability are allowed to create raw packet QPs (the name "raw packet QP" is supposed to suggest an analogy to AF_PACKET / SOL_RAW sockets). Signed-off-by: Or Gerlitz Reviewed-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 3 +++ drivers/infiniband/core/verbs.c | 1 + include/rdma/ib_verbs.h | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4d27e4c3fe3..7d801e6252f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1399,6 +1399,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) + return -EPERM; + INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 575b78045aa..140f9a5d492 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -479,6 +479,7 @@ static const struct { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_PORT, [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c3cca5a4dac..a3fa3232b8a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -605,7 +605,7 @@ enum ib_qp_type { IB_QPT_UD, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETHERTYPE, - /* Save 8 for RAW_PACKET */ + IB_QPT_RAW_PACKET = 8, IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, IB_QPT_MAX -- cgit v1.2.3-70-g09d2