summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig10
-rw-r--r--drivers/infiniband/core/Makefile5
-rw-r--r--drivers/infiniband/core/uverbs.h132
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1006
-rw-r--r--drivers/infiniband/core/uverbs_main.c698
-rw-r--r--drivers/infiniband/core/uverbs_mem.c221
-rw-r--r--drivers/infiniband/core/verbs.c32
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c76
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c141
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c330
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h16
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c215
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h81
-rw-r--r--drivers/infiniband/include/ib_user_verbs.h389
-rw-r--r--drivers/infiniband/include/ib_verbs.h124
19 files changed, 3348 insertions, 174 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 3cc3ff0cccb..79c8e2dd9c3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -7,6 +7,16 @@ config INFINIBAND
any protocols you wish to use as well as drivers for your
InfiniBand hardware.
+config INFINIBAND_USER_VERBS
+ tristate "InfiniBand userspace verbs support"
+ depends on INFINIBAND
+ ---help---
+ Userspace InfiniBand verbs support. This is the kernel side
+ of userspace verbs, which allows userspace processes to
+ directly access InfiniBand hardware for fast-path
+ operations. You will also need libibverbs and a hardware
+ driver library from <http://www.openib.org>.
+
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d2dbfb52c0a..e1a7cf3e863 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,6 +1,7 @@
EXTRA_CFLAGS += -Idrivers/infiniband/include
-obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o
+obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o
+obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o
@@ -10,3 +11,5 @@ ib_mad-y := mad.o smi.o agent.o
ib_sa-y := sa_query.o
ib_umad-y := user_mad.o
+
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
new file mode 100644
index 00000000000..57347f1e82c
--- /dev/null
+++ b/drivers/infiniband/core/uverbs.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $
+ */
+
+#ifndef UVERBS_H
+#define UVERBS_H
+
+/* Include device.h and fs.h until cdev.h is self-sufficient */
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/kref.h>
+#include <linux/idr.h>
+
+#include <ib_verbs.h>
+#include <ib_user_verbs.h>
+
+struct ib_uverbs_device {
+ int devnum;
+ struct cdev dev;
+ struct class_device class_dev;
+ struct ib_device *ib_dev;
+ int num_comp;
+};
+
+struct ib_uverbs_event_file {
+ struct kref ref;
+ struct ib_uverbs_file *uverbs_file;
+ spinlock_t lock;
+ int fd;
+ int is_async;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+};
+
+struct ib_uverbs_file {
+ struct kref ref;
+ struct ib_uverbs_device *device;
+ struct ib_ucontext *ucontext;
+ struct ib_event_handler event_handler;
+ struct ib_uverbs_event_file async_file;
+ struct ib_uverbs_event_file comp_file[1];
+};
+
+struct ib_uverbs_async_event {
+ struct ib_uverbs_async_event_desc desc;
+ struct list_head list;
+};
+
+struct ib_uverbs_comp_event {
+ struct ib_uverbs_comp_event_desc desc;
+ struct list_head list;
+};
+
+struct ib_uobject_mr {
+ struct ib_uobject uobj;
+ struct page *page_list;
+ struct scatterlist *sg_list;
+};
+
+extern struct semaphore ib_uverbs_idr_mutex;
+extern struct idr ib_uverbs_pd_idr;
+extern struct idr ib_uverbs_mr_idr;
+extern struct idr ib_uverbs_mw_idr;
+extern struct idr ib_uverbs_ah_idr;
+extern struct idr ib_uverbs_cq_idr;
+extern struct idr ib_uverbs_qp_idr;
+
+void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
+
+int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
+ void *addr, size_t size, int write);
+void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
+void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
+
+#define IB_UVERBS_DECLARE_CMD(name) \
+ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
+ const char __user *buf, int in_len, \
+ int out_len)
+
+IB_UVERBS_DECLARE_CMD(query_params);
+IB_UVERBS_DECLARE_CMD(get_context);
+IB_UVERBS_DECLARE_CMD(query_device);
+IB_UVERBS_DECLARE_CMD(query_port);
+IB_UVERBS_DECLARE_CMD(query_gid);
+IB_UVERBS_DECLARE_CMD(query_pkey);
+IB_UVERBS_DECLARE_CMD(alloc_pd);
+IB_UVERBS_DECLARE_CMD(dealloc_pd);
+IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(destroy_cq);
+IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(modify_qp);
+IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(attach_mcast);
+IB_UVERBS_DECLARE_CMD(detach_mcast);
+
+#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
new file mode 100644
index 00000000000..5f2bbcda4c7
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -0,0 +1,1006 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
+ */
+
+#include <asm/uaccess.h>
+
+#include "uverbs.h"
+
+#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (void __user *) (ibuf); \
+ (udata)->outbuf = (void __user *) (obuf); \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
+ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_params cmd;
+ struct ib_uverbs_query_params_resp resp;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.num_cq_events = file->device->num_comp;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_get_context cmd;
+ struct ib_uverbs_get_context_resp resp;
+ struct ib_udata udata;
+ struct ib_device *ibdev = file->device->ib_dev;
+ int i;
+ int ret = in_len;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ file->ucontext = ibdev->alloc_ucontext(ibdev, &udata);
+ if (IS_ERR(file->ucontext)) {
+ ret = PTR_ERR(file->ucontext);
+ file->ucontext = NULL;
+ return ret;
+ }
+
+ file->ucontext->device = ibdev;
+ INIT_LIST_HEAD(&file->ucontext->pd_list);
+ INIT_LIST_HEAD(&file->ucontext->mr_list);
+ INIT_LIST_HEAD(&file->ucontext->mw_list);
+ INIT_LIST_HEAD(&file->ucontext->cq_list);
+ INIT_LIST_HEAD(&file->ucontext->qp_list);
+ INIT_LIST_HEAD(&file->ucontext->srq_list);
+ INIT_LIST_HEAD(&file->ucontext->ah_list);
+ spin_lock_init(&file->ucontext->lock);
+
+ resp.async_fd = file->async_file.fd;
+ for (i = 0; i < file->device->num_comp; ++i)
+ if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
+ i * sizeof (__u32),
+ &file->comp_file[i].fd, sizeof (__u32)))
+ goto err;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ goto err;
+
+ return in_len;
+
+err:
+ ibdev->dealloc_ucontext(file->ucontext);
+ file->ucontext = NULL;
+
+ return -EFAULT;
+}
+
+ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_device cmd;
+ struct ib_uverbs_query_device_resp resp;
+ struct ib_device_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_query_device(file->device->ib_dev, &attr);
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.fw_ver = attr.fw_ver;
+ resp.node_guid = attr.node_guid;
+ resp.sys_image_guid = attr.sys_image_guid;
+ resp.max_mr_size = attr.max_mr_size;
+ resp.page_size_cap = attr.page_size_cap;
+ resp.vendor_id = attr.vendor_id;
+ resp.vendor_part_id = attr.vendor_part_id;
+ resp.hw_ver = attr.hw_ver;
+ resp.max_qp = attr.max_qp;
+ resp.max_qp_wr = attr.max_qp_wr;
+ resp.device_cap_flags = attr.device_cap_flags;
+ resp.max_sge = attr.max_sge;
+ resp.max_sge_rd = attr.max_sge_rd;
+ resp.max_cq = attr.max_cq;
+ resp.max_cqe = attr.max_cqe;
+ resp.max_mr = attr.max_mr;
+ resp.max_pd = attr.max_pd;
+ resp.max_qp_rd_atom = attr.max_qp_rd_atom;
+ resp.max_ee_rd_atom = attr.max_ee_rd_atom;
+ resp.max_res_rd_atom = attr.max_res_rd_atom;
+ resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom;
+ resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom;
+ resp.atomic_cap = attr.atomic_cap;
+ resp.max_ee = attr.max_ee;
+ resp.max_rdd = attr.max_rdd;
+ resp.max_mw = attr.max_mw;
+ resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp;
+ resp.max_raw_ethy_qp = attr.max_raw_ethy_qp;
+ resp.max_mcast_grp = attr.max_mcast_grp;
+ resp.max_mcast_qp_attach = attr.max_mcast_qp_attach;
+ resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
+ resp.max_ah = attr.max_ah;
+ resp.max_fmr = attr.max_fmr;
+ resp.max_map_per_fmr = attr.max_map_per_fmr;
+ resp.max_srq = attr.max_srq;
+ resp.max_srq_wr = attr.max_srq_wr;
+ resp.max_srq_sge = attr.max_srq_sge;
+ resp.max_pkeys = attr.max_pkeys;
+ resp.local_ca_ack_delay = attr.local_ca_ack_delay;
+ resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_port cmd;
+ struct ib_uverbs_query_port_resp resp;
+ struct ib_port_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr);
+ if (ret)
+ return ret;
+
+ memset(&resp, 0, sizeof resp);
+
+ resp.state = attr.state;
+ resp.max_mtu = attr.max_mtu;
+ resp.active_mtu = attr.active_mtu;
+ resp.gid_tbl_len = attr.gid_tbl_len;
+ resp.port_cap_flags = attr.port_cap_flags;
+ resp.max_msg_sz = attr.max_msg_sz;
+ resp.bad_pkey_cntr = attr.bad_pkey_cntr;
+ resp.qkey_viol_cntr = attr.qkey_viol_cntr;
+ resp.pkey_tbl_len = attr.pkey_tbl_len;
+ resp.lid = attr.lid;
+ resp.sm_lid = attr.sm_lid;
+ resp.lmc = attr.lmc;
+ resp.max_vl_num = attr.max_vl_num;
+ resp.sm_sl = attr.sm_sl;
+ resp.subnet_timeout = attr.subnet_timeout;
+ resp.init_type_reply = attr.init_type_reply;
+ resp.active_width = attr.active_width;
+ resp.active_speed = attr.active_speed;
+ resp.phys_state = attr.phys_state;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_gid cmd;
+ struct ib_uverbs_query_gid_resp resp;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
+ (union ib_gid *) resp.gid);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_query_pkey cmd;
+ struct ib_uverbs_query_pkey_resp resp;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ memset(&resp, 0, sizeof resp);
+
+ ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
+ &resp.pkey);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_alloc_pd cmd;
+ struct ib_uverbs_alloc_pd_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ uobj->context = file->ucontext;
+
+ pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
+ file->ucontext, &udata);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ goto err;
+ }
+
+ pd->device = file->device->ib_dev;
+ pd->uobject = uobj;
+ atomic_set(&pd->usecnt, 0);
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_pd;
+ }
+
+ down(&ib_uverbs_idr_mutex);
+ ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_pd;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->pd_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ memset(&resp, 0, sizeof resp);
+ resp.pd_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ down(&ib_uverbs_idr_mutex);
+ idr_remove(&ib_uverbs_pd_idr, uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+err_pd:
+ ib_dealloc_pd(pd);
+
+err:
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_dealloc_pd cmd;
+ struct ib_pd *pd;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ if (!pd || pd->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = pd->uobject;
+
+ ret = ib_dealloc_pd(pd);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_reg_mr cmd;
+ struct ib_uverbs_reg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_umem_object *obj;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ obj->uobject.context = file->ucontext;
+
+ /*
+ * We ask for writable memory if any access flags other than
+ * "remote read" are set. "Local write" and "remote write"
+ * obviously require write access. "Remote atomic" can do
+ * things like fetch and add, which will modify memory, and
+ * "MW bind" can change permissions by binding a window.
+ */
+ ret = ib_umem_get(file->device->ib_dev, &obj->umem,
+ (void *) (unsigned long) cmd.start, cmd.length,
+ !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
+ if (ret)
+ goto err_free;
+
+ obj->umem.virt_base = cmd.hca_va;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ if (!pd || pd->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ if (!pd->device->reg_user_mr) {
+ ret = -ENOSYS;
+ goto err_up;
+ }
+
+ mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto err_up;
+ }
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = &obj->uobject;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+
+ memset(&resp, 0, sizeof resp);
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_unreg;
+ }
+
+ ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_unreg;
+
+ resp.mr_handle = obj->uobject.id;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&obj->uobject.list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+err_unreg:
+ ib_dereg_mr(mr);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
+
+ ib_umem_release(file->device->ib_dev, &obj->umem);
+
+err_free:
+ kfree(obj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dereg_mr cmd;
+ struct ib_mr *mr;
+ struct ib_umem_object *memobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle);
+ if (!mr || mr->uobject->context != file->ucontext)
+ goto out;
+
+ memobj = container_of(mr->uobject, struct ib_umem_object, uobject);
+
+ ret = ib_dereg_mr(mr);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&memobj->uobject.list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ ib_umem_release(file->device->ib_dev, &memobj->umem);
+ kfree(memobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_cq cmd;
+ struct ib_uverbs_create_cq_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_cq *cq;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ if (cmd.event_handler >= file->device->num_comp)
+ return -EINVAL;
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ uobj->user_handle = cmd.user_handle;
+ uobj->context = file->ucontext;
+
+ cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
+ file->ucontext, &udata);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto err;
+ }
+
+ cq->device = file->device->ib_dev;
+ cq->uobject = uobj;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = file;
+ atomic_set(&cq->usecnt, 0);
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_cq;
+ }
+
+ down(&ib_uverbs_idr_mutex);
+ ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_cq;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->cq_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ memset(&resp, 0, sizeof resp);
+ resp.cq_handle = uobj->id;
+ resp.cqe = cq->cqe;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ down(&ib_uverbs_idr_mutex);
+ idr_remove(&ib_uverbs_cq_idr, uobj->id);
+ up(&ib_uverbs_idr_mutex);
+
+err_cq:
+ ib_destroy_cq(cq);
+
+err:
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_cq cmd;
+ struct ib_cq *cq;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+ if (!cq || cq->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = cq->uobject;
+
+ ret = ib_destroy_cq(cq);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_qp cmd;
+ struct ib_uverbs_create_qp_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_cq *scq, *rcq;
+ struct ib_qp *qp;
+ struct ib_qp_init_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle);
+ rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle);
+
+ if (!pd || pd->uobject->context != file->ucontext ||
+ !scq || scq->uobject->context != file->ucontext ||
+ !rcq || rcq->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.qp_context = file;
+ attr.send_cq = scq;
+ attr.recv_cq = rcq;
+ attr.srq = NULL;
+ attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ attr.qp_type = cmd.qp_type;
+
+ attr.cap.max_send_wr = cmd.max_send_wr;
+ attr.cap.max_recv_wr = cmd.max_recv_wr;
+ attr.cap.max_send_sge = cmd.max_send_sge;
+ attr.cap.max_recv_sge = cmd.max_recv_sge;
+ attr.cap.max_inline_data = cmd.max_inline_data;
+
+ uobj->user_handle = cmd.user_handle;
+ uobj->context = file->ucontext;
+
+ qp = pd->device->create_qp(pd, &attr, &udata);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_up;
+ }
+
+ qp->device = pd->device;
+ qp->pd = pd;
+ qp->send_cq = attr.send_cq;
+ qp->recv_cq = attr.recv_cq;
+ qp->srq = attr.srq;
+ qp->uobject = uobj;
+ qp->event_handler = attr.event_handler;
+ qp->qp_context = attr.qp_context;
+ qp->qp_type = attr.qp_type;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&attr.send_cq->usecnt);
+ atomic_inc(&attr.recv_cq->usecnt);
+ if (attr.srq)
+ atomic_inc(&attr.srq->usecnt);
+
+ memset(&resp, 0, sizeof resp);
+ resp.qpn = qp->qp_num;
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_destroy;
+ }
+
+ ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_destroy;
+
+ resp.qp_handle = uobj->id;
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_add_tail(&uobj->list, &file->ucontext->qp_list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_list;
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return in_len;
+
+err_list:
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+err_destroy:
+ ib_destroy_qp(qp);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
+
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_modify_qp cmd;
+ struct ib_qp *qp;
+ struct ib_qp_attr *attr;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ attr->qp_state = cmd.qp_state;
+ attr->cur_qp_state = cmd.cur_qp_state;
+ attr->path_mtu = cmd.path_mtu;
+ attr->path_mig_state = cmd.path_mig_state;
+ attr->qkey = cmd.qkey;
+ attr->rq_psn = cmd.rq_psn;
+ attr->sq_psn = cmd.sq_psn;
+ attr->dest_qp_num = cmd.dest_qp_num;
+ attr->qp_access_flags = cmd.qp_access_flags;
+ attr->pkey_index = cmd.pkey_index;
+ attr->alt_pkey_index = cmd.pkey_index;
+ attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd.min_rnr_timer;
+ attr->port_num = cmd.port_num;
+ attr->timeout = cmd.timeout;
+ attr->retry_cnt = cmd.retry_cnt;
+ attr->rnr_retry = cmd.rnr_retry;
+ attr->alt_port_num = cmd.alt_port_num;
+ attr->alt_timeout = cmd.alt_timeout;
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
+ attr->ah_attr.dlid = cmd.dest.dlid;
+ attr->ah_attr.sl = cmd.dest.sl;
+ attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd.dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+
+ ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+ if (ret)
+ goto out;
+
+ ret = in_len;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+ kfree(attr);
+
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_destroy_qp cmd;
+ struct ib_qp *qp;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = qp->uobject;
+
+ ret = ib_destroy_qp(qp);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
+
+ spin_lock_irq(&file->ucontext->lock);
+ list_del(&uobj->list);
+ spin_unlock_irq(&file->ucontext->lock);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_attach_mcast cmd;
+ struct ib_qp *qp;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (qp && qp->uobject->context == file->ucontext)
+ ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_detach_mcast cmd;
+ struct ib_qp *qp;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (qp && qp->uobject->context == file->ucontext)
+ ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
new file mode 100644
index 00000000000..fbbe03d8c90
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -0,0 +1,698 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+
+#include <asm/uaccess.h>
+
+#include "uverbs.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand userspace verbs access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
+
+enum {
+ IB_UVERBS_MAJOR = 231,
+ IB_UVERBS_BASE_MINOR = 192,
+ IB_UVERBS_MAX_DEVICES = 32
+};
+
+#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+
+DECLARE_MUTEX(ib_uverbs_idr_mutex);
+DEFINE_IDR(ib_uverbs_pd_idr);
+DEFINE_IDR(ib_uverbs_mr_idr);
+DEFINE_IDR(ib_uverbs_mw_idr);
+DEFINE_IDR(ib_uverbs_ah_idr);
+DEFINE_IDR(ib_uverbs_cq_idr);
+DEFINE_IDR(ib_uverbs_qp_idr);
+
+static spinlock_t map_lock;
+static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
+
+static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len) = {
+ [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params,
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid,
+ [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+};
+
+static struct vfsmount *uverbs_event_mnt;
+
+static void ib_uverbs_add_one(struct ib_device *device);
+static void ib_uverbs_remove_one(struct ib_device *device);
+
+static int ib_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct ib_uobject *uobj, *tmp;
+
+ if (!context)
+ return 0;
+
+ down(&ib_uverbs_idr_mutex);
+
+ /* XXX Free AHs */
+
+ list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
+ struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
+ idr_remove(&ib_uverbs_qp_idr, uobj->id);
+ ib_destroy_qp(qp);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
+ struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
+ idr_remove(&ib_uverbs_cq_idr, uobj->id);
+ ib_destroy_cq(cq);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ /* XXX Free SRQs */
+ /* XXX Free MWs */
+
+ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
+ struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id);
+ struct ib_umem_object *memobj;
+
+ idr_remove(&ib_uverbs_mr_idr, uobj->id);
+ ib_dereg_mr(mr);
+
+ memobj = container_of(uobj, struct ib_umem_object, uobject);
+ ib_umem_release_on_close(mr->device, &memobj->umem);
+
+ list_del(&uobj->list);
+ kfree(memobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
+ struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id);
+ idr_remove(&ib_uverbs_pd_idr, uobj->id);
+ ib_dealloc_pd(pd);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
+
+ up(&ib_uverbs_idr_mutex);
+
+ return context->device->dealloc_ucontext(context);
+}
+
+static void ib_uverbs_release_file(struct kref *ref)
+{
+ struct ib_uverbs_file *file =
+ container_of(ref, struct ib_uverbs_file, ref);
+
+ module_put(file->device->ib_dev->owner);
+ kfree(file);
+}
+
+static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+ void *event;
+ int eventsz;
+ int ret = 0;
+
+ spin_lock_irq(&file->lock);
+
+ while (list_empty(&file->event_list) && file->fd >= 0) {
+ spin_unlock_irq(&file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->event_list) ||
+ file->fd < 0))
+ return -ERESTARTSYS;
+
+ spin_lock_irq(&file->lock);
+ }
+
+ if (file->fd < 0) {
+ spin_unlock_irq(&file->lock);
+ return -ENODEV;
+ }
+
+ if (file->is_async) {
+ event = list_entry(file->event_list.next,
+ struct ib_uverbs_async_event, list);
+ eventsz = sizeof (struct ib_uverbs_async_event_desc);
+ } else {
+ event = list_entry(file->event_list.next,
+ struct ib_uverbs_comp_event, list);
+ eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ }
+
+ if (eventsz > count) {
+ ret = -EINVAL;
+ event = NULL;
+ } else
+ list_del(file->event_list.next);
+
+ spin_unlock_irq(&file->lock);
+
+ if (event) {
+ if (copy_to_user(buf, event, eventsz))
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+ }
+
+ kfree(event);
+
+ return ret;
+}
+
+static unsigned int ib_uverbs_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ unsigned int pollflags = 0;
+ struct ib_uverbs_event_file *file = filp->private_data;
+
+ poll_wait(filp, &file->poll_wait, wait);
+
+ spin_lock_irq(&file->lock);
+ if (file->fd < 0)
+ pollflags = POLLERR;
+ else if (!list_empty(&file->event_list))
+ pollflags = POLLIN | POLLRDNORM;
+ spin_unlock_irq(&file->lock);
+
+ return pollflags;
+}
+
+static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
+{
+ struct list_head *entry, *tmp;
+
+ spin_lock_irq(&file->lock);
+ if (file->fd != -1) {
+ file->fd = -1;
+ list_for_each_safe(entry, tmp, &file->event_list)
+ if (file->is_async)
+ kfree(list_entry(entry, struct ib_uverbs_async_event, list));
+ else
+ kfree(list_entry(entry, struct ib_uverbs_comp_event, list));
+ }
+ spin_unlock_irq(&file->lock);
+}
+
+static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_event_file *file = filp->private_data;
+
+ ib_uverbs_event_release(file);
+ kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+
+ return 0;
+}
+
+static struct file_operations uverbs_event_fops = {
+ /*
+ * No .owner field since we artificially create event files,
+ * so there is no increment to the module reference count in
+ * the open path. All event files come from a uverbs command
+ * file, which already takes a module reference, so this is OK.
+ */
+ .read = ib_uverbs_event_read,
+ .poll = ib_uverbs_event_poll,
+ .release = ib_uverbs_event_close
+};
+
+void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+ struct ib_uverbs_file *file = cq_context;
+ struct ib_uverbs_comp_event *entry;
+ unsigned long flags;
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry)
+ return;
+
+ entry->desc.cq_handle = cq->uobject->user_handle;
+
+ spin_lock_irqsave(&file->comp_file[0].lock, flags);
+ list_add_tail(&entry->list, &file->comp_file[0].event_list);
+ spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
+
+ wake_up_interruptible(&file->comp_file[0].poll_wait);
+}
+
+static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
+ __u64 element, __u64 event)
+{
+ struct ib_uverbs_async_event *entry;
+ unsigned long flags;
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry)
+ return;
+
+ entry->desc.element = element;
+ entry->desc.event_type = event;
+
+ spin_lock_irqsave(&file->async_file.lock, flags);
+ list_add_tail(&entry->list, &file->async_file.event_list);
+ spin_unlock_irqrestore(&file->async_file.lock, flags);
+
+ wake_up_interruptible(&file->async_file.poll_wait);
+}
+
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ ib_uverbs_async_handler(context_ptr,
+ event->element.cq->uobject->user_handle,
+ event->event);
+}
+
+void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
+{
+ ib_uverbs_async_handler(context_ptr,
+ event->element.qp->uobject->user_handle,
+ event->event);
+}
+
+static void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct ib_uverbs_file *file =
+ container_of(handler, struct ib_uverbs_file, event_handler);
+
+ ib_uverbs_async_handler(file, event->element.port_num, event->event);
+}
+
+static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
+ struct ib_uverbs_file *uverbs_file)
+{
+ struct file *filp;
+
+ spin_lock_init(&file->lock);
+ INIT_LIST_HEAD(&file->event_list);
+ init_waitqueue_head(&file->poll_wait);
+ file->uverbs_file = uverbs_file;
+
+ file->fd = get_unused_fd();
+ if (file->fd < 0)
+ return file->fd;
+
+ filp = get_empty_filp();
+ if (!filp) {
+ put_unused_fd(file->fd);
+ return -ENFILE;
+ }
+
+ filp->f_op = &uverbs_event_fops;
+ filp->f_vfsmnt = mntget(uverbs_event_mnt);
+ filp->f_dentry = dget(uverbs_event_mnt->mnt_root);
+ filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
+ filp->f_flags = O_RDONLY;
+ filp->f_mode = FMODE_READ;
+ filp->private_data = file;
+
+ fd_install(file->fd, filp);
+
+ return 0;
+}
+
+static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_cmd_hdr hdr;
+
+ if (count < sizeof hdr)
+ return -EINVAL;
+
+ if (copy_from_user(&hdr, buf, sizeof hdr))
+ return -EFAULT;
+
+ if (hdr.in_words * 4 != count)
+ return -EINVAL;
+
+ if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table))
+ return -EINVAL;
+
+ if (!file->ucontext &&
+ hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
+ hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
+ return -EINVAL;
+
+ return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
+ hdr.in_words * 4, hdr.out_words * 4);
+}
+
+static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+
+ if (!file->ucontext)
+ return -ENODEV;
+ else
+ return file->device->ib_dev->mmap(file->ucontext, vma);
+}
+
+static int ib_uverbs_open(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_device *dev =
+ container_of(inode->i_cdev, struct ib_uverbs_device, dev);
+ struct ib_uverbs_file *file;
+ int i = 0;
+ int ret;
+
+ if (!try_module_get(dev->ib_dev->owner))
+ return -ENODEV;
+
+ file = kmalloc(sizeof *file +
+ (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
+ GFP_KERNEL);
+ if (!file)
+ return -ENOMEM;
+
+ file->device = dev;
+ kref_init(&file->ref);
+
+ file->ucontext = NULL;
+
+ ret = ib_uverbs_event_init(&file->async_file, file);
+ if (ret)
+ goto err;
+
+ file->async_file.is_async = 1;
+
+ kref_get(&file->ref);
+
+ for (i = 0; i < dev->num_comp; ++i) {
+ ret = ib_uverbs_event_init(&file->comp_file[i], file);
+ if (ret)
+ goto err_async;
+ kref_get(&file->ref);
+ file->comp_file[i].is_async = 0;
+ }
+
+
+ filp->private_data = file;
+
+ INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
+ ib_uverbs_event_handler);
+ if (ib_register_event_handler(&file->event_handler))
+ goto err_async;
+
+ return 0;
+
+err_async:
+ while (i--)
+ ib_uverbs_event_release(&file->comp_file[i]);
+
+ ib_uverbs_event_release(&file->async_file);
+
+err:
+ kref_put(&file->ref, ib_uverbs_release_file);
+
+ return ret;
+}
+
+static int ib_uverbs_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ int i;
+
+ ib_unregister_event_handler(&file->event_handler);
+ ib_uverbs_event_release(&file->async_file);
+ ib_dealloc_ucontext(file->ucontext);
+
+ for (i = 0; i < file->device->num_comp; ++i)
+ ib_uverbs_event_release(&file->comp_file[i]);
+
+ kref_put(&file->ref, ib_uverbs_release_file);
+
+ return 0;
+}
+
+static struct file_operations uverbs_fops = {
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .open = ib_uverbs_open,
+ .release = ib_uverbs_close
+};
+
+static struct file_operations uverbs_mmap_fops = {
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .mmap = ib_uverbs_mmap,
+ .open = ib_uverbs_open,
+ .release = ib_uverbs_close
+};
+
+static struct ib_client uverbs_client = {
+ .name = "uverbs",
+ .add = ib_uverbs_add_one,
+ .remove = ib_uverbs_remove_one
+};
+
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
+{
+ struct ib_uverbs_device *dev =
+ container_of(class_dev, struct ib_uverbs_device, class_dev);
+
+ return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static void ib_uverbs_release_class_dev(struct class_device *class_dev)
+{
+ struct ib_uverbs_device *dev =
+ container_of(class_dev, struct ib_uverbs_device, class_dev);
+
+ cdev_del(&dev->dev);
+ clear_bit(dev->devnum, dev_map);
+ kfree(dev);
+}
+
+static struct class uverbs_class = {
+ .name = "infiniband_verbs",
+ .release = ib_uverbs_release_class_dev
+};
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static void ib_uverbs_add_one(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev;
+
+ if (!device->alloc_ucontext)
+ return;
+
+ uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ if (!uverbs_dev)
+ return;
+
+ memset(uverbs_dev, 0, sizeof *uverbs_dev);
+
+ spin_lock(&map_lock);
+ uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
+ if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
+ spin_unlock(&map_lock);
+ goto err;
+ }
+ set_bit(uverbs_dev->devnum, dev_map);
+ spin_unlock(&map_lock);
+
+ uverbs_dev->ib_dev = device;
+ uverbs_dev->num_comp = 1;
+
+ if (device->mmap)
+ cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
+ else
+ cdev_init(&uverbs_dev->dev, &uverbs_fops);
+ uverbs_dev->dev.owner = THIS_MODULE;
+ kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ goto err;
+
+ uverbs_dev->class_dev.class = &uverbs_class;
+ uverbs_dev->class_dev.dev = device->dma_device;
+ uverbs_dev->class_dev.devt = uverbs_dev->dev.dev;
+ snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum);
+ if (class_device_register(&uverbs_dev->class_dev))
+ goto err_cdev;
+
+ if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
+ goto err_class;
+
+ ib_set_client_data(device, &uverbs_client, uverbs_dev);
+
+ return;
+
+err_class:
+ class_device_unregister(&uverbs_dev->class_dev);
+
+err_cdev:
+ cdev_del(&uverbs_dev->dev);
+ clear_bit(uverbs_dev->devnum, dev_map);
+
+err:
+ kfree(uverbs_dev);
+ return;
+}
+
+static void ib_uverbs_remove_one(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
+
+ if (!uverbs_dev)
+ return;
+
+ class_device_unregister(&uverbs_dev->class_dev);
+}
+
+static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
+ INFINIBANDEVENTFS_MAGIC);
+}
+
+static struct file_system_type uverbs_event_fs = {
+ /* No owner field so module can be unloaded */
+ .name = "infinibandeventfs",
+ .get_sb = uverbs_event_get_sb,
+ .kill_sb = kill_litter_super
+};
+
+static int __init ib_uverbs_init(void)
+{
+ int ret;
+
+ spin_lock_init(&map_lock);
+
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ "infiniband_verbs");
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register device number\n");
+ goto out;
+ }
+
+ ret = class_register(&uverbs_class);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
+ goto out_chrdev;
+ }
+
+ ret = class_create_file(&uverbs_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
+ goto out_class;
+ }
+
+ ret = register_filesystem(&uverbs_event_fs);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
+ goto out_class;
+ }
+
+ uverbs_event_mnt = kern_mount(&uverbs_event_fs);
+ if (IS_ERR(uverbs_event_mnt)) {
+ ret = PTR_ERR(uverbs_event_mnt);
+ printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
+ goto out_fs;
+ }
+
+ ret = ib_register_client(&uverbs_client);
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register client\n");
+ goto out_mnt;
+ }
+
+ return 0;
+
+out_mnt:
+ mntput(uverbs_event_mnt);
+
+out_fs:
+ unregister_filesystem(&uverbs_event_fs);
+
+out_class:
+ class_unregister(&uverbs_class);
+
+out_chrdev:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+
+out:
+ return ret;
+}
+
+static void __exit ib_uverbs_cleanup(void)
+{
+ ib_unregister_client(&uverbs_client);
+ mntput(uverbs_event_mnt);
+ unregister_filesystem(&uverbs_event_fs);
+ class_unregister(&uverbs_class);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+}
+
+module_init(ib_uverbs_init);
+module_exit(ib_uverbs_cleanup);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c
new file mode 100644
index 00000000000..ed550f6595b
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_mem.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include "uverbs.h"
+
+struct ib_umem_account_work {
+ struct work_struct work;
+ struct mm_struct *mm;
+ unsigned long diff;
+};
+
+
+static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
+{
+ struct ib_umem_chunk *chunk, *tmp;
+ int i;
+
+ list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
+ dma_unmap_sg(dev->dma_device, chunk->page_list,
+ chunk->nents, DMA_BIDIRECTIONAL);
+ for (i = 0; i < chunk->nents; ++i) {
+ if (umem->writable && dirty)
+ set_page_dirty_lock(chunk->page_list[i].page);
+ put_page(chunk->page_list[i].page);
+ }
+
+ kfree(chunk);
+ }
+}
+
+int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
+ void *addr, size_t size, int write)
+{
+ struct page **page_list;
+ struct ib_umem_chunk *chunk;
+ unsigned long locked;
+ unsigned long lock_limit;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret = 0;
+ int off;
+ int i;
+
+ if (!can_do_mlock())
+ return -EPERM;
+
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ mem->user_base = (unsigned long) addr;
+ mem->length = size;
+ mem->offset = (unsigned long) addr & ~PAGE_MASK;
+ mem->page_size = PAGE_SIZE;
+ mem->writable = write;
+
+ INIT_LIST_HEAD(&mem->chunk_list);
+
+ npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
+
+ down_write(&current->mm->mmap_sem);
+
+ locked = npages + current->mm->locked_vm;
+ lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ cur_base = (unsigned long) addr & PAGE_MASK;
+
+ while (npages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(int, npages,
+ PAGE_SIZE / sizeof (struct page *)),
+ 1, !write, page_list, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ cur_base += ret * PAGE_SIZE;
+ npages -= ret;
+
+ off = 0;
+
+ while (ret) {
+ chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
+ min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
+ for (i = 0; i < chunk->nents; ++i) {
+ chunk->page_list[i].page = page_list[i + off];
+ chunk->page_list[i].offset = 0;
+ chunk->page_list[i].length = PAGE_SIZE;
+ }
+
+ chunk->nmap = dma_map_sg(dev->dma_device,
+ &chunk->page_list[0],
+ chunk->nents,
+ DMA_BIDIRECTIONAL);
+ if (chunk->nmap <= 0) {
+ for (i = 0; i < chunk->nents; ++i)
+ put_page(chunk->page_list[i].page);
+ kfree(chunk);
+
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret -= chunk->nents;
+ off += chunk->nents;
+ list_add_tail(&chunk->list, &mem->chunk_list);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (ret < 0)
+ __ib_umem_release(dev, mem, 0);
+ else
+ current->mm->locked_vm = locked;
+
+ up_write(&current->mm->mmap_sem);
+ free_page((unsigned long) page_list);
+
+ return ret;
+}
+
+void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
+{
+ __ib_umem_release(dev, umem, 1);
+
+ down_write(&current->mm->mmap_sem);
+ current->mm->locked_vm -=
+ PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+ up_write(&current->mm->mmap_sem);
+}
+
+static void ib_umem_account(void *work_ptr)
+{
+ struct ib_umem_account_work *work = work_ptr;
+
+ down_write(&work->mm->mmap_sem);
+ work->mm->locked_vm -= work->diff;
+ up_write(&work->mm->mmap_sem);
+ mmput(work->mm);
+ kfree(work);
+}
+
+void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
+{
+ struct ib_umem_account_work *work;
+ struct mm_struct *mm;
+
+ __ib_umem_release(dev, umem, 1);
+
+ mm = get_task_mm(current);
+ if (!mm)
+ return;
+
+ /*
+ * We may be called with the mm's mmap_sem already held. This
+ * can happen when a userspace munmap() is the call that drops
+ * the last reference to our file and calls our release
+ * method. If there are memory regions to destroy, we'll end
+ * up here and not be able to take the mmap_sem. Therefore we
+ * defer the vm_locked accounting to the system workqueue.
+ */
+
+ work = kmalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return;
+
+ INIT_WORK(&work->work, ib_umem_account, work);
+ work->mm = mm;
+ work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+
+ schedule_work(&work->work);
+}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 7c08ed0cd7d..2516f964651 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -4,6 +4,7 @@
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -47,10 +48,11 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
struct ib_pd *pd;
- pd = device->alloc_pd(device);
+ pd = device->alloc_pd(device, NULL, NULL);
if (!IS_ERR(pd)) {
- pd->device = device;
+ pd->device = device;
+ pd->uobject = NULL;
atomic_set(&pd->usecnt, 0);
}
@@ -76,8 +78,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
ah = pd->device->create_ah(pd, ah_attr);
if (!IS_ERR(ah)) {
- ah->device = pd->device;
- ah->pd = pd;
+ ah->device = pd->device;
+ ah->pd = pd;
+ ah->uobject = NULL;
atomic_inc(&pd->usecnt);
}
@@ -122,7 +125,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
{
struct ib_qp *qp;
- qp = pd->device->create_qp(pd, qp_init_attr);
+ qp = pd->device->create_qp(pd, qp_init_attr, NULL);
if (!IS_ERR(qp)) {
qp->device = pd->device;
@@ -130,6 +133,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->send_cq = qp_init_attr->send_cq;
qp->recv_cq = qp_init_attr->recv_cq;
qp->srq = qp_init_attr->srq;
+ qp->uobject = NULL;
qp->event_handler = qp_init_attr->event_handler;
qp->qp_context = qp_init_attr->qp_context;
qp->qp_type = qp_init_attr->qp_type;
@@ -197,10 +201,11 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
{
struct ib_cq *cq;
- cq = device->create_cq(device, cqe);
+ cq = device->create_cq(device, cqe, NULL, NULL);
if (!IS_ERR(cq)) {
cq->device = device;
+ cq->uobject = NULL;
cq->comp_handler = comp_handler;
cq->event_handler = event_handler;
cq->cq_context = cq_context;
@@ -245,8 +250,9 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
mr = pd->device->get_dma_mr(pd, mr_access_flags);
if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
@@ -267,8 +273,9 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
mr_access_flags, iova_start);
if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
@@ -344,8 +351,9 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
mw = pd->device->alloc_mw(pd);
if (!IS_ERR(mw)) {
- mw->device = pd->device;
- mw->pd = pd;
+ mw->device = pd->device;
+ mw->pd = pd;
+ mw->uobject = NULL;
atomic_inc(&pd->usecnt);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 766e9031ec4..b5aea7b869f 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -742,6 +743,7 @@ err_out:
}
int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq)
{
int size = nent * MTHCA_CQ_ENTRY_SIZE;
@@ -753,30 +755,33 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
might_sleep();
- cq->ibcq.cqe = nent - 1;
+ cq->ibcq.cqe = nent - 1;
+ cq->is_kernel = !ctx;
cq->cqn = mthca_alloc(&dev->cq_table.alloc);
if (cq->cqn == -1)
return -ENOMEM;
if (mthca_is_memfree(dev)) {
- cq->arm_sn = 1;
-
err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
if (err)
goto err_out;
- err = -ENOMEM;
+ if (cq->is_kernel) {
+ cq->arm_sn = 1;
+
+ err = -ENOMEM;
- cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
- cq->cqn, &cq->set_ci_db);
- if (cq->set_ci_db_index < 0)
- goto err_out_icm;
+ cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
+ cq->cqn, &cq->set_ci_db);
+ if (cq->set_ci_db_index < 0)
+ goto err_out_icm;
- cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
- cq->cqn, &cq->arm_db);
- if (cq->arm_db_index < 0)
- goto err_out_ci;
+ cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
+ cq->cqn, &cq->arm_db);
+ if (cq->arm_db_index < 0)
+ goto err_out_ci;
+ }
}
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
@@ -785,12 +790,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq_context = mailbox->buf;
- err = mthca_alloc_cq_buf(dev, size, cq);
- if (err)
- goto err_out_mailbox;
+ if (cq->is_kernel) {
+ err = mthca_alloc_cq_buf(dev, size, cq);
+ if (err)
+ goto err_out_mailbox;
- for (i = 0; i < nent; ++i)
- set_cqe_hw(get_cqe(cq, i));
+ for (i = 0; i < nent; ++i)
+ set_cqe_hw(get_cqe(cq, i));
+ }
spin_lock_init(&cq->lock);
atomic_set(&cq->refcount, 1);
@@ -801,11 +808,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
MTHCA_CQ_STATE_DISARMED |
MTHCA_CQ_FLAG_TR);
cq_context->start = cpu_to_be64(0);
- cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
- dev->driver_uar.index);
+ cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+ if (ctx)
+ cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
+ else
+ cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
- cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num);
+ cq_context->pd = cpu_to_be32(pdn);
cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
cq_context->cqn = cpu_to_be32(cq->cqn);
@@ -843,18 +853,20 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
return 0;
err_out_free_mr:
- mthca_free_mr(dev, &cq->mr);
- mthca_free_cq_buf(dev, cq);
+ if (cq->is_kernel) {
+ mthca_free_mr(dev, &cq->mr);
+ mthca_free_cq_buf(dev, cq);
+ }
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
err_out_arm:
- if (mthca_is_memfree(dev))
+ if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
err_out_ci:
- if (mthca_is_memfree(dev))
+ if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
err_out_icm:
@@ -892,7 +904,8 @@ void mthca_free_cq(struct mthca_dev *dev,
int j;
printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
- cq->cqn, cq->cons_index, !!next_cqe_sw(cq));
+ cq->cqn, cq->cons_index,
+ cq->is_kernel ? !!next_cqe_sw(cq) : 0);
for (j = 0; j < 16; ++j)
printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
}
@@ -910,12 +923,13 @@ void mthca_free_cq(struct mthca_dev *dev,
atomic_dec(&cq->refcount);
wait_event(cq->wait, !atomic_read(&cq->refcount));
- mthca_free_mr(dev, &cq->mr);
- mthca_free_cq_buf(dev, cq);
-
- if (mthca_is_memfree(dev)) {
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+ if (cq->is_kernel) {
+ mthca_free_mr(dev, &cq->mr);
+ mthca_free_cq_buf(dev, cq);
+ if (mthca_is_memfree(dev)) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+ }
}
mthca_table_put(dev, dev->cq_table.table, cq->cqn);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 4127f09dc5e..5ecdd2eeeb0 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -378,7 +379,7 @@ void mthca_unregister_device(struct mthca_dev *dev);
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
-int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
+int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
@@ -413,6 +414,7 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq);
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq);
@@ -438,12 +440,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
struct mthca_cq *recv_cq,
enum ib_qp_type type,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
struct mthca_qp *qp);
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
int qpn,
int port,
struct mthca_sqp *sqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 09519b604c0..2ef916859e1 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -665,7 +665,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev)
goto err_pd_table_free;
}
- err = mthca_pd_alloc(dev, &dev->driver_pd);
+ err = mthca_pd_alloc(dev, 1, &dev->driver_pd);
if (err) {
mthca_err(dev, "Failed to create driver PD, "
"aborting.\n");
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 6d3b05dd9e3..2a864615035 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -47,6 +48,15 @@ enum {
MTHCA_TABLE_CHUNK_SIZE = 1 << 18
};
+struct mthca_user_db_table {
+ struct semaphore mutex;
+ struct {
+ u64 uvirt;
+ struct scatterlist mem;
+ int refcount;
+ } page[0];
+};
+
void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
{
struct mthca_icm_chunk *chunk, *tmp;
@@ -344,13 +354,133 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
kfree(table);
}
-static u64 mthca_uarc_virt(struct mthca_dev *dev, int page)
+static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
{
return dev->uar_table.uarc_base +
- dev->driver_uar.index * dev->uar_table.uarc_size +
+ uar->index * dev->uar_table.uarc_size +
page * 4096;
}
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr)
+{
+ int ret = 0;
+ u8 status;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return 0;
+
+ if (index < 0 || index > dev->uar_table.uarc_size / 8)
+ return -EINVAL;
+
+ down(&db_tab->mutex);
+
+ i = index / MTHCA_DB_REC_PER_PAGE;
+
+ if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||
+ (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
+ (uaddr & 4095)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (db_tab->page[i].refcount) {
+ ++db_tab->page[i].refcount;
+ goto out;
+ }
+
+ ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
+ &db_tab->page[i].mem.page, NULL);
+ if (ret < 0)
+ goto out;
+
+ db_tab->page[i].mem.length = 4096;
+ db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
+
+ ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ if (ret < 0) {
+ put_page(db_tab->page[i].mem.page);
+ goto out;
+ }
+
+ ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
+ mthca_uarc_virt(dev, uar, i), &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(db_tab->page[i].mem.page);
+ goto out;
+ }
+
+ db_tab->page[i].uvirt = uaddr;
+ db_tab->page[i].refcount = 1;
+
+out:
+ up(&db_tab->mutex);
+ return ret;
+}
+
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index)
+{
+ if (!mthca_is_memfree(dev))
+ return;
+
+ /*
+ * To make our bookkeeping simpler, we don't unmap DB
+ * pages until we clean up the whole db table.
+ */
+
+ down(&db_tab->mutex);
+
+ --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
+
+ up(&db_tab->mutex);
+}
+
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
+{
+ struct mthca_user_db_table *db_tab;
+ int npages;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return NULL;
+
+ npages = dev->uar_table.uarc_size / 4096;
+ db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
+ if (!db_tab)
+ return ERR_PTR(-ENOMEM);
+
+ init_MUTEX(&db_tab->mutex);
+ for (i = 0; i < npages; ++i) {
+ db_tab->page[i].refcount = 0;
+ db_tab->page[i].uvirt = 0;
+ }
+
+ return db_tab;
+}
+
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab)
+{
+ int i;
+ u8 status;
+
+ if (!mthca_is_memfree(dev))
+ return;
+
+ for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
+ if (db_tab->page[i].uvirt) {
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(db_tab->page[i].mem.page);
+ }
+ }
+}
+
int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
{
int group;
@@ -407,7 +537,8 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
}
memset(page->db_rec, 0, 4096);
- ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status);
+ ret = mthca_MAP_ICM_page(dev, page->mapping,
+ mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
if (!ret && status)
ret = -EINVAL;
if (ret) {
@@ -461,7 +592,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
i >= dev->db_tab->max_group1 - 1) {
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
dma_free_coherent(&dev->pdev->dev, 4096,
page->db_rec, page->mapping);
@@ -530,7 +661,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
dma_free_coherent(&dev->pdev->dev, 4096,
dev->db_tab->page[i].db_rec,
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index fe7be2a6bc4..4761d844cb5 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -148,7 +149,7 @@ struct mthca_db_table {
struct semaphore mutex;
};
-enum {
+enum mthca_db_type {
MTHCA_DB_TYPE_INVALID = 0x0,
MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
MTHCA_DB_TYPE_CQ_ARM = 0x2,
@@ -158,6 +159,17 @@ enum {
MTHCA_DB_TYPE_GROUP_SEP = 0x7
};
+struct mthca_user_db_table;
+struct mthca_uar;
+
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr);
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index);
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab);
+
int mthca_init_db_tab(struct mthca_dev *dev);
void mthca_cleanup_db_tab(struct mthca_dev *dev);
int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index ea66847e4ea..c2c899844e9 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -37,23 +38,27 @@
#include "mthca_dev.h"
-int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
+int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
{
- int err;
+ int err = 0;
might_sleep();
+ pd->privileged = privileged;
+
atomic_set(&pd->sqp_count, 0);
pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
if (pd->pd_num == -1)
return -ENOMEM;
- err = mthca_mr_alloc_notrans(dev, pd->pd_num,
- MTHCA_MPT_FLAG_LOCAL_READ |
- MTHCA_MPT_FLAG_LOCAL_WRITE,
- &pd->ntmr);
- if (err)
- mthca_free(&dev->pd_table.alloc, pd->pd_num);
+ if (privileged) {
+ err = mthca_mr_alloc_notrans(dev, pd->pd_num,
+ MTHCA_MPT_FLAG_LOCAL_READ |
+ MTHCA_MPT_FLAG_LOCAL_WRITE,
+ &pd->ntmr);
+ if (err)
+ mthca_free(&dev->pd_table.alloc, pd->pd_num);
+ }
return err;
}
@@ -61,7 +66,8 @@ int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd)
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
{
might_sleep();
- mthca_free_mr(dev, &pd->ntmr);
+ if (pd->privileged)
+ mthca_free_mr(dev, &pd->ntmr);
mthca_free(&dev->pd_table.alloc, pd->pd_num);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0b5adfd9159..7a58ce90e17 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,9 +35,12 @@
*/
#include <ib_smi.h>
+#include <linux/mm.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
+#include "mthca_user.h"
+#include "mthca_memfree.h"
static int mthca_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
@@ -284,7 +288,78 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
return err;
}
-static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
+static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct mthca_alloc_ucontext_resp uresp;
+ struct mthca_ucontext *context;
+ int err;
+
+ memset(&uresp, 0, sizeof uresp);
+
+ uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
+ if (mthca_is_memfree(to_mdev(ibdev)))
+ uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
+ else
+ uresp.uarc_size = 0;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
+ if (err) {
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
+ if (IS_ERR(context->db_tab)) {
+ err = PTR_ERR(context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+ mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &context->ibucontext;
+}
+
+static int mthca_dealloc_ucontext(struct ib_ucontext *context)
+{
+ mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab);
+ mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
+ kfree(to_mucontext(context));
+
+ return 0;
+}
+
+static int mthca_mmap_uar(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
{
struct mthca_pd *pd;
int err;
@@ -293,12 +368,20 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
if (!pd)
return ERR_PTR(-ENOMEM);
- err = mthca_pd_alloc(to_mdev(ibdev), pd);
+ err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
+ if (context) {
+ if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
+ mthca_pd_free(to_mdev(ibdev), pd);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
return &pd->ibpd;
}
@@ -338,8 +421,10 @@ static int mthca_ah_destroy(struct ib_ah *ah)
}
static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr)
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct mthca_create_qp ucmd;
struct mthca_qp *qp;
int err;
@@ -348,41 +433,82 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
case IB_QPT_UC:
case IB_QPT_UD:
{
+ struct mthca_ucontext *context;
+
qp = kmalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- qp->sq.max = init_attr->cap.max_send_wr;
- qp->rq.max = init_attr->cap.max_recv_wr;
- qp->sq.max_gs = init_attr->cap.max_send_sge;
- qp->rq.max_gs = init_attr->cap.max_recv_sge;
+ if (pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index, ucmd.sq_db_page);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index, ucmd.rq_db_page);
+ if (err) {
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->mr.ibmr.lkey = ucmd.lkey;
+ qp->sq.db_index = ucmd.sq_db_index;
+ qp->rq.db_index = ucmd.rq_db_index;
+ }
err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
- qp);
+ &init_attr->cap, qp);
+
+ if (err && pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index);
+ }
+
qp->ibqp.qp_num = qp->qpn;
break;
}
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ /* Don't allow userspace to create special QPs */
+ if (pd->uobject)
+ return ERR_PTR(-EINVAL);
+
qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- qp->sq.max = init_attr->cap.max_send_wr;
- qp->rq.max = init_attr->cap.max_recv_wr;
- qp->sq.max_gs = init_attr->cap.max_send_sge;
- qp->rq.max_gs = init_attr->cap.max_recv_sge;
-
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
- init_attr->sq_sig_type,
+ init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
to_msqp(qp));
break;
@@ -397,42 +523,115 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
return ERR_PTR(err);
}
- init_attr->cap.max_inline_data = 0;
+ init_attr->cap.max_inline_data = 0;
+ init_attr->cap.max_send_wr = qp->sq.max;
+ init_attr->cap.max_recv_wr = qp->rq.max;
+ init_attr->cap.max_send_sge = qp->sq.max_gs;
+ init_attr->cap.max_recv_sge = qp->rq.max_gs;
return &qp->ibqp;
}
static int mthca_destroy_qp(struct ib_qp *qp)
{
+ if (qp->uobject) {
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->sq.db_index);
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->rq.db_index);
+ }
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
kfree(qp);
return 0;
}
-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries)
+static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
{
+ struct mthca_create_cq ucmd;
struct mthca_cq *cq;
int nent;
int err;
+ if (context) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.set_db_index, ucmd.set_db_page);
+ if (err)
+ return ERR_PTR(err);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.arm_db_index, ucmd.arm_db_page);
+ if (err)
+ goto err_unmap_set;
+ }
+
cq = kmalloc(sizeof *cq, GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ if (!cq) {
+ err = -ENOMEM;
+ goto err_unmap_arm;
+ }
+
+ if (context) {
+ cq->mr.ibmr.lkey = ucmd.lkey;
+ cq->set_ci_db_index = ucmd.set_db_index;
+ cq->arm_db_index = ucmd.arm_db_index;
+ }
for (nent = 1; nent <= entries; nent <<= 1)
; /* nothing */
- err = mthca_init_cq(to_mdev(ibdev), nent, cq);
- if (err) {
- kfree(cq);
- cq = ERR_PTR(err);
+ err = mthca_init_cq(to_mdev(ibdev), nent,
+ context ? to_mucontext(context) : NULL,
+ context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
+ cq);
+ if (err)
+ goto err_free;
+
+ if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
+ mthca_free_cq(to_mdev(ibdev), cq);
+ goto err_free;
}
return &cq->ibcq;
+
+err_free:
+ kfree(cq);
+
+err_unmap_arm:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.arm_db_index);
+
+err_unmap_set:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.set_db_index);
+
+ return ERR_PTR(err);
}
static int mthca_destroy_cq(struct ib_cq *cq)
{
+ if (cq->uobject) {
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->arm_db_index);
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->set_ci_db_index);
+ }
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
kfree(cq);
@@ -568,6 +767,87 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
return &mr->ibmr;
}
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+ int acc, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(pd->device);
+ struct ib_umem_chunk *chunk;
+ struct mthca_mr *mr;
+ u64 *pages;
+ int shift, n, len;
+ int i, j, k;
+ int err = 0;
+
+ shift = ffs(region->page_size) - 1;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ n = 0;
+ list_for_each_entry(chunk, &region->chunk_list, list)
+ n += chunk->nents;
+
+ mr->mtt = mthca_alloc_mtt(dev, n);
+ if (IS_ERR(mr->mtt)) {
+ err = PTR_ERR(mr->mtt);
+ goto err;
+ }
+
+ pages = (u64 *) __get_free_page(GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err_mtt;
+ }
+
+ i = n = 0;
+
+ list_for_each_entry(chunk, &region->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(&chunk->page_list[j]) +
+ region->page_size * k;
+ /*
+ * Be friendly to WRITE_MTT command
+ * and leave two empty slots for the
+ * index and reserved fields of the
+ * mailbox.
+ */
+ if (i == PAGE_SIZE / sizeof (u64) - 2) {
+ err = mthca_write_mtt(dev, mr->mtt,
+ n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+mtt_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_mtt;
+
+ err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
+ region->length, convert_access(acc), mr);
+
+ if (err)
+ goto err_mtt;
+
+ return &mr->ibmr;
+
+err_mtt:
+ mthca_free_mtt(dev, mr->mtt);
+
+err:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
static int mthca_dereg_mr(struct ib_mr *mr)
{
struct mthca_mr *mmr = to_mmr(mr);
@@ -692,6 +972,8 @@ int mthca_register_device(struct mthca_dev *dev)
int i;
strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
+ dev->ib_dev.owner = THIS_MODULE;
+
dev->ib_dev.node_type = IB_NODE_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.dma_device = &dev->pdev->dev;
@@ -701,6 +983,9 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.modify_port = mthca_modify_port;
dev->ib_dev.query_pkey = mthca_query_pkey;
dev->ib_dev.query_gid = mthca_query_gid;
+ dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
+ dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
+ dev->ib_dev.mmap = mthca_mmap_uar;
dev->ib_dev.alloc_pd = mthca_alloc_pd;
dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
dev->ib_dev.create_ah = mthca_ah_create;
@@ -713,6 +998,7 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
+ dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 4d976cccb1a..1d032791cc8 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -54,6 +55,14 @@ struct mthca_uar {
int index;
};
+struct mthca_user_db_table;
+
+struct mthca_ucontext {
+ struct ib_ucontext ibucontext;
+ struct mthca_uar uar;
+ struct mthca_user_db_table *db_tab;
+};
+
struct mthca_mtt;
struct mthca_mr {
@@ -83,6 +92,7 @@ struct mthca_pd {
u32 pd_num;
atomic_t sqp_count;
struct mthca_mr ntmr;
+ int privileged;
};
struct mthca_eq {
@@ -167,6 +177,7 @@ struct mthca_cq {
int cqn;
u32 cons_index;
int is_direct;
+ int is_kernel;
/* Next fields are Arbel only */
int set_ci_db_index;
@@ -236,6 +247,11 @@ struct mthca_sqp {
dma_addr_t header_dma;
};
+static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct mthca_ucontext, ibucontext);
+}
+
static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
{
return container_of(ibmr, struct mthca_fmr, ibmr);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 163a8ef4186..f7126b14d5a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -46,7 +47,9 @@ enum {
MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
MTHCA_ACK_REQ_FREQ = 10,
MTHCA_FLIGHT_LIMIT = 9,
- MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */
+ MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */
+ MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */
+ MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */
};
enum {
@@ -689,7 +692,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
/* leave arbel_sched_queue as 0 */
- qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
+ if (qp->ibqp.uobject)
+ qp_context->usr_page =
+ cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+ else
+ qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
qp_context->local_qpn = cpu_to_be32(qp->qpn);
if (attr_mask & IB_QP_DEST_QPN) {
qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -954,6 +961,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
1 << qp->sq.wqe_shift);
+
+ /*
+ * If this is a userspace QP, we don't actually have to
+ * allocate anything. All we need is to calculate the WQE
+ * sizes and the send_wqe_offset, so we're done now.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
size = PAGE_ALIGN(qp->send_wqe_offset +
(qp->sq.max << qp->sq.wqe_shift));
@@ -1053,10 +1069,32 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
return err;
}
-static int mthca_alloc_memfree(struct mthca_dev *dev,
+static void mthca_free_wqe_buf(struct mthca_dev *dev,
struct mthca_qp *qp)
{
- int ret = 0;
+ int i;
+ int size = PAGE_ALIGN(qp->send_wqe_offset +
+ (qp->sq.max << qp->sq.wqe_shift));
+
+ if (qp->is_direct) {
+ dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
+ pci_unmap_addr(&qp->queue.direct, mapping));
+ } else {
+ for (i = 0; i < size / PAGE_SIZE; ++i) {
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ qp->queue.page_list[i].buf,
+ pci_unmap_addr(&qp->queue.page_list[i],
+ mapping));
+ }
+ }
+
+ kfree(qp->wrid);
+}
+
+static int mthca_map_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret;
if (mthca_is_memfree(dev)) {
ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
@@ -1067,35 +1105,15 @@ static int mthca_alloc_memfree(struct mthca_dev *dev,
if (ret)
goto err_qpc;
- ret = mthca_table_get(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
- if (ret)
- goto err_eqpc;
-
- qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
- qp->qpn, &qp->rq.db);
- if (qp->rq.db_index < 0) {
- ret = -ENOMEM;
- goto err_rdb;
- }
+ ret = mthca_table_get(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ if (ret)
+ goto err_eqpc;
- qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
- qp->qpn, &qp->sq.db);
- if (qp->sq.db_index < 0) {
- ret = -ENOMEM;
- goto err_rq_db;
- }
}
return 0;
-err_rq_db:
- mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
-
-err_rdb:
- mthca_table_put(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
-
err_eqpc:
mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
@@ -1105,6 +1123,35 @@ err_qpc:
return ret;
}
+static void mthca_unmap_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ mthca_table_put(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+ mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+}
+
+static int mthca_alloc_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret = 0;
+
+ if (mthca_is_memfree(dev)) {
+ qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
+ qp->qpn, &qp->rq.db);
+ if (qp->rq.db_index < 0)
+ return ret;
+
+ qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
+ qp->qpn, &qp->sq.db);
+ if (qp->sq.db_index < 0)
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ }
+
+ return ret;
+}
+
static void mthca_free_memfree(struct mthca_dev *dev,
struct mthca_qp *qp)
{
@@ -1112,11 +1159,6 @@ static void mthca_free_memfree(struct mthca_dev *dev,
mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
}
-
- mthca_table_put(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
- mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
- mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
}
static void mthca_wq_init(struct mthca_wq* wq)
@@ -1147,13 +1189,28 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
mthca_wq_init(&qp->sq);
mthca_wq_init(&qp->rq);
- ret = mthca_alloc_memfree(dev, qp);
+ ret = mthca_map_memfree(dev, qp);
if (ret)
return ret;
ret = mthca_alloc_wqe_buf(dev, pd, qp);
if (ret) {
- mthca_free_memfree(dev, qp);
+ mthca_unmap_memfree(dev, qp);
+ return ret;
+ }
+
+ /*
+ * If this is a userspace QP, we're done now. The doorbells
+ * will be allocated and buffers will be initialized in
+ * userspace.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
+ ret = mthca_alloc_memfree(dev, qp);
+ if (ret) {
+ mthca_free_wqe_buf(dev, qp);
+ mthca_unmap_memfree(dev, qp);
return ret;
}
@@ -1186,22 +1243,39 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
return 0;
}
-static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp)
+static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
+ struct mthca_qp *qp)
{
- int i;
-
- if (!mthca_is_memfree(dev))
- return;
+ /* Sanity check QP size before proceeding */
+ if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 ||
+ cap->max_send_sge > 64 || cap->max_recv_sge > 64)
+ return -EINVAL;
- for (i = 0; 1 << i < qp->rq.max; ++i)
- ; /* nothing */
+ if (mthca_is_memfree(dev)) {
+ qp->rq.max = cap->max_recv_wr ?
+ roundup_pow_of_two(cap->max_recv_wr) : 0;
+ qp->sq.max = cap->max_send_wr ?
+ roundup_pow_of_two(cap->max_send_wr) : 0;
+ } else {
+ qp->rq.max = cap->max_recv_wr;
+ qp->sq.max = cap->max_send_wr;
+ }
- qp->rq.max = 1 << i;
+ qp->rq.max_gs = cap->max_recv_sge;
+ qp->sq.max_gs = max_t(int, cap->max_send_sge,
+ ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
+ MTHCA_INLINE_CHUNK_SIZE) /
+ sizeof (struct mthca_data_seg));
- for (i = 0; 1 << i < qp->sq.max; ++i)
- ; /* nothing */
+ /*
+ * For MLX transport we need 2 extra S/G entries:
+ * one for the header and one for the checksum at the end
+ */
+ if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) ||
+ qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg)
+ return -EINVAL;
- qp->sq.max = 1 << i;
+ return 0;
}
int mthca_alloc_qp(struct mthca_dev *dev,
@@ -1210,11 +1284,14 @@ int mthca_alloc_qp(struct mthca_dev *dev,
struct mthca_cq *recv_cq,
enum ib_qp_type type,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
struct mthca_qp *qp)
{
int err;
- mthca_align_qp_size(dev, qp);
+ err = mthca_set_qp_size(dev, cap, qp);
+ if (err)
+ return err;
switch (type) {
case IB_QPT_RC: qp->transport = RC; break;
@@ -1247,14 +1324,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
int qpn,
int port,
struct mthca_sqp *sqp)
{
- int err = 0;
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
+ int err;
- mthca_align_qp_size(dev, &sqp->qp);
+ err = mthca_set_qp_size(dev, cap, &sqp->qp);
+ if (err)
+ return err;
sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1313,8 +1393,6 @@ void mthca_free_qp(struct mthca_dev *dev,
struct mthca_qp *qp)
{
u8 status;
- int size;
- int i;
struct mthca_cq *send_cq;
struct mthca_cq *recv_cq;
@@ -1344,31 +1422,22 @@ void mthca_free_qp(struct mthca_dev *dev,
if (qp->state != IB_QPS_RESET)
mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
- mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
- if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
- mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
-
- mthca_free_mr(dev, &qp->mr);
-
- size = PAGE_ALIGN(qp->send_wqe_offset +
- (qp->sq.max << qp->sq.wqe_shift));
+ /*
+ * If this is a userspace QP, the buffers, MR, CQs and so on
+ * will be cleaned up in userspace, so all we have to do is
+ * unref the mem-free tables and free the QPN in our table.
+ */
+ if (!qp->ibqp.uobject) {
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
+ if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
- if (qp->is_direct) {
- pci_free_consistent(dev->pdev, size,
- qp->queue.direct.buf,
- pci_unmap_addr(&qp->queue.direct, mapping));
- } else {
- for (i = 0; i < size / PAGE_SIZE; ++i) {
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- qp->queue.page_list[i].buf,
- pci_unmap_addr(&qp->queue.page_list[i],
- mapping));
- }
+ mthca_free_mr(dev, &qp->mr);
+ mthca_free_memfree(dev, qp);
+ mthca_free_wqe_buf(dev, qp);
}
- kfree(qp->wrid);
-
- mthca_free_memfree(dev, qp);
+ mthca_unmap_memfree(dev, qp);
if (is_sqp(dev, qp)) {
atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
new file mode 100644
index 00000000000..3024c1b4547
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MTHCA_USER_H
+#define MTHCA_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mthca_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 uarc_size;
+};
+
+struct mthca_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct mthca_create_cq {
+ __u32 lkey;
+ __u32 pdn;
+ __u64 arm_db_page;
+ __u64 set_db_page;
+ __u32 arm_db_index;
+ __u32 set_db_index;
+};
+
+struct mthca_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mthca_create_qp {
+ __u32 lkey;
+ __u32 reserved;
+ __u64 sq_db_page;
+ __u64 rq_db_page;
+ __u32 sq_db_index;
+ __u32 rq_db_index;
+};
+
+#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/include/ib_user_verbs.h b/drivers/infiniband/include/ib_user_verbs.h
new file mode 100644
index 00000000000..7c613706af7
--- /dev/null
+++ b/drivers/infiniband/include/ib_user_verbs.h
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $
+ */
+
+#ifndef IB_USER_VERBS_H
+#define IB_USER_VERBS_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define IB_USER_VERBS_ABI_VERSION 1
+
+enum {
+ IB_USER_VERBS_CMD_QUERY_PARAMS,
+ IB_USER_VERBS_CMD_GET_CONTEXT,
+ IB_USER_VERBS_CMD_QUERY_DEVICE,
+ IB_USER_VERBS_CMD_QUERY_PORT,
+ IB_USER_VERBS_CMD_QUERY_GID,
+ IB_USER_VERBS_CMD_QUERY_PKEY,
+ IB_USER_VERBS_CMD_ALLOC_PD,
+ IB_USER_VERBS_CMD_DEALLOC_PD,
+ IB_USER_VERBS_CMD_CREATE_AH,
+ IB_USER_VERBS_CMD_MODIFY_AH,
+ IB_USER_VERBS_CMD_QUERY_AH,
+ IB_USER_VERBS_CMD_DESTROY_AH,
+ IB_USER_VERBS_CMD_REG_MR,
+ IB_USER_VERBS_CMD_REG_SMR,
+ IB_USER_VERBS_CMD_REREG_MR,
+ IB_USER_VERBS_CMD_QUERY_MR,
+ IB_USER_VERBS_CMD_DEREG_MR,
+ IB_USER_VERBS_CMD_ALLOC_MW,
+ IB_USER_VERBS_CMD_BIND_MW,
+ IB_USER_VERBS_CMD_DEALLOC_MW,
+ IB_USER_VERBS_CMD_CREATE_CQ,
+ IB_USER_VERBS_CMD_RESIZE_CQ,
+ IB_USER_VERBS_CMD_DESTROY_CQ,
+ IB_USER_VERBS_CMD_POLL_CQ,
+ IB_USER_VERBS_CMD_PEEK_CQ,
+ IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+ IB_USER_VERBS_CMD_CREATE_QP,
+ IB_USER_VERBS_CMD_QUERY_QP,
+ IB_USER_VERBS_CMD_MODIFY_QP,
+ IB_USER_VERBS_CMD_DESTROY_QP,
+ IB_USER_VERBS_CMD_POST_SEND,
+ IB_USER_VERBS_CMD_POST_RECV,
+ IB_USER_VERBS_CMD_ATTACH_MCAST,
+ IB_USER_VERBS_CMD_DETACH_MCAST
+};
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct ib_uverbs_async_event_desc {
+ __u64 element;
+ __u32 event_type; /* enum ib_event_type */
+ __u32 reserved;
+};
+
+struct ib_uverbs_comp_event_desc {
+ __u64 cq_handle;
+};
+
+/*
+ * All commands from userspace should start with a __u32 command field
+ * followed by __u16 in_words and out_words fields (which give the
+ * length of the command block and response buffer if any in 32-bit
+ * words). The kernel driver will read these fields first and read
+ * the rest of the command struct based on these value.
+ */
+
+struct ib_uverbs_cmd_hdr {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+};
+
+/*
+ * No driver_data for "query params" command, since this is intended
+ * to be a core function with no possible device dependence.
+ */
+struct ib_uverbs_query_params {
+ __u64 response;
+};
+
+struct ib_uverbs_query_params_resp {
+ __u32 num_cq_events;
+};
+
+struct ib_uverbs_get_context {
+ __u64 response;
+ __u64 cq_fd_tab;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_get_context_resp {
+ __u32 async_fd;
+ __u32 reserved;
+};
+
+struct ib_uverbs_query_device {
+ __u64 response;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_device_resp {
+ __u64 fw_ver;
+ __u64 node_guid;
+ __u64 sys_image_guid;
+ __u64 max_mr_size;
+ __u64 page_size_cap;
+ __u32 vendor_id;
+ __u32 vendor_part_id;
+ __u32 hw_ver;
+ __u32 max_qp;
+ __u32 max_qp_wr;
+ __u32 device_cap_flags;
+ __u32 max_sge;
+ __u32 max_sge_rd;
+ __u32 max_cq;
+ __u32 max_cqe;
+ __u32 max_mr;
+ __u32 max_pd;
+ __u32 max_qp_rd_atom;
+ __u32 max_ee_rd_atom;
+ __u32 max_res_rd_atom;
+ __u32 max_qp_init_rd_atom;
+ __u32 max_ee_init_rd_atom;
+ __u32 atomic_cap;
+ __u32 max_ee;
+ __u32 max_rdd;
+ __u32 max_mw;
+ __u32 max_raw_ipv6_qp;
+ __u32 max_raw_ethy_qp;
+ __u32 max_mcast_grp;
+ __u32 max_mcast_qp_attach;
+ __u32 max_total_mcast_qp_attach;
+ __u32 max_ah;
+ __u32 max_fmr;
+ __u32 max_map_per_fmr;
+ __u32 max_srq;
+ __u32 max_srq_wr;
+ __u32 max_srq_sge;
+ __u16 max_pkeys;
+ __u8 local_ca_ack_delay;
+ __u8 phys_port_cnt;
+ __u8 reserved[4];
+};
+
+struct ib_uverbs_query_port {
+ __u64 response;
+ __u8 port_num;
+ __u8 reserved[7];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_port_resp {
+ __u32 port_cap_flags;
+ __u32 max_msg_sz;
+ __u32 bad_pkey_cntr;
+ __u32 qkey_viol_cntr;
+ __u32 gid_tbl_len;
+ __u16 pkey_tbl_len;
+ __u16 lid;
+ __u16 sm_lid;
+ __u8 state;
+ __u8 max_mtu;
+ __u8 active_mtu;
+ __u8 lmc;
+ __u8 max_vl_num;
+ __u8 sm_sl;
+ __u8 subnet_timeout;
+ __u8 init_type_reply;
+ __u8 active_width;
+ __u8 active_speed;
+ __u8 phys_state;
+ __u8 reserved[3];
+};
+
+struct ib_uverbs_query_gid {
+ __u64 response;
+ __u8 port_num;
+ __u8 index;
+ __u8 reserved[6];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_gid_resp {
+ __u8 gid[16];
+};
+
+struct ib_uverbs_query_pkey {
+ __u64 response;
+ __u8 port_num;
+ __u8 index;
+ __u8 reserved[6];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_pkey_resp {
+ __u16 pkey;
+ __u16 reserved;
+};
+
+struct ib_uverbs_alloc_pd {
+ __u64 response;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_alloc_pd_resp {
+ __u32 pd_handle;
+};
+
+struct ib_uverbs_dealloc_pd {
+ __u32 pd_handle;
+};
+
+struct ib_uverbs_reg_mr {
+ __u64 response;
+ __u64 start;
+ __u64 length;
+ __u64 hca_va;
+ __u32 pd_handle;
+ __u32 access_flags;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_reg_mr_resp {
+ __u32 mr_handle;
+ __u32 lkey;
+ __u32 rkey;
+};
+
+struct ib_uverbs_dereg_mr {
+ __u32 mr_handle;
+};
+
+struct ib_uverbs_create_cq {
+ __u64 response;
+ __u64 user_handle;
+ __u32 cqe;
+ __u32 event_handler;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_create_cq_resp {
+ __u32 cq_handle;
+ __u32 cqe;
+};
+
+struct ib_uverbs_destroy_cq {
+ __u32 cq_handle;
+};
+
+struct ib_uverbs_create_qp {
+ __u64 response;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 send_cq_handle;
+ __u32 recv_cq_handle;
+ __u32 srq_handle;
+ __u32 max_send_wr;
+ __u32 max_recv_wr;
+ __u32 max_send_sge;
+ __u32 max_recv_sge;
+ __u32 max_inline_data;
+ __u8 sq_sig_all;
+ __u8 qp_type;
+ __u8 is_srq;
+ __u8 reserved;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_create_qp_resp {
+ __u32 qp_handle;
+ __u32 qpn;
+};
+
+/*
+ * This struct needs to remain a multiple of 8 bytes to keep the
+ * alignment of the modify QP parameters.
+ */
+struct ib_uverbs_qp_dest {
+ __u8 dgid[16];
+ __u32 flow_label;
+ __u16 dlid;
+ __u16 reserved;
+ __u8 sgid_index;
+ __u8 hop_limit;
+ __u8 traffic_class;
+ __u8 sl;
+ __u8 src_path_bits;
+ __u8 static_rate;
+ __u8 is_global;
+ __u8 port_num;
+};
+
+struct ib_uverbs_modify_qp {
+ struct ib_uverbs_qp_dest dest;
+ struct ib_uverbs_qp_dest alt_dest;
+ __u32 qp_handle;
+ __u32 attr_mask;
+ __u32 qkey;
+ __u32 rq_psn;
+ __u32 sq_psn;
+ __u32 dest_qp_num;
+ __u32 qp_access_flags;
+ __u16 pkey_index;
+ __u16 alt_pkey_index;
+ __u8 qp_state;
+ __u8 cur_qp_state;
+ __u8 path_mtu;
+ __u8 path_mig_state;
+ __u8 en_sqd_async_notify;
+ __u8 max_rd_atomic;
+ __u8 max_dest_rd_atomic;
+ __u8 min_rnr_timer;
+ __u8 port_num;
+ __u8 timeout;
+ __u8 retry_cnt;
+ __u8 rnr_retry;
+ __u8 alt_port_num;
+ __u8 alt_timeout;
+ __u8 reserved[2];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_modify_qp_resp {
+};
+
+struct ib_uverbs_destroy_qp {
+ __u32 qp_handle;
+};
+
+struct ib_uverbs_attach_mcast {
+ __u8 gid[16];
+ __u32 qp_handle;
+ __u16 mlid;
+ __u16 reserved;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_detach_mcast {
+ __u8 gid[16];
+ __u32 qp_handle;
+ __u16 mlid;
+ __u16 reserved;
+ __u64 driver_data[0];
+};
+
+#endif /* IB_USER_VERBS_H */
diff --git a/drivers/infiniband/include/ib_verbs.h b/drivers/infiniband/include/ib_verbs.h
index cf01f044a22..e5bd9a10c20 100644
--- a/drivers/infiniband/include/ib_verbs.h
+++ b/drivers/infiniband/include/ib_verbs.h
@@ -4,6 +4,7 @@
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -41,7 +42,10 @@
#include <linux/types.h>
#include <linux/device.h>
+
#include <asm/atomic.h>
+#include <asm/scatterlist.h>
+#include <asm/uaccess.h>
union ib_gid {
u8 raw[16];
@@ -544,7 +548,7 @@ struct ib_send_wr {
int num_sge;
enum ib_wr_opcode opcode;
int send_flags;
- u32 imm_data;
+ __be32 imm_data;
union {
struct {
u64 remote_addr;
@@ -618,29 +622,86 @@ struct ib_fmr_attr {
u8 page_size;
};
+struct ib_ucontext {
+ struct ib_device *device;
+ struct list_head pd_list;
+ struct list_head mr_list;
+ struct list_head mw_list;
+ struct list_head cq_list;
+ struct list_head qp_list;
+ struct list_head srq_list;
+ struct list_head ah_list;
+ spinlock_t lock;
+};
+
+struct ib_uobject {
+ u64 user_handle; /* handle given to us by userspace */
+ struct ib_ucontext *context; /* associated user context */
+ struct list_head list; /* link to context's list */
+ u32 id; /* index into kernel idr */
+};
+
+struct ib_umem {
+ unsigned long user_base;
+ unsigned long virt_base;
+ size_t length;
+ int offset;
+ int page_size;
+ int writable;
+ struct list_head chunk_list;
+};
+
+struct ib_umem_chunk {
+ struct list_head list;
+ int nents;
+ int nmap;
+ struct scatterlist page_list[0];
+};
+
+struct ib_udata {
+ void __user *inbuf;
+ void __user *outbuf;
+ size_t inlen;
+ size_t outlen;
+};
+
+#define IB_UMEM_MAX_PAGE_CHUNK \
+ ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
+ ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
+ (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
+
+struct ib_umem_object {
+ struct ib_uobject uobject;
+ struct ib_umem umem;
+};
+
struct ib_pd {
- struct ib_device *device;
- atomic_t usecnt; /* count all resources */
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ atomic_t usecnt; /* count all resources */
};
struct ib_ah {
struct ib_device *device;
struct ib_pd *pd;
+ struct ib_uobject *uobject;
};
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
struct ib_cq {
- struct ib_device *device;
- ib_comp_handler comp_handler;
- void (*event_handler)(struct ib_event *, void *);
- void * cq_context;
- int cqe;
- atomic_t usecnt; /* count number of work queues */
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ ib_comp_handler comp_handler;
+ void (*event_handler)(struct ib_event *, void *);
+ void * cq_context;
+ int cqe;
+ atomic_t usecnt; /* count number of work queues */
};
struct ib_srq {
struct ib_device *device;
+ struct ib_uobject *uobject;
struct ib_pd *pd;
void *srq_context;
atomic_t usecnt;
@@ -652,6 +713,7 @@ struct ib_qp {
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
+ struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
u32 qp_num;
@@ -659,16 +721,18 @@ struct ib_qp {
};
struct ib_mr {
- struct ib_device *device;
- struct ib_pd *pd;
- u32 lkey;
- u32 rkey;
- atomic_t usecnt; /* count number of MWs */
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_uobject *uobject;
+ u32 lkey;
+ u32 rkey;
+ atomic_t usecnt; /* count number of MWs */
};
struct ib_mw {
struct ib_device *device;
struct ib_pd *pd;
+ struct ib_uobject *uobject;
u32 rkey;
};
@@ -737,7 +801,14 @@ struct ib_device {
int (*modify_port)(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify);
- struct ib_pd * (*alloc_pd)(struct ib_device *device);
+ struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
+ struct ib_udata *udata);
+ int (*dealloc_ucontext)(struct ib_ucontext *context);
+ int (*mmap)(struct ib_ucontext *context,
+ struct vm_area_struct *vma);
+ struct ib_pd * (*alloc_pd)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
int (*dealloc_pd)(struct ib_pd *pd);
struct ib_ah * (*create_ah)(struct ib_pd *pd,
struct ib_ah_attr *ah_attr);
@@ -747,7 +818,8 @@ struct ib_device {
struct ib_ah_attr *ah_attr);
int (*destroy_ah)(struct ib_ah *ah);
struct ib_qp * (*create_qp)(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr);
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
int (*modify_qp)(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask);
@@ -762,8 +834,9 @@ struct ib_device {
int (*post_recv)(struct ib_qp *qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device,
- int cqe);
+ struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
int (*destroy_cq)(struct ib_cq *cq);
int (*resize_cq)(struct ib_cq *cq, int *cqe);
int (*poll_cq)(struct ib_cq *cq, int num_entries,
@@ -780,6 +853,10 @@ struct ib_device {
int num_phys_buf,
int mr_access_flags,
u64 *iova_start);
+ struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
+ struct ib_umem *region,
+ int mr_access_flags,
+ struct ib_udata *udata);
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
@@ -817,6 +894,7 @@ struct ib_device {
struct ib_mad *in_mad,
struct ib_mad *out_mad);
+ struct module *owner;
struct class_device class_dev;
struct kobject ports_parent;
struct list_head port_list;
@@ -852,6 +930,16 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
+static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
+{
+ return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
+}
+
+static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
+{
+ return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+}
+
int ib_register_event_handler (struct ib_event_handler *event_handler);
int ib_unregister_event_handler(struct ib_event_handler *event_handler);
void ib_dispatch_event(struct ib_event *event);