summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /drivers/infiniband/core
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile12
-rw-r--r--drivers/infiniband/core/agent.c373
-rw-r--r--drivers/infiniband/core/agent.h55
-rw-r--r--drivers/infiniband/core/agent_priv.h63
-rw-r--r--drivers/infiniband/core/cache.c365
-rw-r--r--drivers/infiniband/core/core_priv.h52
-rw-r--r--drivers/infiniband/core/device.c614
-rw-r--r--drivers/infiniband/core/fmr_pool.c507
-rw-r--r--drivers/infiniband/core/mad.c2714
-rw-r--r--drivers/infiniband/core/mad_priv.h199
-rw-r--r--drivers/infiniband/core/packer.c201
-rw-r--r--drivers/infiniband/core/sa_query.c866
-rw-r--r--drivers/infiniband/core/smi.c234
-rw-r--r--drivers/infiniband/core/smi.h67
-rw-r--r--drivers/infiniband/core/sysfs.c762
-rw-r--r--drivers/infiniband/core/ud_header.c365
-rw-r--r--drivers/infiniband/core/user_mad.c840
-rw-r--r--drivers/infiniband/core/verbs.c434
18 files changed, 8723 insertions, 0 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
new file mode 100644
index 00000000000..d2dbfb52c0a
--- /dev/null
+++ b/drivers/infiniband/core/Makefile
@@ -0,0 +1,12 @@
+EXTRA_CFLAGS += -Idrivers/infiniband/include
+
+obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o
+
+ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
+ device.o fmr_pool.o cache.o
+
+ib_mad-y := mad.o smi.o agent.o
+
+ib_sa-y := sa_query.o
+
+ib_umad-y := user_mad.o
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
new file mode 100644
index 00000000000..7aee5ebf3f0
--- /dev/null
+++ b/drivers/infiniband/core/agent.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#include <linux/dma-mapping.h>
+
+#include <asm/bug.h>
+
+#include <ib_smi.h>
+
+#include "smi.h"
+#include "agent_priv.h"
+#include "mad_priv.h"
+#include "agent.h"
+
+spinlock_t ib_agent_port_list_lock;
+static LIST_HEAD(ib_agent_port_list);
+
+/*
+ * Caller must hold ib_agent_port_list_lock
+ */
+static inline struct ib_agent_port_private *
+__ib_get_agent_port(struct ib_device *device, int port_num,
+ struct ib_mad_agent *mad_agent)
+{
+ struct ib_agent_port_private *entry;
+
+ BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */
+
+ if (device) {
+ list_for_each_entry(entry, &ib_agent_port_list, port_list) {
+ if (entry->smp_agent->device == device &&
+ entry->port_num == port_num)
+ return entry;
+ }
+ } else {
+ list_for_each_entry(entry, &ib_agent_port_list, port_list) {
+ if ((entry->smp_agent == mad_agent) ||
+ (entry->perf_mgmt_agent == mad_agent))
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+static inline struct ib_agent_port_private *
+ib_get_agent_port(struct ib_device *device, int port_num,
+ struct ib_mad_agent *mad_agent)
+{
+ struct ib_agent_port_private *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_agent_port_list_lock, flags);
+ entry = __ib_get_agent_port(device, port_num, mad_agent);
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+
+ return entry;
+}
+
+int smi_check_local_dr_smp(struct ib_smp *smp,
+ struct ib_device *device,
+ int port_num)
+{
+ struct ib_agent_port_private *port_priv;
+
+ if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ return 1;
+ port_priv = ib_get_agent_port(device, port_num, NULL);
+ if (!port_priv) {
+ printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
+ "not open\n",
+ device->name, port_num);
+ return 1;
+ }
+
+ return smi_check_local_smp(port_priv->smp_agent, smp);
+}
+
+static int agent_mad_send(struct ib_mad_agent *mad_agent,
+ struct ib_agent_port_private *port_priv,
+ struct ib_mad_private *mad_priv,
+ struct ib_grh *grh,
+ struct ib_wc *wc)
+{
+ struct ib_agent_send_wr *agent_send_wr;
+ struct ib_sge gather_list;
+ struct ib_send_wr send_wr;
+ struct ib_send_wr *bad_send_wr;
+ struct ib_ah_attr ah_attr;
+ unsigned long flags;
+ int ret = 1;
+
+ agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL);
+ if (!agent_send_wr)
+ goto out;
+ agent_send_wr->mad = mad_priv;
+
+ /* PCI mapping */
+ gather_list.addr = dma_map_single(mad_agent->device->dma_device,
+ &mad_priv->mad,
+ sizeof(mad_priv->mad),
+ DMA_TO_DEVICE);
+ gather_list.length = sizeof(mad_priv->mad);
+ gather_list.lkey = (*port_priv->mr).lkey;
+
+ send_wr.next = NULL;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.sg_list = &gather_list;
+ send_wr.num_sge = 1;
+ send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */
+ send_wr.wr.ud.timeout_ms = 0;
+ send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+
+ ah_attr.dlid = wc->slid;
+ ah_attr.port_num = mad_agent->port_num;
+ ah_attr.src_path_bits = wc->dlid_path_bits;
+ ah_attr.sl = wc->sl;
+ ah_attr.static_rate = 0;
+ ah_attr.ah_flags = 0; /* No GRH */
+ if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
+ if (wc->wc_flags & IB_WC_GRH) {
+ ah_attr.ah_flags = IB_AH_GRH;
+ /* Should sgid be looked up ? */
+ ah_attr.grh.sgid_index = 0;
+ ah_attr.grh.hop_limit = grh->hop_limit;
+ ah_attr.grh.flow_label = be32_to_cpup(
+ &grh->version_tclass_flow) & 0xfffff;
+ ah_attr.grh.traffic_class = (be32_to_cpup(
+ &grh->version_tclass_flow) >> 20) & 0xff;
+ memcpy(ah_attr.grh.dgid.raw,
+ grh->sgid.raw,
+ sizeof(ah_attr.grh.dgid));
+ }
+ }
+
+ agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr);
+ if (IS_ERR(agent_send_wr->ah)) {
+ printk(KERN_ERR SPFX "No memory for address handle\n");
+ kfree(agent_send_wr);
+ goto out;
+ }
+
+ send_wr.wr.ud.ah = agent_send_wr->ah;
+ if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
+ send_wr.wr.ud.pkey_index = wc->pkey_index;
+ send_wr.wr.ud.remote_qkey = IB_QP1_QKEY;
+ } else { /* for SMPs */
+ send_wr.wr.ud.pkey_index = 0;
+ send_wr.wr.ud.remote_qkey = 0;
+ }
+ send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr;
+ send_wr.wr_id = (unsigned long)agent_send_wr;
+
+ pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr);
+
+ /* Send */
+ spin_lock_irqsave(&port_priv->send_list_lock, flags);
+ if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) {
+ spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+ dma_unmap_single(mad_agent->device->dma_device,
+ pci_unmap_addr(agent_send_wr, mapping),
+ sizeof(mad_priv->mad),
+ DMA_TO_DEVICE);
+ ib_destroy_ah(agent_send_wr->ah);
+ kfree(agent_send_wr);
+ } else {
+ list_add_tail(&agent_send_wr->send_list,
+ &port_priv->send_posted_list);
+ spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+int agent_send(struct ib_mad_private *mad,
+ struct ib_grh *grh,
+ struct ib_wc *wc,
+ struct ib_device *device,
+ int port_num)
+{
+ struct ib_agent_port_private *port_priv;
+ struct ib_mad_agent *mad_agent;
+
+ port_priv = ib_get_agent_port(device, port_num, NULL);
+ if (!port_priv) {
+ printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n",
+ device->name, port_num);
+ return 1;
+ }
+
+ /* Get mad agent based on mgmt_class in MAD */
+ switch (mad->mad.mad.mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ mad_agent = port_priv->smp_agent;
+ break;
+ case IB_MGMT_CLASS_PERF_MGMT:
+ mad_agent = port_priv->perf_mgmt_agent;
+ break;
+ default:
+ return 1;
+ }
+
+ return agent_mad_send(mad_agent, port_priv, mad, grh, wc);
+}
+
+static void agent_send_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_agent_port_private *port_priv;
+ struct ib_agent_send_wr *agent_send_wr;
+ unsigned long flags;
+
+ /* Find matching MAD agent */
+ port_priv = ib_get_agent_port(NULL, 0, mad_agent);
+ if (!port_priv) {
+ printk(KERN_ERR SPFX "agent_send_handler: no matching MAD "
+ "agent %p\n", mad_agent);
+ return;
+ }
+
+ agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id;
+ spin_lock_irqsave(&port_priv->send_list_lock, flags);
+ /* Remove completed send from posted send MAD list */
+ list_del(&agent_send_wr->send_list);
+ spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+
+ /* Unmap PCI */
+ dma_unmap_single(mad_agent->device->dma_device,
+ pci_unmap_addr(agent_send_wr, mapping),
+ sizeof(agent_send_wr->mad->mad),
+ DMA_TO_DEVICE);
+
+ ib_destroy_ah(agent_send_wr->ah);
+
+ /* Release allocated memory */
+ kmem_cache_free(ib_mad_cache, agent_send_wr->mad);
+ kfree(agent_send_wr);
+}
+
+int ib_agent_port_open(struct ib_device *device, int port_num)
+{
+ int ret;
+ struct ib_agent_port_private *port_priv;
+ unsigned long flags;
+
+ /* First, check if port already open for SMI */
+ port_priv = ib_get_agent_port(device, port_num, NULL);
+ if (port_priv) {
+ printk(KERN_DEBUG SPFX "%s port %d already open\n",
+ device->name, port_num);
+ return 0;
+ }
+
+ /* Create new device info */
+ port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
+ if (!port_priv) {
+ printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
+ ret = -ENOMEM;
+ goto error1;
+ }
+
+ memset(port_priv, 0, sizeof *port_priv);
+ port_priv->port_num = port_num;
+ spin_lock_init(&port_priv->send_list_lock);
+ INIT_LIST_HEAD(&port_priv->send_posted_list);
+
+ /* Obtain send only MAD agent for SM class (SMI QP) */
+ port_priv->smp_agent = ib_register_mad_agent(device, port_num,
+ IB_QPT_SMI,
+ NULL, 0,
+ &agent_send_handler,
+ NULL, NULL);
+
+ if (IS_ERR(port_priv->smp_agent)) {
+ ret = PTR_ERR(port_priv->smp_agent);
+ goto error2;
+ }
+
+ /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */
+ port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num,
+ IB_QPT_GSI,
+ NULL, 0,
+ &agent_send_handler,
+ NULL, NULL);
+ if (IS_ERR(port_priv->perf_mgmt_agent)) {
+ ret = PTR_ERR(port_priv->perf_mgmt_agent);
+ goto error3;
+ }
+
+ port_priv->mr = ib_get_dma_mr(port_priv->smp_agent->qp->pd,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(port_priv->mr)) {
+ printk(KERN_ERR SPFX "Couldn't get DMA MR\n");
+ ret = PTR_ERR(port_priv->mr);
+ goto error4;
+ }
+
+ spin_lock_irqsave(&ib_agent_port_list_lock, flags);
+ list_add_tail(&port_priv->port_list, &ib_agent_port_list);
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+
+ return 0;
+
+error4:
+ ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
+error3:
+ ib_unregister_mad_agent(port_priv->smp_agent);
+error2:
+ kfree(port_priv);
+error1:
+ return ret;
+}
+
+int ib_agent_port_close(struct ib_device *device, int port_num)
+{
+ struct ib_agent_port_private *port_priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_agent_port_list_lock, flags);
+ port_priv = __ib_get_agent_port(device, port_num, NULL);
+ if (port_priv == NULL) {
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+ printk(KERN_ERR SPFX "Port %d not found\n", port_num);
+ return -ENODEV;
+ }
+ list_del(&port_priv->port_list);
+ spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
+
+ ib_dereg_mr(port_priv->mr);
+
+ ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
+ ib_unregister_mad_agent(port_priv->smp_agent);
+ kfree(port_priv);
+
+ return 0;
+}
diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h
new file mode 100644
index 00000000000..d9426842254
--- /dev/null
+++ b/drivers/infiniband/core/agent.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: agent.h 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#ifndef __AGENT_H_
+#define __AGENT_H_
+
+extern spinlock_t ib_agent_port_list_lock;
+
+extern int ib_agent_port_open(struct ib_device *device,
+ int port_num);
+
+extern int ib_agent_port_close(struct ib_device *device, int port_num);
+
+extern int agent_send(struct ib_mad_private *mad,
+ struct ib_grh *grh,
+ struct ib_wc *wc,
+ struct ib_device *device,
+ int port_num);
+
+#endif /* __AGENT_H_ */
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h
new file mode 100644
index 00000000000..17a0cce5813
--- /dev/null
+++ b/drivers/infiniband/core/agent_priv.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: agent_priv.h 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#ifndef __IB_AGENT_PRIV_H__
+#define __IB_AGENT_PRIV_H__
+
+#include <linux/pci.h>
+
+#define SPFX "ib_agent: "
+
+struct ib_agent_send_wr {
+ struct list_head send_list;
+ struct ib_ah *ah;
+ struct ib_mad_private *mad;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+struct ib_agent_port_private {
+ struct list_head port_list;
+ struct list_head send_posted_list;
+ spinlock_t send_list_lock;
+ int port_num;
+ struct ib_mad_agent *smp_agent; /* SM class */
+ struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */
+ struct ib_mr *mr;
+};
+
+#endif /* __IB_AGENT_PRIV_H__ */
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
new file mode 100644
index 00000000000..3042360c97e
--- /dev/null
+++ b/drivers/infiniband/core/cache.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+#include <ib_cache.h>
+
+#include "core_priv.h"
+
+struct ib_pkey_cache {
+ int table_len;
+ u16 table[0];
+};
+
+struct ib_gid_cache {
+ int table_len;
+ union ib_gid table[0];
+};
+
+struct ib_update_work {
+ struct work_struct work;
+ struct ib_device *device;
+ u8 port_num;
+};
+
+static inline int start_port(struct ib_device *device)
+{
+ return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+}
+
+static inline int end_port(struct ib_device *device)
+{
+ return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
+}
+
+int ib_get_cached_gid(struct ib_device *device,
+ u8 port_num,
+ int index,
+ union ib_gid *gid)
+{
+ struct ib_gid_cache *cache;
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.gid_cache[port_num - start_port(device)];
+
+ if (index < 0 || index >= cache->table_len)
+ ret = -EINVAL;
+ else
+ *gid = cache->table[index];
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_gid);
+
+int ib_find_cached_gid(struct ib_device *device,
+ union ib_gid *gid,
+ u8 *port_num,
+ u16 *index)
+{
+ struct ib_gid_cache *cache;
+ unsigned long flags;
+ int p, i;
+ int ret = -ENOENT;
+
+ *port_num = -1;
+ if (index)
+ *index = -1;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ cache = device->cache.gid_cache[p];
+ for (i = 0; i < cache->table_len; ++i) {
+ if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
+ *port_num = p + start_port(device);
+ if (index)
+ *index = i;
+ ret = 0;
+ goto found;
+ }
+ }
+ }
+found:
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_find_cached_gid);
+
+int ib_get_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ int index,
+ u16 *pkey)
+{
+ struct ib_pkey_cache *cache;
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+ if (index < 0 || index >= cache->table_len)
+ ret = -EINVAL;
+ else
+ *pkey = cache->table[index];
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_pkey);
+
+int ib_find_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ u16 pkey,
+ u16 *index)
+{
+ struct ib_pkey_cache *cache;
+ unsigned long flags;
+ int i;
+ int ret = -ENOENT;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+ *index = -1;
+
+ for (i = 0; i < cache->table_len; ++i)
+ if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+ *index = i;
+ ret = 0;
+ break;
+ }
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_find_cached_pkey);
+
+static void ib_cache_update(struct ib_device *device,
+ u8 port)
+{
+ struct ib_port_attr *tprops = NULL;
+ struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
+ struct ib_gid_cache *gid_cache = NULL, *old_gid_cache;
+ int i;
+ int ret;
+
+ tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+ if (!tprops)
+ return;
+
+ ret = ib_query_port(device, port, tprops);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
+ ret, device->name);
+ goto err;
+ }
+
+ pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
+ sizeof *pkey_cache->table, GFP_KERNEL);
+ if (!pkey_cache)
+ goto err;
+
+ pkey_cache->table_len = tprops->pkey_tbl_len;
+
+ gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len *
+ sizeof *gid_cache->table, GFP_KERNEL);
+ if (!gid_cache)
+ goto err;
+
+ gid_cache->table_len = tprops->gid_tbl_len;
+
+ for (i = 0; i < pkey_cache->table_len; ++i) {
+ ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
+ }
+
+ for (i = 0; i < gid_cache->table_len; ++i) {
+ ret = ib_query_gid(device, port, i, gid_cache->table + i);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
+ }
+
+ write_lock_irq(&device->cache.lock);
+
+ old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
+ old_gid_cache = device->cache.gid_cache [port - start_port(device)];
+
+ device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
+ device->cache.gid_cache [port - start_port(device)] = gid_cache;
+
+ write_unlock_irq(&device->cache.lock);
+
+ kfree(old_pkey_cache);
+ kfree(old_gid_cache);
+ kfree(tprops);
+ return;
+
+err:
+ kfree(pkey_cache);
+ kfree(gid_cache);
+ kfree(tprops);
+}
+
+static void ib_cache_task(void *work_ptr)
+{
+ struct ib_update_work *work = work_ptr;
+
+ ib_cache_update(work->device, work->port_num);
+ kfree(work);
+}
+
+static void ib_cache_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct ib_update_work *work;
+
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE) {
+ work = kmalloc(sizeof *work, GFP_ATOMIC);
+ if (work) {
+ INIT_WORK(&work->work, ib_cache_task, work);
+ work->device = event->device;
+ work->port_num = event->element.port_num;
+ schedule_work(&work->work);
+ }
+ }
+}
+
+static void ib_cache_setup_one(struct ib_device *device)
+{
+ int p;
+
+ rwlock_init(&device->cache.lock);
+
+ device->cache.pkey_cache =
+ kmalloc(sizeof *device->cache.pkey_cache *
+ (end_port(device) - start_port(device) + 1), GFP_KERNEL);
+ device->cache.gid_cache =
+ kmalloc(sizeof *device->cache.pkey_cache *
+ (end_port(device) - start_port(device) + 1), GFP_KERNEL);
+
+ if (!device->cache.pkey_cache || !device->cache.gid_cache) {
+ printk(KERN_WARNING "Couldn't allocate cache "
+ "for %s\n", device->name);
+ goto err;
+ }
+
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ device->cache.pkey_cache[p] = NULL;
+ device->cache.gid_cache [p] = NULL;
+ ib_cache_update(device, p + start_port(device));
+ }
+
+ INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+ device, ib_cache_event);
+ if (ib_register_event_handler(&device->cache.event_handler))
+ goto err_cache;
+
+ return;
+
+err_cache:
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ kfree(device->cache.pkey_cache[p]);
+ kfree(device->cache.gid_cache[p]);
+ }
+
+err:
+ kfree(device->cache.pkey_cache);
+ kfree(device->cache.gid_cache);
+}
+
+static void ib_cache_cleanup_one(struct ib_device *device)
+{
+ int p;
+
+ ib_unregister_event_handler(&device->cache.event_handler);
+ flush_scheduled_work();
+
+ for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+ kfree(device->cache.pkey_cache[p]);
+ kfree(device->cache.gid_cache[p]);
+ }
+
+ kfree(device->cache.pkey_cache);
+ kfree(device->cache.gid_cache);
+}
+
+static struct ib_client cache_client = {
+ .name = "cache",
+ .add = ib_cache_setup_one,
+ .remove = ib_cache_cleanup_one
+};
+
+int __init ib_cache_setup(void)
+{
+ return ib_register_client(&cache_client);
+}
+
+void __exit ib_cache_cleanup(void)
+{
+ ib_unregister_client(&cache_client);
+}
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
new file mode 100644
index 00000000000..797049626ff
--- /dev/null
+++ b/drivers/infiniband/core/core_priv.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: core_priv.h 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#ifndef _CORE_PRIV_H
+#define _CORE_PRIV_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <ib_verbs.h>
+
+int ib_device_register_sysfs(struct ib_device *device);
+void ib_device_unregister_sysfs(struct ib_device *device);
+
+int ib_sysfs_setup(void);
+void ib_sysfs_cleanup(void);
+
+int ib_cache_setup(void);
+void ib_cache_cleanup(void);
+
+#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
new file mode 100644
index 00000000000..9197e92d708
--- /dev/null
+++ b/drivers/infiniband/core/device.c
@@ -0,0 +1,614 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: device.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+
+#include <asm/semaphore.h>
+
+#include "core_priv.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("core kernel InfiniBand API");
+MODULE_LICENSE("Dual BSD/GPL");
+
+struct ib_client_data {
+ struct list_head list;
+ struct ib_client *client;
+ void * data;
+};
+
+static LIST_HEAD(device_list);
+static LIST_HEAD(client_list);
+
+/*
+ * device_sem protects access to both device_list and client_list.
+ * There's no real point to using multiple locks or something fancier
+ * like an rwsem: we always access both lists, and we're always
+ * modifying one list or the other list. In any case this is not a
+ * hot path so there's no point in trying to optimize.
+ */
+static DECLARE_MUTEX(device_sem);
+
+static int ib_device_check_mandatory(struct ib_device *device)
+{
+#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
+ static const struct {
+ size_t offset;
+ char *name;
+ } mandatory_table[] = {
+ IB_MANDATORY_FUNC(query_device),
+ IB_MANDATORY_FUNC(query_port),
+ IB_MANDATORY_FUNC(query_pkey),
+ IB_MANDATORY_FUNC(query_gid),
+ IB_MANDATORY_FUNC(alloc_pd),
+ IB_MANDATORY_FUNC(dealloc_pd),
+ IB_MANDATORY_FUNC(create_ah),
+ IB_MANDATORY_FUNC(destroy_ah),
+ IB_MANDATORY_FUNC(create_qp),
+ IB_MANDATORY_FUNC(modify_qp),
+ IB_MANDATORY_FUNC(destroy_qp),
+ IB_MANDATORY_FUNC(post_send),
+ IB_MANDATORY_FUNC(post_recv),
+ IB_MANDATORY_FUNC(create_cq),
+ IB_MANDATORY_FUNC(destroy_cq),
+ IB_MANDATORY_FUNC(poll_cq),
+ IB_MANDATORY_FUNC(req_notify_cq),
+ IB_MANDATORY_FUNC(get_dma_mr),
+ IB_MANDATORY_FUNC(dereg_mr)
+ };
+ int i;
+
+ for (i = 0; i < sizeof mandatory_table / sizeof mandatory_table[0]; ++i) {
+ if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
+ printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
+ device->name, mandatory_table[i].name);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static struct ib_device *__ib_device_get_by_name(const char *name)
+{
+ struct ib_device *device;
+
+ list_for_each_entry(device, &device_list, core_list)
+ if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
+ return device;
+
+ return NULL;
+}
+
+
+static int alloc_name(char *name)
+{
+ long *inuse;
+ char buf[IB_DEVICE_NAME_MAX];
+ struct ib_device *device;
+ int i;
+
+ inuse = (long *) get_zeroed_page(GFP_KERNEL);
+ if (!inuse)
+ return -ENOMEM;
+
+ list_for_each_entry(device, &device_list, core_list) {
+ if (!sscanf(device->name, name, &i))
+ continue;
+ if (i < 0 || i >= PAGE_SIZE * 8)
+ continue;
+ snprintf(buf, sizeof buf, name, i);
+ if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
+ set_bit(i, inuse);
+ }
+
+ i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
+ free_page((unsigned long) inuse);
+ snprintf(buf, sizeof buf, name, i);
+
+ if (__ib_device_get_by_name(buf))
+ return -ENFILE;
+
+ strlcpy(name, buf, IB_DEVICE_NAME_MAX);
+ return 0;
+}
+
+/**
+ * ib_alloc_device - allocate an IB device struct
+ * @size:size of structure to allocate
+ *
+ * Low-level drivers should use ib_alloc_device() to allocate &struct
+ * ib_device. @size is the size of the structure to be allocated,
+ * including any private data used by the low-level driver.
+ * ib_dealloc_device() must be used to free structures allocated with
+ * ib_alloc_device().
+ */
+struct ib_device *ib_alloc_device(size_t size)
+{
+ void *dev;
+
+ BUG_ON(size < sizeof (struct ib_device));
+
+ dev = kmalloc(size, GFP_KERNEL);
+ if (!dev)
+ return NULL;
+
+ memset(dev, 0, size);
+
+ return dev;
+}
+EXPORT_SYMBOL(ib_alloc_device);
+
+/**
+ * ib_dealloc_device - free an IB device struct
+ * @device:structure to free
+ *
+ * Free a structure allocated with ib_alloc_device().
+ */
+void ib_dealloc_device(struct ib_device *device)
+{
+ if (device->reg_state == IB_DEV_UNINITIALIZED) {
+ kfree(device);
+ return;
+ }
+
+ BUG_ON(device->reg_state != IB_DEV_UNREGISTERED);
+
+ ib_device_unregister_sysfs(device);
+}
+EXPORT_SYMBOL(ib_dealloc_device);
+
+static int add_client_context(struct ib_device *device, struct ib_client *client)
+{
+ struct ib_client_data *context;
+ unsigned long flags;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context) {
+ printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
+ device->name, client->name);
+ return -ENOMEM;
+ }
+
+ context->client = client;
+ context->data = NULL;
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_add(&context->list, &device->client_data_list);
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+
+ return 0;
+}
+
+/**
+ * ib_register_device - Register an IB device with IB core
+ * @device:Device to register
+ *
+ * Low-level drivers use ib_register_device() to register their
+ * devices with the IB core. All registered clients will receive a
+ * callback for each device that is added. @device must be allocated
+ * with ib_alloc_device().
+ */
+int ib_register_device(struct ib_device *device)
+{
+ int ret;
+
+ down(&device_sem);
+
+ if (strchr(device->name, '%')) {
+ ret = alloc_name(device->name);
+ if (ret)
+ goto out;
+ }
+
+ if (ib_device_check_mandatory(device)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&device->event_handler_list);
+ INIT_LIST_HEAD(&device->client_data_list);
+ spin_lock_init(&device->event_handler_lock);
+ spin_lock_init(&device->client_data_lock);
+
+ ret = ib_device_register_sysfs(device);
+ if (ret) {
+ printk(KERN_WARNING "Couldn't register device %s with driver model\n",
+ device->name);
+ goto out;
+ }
+
+ list_add_tail(&device->core_list, &device_list);
+
+ device->reg_state = IB_DEV_REGISTERED;
+
+ {
+ struct ib_client *client;
+
+ list_for_each_entry(client, &client_list, list)
+ if (client->add && !add_client_context(device, client))
+ client->add(device);
+ }
+
+ out:
+ up(&device_sem);
+ return ret;
+}
+EXPORT_SYMBOL(ib_register_device);
+
+/**
+ * ib_unregister_device - Unregister an IB device
+ * @device:Device to unregister
+ *
+ * Unregister an IB device. All clients will receive a remove callback.
+ */
+void ib_unregister_device(struct ib_device *device)
+{
+ struct ib_client *client;
+ struct ib_client_data *context, *tmp;
+ unsigned long flags;
+
+ down(&device_sem);
+
+ list_for_each_entry_reverse(client, &client_list, list)
+ if (client->remove)
+ client->remove(device);
+
+ list_del(&device->core_list);
+
+ up(&device_sem);
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+ kfree(context);
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+
+ device->reg_state = IB_DEV_UNREGISTERED;
+}
+EXPORT_SYMBOL(ib_unregister_device);
+
+/**
+ * ib_register_client - Register an IB client
+ * @client:Client to register
+ *
+ * Upper level users of the IB drivers can use ib_register_client() to
+ * register callbacks for IB device addition and removal. When an IB
+ * device is added, each registered client's add method will be called
+ * (in the order the clients were registered), and when a device is
+ * removed, each client's remove method will be called (in the reverse
+ * order that clients were registered). In addition, when
+ * ib_register_client() is called, the client will receive an add
+ * callback for all devices already registered.
+ */
+int ib_register_client(struct ib_client *client)
+{
+ struct ib_device *device;
+
+ down(&device_sem);
+
+ list_add_tail(&client->list, &client_list);
+ list_for_each_entry(device, &device_list, core_list)
+ if (client->add && !add_client_context(device, client))
+ client->add(device);
+
+ up(&device_sem);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_register_client);
+
+/**
+ * ib_unregister_client - Unregister an IB client
+ * @client:Client to unregister
+ *
+ * Upper level users use ib_unregister_client() to remove their client
+ * registration. When ib_unregister_client() is called, the client
+ * will receive a remove callback for each IB device still registered.
+ */
+void ib_unregister_client(struct ib_client *client)
+{
+ struct ib_client_data *context, *tmp;
+ struct ib_device *device;
+ unsigned long flags;
+
+ down(&device_sem);
+
+ list_for_each_entry(device, &device_list, core_list) {
+ if (client->remove)
+ client->remove(device);
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
+ if (context->client == client) {
+ list_del(&context->list);
+ kfree(context);
+ }
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+ }
+ list_del(&client->list);
+
+ up(&device_sem);
+}
+EXPORT_SYMBOL(ib_unregister_client);
+
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns client context set with
+ * ib_set_client_data().
+ */
+void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
+{
+ struct ib_client_data *context;
+ void *ret = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry(context, &device->client_data_list, list)
+ if (context->client == client) {
+ ret = context->data;
+ break;
+ }
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_client_data);
+
+/**
+ * ib_set_client_data - Get IB client context
+ * @device:Device to set context for
+ * @client:Client to set context for
+ * @data:Context to set
+ *
+ * ib_set_client_data() sets client context that can be retrieved with
+ * ib_get_client_data().
+ */
+void ib_set_client_data(struct ib_device *device, struct ib_client *client,
+ void *data)
+{
+ struct ib_client_data *context;
+ unsigned long flags;
+
+ spin_lock_irqsave(&device->client_data_lock, flags);
+ list_for_each_entry(context, &device->client_data_list, list)
+ if (context->client == client) {
+ context->data = data;
+ goto out;
+ }
+
+ printk(KERN_WARNING "No client context found for %s/%s\n",
+ device->name, client->name);
+
+out:
+ spin_unlock_irqrestore(&device->client_data_lock, flags);
+}
+EXPORT_SYMBOL(ib_set_client_data);
+
+/**
+ * ib_register_event_handler - Register an IB event handler
+ * @event_handler:Handler to register
+ *
+ * ib_register_event_handler() registers an event handler that will be
+ * called back when asynchronous IB events occur (as defined in
+ * chapter 11 of the InfiniBand Architecture Specification). This
+ * callback may occur in interrupt context.
+ */
+int ib_register_event_handler (struct ib_event_handler *event_handler)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ list_add_tail(&event_handler->list,
+ &event_handler->device->event_handler_list);
+ spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_register_event_handler);
+
+/**
+ * ib_unregister_event_handler - Unregister an event handler
+ * @event_handler:Handler to unregister
+ *
+ * Unregister an event handler registered with
+ * ib_register_event_handler().
+ */
+int ib_unregister_event_handler(struct ib_event_handler *event_handler)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ list_del(&event_handler->list);
+ spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_unregister_event_handler);
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(struct ib_event *event)
+{
+ unsigned long flags;
+ struct ib_event_handler *handler;
+
+ spin_lock_irqsave(&event->device->event_handler_lock, flags);
+
+ list_for_each_entry(handler, &event->device->event_handler_list, list)
+ handler->handler(handler, event);
+
+ spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
+}
+EXPORT_SYMBOL(ib_dispatch_event);
+
+/**
+ * ib_query_device - Query IB device attributes
+ * @device:Device to query
+ * @device_attr:Device attributes
+ *
+ * ib_query_device() returns the attributes of a device through the
+ * @device_attr pointer.
+ */
+int ib_query_device(struct ib_device *device,
+ struct ib_device_attr *device_attr)
+{
+ return device->query_device(device, device_attr);
+}
+EXPORT_SYMBOL(ib_query_device);
+
+/**
+ * ib_query_port - Query IB port attributes
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @port_attr:Port attributes
+ *
+ * ib_query_port() returns the attributes of a port through the
+ * @port_attr pointer.
+ */
+int ib_query_port(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr)
+{
+ return device->query_port(device, port_num, port_attr);
+}
+EXPORT_SYMBOL(ib_query_port);
+
+/**
+ * ib_query_gid - Get GID table entry
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @index:GID table index to query
+ * @gid:Returned GID
+ *
+ * ib_query_gid() fetches the specified GID table entry.
+ */
+int ib_query_gid(struct ib_device *device,
+ u8 port_num, int index, union ib_gid *gid)
+{
+ return device->query_gid(device, port_num, index, gid);
+}
+EXPORT_SYMBOL(ib_query_gid);
+
+/**
+ * ib_query_pkey - Get P_Key table entry
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @index:P_Key table index to query
+ * @pkey:Returned P_Key
+ *
+ * ib_query_pkey() fetches the specified P_Key table entry.
+ */
+int ib_query_pkey(struct ib_device *device,
+ u8 port_num, u16 index, u16 *pkey)
+{
+ return device->query_pkey(device, port_num, index, pkey);
+}
+EXPORT_SYMBOL(ib_query_pkey);
+
+/**
+ * ib_modify_device - Change IB device attributes
+ * @device:Device to modify
+ * @device_modify_mask:Mask of attributes to change
+ * @device_modify:New attribute values
+ *
+ * ib_modify_device() changes a device's attributes as specified by
+ * the @device_modify_mask and @device_modify structure.
+ */
+int ib_modify_device(struct ib_device *device,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ return device->modify_device(device, device_modify_mask,
+ device_modify);
+}
+EXPORT_SYMBOL(ib_modify_device);
+
+/**
+ * ib_modify_port - Modifies the attributes for the specified port.
+ * @device: The device to modify.
+ * @port_num: The number of the port to modify.
+ * @port_modify_mask: Mask used to specify which attributes of the port
+ * to change.
+ * @port_modify: New attribute values for the port.
+ *
+ * ib_modify_port() changes a port's attributes as specified by the
+ * @port_modify_mask and @port_modify structure.
+ */
+int ib_modify_port(struct ib_device *device,
+ u8 port_num, int port_modify_mask,
+ struct ib_port_modify *port_modify)
+{
+ return device->modify_port(device, port_num, port_modify_mask,
+ port_modify);
+}
+EXPORT_SYMBOL(ib_modify_port);
+
+static int __init ib_core_init(void)
+{
+ int ret;
+
+ ret = ib_sysfs_setup();
+ if (ret)
+ printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
+
+ ret = ib_cache_setup();
+ if (ret) {
+ printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
+ ib_sysfs_cleanup();
+ }
+
+ return ret;
+}
+
+static void __exit ib_core_cleanup(void)
+{
+ ib_cache_cleanup();
+ ib_sysfs_cleanup();
+}
+
+module_init(ib_core_init);
+module_exit(ib_core_cleanup);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
new file mode 100644
index 00000000000..2e9469f1892
--- /dev/null
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: fmr_pool.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/jhash.h>
+#include <linux/kthread.h>
+
+#include <ib_fmr_pool.h>
+
+#include "core_priv.h"
+
+enum {
+ IB_FMR_MAX_REMAPS = 32,
+
+ IB_FMR_HASH_BITS = 8,
+ IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
+ IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
+};
+
+/*
+ * If an FMR is not in use, then the list member will point to either
+ * its pool's free_list (if the FMR can be mapped again; that is,
+ * remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the
+ * FMR needs to be unmapped before being remapped). In either of
+ * these cases it is a bug if the ref_count is not 0. In other words,
+ * if ref_count is > 0, then the list member must not be linked into
+ * either free_list or dirty_list.
+ *
+ * The cache_node member is used to link the FMR into a cache bucket
+ * (if caching is enabled). This is independent of the reference
+ * count of the FMR. When a valid FMR is released, its ref_count is
+ * decremented, and if ref_count reaches 0, the FMR is placed in
+ * either free_list or dirty_list as appropriate. However, it is not
+ * removed from the cache and may be "revived" if a call to
+ * ib_fmr_register_physical() occurs before the FMR is remapped. In
+ * this case we just increment the ref_count and remove the FMR from
+ * free_list/dirty_list.
+ *
+ * Before we remap an FMR from free_list, we remove it from the cache
+ * (to prevent another user from obtaining a stale FMR). When an FMR
+ * is released, we add it to the tail of the free list, so that our
+ * cache eviction policy is "least recently used."
+ *
+ * All manipulation of ref_count, list and cache_node is protected by
+ * pool_lock to maintain consistency.
+ */
+
+struct ib_fmr_pool {
+ spinlock_t pool_lock;
+
+ int pool_size;
+ int max_pages;
+ int dirty_watermark;
+ int dirty_len;
+ struct list_head free_list;
+ struct list_head dirty_list;
+ struct hlist_head *cache_bucket;
+
+ void (*flush_function)(struct ib_fmr_pool *pool,
+ void * arg);
+ void *flush_arg;
+
+ struct task_struct *thread;
+
+ atomic_t req_ser;
+ atomic_t flush_ser;
+
+ wait_queue_head_t force_wait;
+};
+
+static inline u32 ib_fmr_hash(u64 first_page)
+{
+ return jhash_2words((u32) first_page,
+ (u32) (first_page >> 32),
+ 0);
+}
+
+/* Caller must hold pool_lock */
+static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
+ u64 *page_list,
+ int page_list_len,
+ u64 io_virtual_address)
+{
+ struct hlist_head *bucket;
+ struct ib_pool_fmr *fmr;
+ struct hlist_node *pos;
+
+ if (!pool->cache_bucket)
+ return NULL;
+
+ bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
+
+ hlist_for_each_entry(fmr, pos, bucket, cache_node)
+ if (io_virtual_address == fmr->io_virtual_address &&
+ page_list_len == fmr->page_list_len &&
+ !memcmp(page_list, fmr->page_list,
+ page_list_len * sizeof *page_list))
+ return fmr;
+
+ return NULL;
+}
+
+static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
+{
+ int ret;
+ struct ib_pool_fmr *fmr;
+ LIST_HEAD(unmap_list);
+ LIST_HEAD(fmr_list);
+
+ spin_lock_irq(&pool->pool_lock);
+
+ list_for_each_entry(fmr, &pool->dirty_list, list) {
+ hlist_del_init(&fmr->cache_node);
+ fmr->remap_count = 0;
+ list_add_tail(&fmr->fmr->list, &fmr_list);
+
+#ifdef DEBUG
+ if (fmr->ref_count !=0) {
+ printk(KERN_WARNING "Unmapping FMR 0x%08x with ref count %d",
+ fmr, fmr->ref_count);
+ }
+#endif
+ }
+
+ list_splice(&pool->dirty_list, &unmap_list);
+ INIT_LIST_HEAD(&pool->dirty_list);
+ pool->dirty_len = 0;
+
+ spin_unlock_irq(&pool->pool_lock);
+
+ if (list_empty(&unmap_list)) {
+ return;
+ }
+
+ ret = ib_unmap_fmr(&fmr_list);
+ if (ret)
+ printk(KERN_WARNING "ib_unmap_fmr returned %d", ret);
+
+ spin_lock_irq(&pool->pool_lock);
+ list_splice(&unmap_list, &pool->free_list);
+ spin_unlock_irq(&pool->pool_lock);
+}
+
+static int ib_fmr_cleanup_thread(void *pool_ptr)
+{
+ struct ib_fmr_pool *pool = pool_ptr;
+
+ do {
+ if (pool->dirty_len >= pool->dirty_watermark ||
+ atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
+ ib_fmr_batch_release(pool);
+
+ atomic_inc(&pool->flush_ser);
+ wake_up_interruptible(&pool->force_wait);
+
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
+ }
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (pool->dirty_len < pool->dirty_watermark &&
+ atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
+ !kthread_should_stop())
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ } while (!kthread_should_stop());
+
+ return 0;
+}
+
+/**
+ * ib_create_fmr_pool - Create an FMR pool
+ * @pd:Protection domain for FMRs
+ * @params:FMR pool parameters
+ *
+ * Create a pool of FMRs. Return value is pointer to new pool or
+ * error code if creation failed.
+ */
+struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
+ struct ib_fmr_pool_param *params)
+{
+ struct ib_device *device;
+ struct ib_fmr_pool *pool;
+ int i;
+ int ret;
+
+ if (!params)
+ return ERR_PTR(-EINVAL);
+
+ device = pd->device;
+ if (!device->alloc_fmr || !device->dealloc_fmr ||
+ !device->map_phys_fmr || !device->unmap_fmr) {
+ printk(KERN_WARNING "Device %s does not support fast memory regions",
+ device->name);
+ return ERR_PTR(-ENOSYS);
+ }
+
+ pool = kmalloc(sizeof *pool, GFP_KERNEL);
+ if (!pool) {
+ printk(KERN_WARNING "couldn't allocate pool struct");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ pool->cache_bucket = NULL;
+
+ pool->flush_function = params->flush_function;
+ pool->flush_arg = params->flush_arg;
+
+ INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->dirty_list);
+
+ if (params->cache) {
+ pool->cache_bucket =
+ kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
+ GFP_KERNEL);
+ if (!pool->cache_bucket) {
+ printk(KERN_WARNING "Failed to allocate cache in pool");
+ ret = -ENOMEM;
+ goto out_free_pool;
+ }
+
+ for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
+ INIT_HLIST_HEAD(pool->cache_bucket + i);
+ }
+
+ pool->pool_size = 0;
+ pool->max_pages = params->max_pages_per_fmr;
+ pool->dirty_watermark = params->dirty_watermark;
+ pool->dirty_len = 0;
+ spin_lock_init(&pool->pool_lock);
+ atomic_set(&pool->req_ser, 0);
+ atomic_set(&pool->flush_ser, 0);
+ init_waitqueue_head(&pool->force_wait);
+
+ pool->thread = kthread_create(ib_fmr_cleanup_thread,
+ pool,
+ "ib_fmr(%s)",
+ device->name);
+ if (IS_ERR(pool->thread)) {
+ printk(KERN_WARNING "couldn't start cleanup thread");
+ ret = PTR_ERR(pool->thread);
+ goto out_free_pool;
+ }
+
+ {
+ struct ib_pool_fmr *fmr;
+ struct ib_fmr_attr attr = {
+ .max_pages = params->max_pages_per_fmr,
+ .max_maps = IB_FMR_MAX_REMAPS,
+ .page_size = PAGE_SHIFT
+ };
+
+ for (i = 0; i < params->pool_size; ++i) {
+ fmr = kmalloc(sizeof *fmr + params->max_pages_per_fmr * sizeof (u64),
+ GFP_KERNEL);
+ if (!fmr) {
+ printk(KERN_WARNING "failed to allocate fmr struct "
+ "for FMR %d", i);
+ goto out_fail;
+ }
+
+ fmr->pool = pool;
+ fmr->remap_count = 0;
+ fmr->ref_count = 0;
+ INIT_HLIST_NODE(&fmr->cache_node);
+
+ fmr->fmr = ib_alloc_fmr(pd, params->access, &attr);
+ if (IS_ERR(fmr->fmr)) {
+ printk(KERN_WARNING "fmr_create failed for FMR %d", i);
+ kfree(fmr);
+ goto out_fail;
+ }
+
+ list_add_tail(&fmr->list, &pool->free_list);
+ ++pool->pool_size;
+ }
+ }
+
+ return pool;
+
+ out_free_pool:
+ kfree(pool->cache_bucket);
+ kfree(pool);
+
+ return ERR_PTR(ret);
+
+ out_fail:
+ ib_destroy_fmr_pool(pool);
+
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(ib_create_fmr_pool);
+
+/**
+ * ib_destroy_fmr_pool - Free FMR pool
+ * @pool:FMR pool to free
+ *
+ * Destroy an FMR pool and free all associated resources.
+ */
+int ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
+{
+ struct ib_pool_fmr *fmr;
+ struct ib_pool_fmr *tmp;
+ int i;
+
+ kthread_stop(pool->thread);
+ ib_fmr_batch_release(pool);
+
+ i = 0;
+ list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
+ ib_dealloc_fmr(fmr->fmr);
+ list_del(&fmr->list);
+ kfree(fmr);
+ ++i;
+ }
+
+ if (i < pool->pool_size)
+ printk(KERN_WARNING "pool still has %d regions registered",
+ pool->pool_size - i);
+
+ kfree(pool->cache_bucket);
+ kfree(pool);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_destroy_fmr_pool);
+
+/**
+ * ib_flush_fmr_pool - Invalidate all unmapped FMRs
+ * @pool:FMR pool to flush
+ *
+ * Ensure that all unmapped FMRs are fully invalidated.
+ */
+int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
+{
+ int serial;
+
+ atomic_inc(&pool->req_ser);
+ /*
+ * It's OK if someone else bumps req_ser again here -- we'll
+ * just wait a little longer.
+ */
+ serial = atomic_read(&pool->req_ser);
+
+ wake_up_process(pool->thread);
+
+ if (wait_event_interruptible(pool->force_wait,
+ atomic_read(&pool->flush_ser) -
+ atomic_read(&pool->req_ser) >= 0))
+ return -EINTR;
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_flush_fmr_pool);
+
+/**
+ * ib_fmr_pool_map_phys -
+ * @pool:FMR pool to allocate FMR from
+ * @page_list:List of pages to map
+ * @list_len:Number of pages in @page_list
+ * @io_virtual_address:I/O virtual address for new FMR
+ *
+ * Map an FMR from an FMR pool.
+ */
+struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
+ u64 *page_list,
+ int list_len,
+ u64 *io_virtual_address)
+{
+ struct ib_fmr_pool *pool = pool_handle;
+ struct ib_pool_fmr *fmr;
+ unsigned long flags;
+ int result;
+
+ if (list_len < 1 || list_len > pool->max_pages)
+ return ERR_PTR(-EINVAL);
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ fmr = ib_fmr_cache_lookup(pool,
+ page_list,
+ list_len,
+ *io_virtual_address);
+ if (fmr) {
+ /* found in cache */
+ ++fmr->ref_count;
+ if (fmr->ref_count == 1) {
+ list_del(&fmr->list);
+ }
+
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return fmr;
+ }
+
+ if (list_empty(&pool->free_list)) {
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
+ list_del(&fmr->list);
+ hlist_del_init(&fmr->cache_node);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
+ *io_virtual_address);
+
+ if (result) {
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ list_add(&fmr->list, &pool->free_list);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ printk(KERN_WARNING "fmr_map returns %d",
+ result);
+
+ return ERR_PTR(result);
+ }
+
+ ++fmr->remap_count;
+ fmr->ref_count = 1;
+
+ if (pool->cache_bucket) {
+ fmr->io_virtual_address = *io_virtual_address;
+ fmr->page_list_len = list_len;
+ memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ hlist_add_head(&fmr->cache_node,
+ pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ }
+
+ return fmr;
+}
+EXPORT_SYMBOL(ib_fmr_pool_map_phys);
+
+/**
+ * ib_fmr_pool_unmap - Unmap FMR
+ * @fmr:FMR to unmap
+ *
+ * Unmap an FMR. The FMR mapping may remain valid until the FMR is
+ * reused (or until ib_flush_fmr_pool() is called).
+ */
+int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
+{
+ struct ib_fmr_pool *pool;
+ unsigned long flags;
+
+ pool = fmr->pool;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+
+ --fmr->ref_count;
+ if (!fmr->ref_count) {
+ if (fmr->remap_count < IB_FMR_MAX_REMAPS) {
+ list_add_tail(&fmr->list, &pool->free_list);
+ } else {
+ list_add_tail(&fmr->list, &pool->dirty_list);
+ ++pool->dirty_len;
+ wake_up_process(pool->thread);
+ }
+ }
+
+#ifdef DEBUG
+ if (fmr->ref_count < 0)
+ printk(KERN_WARNING "FMR %p has ref count %d < 0",
+ fmr, fmr->ref_count);
+#endif
+
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
new file mode 100644
index 00000000000..4ec7fff29b5
--- /dev/null
+++ b/drivers/infiniband/core/mad.c
@@ -0,0 +1,2714 @@
+/*
+ * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mad.c 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+
+#include <ib_mad.h>
+
+#include "mad_priv.h"
+#include "smi.h"
+#include "agent.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("kernel IB MAD API");
+MODULE_AUTHOR("Hal Rosenstock");
+MODULE_AUTHOR("Sean Hefty");
+
+
+kmem_cache_t *ib_mad_cache;
+static struct list_head ib_mad_port_list;
+static u32 ib_mad_client_id = 0;
+
+/* Port list lock */
+static spinlock_t ib_mad_port_list_lock;
+
+
+/* Forward declarations */
+static int method_in_use(struct ib_mad_mgmt_method_table **method,
+ struct ib_mad_reg_req *mad_reg_req);
+static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
+static struct ib_mad_agent_private *find_mad_agent(
+ struct ib_mad_port_private *port_priv,
+ struct ib_mad *mad, int solicited);
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_private *mad);
+static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
+static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_send_wc *mad_send_wc);
+static void timeout_sends(void *data);
+static void cancel_sends(void *data);
+static void local_completions(void *data);
+static int solicited_mad(struct ib_mad *mad);
+static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
+ struct ib_mad_agent_private *agent_priv,
+ u8 mgmt_class);
+static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
+ struct ib_mad_agent_private *agent_priv);
+
+/*
+ * Returns a ib_mad_port_private structure or NULL for a device/port
+ * Assumes ib_mad_port_list_lock is being held
+ */
+static inline struct ib_mad_port_private *
+__ib_get_mad_port(struct ib_device *device, int port_num)
+{
+ struct ib_mad_port_private *entry;
+
+ list_for_each_entry(entry, &ib_mad_port_list, port_list) {
+ if (entry->device == device && entry->port_num == port_num)
+ return entry;
+ }
+ return NULL;
+}
+
+/*
+ * Wrapper function to return a ib_mad_port_private structure or NULL
+ * for a device/port
+ */
+static inline struct ib_mad_port_private *
+ib_get_mad_port(struct ib_device *device, int port_num)
+{
+ struct ib_mad_port_private *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ entry = __ib_get_mad_port(device, port_num);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
+ return entry;
+}
+
+static inline u8 convert_mgmt_class(u8 mgmt_class)
+{
+ /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
+ return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
+ 0 : mgmt_class;
+}
+
+static int get_spl_qp_index(enum ib_qp_type qp_type)
+{
+ switch (qp_type)
+ {
+ case IB_QPT_SMI:
+ return 0;
+ case IB_QPT_GSI:
+ return 1;
+ default:
+ return -1;
+ }
+}
+
+static int vendor_class_index(u8 mgmt_class)
+{
+ return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
+}
+
+static int is_vendor_class(u8 mgmt_class)
+{
+ if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
+ (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
+ return 0;
+ return 1;
+}
+
+static int is_vendor_oui(char *oui)
+{
+ if (oui[0] || oui[1] || oui[2])
+ return 1;
+ return 0;
+}
+
+static int is_vendor_method_in_use(
+ struct ib_mad_mgmt_vendor_class *vendor_class,
+ struct ib_mad_reg_req *mad_reg_req)
+{
+ struct ib_mad_mgmt_method_table *method;
+ int i;
+
+ for (i = 0; i < MAX_MGMT_OUI; i++) {
+ if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
+ method = vendor_class->method_table[i];
+ if (method) {
+ if (method_in_use(&method, mad_reg_req))
+ return 1;
+ else
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * ib_register_mad_agent - Register to send/receive MADs
+ */
+struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
+ u8 port_num,
+ enum ib_qp_type qp_type,
+ struct ib_mad_reg_req *mad_reg_req,
+ u8 rmpp_version,
+ ib_mad_send_handler send_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_reg_req *reg_req = NULL;
+ struct ib_mad_mgmt_class_table *class;
+ struct ib_mad_mgmt_vendor_class_table *vendor;
+ struct ib_mad_mgmt_vendor_class *vendor_class;
+ struct ib_mad_mgmt_method_table *method;
+ int ret2, qpn;
+ unsigned long flags;
+ u8 mgmt_class, vclass;
+
+ /* Validate parameters */
+ qpn = get_spl_qp_index(qp_type);
+ if (qpn == -1)
+ goto error1;
+
+ if (rmpp_version)
+ goto error1; /* XXX: until RMPP implemented */
+
+ /* Validate MAD registration request if supplied */
+ if (mad_reg_req) {
+ if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
+ goto error1;
+ if (!recv_handler)
+ goto error1;
+ if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
+ /*
+ * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
+ * one in this range currently allowed
+ */
+ if (mad_reg_req->mgmt_class !=
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ goto error1;
+ } else if (mad_reg_req->mgmt_class == 0) {
+ /*
+ * Class 0 is reserved in IBA and is used for
+ * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
+ */
+ goto error1;
+ } else if (is_vendor_class(mad_reg_req->mgmt_class)) {
+ /*
+ * If class is in "new" vendor range,
+ * ensure supplied OUI is not zero
+ */
+ if (!is_vendor_oui(mad_reg_req->oui))
+ goto error1;
+ }
+ /* Make sure class supplied is consistent with QP type */
+ if (qp_type == IB_QPT_SMI) {
+ if ((mad_reg_req->mgmt_class !=
+ IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
+ (mad_reg_req->mgmt_class !=
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ goto error1;
+ } else {
+ if ((mad_reg_req->mgmt_class ==
+ IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
+ (mad_reg_req->mgmt_class ==
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+ goto error1;
+ }
+ } else {
+ /* No registration request supplied */
+ if (!send_handler)
+ goto error1;
+ }
+
+ /* Validate device and port */
+ port_priv = ib_get_mad_port(device, port_num);
+ if (!port_priv) {
+ ret = ERR_PTR(-ENODEV);
+ goto error1;
+ }
+
+ /* Allocate structures */
+ mad_agent_priv = kmalloc(sizeof *mad_agent_priv, GFP_KERNEL);
+ if (!mad_agent_priv) {
+ ret = ERR_PTR(-ENOMEM);
+ goto error1;
+ }
+
+ if (mad_reg_req) {
+ reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
+ if (!reg_req) {
+ ret = ERR_PTR(-ENOMEM);
+ goto error2;
+ }
+ /* Make a copy of the MAD registration request */
+ memcpy(reg_req, mad_reg_req, sizeof *reg_req);
+ }
+
+ /* Now, fill in the various structures */
+ memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
+ mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
+ mad_agent_priv->reg_req = reg_req;
+ mad_agent_priv->rmpp_version = rmpp_version;
+ mad_agent_priv->agent.device = device;
+ mad_agent_priv->agent.recv_handler = recv_handler;
+ mad_agent_priv->agent.send_handler = send_handler;
+ mad_agent_priv->agent.context = context;
+ mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
+ mad_agent_priv->agent.port_num = port_num;
+
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
+ mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
+
+ /*
+ * Make sure MAD registration (if supplied)
+ * is non overlapping with any existing ones
+ */
+ if (mad_reg_req) {
+ mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
+ if (!is_vendor_class(mgmt_class)) {
+ class = port_priv->version[mad_reg_req->
+ mgmt_class_version].class;
+ if (class) {
+ method = class->method_table[mgmt_class];
+ if (method) {
+ if (method_in_use(&method,
+ mad_reg_req))
+ goto error3;
+ }
+ }
+ ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
+ mgmt_class);
+ } else {
+ /* "New" vendor class range */
+ vendor = port_priv->version[mad_reg_req->
+ mgmt_class_version].vendor;
+ if (vendor) {
+ vclass = vendor_class_index(mgmt_class);
+ vendor_class = vendor->vendor_class[vclass];
+ if (vendor_class) {
+ if (is_vendor_method_in_use(
+ vendor_class,
+ mad_reg_req))
+ goto error3;
+ }
+ }
+ ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
+ }
+ if (ret2) {
+ ret = ERR_PTR(ret2);
+ goto error3;
+ }
+ }
+
+ /* Add mad agent into port's agent list */
+ list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+
+ spin_lock_init(&mad_agent_priv->lock);
+ INIT_LIST_HEAD(&mad_agent_priv->send_list);
+ INIT_LIST_HEAD(&mad_agent_priv->wait_list);
+ INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv);
+ INIT_LIST_HEAD(&mad_agent_priv->local_list);
+ INIT_WORK(&mad_agent_priv->local_work, local_completions,
+ mad_agent_priv);
+ INIT_LIST_HEAD(&mad_agent_priv->canceled_list);
+ INIT_WORK(&mad_agent_priv->canceled_work, cancel_sends, mad_agent_priv);
+ atomic_set(&mad_agent_priv->refcount, 1);
+ init_waitqueue_head(&mad_agent_priv->wait);
+
+ return &mad_agent_priv->agent;
+
+error3:
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ kfree(reg_req);
+error2:
+ kfree(mad_agent_priv);
+error1:
+ return ret;
+}
+EXPORT_SYMBOL(ib_register_mad_agent);
+
+static inline int is_snooping_sends(int mad_snoop_flags)
+{
+ return (mad_snoop_flags &
+ (/*IB_MAD_SNOOP_POSTED_SENDS |
+ IB_MAD_SNOOP_RMPP_SENDS |*/
+ IB_MAD_SNOOP_SEND_COMPLETIONS /*|
+ IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
+}
+
+static inline int is_snooping_recvs(int mad_snoop_flags)
+{
+ return (mad_snoop_flags &
+ (IB_MAD_SNOOP_RECVS /*|
+ IB_MAD_SNOOP_RMPP_RECVS*/));
+}
+
+static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_snoop_private *mad_snoop_priv)
+{
+ struct ib_mad_snoop_private **new_snoop_table;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ /* Check for empty slot in array. */
+ for (i = 0; i < qp_info->snoop_table_size; i++)
+ if (!qp_info->snoop_table[i])
+ break;
+
+ if (i == qp_info->snoop_table_size) {
+ /* Grow table. */
+ new_snoop_table = kmalloc(sizeof mad_snoop_priv *
+ qp_info->snoop_table_size + 1,
+ GFP_ATOMIC);
+ if (!new_snoop_table) {
+ i = -ENOMEM;
+ goto out;
+ }
+ if (qp_info->snoop_table) {
+ memcpy(new_snoop_table, qp_info->snoop_table,
+ sizeof mad_snoop_priv *
+ qp_info->snoop_table_size);
+ kfree(qp_info->snoop_table);
+ }
+ qp_info->snoop_table = new_snoop_table;
+ qp_info->snoop_table_size++;
+ }
+ qp_info->snoop_table[i] = mad_snoop_priv;
+ atomic_inc(&qp_info->snoop_count);
+out:
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+ return i;
+}
+
+struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
+ u8 port_num,
+ enum ib_qp_type qp_type,
+ int mad_snoop_flags,
+ ib_mad_snoop_handler snoop_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_agent *ret;
+ struct ib_mad_snoop_private *mad_snoop_priv;
+ int qpn;
+
+ /* Validate parameters */
+ if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
+ (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
+ ret = ERR_PTR(-EINVAL);
+ goto error1;
+ }
+ qpn = get_spl_qp_index(qp_type);
+ if (qpn == -1) {
+ ret = ERR_PTR(-EINVAL);
+ goto error1;
+ }
+ port_priv = ib_get_mad_port(device, port_num);
+ if (!port_priv) {
+ ret = ERR_PTR(-ENODEV);
+ goto error1;
+ }
+ /* Allocate structures */
+ mad_snoop_priv = kmalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
+ if (!mad_snoop_priv) {
+ ret = ERR_PTR(-ENOMEM);
+ goto error1;
+ }
+
+ /* Now, fill in the various structures */
+ memset(mad_snoop_priv, 0, sizeof *mad_snoop_priv);
+ mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
+ mad_snoop_priv->agent.device = device;
+ mad_snoop_priv->agent.recv_handler = recv_handler;
+ mad_snoop_priv->agent.snoop_handler = snoop_handler;
+ mad_snoop_priv->agent.context = context;
+ mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
+ mad_snoop_priv->agent.port_num = port_num;
+ mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
+ init_waitqueue_head(&mad_snoop_priv->wait);
+ mad_snoop_priv->snoop_index = register_snoop_agent(
+ &port_priv->qp_info[qpn],
+ mad_snoop_priv);
+ if (mad_snoop_priv->snoop_index < 0) {
+ ret = ERR_PTR(mad_snoop_priv->snoop_index);
+ goto error2;
+ }
+
+ atomic_set(&mad_snoop_priv->refcount, 1);
+ return &mad_snoop_priv->agent;
+
+error2:
+ kfree(mad_snoop_priv);
+error1:
+ return ret;
+}
+EXPORT_SYMBOL(ib_register_mad_snoop);
+
+static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_port_private *port_priv;
+ unsigned long flags;
+
+ /* Note that we could still be handling received MADs */
+
+ /*
+ * Canceling all sends results in dropping received response
+ * MADs, preventing us from queuing additional work
+ */
+ cancel_mads(mad_agent_priv);
+
+ port_priv = mad_agent_priv->qp_info->port_priv;
+
+ cancel_delayed_work(&mad_agent_priv->timed_work);
+ flush_workqueue(port_priv->wq);
+
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
+ remove_mad_reg_req(mad_agent_priv);
+ list_del(&mad_agent_priv->agent_list);
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+
+ /* XXX: Cleanup pending RMPP receives for this agent */
+
+ atomic_dec(&mad_agent_priv->refcount);
+ wait_event(mad_agent_priv->wait,
+ !atomic_read(&mad_agent_priv->refcount));
+
+ if (mad_agent_priv->reg_req)
+ kfree(mad_agent_priv->reg_req);
+ kfree(mad_agent_priv);
+}
+
+static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
+{
+ struct ib_mad_qp_info *qp_info;
+ unsigned long flags;
+
+ qp_info = mad_snoop_priv->qp_info;
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
+ atomic_dec(&qp_info->snoop_count);
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+
+ atomic_dec(&mad_snoop_priv->refcount);
+ wait_event(mad_snoop_priv->wait,
+ !atomic_read(&mad_snoop_priv->refcount));
+
+ kfree(mad_snoop_priv);
+}
+
+/*
+ * ib_unregister_mad_agent - Unregisters a client from using MAD services
+ */
+int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_snoop_private *mad_snoop_priv;
+
+ /* If the TID is zero, the agent can only snoop. */
+ if (mad_agent->hi_tid) {
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+ unregister_mad_agent(mad_agent_priv);
+ } else {
+ mad_snoop_priv = container_of(mad_agent,
+ struct ib_mad_snoop_private,
+ agent);
+ unregister_mad_snoop(mad_snoop_priv);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ib_unregister_mad_agent);
+
+static void dequeue_mad(struct ib_mad_list_head *mad_list)
+{
+ struct ib_mad_queue *mad_queue;
+ unsigned long flags;
+
+ BUG_ON(!mad_list->mad_queue);
+ mad_queue = mad_list->mad_queue;
+ spin_lock_irqsave(&mad_queue->lock, flags);
+ list_del(&mad_list->list);
+ mad_queue->count--;
+ spin_unlock_irqrestore(&mad_queue->lock, flags);
+}
+
+static void snoop_send(struct ib_mad_qp_info *qp_info,
+ struct ib_send_wr *send_wr,
+ struct ib_mad_send_wc *mad_send_wc,
+ int mad_snoop_flags)
+{
+ struct ib_mad_snoop_private *mad_snoop_priv;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ for (i = 0; i < qp_info->snoop_table_size; i++) {
+ mad_snoop_priv = qp_info->snoop_table[i];
+ if (!mad_snoop_priv ||
+ !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
+ continue;
+
+ atomic_inc(&mad_snoop_priv->refcount);
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+ mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
+ send_wr, mad_send_wc);
+ if (atomic_dec_and_test(&mad_snoop_priv->refcount))
+ wake_up(&mad_snoop_priv->wait);
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ }
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+}
+
+static void snoop_recv(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ int mad_snoop_flags)
+{
+ struct ib_mad_snoop_private *mad_snoop_priv;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ for (i = 0; i < qp_info->snoop_table_size; i++) {
+ mad_snoop_priv = qp_info->snoop_table[i];
+ if (!mad_snoop_priv ||
+ !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
+ continue;
+
+ atomic_inc(&mad_snoop_priv->refcount);
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+ mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
+ mad_recv_wc);
+ if (atomic_dec_and_test(&mad_snoop_priv->refcount))
+ wake_up(&mad_snoop_priv->wait);
+ spin_lock_irqsave(&qp_info->snoop_lock, flags);
+ }
+ spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
+}
+
+static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
+ struct ib_wc *wc)
+{
+ memset(wc, 0, sizeof *wc);
+ wc->wr_id = wr_id;
+ wc->status = IB_WC_SUCCESS;
+ wc->opcode = IB_WC_RECV;
+ wc->pkey_index = pkey_index;
+ wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
+ wc->src_qp = IB_QP0;
+ wc->qp_num = IB_QP0;
+ wc->slid = slid;
+ wc->sl = 0;
+ wc->dlid_path_bits = 0;
+ wc->port_num = port_num;
+}
+
+/*
+ * Return 0 if SMP is to be sent
+ * Return 1 if SMP was consumed locally (whether or not solicited)
+ * Return < 0 if error
+ */
+static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_smp *smp,
+ struct ib_send_wr *send_wr)
+{
+ int ret, solicited;
+ unsigned long flags;
+ struct ib_mad_local_private *local;
+ struct ib_mad_private *mad_priv;
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_agent_private *recv_mad_agent = NULL;
+ struct ib_device *device = mad_agent_priv->agent.device;
+ u8 port_num = mad_agent_priv->agent.port_num;
+ struct ib_wc mad_wc;
+
+ if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
+ ret = -EINVAL;
+ printk(KERN_ERR PFX "Invalid directed route\n");
+ goto out;
+ }
+ /* Check to post send on QP or process locally */
+ ret = smi_check_local_dr_smp(smp, device, port_num);
+ if (!ret || !device->process_mad)
+ goto out;
+
+ local = kmalloc(sizeof *local, GFP_ATOMIC);
+ if (!local) {
+ ret = -ENOMEM;
+ printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
+ goto out;
+ }
+ local->mad_priv = NULL;
+ local->recv_mad_agent = NULL;
+ mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
+ if (!mad_priv) {
+ ret = -ENOMEM;
+ printk(KERN_ERR PFX "No memory for local response MAD\n");
+ kfree(local);
+ goto out;
+ }
+
+ build_smp_wc(send_wr->wr_id, smp->dr_slid, send_wr->wr.ud.pkey_index,
+ send_wr->wr.ud.port_num, &mad_wc);
+
+ /* No GRH for DR SMP */
+ ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
+ (struct ib_mad *)smp,
+ (struct ib_mad *)&mad_priv->mad);
+ switch (ret)
+ {
+ case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
+ /*
+ * See if response is solicited and
+ * there is a recv handler
+ */
+ if (solicited_mad(&mad_priv->mad.mad) &&
+ mad_agent_priv->agent.recv_handler) {
+ local->mad_priv = mad_priv;
+ local->recv_mad_agent = mad_agent_priv;
+ /*
+ * Reference MAD agent until receive
+ * side of local completion handled
+ */
+ atomic_inc(&mad_agent_priv->refcount);
+ } else
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ break;
+ case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ break;
+ case IB_MAD_RESULT_SUCCESS:
+ /* Treat like an incoming receive MAD */
+ solicited = solicited_mad(&mad_priv->mad.mad);
+ port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
+ mad_agent_priv->agent.port_num);
+ if (port_priv) {
+ mad_priv->mad.mad.mad_hdr.tid =
+ ((struct ib_mad *)smp)->mad_hdr.tid;
+ recv_mad_agent = find_mad_agent(port_priv,
+ &mad_priv->mad.mad,
+ solicited);
+ }
+ if (!port_priv || !recv_mad_agent) {
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ kfree(local);
+ ret = 0;
+ goto out;
+ }
+ local->mad_priv = mad_priv;
+ local->recv_mad_agent = recv_mad_agent;
+ break;
+ default:
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ kfree(local);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ local->send_wr = *send_wr;
+ local->send_wr.sg_list = local->sg_list;
+ memcpy(local->sg_list, send_wr->sg_list,
+ sizeof *send_wr->sg_list * send_wr->num_sge);
+ local->send_wr.next = NULL;
+ local->tid = send_wr->wr.ud.mad_hdr->tid;
+ local->wr_id = send_wr->wr_id;
+ /* Reference MAD agent until send side of local completion handled */
+ atomic_inc(&mad_agent_priv->refcount);
+ /* Queue local completion to local list */
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ queue_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->local_work);
+ ret = 1;
+out:
+ return ret;
+}
+
+static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_mad_qp_info *qp_info;
+ struct ib_send_wr *bad_send_wr;
+ unsigned long flags;
+ int ret;
+
+ /* Replace user's WR ID with our own to find WR upon completion */
+ qp_info = mad_agent_priv->qp_info;
+ mad_send_wr->wr_id = mad_send_wr->send_wr.wr_id;
+ mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
+ mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+
+ spin_lock_irqsave(&qp_info->send_queue.lock, flags);
+ if (qp_info->send_queue.count++ < qp_info->send_queue.max_active) {
+ list_add_tail(&mad_send_wr->mad_list.list,
+ &qp_info->send_queue.list);
+ spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+ ret = ib_post_send(mad_agent_priv->agent.qp,
+ &mad_send_wr->send_wr, &bad_send_wr);
+ if (ret) {
+ printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
+ dequeue_mad(&mad_send_wr->mad_list);
+ }
+ } else {
+ list_add_tail(&mad_send_wr->mad_list.list,
+ &qp_info->overflow_list);
+ spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+ ret = 0;
+ }
+ return ret;
+}
+
+/*
+ * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
+ * with the registered client
+ */
+int ib_post_send_mad(struct ib_mad_agent *mad_agent,
+ struct ib_send_wr *send_wr,
+ struct ib_send_wr **bad_send_wr)
+{
+ int ret = -EINVAL;
+ struct ib_mad_agent_private *mad_agent_priv;
+
+ /* Validate supplied parameters */
+ if (!bad_send_wr)
+ goto error1;
+
+ if (!mad_agent || !send_wr)
+ goto error2;
+
+ if (!mad_agent->send_handler)
+ goto error2;
+
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+
+ /* Walk list of send WRs and post each on send list */
+ while (send_wr) {
+ unsigned long flags;
+ struct ib_send_wr *next_send_wr;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_smp *smp;
+
+ /* Validate more parameters */
+ if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG)
+ goto error2;
+
+ if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler)
+ goto error2;
+
+ if (!send_wr->wr.ud.mad_hdr) {
+ printk(KERN_ERR PFX "MAD header must be supplied "
+ "in WR %p\n", send_wr);
+ goto error2;
+ }
+
+ /*
+ * Save pointer to next work request to post in case the
+ * current one completes, and the user modifies the work
+ * request associated with the completion
+ */
+ next_send_wr = (struct ib_send_wr *)send_wr->next;
+
+ smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr;
+ if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ ret = handle_outgoing_dr_smp(mad_agent_priv, smp,
+ send_wr);
+ if (ret < 0) /* error */
+ goto error2;
+ else if (ret == 1) /* locally consumed */
+ goto next;
+ }
+
+ /* Allocate MAD send WR tracking structure */
+ mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC);
+ if (!mad_send_wr) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_send_wr_private\n");
+ ret = -ENOMEM;
+ goto error2;
+ }
+
+ mad_send_wr->send_wr = *send_wr;
+ mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
+ memcpy(mad_send_wr->sg_list, send_wr->sg_list,
+ sizeof *send_wr->sg_list * send_wr->num_sge);
+ mad_send_wr->send_wr.next = NULL;
+ mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
+ mad_send_wr->agent = mad_agent;
+ /* Timeout will be updated after send completes */
+ mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
+ ud.timeout_ms);
+ mad_send_wr->retry = 0;
+ /* One reference for each work request to QP + response */
+ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
+ mad_send_wr->status = IB_WC_SUCCESS;
+
+ /* Reference MAD agent until send completes */
+ atomic_inc(&mad_agent_priv->refcount);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ ret = ib_send_mad(mad_agent_priv, mad_send_wr);
+ if (ret) {
+ /* Fail send request */
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_del(&mad_send_wr->agent_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ atomic_dec(&mad_agent_priv->refcount);
+ goto error2;
+ }
+next:
+ send_wr = next_send_wr;
+ }
+ return 0;
+
+error2:
+ *bad_send_wr = send_wr;
+error1:
+ return ret;
+}
+EXPORT_SYMBOL(ib_post_send_mad);
+
+/*
+ * ib_free_recv_mad - Returns data buffers used to receive
+ * a MAD to the access layer
+ */
+void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_mad_recv_buf *entry;
+ struct ib_mad_private_header *mad_priv_hdr;
+ struct ib_mad_private *priv;
+
+ mad_priv_hdr = container_of(mad_recv_wc,
+ struct ib_mad_private_header,
+ recv_wc);
+ priv = container_of(mad_priv_hdr, struct ib_mad_private, header);
+
+ /*
+ * Walk receive buffer list associated with this WC
+ * No need to remove them from list of receive buffers
+ */
+ list_for_each_entry(entry, &mad_recv_wc->recv_buf.list, list) {
+ /* Free previous receive buffer */
+ kmem_cache_free(ib_mad_cache, priv);
+ mad_priv_hdr = container_of(mad_recv_wc,
+ struct ib_mad_private_header,
+ recv_wc);
+ priv = container_of(mad_priv_hdr, struct ib_mad_private,
+ header);
+ }
+
+ /* Free last buffer */
+ kmem_cache_free(ib_mad_cache, priv);
+}
+EXPORT_SYMBOL(ib_free_recv_mad);
+
+void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc,
+ void *buf)
+{
+ printk(KERN_ERR PFX "ib_coalesce_recv_mad() not implemented yet\n");
+}
+EXPORT_SYMBOL(ib_coalesce_recv_mad);
+
+struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
+ u8 rmpp_version,
+ ib_mad_send_handler send_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context)
+{
+ return ERR_PTR(-EINVAL); /* XXX: for now */
+}
+EXPORT_SYMBOL(ib_redirect_mad_qp);
+
+int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
+ struct ib_wc *wc)
+{
+ printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
+ return 0;
+}
+EXPORT_SYMBOL(ib_process_mad_wc);
+
+static int method_in_use(struct ib_mad_mgmt_method_table **method,
+ struct ib_mad_reg_req *mad_reg_req)
+{
+ int i;
+
+ for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
+ i < IB_MGMT_MAX_METHODS;
+ i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
+ 1+i)) {
+ if ((*method)->agent[i]) {
+ printk(KERN_ERR PFX "Method %d already in use\n", i);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
+{
+ /* Allocate management method table */
+ *method = kmalloc(sizeof **method, GFP_ATOMIC);
+ if (!*method) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_mgmt_method_table\n");
+ return -ENOMEM;
+ }
+ /* Clear management method table */
+ memset(*method, 0, sizeof **method);
+
+ return 0;
+}
+
+/*
+ * Check to see if there are any methods still in use
+ */
+static int check_method_table(struct ib_mad_mgmt_method_table *method)
+{
+ int i;
+
+ for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
+ if (method->agent[i])
+ return 1;
+ return 0;
+}
+
+/*
+ * Check to see if there are any method tables for this class still in use
+ */
+static int check_class_table(struct ib_mad_mgmt_class_table *class)
+{
+ int i;
+
+ for (i = 0; i < MAX_MGMT_CLASS; i++)
+ if (class->method_table[i])
+ return 1;
+ return 0;
+}
+
+static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
+{
+ int i;
+
+ for (i = 0; i < MAX_MGMT_OUI; i++)
+ if (vendor_class->method_table[i])
+ return 1;
+ return 0;
+}
+
+static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
+ char *oui)
+{
+ int i;
+
+ for (i = 0; i < MAX_MGMT_OUI; i++)
+ /* Is there matching OUI for this vendor class ? */
+ if (!memcmp(vendor_class->oui[i], oui, 3))
+ return i;
+
+ return -1;
+}
+
+static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
+{
+ int i;
+
+ for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
+ if (vendor->vendor_class[i])
+ return 1;
+
+ return 0;
+}
+
+static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
+ struct ib_mad_agent_private *agent)
+{
+ int i;
+
+ /* Remove any methods for this mad agent */
+ for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
+ if (method->agent[i] == agent) {
+ method->agent[i] = NULL;
+ }
+ }
+}
+
+static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
+ struct ib_mad_agent_private *agent_priv,
+ u8 mgmt_class)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_mgmt_class_table **class;
+ struct ib_mad_mgmt_method_table **method;
+ int i, ret;
+
+ port_priv = agent_priv->qp_info->port_priv;
+ class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
+ if (!*class) {
+ /* Allocate management class table for "new" class version */
+ *class = kmalloc(sizeof **class, GFP_ATOMIC);
+ if (!*class) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_mgmt_class_table\n");
+ ret = -ENOMEM;
+ goto error1;
+ }
+ /* Clear management class table */
+ memset(*class, 0, sizeof(**class));
+ /* Allocate method table for this management class */
+ method = &(*class)->method_table[mgmt_class];
+ if ((ret = allocate_method_table(method)))
+ goto error2;
+ } else {
+ method = &(*class)->method_table[mgmt_class];
+ if (!*method) {
+ /* Allocate method table for this management class */
+ if ((ret = allocate_method_table(method)))
+ goto error1;
+ }
+ }
+
+ /* Now, make sure methods are not already in use */
+ if (method_in_use(method, mad_reg_req))
+ goto error3;
+
+ /* Finally, add in methods being registered */
+ for (i = find_first_bit(mad_reg_req->method_mask,
+ IB_MGMT_MAX_METHODS);
+ i < IB_MGMT_MAX_METHODS;
+ i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
+ 1+i)) {
+ (*method)->agent[i] = agent_priv;
+ }
+ return 0;
+
+error3:
+ /* Remove any methods for this mad agent */
+ remove_methods_mad_agent(*method, agent_priv);
+ /* Now, check to see if there are any methods in use */
+ if (!check_method_table(*method)) {
+ /* If not, release management method table */
+ kfree(*method);
+ *method = NULL;
+ }
+ ret = -EINVAL;
+ goto error1;
+error2:
+ kfree(*class);
+ *class = NULL;
+error1:
+ return ret;
+}
+
+static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
+ struct ib_mad_agent_private *agent_priv)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_mgmt_vendor_class_table **vendor_table;
+ struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
+ struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
+ struct ib_mad_mgmt_method_table **method;
+ int i, ret = -ENOMEM;
+ u8 vclass;
+
+ /* "New" vendor (with OUI) class */
+ vclass = vendor_class_index(mad_reg_req->mgmt_class);
+ port_priv = agent_priv->qp_info->port_priv;
+ vendor_table = &port_priv->version[
+ mad_reg_req->mgmt_class_version].vendor;
+ if (!*vendor_table) {
+ /* Allocate mgmt vendor class table for "new" class version */
+ vendor = kmalloc(sizeof *vendor, GFP_ATOMIC);
+ if (!vendor) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_mgmt_vendor_class_table\n");
+ goto error1;
+ }
+ /* Clear management vendor class table */
+ memset(vendor, 0, sizeof(*vendor));
+ *vendor_table = vendor;
+ }
+ if (!(*vendor_table)->vendor_class[vclass]) {
+ /* Allocate table for this management vendor class */
+ vendor_class = kmalloc(sizeof *vendor_class, GFP_ATOMIC);
+ if (!vendor_class) {
+ printk(KERN_ERR PFX "No memory for "
+ "ib_mad_mgmt_vendor_class\n");
+ goto error2;
+ }
+ memset(vendor_class, 0, sizeof(*vendor_class));
+ (*vendor_table)->vendor_class[vclass] = vendor_class;
+ }
+ for (i = 0; i < MAX_MGMT_OUI; i++) {
+ /* Is there matching OUI for this vendor class ? */
+ if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
+ mad_reg_req->oui, 3)) {
+ method = &(*vendor_table)->vendor_class[
+ vclass]->method_table[i];
+ BUG_ON(!*method);
+ goto check_in_use;
+ }
+ }
+ for (i = 0; i < MAX_MGMT_OUI; i++) {
+ /* OUI slot available ? */
+ if (!is_vendor_oui((*vendor_table)->vendor_class[
+ vclass]->oui[i])) {
+ method = &(*vendor_table)->vendor_class[
+ vclass]->method_table[i];
+ BUG_ON(*method);
+ /* Allocate method table for this OUI */
+ if ((ret = allocate_method_table(method)))
+ goto error3;
+ memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
+ mad_reg_req->oui, 3);
+ goto check_in_use;
+ }
+ }
+ printk(KERN_ERR PFX "All OUI slots in use\n");
+ goto error3;
+
+check_in_use:
+ /* Now, make sure methods are not already in use */
+ if (method_in_use(method, mad_reg_req))
+ goto error4;
+
+ /* Finally, add in methods being registered */
+ for (i = find_first_bit(mad_reg_req->method_mask,
+ IB_MGMT_MAX_METHODS);
+ i < IB_MGMT_MAX_METHODS;
+ i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
+ 1+i)) {
+ (*method)->agent[i] = agent_priv;
+ }
+ return 0;
+
+error4:
+ /* Remove any methods for this mad agent */
+ remove_methods_mad_agent(*method, agent_priv);
+ /* Now, check to see if there are any methods in use */
+ if (!check_method_table(*method)) {
+ /* If not, release management method table */
+ kfree(*method);
+ *method = NULL;
+ }
+ ret = -EINVAL;
+error3:
+ if (vendor_class) {
+ (*vendor_table)->vendor_class[vclass] = NULL;
+ kfree(vendor_class);
+ }
+error2:
+ if (vendor) {
+ *vendor_table = NULL;
+ kfree(vendor);
+ }
+error1:
+ return ret;
+}
+
+static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_mad_mgmt_class_table *class;
+ struct ib_mad_mgmt_method_table *method;
+ struct ib_mad_mgmt_vendor_class_table *vendor;
+ struct ib_mad_mgmt_vendor_class *vendor_class;
+ int index;
+ u8 mgmt_class;
+
+ /*
+ * Was MAD registration request supplied
+ * with original registration ?
+ */
+ if (!agent_priv->reg_req) {
+ goto out;
+ }
+
+ port_priv = agent_priv->qp_info->port_priv;
+ mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
+ class = port_priv->version[
+ agent_priv->reg_req->mgmt_class_version].class;
+ if (!class)
+ goto vendor_check;
+
+ method = class->method_table[mgmt_class];
+ if (method) {
+ /* Remove any methods for this mad agent */
+ remove_methods_mad_agent(method, agent_priv);
+ /* Now, check to see if there are any methods still in use */
+ if (!check_method_table(method)) {
+ /* If not, release management method table */
+ kfree(method);
+ class->method_table[mgmt_class] = NULL;
+ /* Any management classes left ? */
+ if (!check_class_table(class)) {
+ /* If not, release management class table */
+ kfree(class);
+ port_priv->version[
+ agent_priv->reg_req->
+ mgmt_class_version].class = NULL;
+ }
+ }
+ }
+
+vendor_check:
+ if (!is_vendor_class(mgmt_class))
+ goto out;
+
+ /* normalize mgmt_class to vendor range 2 */
+ mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
+ vendor = port_priv->version[
+ agent_priv->reg_req->mgmt_class_version].vendor;
+
+ if (!vendor)
+ goto out;
+
+ vendor_class = vendor->vendor_class[mgmt_class];
+ if (vendor_class) {
+ index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
+ if (index < 0)
+ goto out;
+ method = vendor_class->method_table[index];
+ if (method) {
+ /* Remove any methods for this mad agent */
+ remove_methods_mad_agent(method, agent_priv);
+ /*
+ * Now, check to see if there are
+ * any methods still in use
+ */
+ if (!check_method_table(method)) {
+ /* If not, release management method table */
+ kfree(method);
+ vendor_class->method_table[index] = NULL;
+ memset(vendor_class->oui[index], 0, 3);
+ /* Any OUIs left ? */
+ if (!check_vendor_class(vendor_class)) {
+ /* If not, release vendor class table */
+ kfree(vendor_class);
+ vendor->vendor_class[mgmt_class] = NULL;
+ /* Any other vendor classes left ? */
+ if (!check_vendor_table(vendor)) {
+ kfree(vendor);
+ port_priv->version[
+ agent_priv->reg_req->
+ mgmt_class_version].
+ vendor = NULL;
+ }
+ }
+ }
+ }
+ }
+
+out:
+ return;
+}
+
+static int response_mad(struct ib_mad *mad)
+{
+ /* Trap represses are responses although response bit is reset */
+ return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
+ (mad->mad_hdr.method & IB_MGMT_METHOD_RESP));
+}
+
+static int solicited_mad(struct ib_mad *mad)
+{
+ /* CM MADs are never solicited */
+ if (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CM) {
+ return 0;
+ }
+
+ /* XXX: Determine whether MAD is using RMPP */
+
+ /* Not using RMPP */
+ /* Is this MAD a response to a previous MAD ? */
+ return response_mad(mad);
+}
+
+static struct ib_mad_agent_private *
+find_mad_agent(struct ib_mad_port_private *port_priv,
+ struct ib_mad *mad,
+ int solicited)
+{
+ struct ib_mad_agent_private *mad_agent = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
+
+ /*
+ * Whether MAD was solicited determines type of routing to
+ * MAD client.
+ */
+ if (solicited) {
+ u32 hi_tid;
+ struct ib_mad_agent_private *entry;
+
+ /*
+ * Routing is based on high 32 bits of transaction ID
+ * of MAD.
+ */
+ hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
+ list_for_each_entry(entry, &port_priv->agent_list,
+ agent_list) {
+ if (entry->agent.hi_tid == hi_tid) {
+ mad_agent = entry;
+ break;
+ }
+ }
+ } else {
+ struct ib_mad_mgmt_class_table *class;
+ struct ib_mad_mgmt_method_table *method;
+ struct ib_mad_mgmt_vendor_class_table *vendor;
+ struct ib_mad_mgmt_vendor_class *vendor_class;
+ struct ib_vendor_mad *vendor_mad;
+ int index;
+
+ /*
+ * Routing is based on version, class, and method
+ * For "newer" vendor MADs, also based on OUI
+ */
+ if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION)
+ goto out;
+ if (!is_vendor_class(mad->mad_hdr.mgmt_class)) {
+ class = port_priv->version[
+ mad->mad_hdr.class_version].class;
+ if (!class)
+ goto out;
+ method = class->method_table[convert_mgmt_class(
+ mad->mad_hdr.mgmt_class)];
+ if (method)
+ mad_agent = method->agent[mad->mad_hdr.method &
+ ~IB_MGMT_METHOD_RESP];
+ } else {
+ vendor = port_priv->version[
+ mad->mad_hdr.class_version].vendor;
+ if (!vendor)
+ goto out;
+ vendor_class = vendor->vendor_class[vendor_class_index(
+ mad->mad_hdr.mgmt_class)];
+ if (!vendor_class)
+ goto out;
+ /* Find matching OUI */
+ vendor_mad = (struct ib_vendor_mad *)mad;
+ index = find_vendor_oui(vendor_class, vendor_mad->oui);
+ if (index == -1)
+ goto out;
+ method = vendor_class->method_table[index];
+ if (method) {
+ mad_agent = method->agent[mad->mad_hdr.method &
+ ~IB_MGMT_METHOD_RESP];
+ }
+ }
+ }
+
+ if (mad_agent) {
+ if (mad_agent->agent.recv_handler)
+ atomic_inc(&mad_agent->refcount);
+ else {
+ printk(KERN_NOTICE PFX "No receive handler for client "
+ "%p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
+ mad_agent = NULL;
+ }
+ }
+out:
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+
+ return mad_agent;
+}
+
+static int validate_mad(struct ib_mad *mad, u32 qp_num)
+{
+ int valid = 0;
+
+ /* Make sure MAD base version is understood */
+ if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
+ printk(KERN_ERR PFX "MAD received with unsupported base "
+ "version %d\n", mad->mad_hdr.base_version);
+ goto out;
+ }
+
+ /* Filter SMI packets sent to other than QP0 */
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
+ (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+ if (qp_num == 0)
+ valid = 1;
+ } else {
+ /* Filter GSI packets sent to QP0 */
+ if (qp_num != 0)
+ valid = 1;
+ }
+
+out:
+ return valid;
+}
+
+/*
+ * Return start of fully reassembled MAD, or NULL, if MAD isn't assembled yet
+ */
+static struct ib_mad_private *
+reassemble_recv(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_private *recv)
+{
+ /* Until we have RMPP, all receives are reassembled!... */
+ INIT_LIST_HEAD(&recv->header.recv_wc.recv_buf.list);
+ return recv;
+}
+
+static struct ib_mad_send_wr_private*
+find_send_req(struct ib_mad_agent_private *mad_agent_priv,
+ u64 tid)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+
+ list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
+ agent_list) {
+ if (mad_send_wr->tid == tid)
+ return mad_send_wr;
+ }
+
+ /*
+ * It's possible to receive the response before we've
+ * been notified that the send has completed
+ */
+ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
+ agent_list) {
+ if (mad_send_wr->tid == tid && mad_send_wr->timeout) {
+ /* Verify request has not been canceled */
+ return (mad_send_wr->status == IB_WC_SUCCESS) ?
+ mad_send_wr : NULL;
+ }
+ }
+ return NULL;
+}
+
+static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_private *recv,
+ int solicited)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
+ unsigned long flags;
+
+ /* Fully reassemble receive before processing */
+ recv = reassemble_recv(mad_agent_priv, recv);
+ if (!recv) {
+ if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ wake_up(&mad_agent_priv->wait);
+ return;
+ }
+
+ /* Complete corresponding request */
+ if (solicited) {
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ mad_send_wr = find_send_req(mad_agent_priv,
+ recv->mad.mad.mad_hdr.tid);
+ if (!mad_send_wr) {
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ ib_free_recv_mad(&recv->header.recv_wc);
+ if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ wake_up(&mad_agent_priv->wait);
+ return;
+ }
+ /* Timeout = 0 means that we won't wait for a response */
+ mad_send_wr->timeout = 0;
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ /* Defined behavior is to complete response before request */
+ recv->header.recv_wc.wc->wr_id = mad_send_wr->wr_id;
+ mad_agent_priv->agent.recv_handler(
+ &mad_agent_priv->agent,
+ &recv->header.recv_wc);
+ atomic_dec(&mad_agent_priv->refcount);
+
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.wr_id = mad_send_wr->wr_id;
+ ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ } else {
+ mad_agent_priv->agent.recv_handler(
+ &mad_agent_priv->agent,
+ &recv->header.recv_wc);
+ if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ wake_up(&mad_agent_priv->wait);
+ }
+}
+
+static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
+{
+ struct ib_mad_qp_info *qp_info;
+ struct ib_mad_private_header *mad_priv_hdr;
+ struct ib_mad_private *recv, *response;
+ struct ib_mad_list_head *mad_list;
+ struct ib_mad_agent_private *mad_agent;
+ int solicited;
+
+ response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
+ if (!response)
+ printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
+ "for response buffer\n");
+
+ mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ qp_info = mad_list->mad_queue->qp_info;
+ dequeue_mad(mad_list);
+
+ mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
+ mad_list);
+ recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
+ dma_unmap_single(port_priv->device->dma_device,
+ pci_unmap_addr(&recv->header, mapping),
+ sizeof(struct ib_mad_private) -
+ sizeof(struct ib_mad_private_header),
+ DMA_FROM_DEVICE);
+
+ /* Setup MAD receive work completion from "normal" work completion */
+ recv->header.recv_wc.wc = wc;
+ recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
+ recv->header.recv_wc.recv_buf.mad = &recv->mad.mad;
+ recv->header.recv_wc.recv_buf.grh = &recv->grh;
+
+ if (atomic_read(&qp_info->snoop_count))
+ snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
+
+ /* Validate MAD */
+ if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
+ goto out;
+
+ if (recv->mad.mad.mad_hdr.mgmt_class ==
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ if (!smi_handle_dr_smp_recv(&recv->mad.smp,
+ port_priv->device->node_type,
+ port_priv->port_num,
+ port_priv->device->phys_port_cnt))
+ goto out;
+ if (!smi_check_forward_dr_smp(&recv->mad.smp))
+ goto local;
+ if (!smi_handle_dr_smp_send(&recv->mad.smp,
+ port_priv->device->node_type,
+ port_priv->port_num))
+ goto out;
+ if (!smi_check_local_dr_smp(&recv->mad.smp,
+ port_priv->device,
+ port_priv->port_num))
+ goto out;
+ }
+
+local:
+ /* Give driver "right of first refusal" on incoming MAD */
+ if (port_priv->device->process_mad) {
+ int ret;
+
+ if (!response) {
+ printk(KERN_ERR PFX "No memory for response MAD\n");
+ /*
+ * Is it better to assume that
+ * it wouldn't be processed ?
+ */
+ goto out;
+ }
+
+ ret = port_priv->device->process_mad(port_priv->device, 0,
+ port_priv->port_num,
+ wc, &recv->grh,
+ &recv->mad.mad,
+ &response->mad.mad);
+ if (ret & IB_MAD_RESULT_SUCCESS) {
+ if (ret & IB_MAD_RESULT_CONSUMED)
+ goto out;
+ if (ret & IB_MAD_RESULT_REPLY) {
+ /* Send response */
+ if (!agent_send(response, &recv->grh, wc,
+ port_priv->device,
+ port_priv->port_num))
+ response = NULL;
+ goto out;
+ }
+ }
+ }
+
+ /* Determine corresponding MAD agent for incoming receive MAD */
+ solicited = solicited_mad(&recv->mad.mad);
+ mad_agent = find_mad_agent(port_priv, &recv->mad.mad, solicited);
+ if (mad_agent) {
+ ib_mad_complete_recv(mad_agent, recv, solicited);
+ /*
+ * recv is freed up in error cases in ib_mad_complete_recv
+ * or via recv_handler in ib_mad_complete_recv()
+ */
+ recv = NULL;
+ }
+
+out:
+ /* Post another receive request for this QP */
+ if (response) {
+ ib_mad_post_receive_mads(qp_info, response);
+ if (recv)
+ kmem_cache_free(ib_mad_cache, recv);
+ } else
+ ib_mad_post_receive_mads(qp_info, recv);
+}
+
+static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ unsigned long delay;
+
+ if (list_empty(&mad_agent_priv->wait_list)) {
+ cancel_delayed_work(&mad_agent_priv->timed_work);
+ } else {
+ mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
+ struct ib_mad_send_wr_private,
+ agent_list);
+
+ if (time_after(mad_agent_priv->timeout,
+ mad_send_wr->timeout)) {
+ mad_agent_priv->timeout = mad_send_wr->timeout;
+ cancel_delayed_work(&mad_agent_priv->timed_work);
+ delay = mad_send_wr->timeout - jiffies;
+ if ((long)delay <= 0)
+ delay = 1;
+ queue_delayed_work(mad_agent_priv->qp_info->
+ port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
+ }
+ }
+}
+
+static void wait_for_response(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_wr_private *mad_send_wr )
+{
+ struct ib_mad_send_wr_private *temp_mad_send_wr;
+ struct list_head *list_item;
+ unsigned long delay;
+
+ list_del(&mad_send_wr->agent_list);
+
+ delay = mad_send_wr->timeout;
+ mad_send_wr->timeout += jiffies;
+
+ list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
+ temp_mad_send_wr = list_entry(list_item,
+ struct ib_mad_send_wr_private,
+ agent_list);
+ if (time_after(mad_send_wr->timeout,
+ temp_mad_send_wr->timeout))
+ break;
+ }
+ list_add(&mad_send_wr->agent_list, list_item);
+
+ /* Reschedule a work item if we have a shorter timeout */
+ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
+ cancel_delayed_work(&mad_agent_priv->timed_work);
+ queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
+ }
+}
+
+/*
+ * Process a send work completion
+ */
+static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ unsigned long flags;
+
+ mad_agent_priv = container_of(mad_send_wr->agent,
+ struct ib_mad_agent_private, agent);
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ if (mad_send_wc->status != IB_WC_SUCCESS &&
+ mad_send_wr->status == IB_WC_SUCCESS) {
+ mad_send_wr->status = mad_send_wc->status;
+ mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
+ }
+
+ if (--mad_send_wr->refcount > 0) {
+ if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
+ mad_send_wr->status == IB_WC_SUCCESS) {
+ wait_for_response(mad_agent_priv, mad_send_wr);
+ }
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ return;
+ }
+
+ /* Remove send from MAD agent and notify client of completion */
+ list_del(&mad_send_wr->agent_list);
+ adjust_timeout(mad_agent_priv);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ if (mad_send_wr->status != IB_WC_SUCCESS )
+ mad_send_wc->status = mad_send_wr->status;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ mad_send_wc);
+
+ /* Release reference on agent taken when sending */
+ if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ wake_up(&mad_agent_priv->wait);
+
+ kfree(mad_send_wr);
+}
+
+static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
+{
+ struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
+ struct ib_mad_list_head *mad_list;
+ struct ib_mad_qp_info *qp_info;
+ struct ib_mad_queue *send_queue;
+ struct ib_send_wr *bad_send_wr;
+ unsigned long flags;
+ int ret;
+
+ mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
+ mad_list);
+ send_queue = mad_list->mad_queue;
+ qp_info = send_queue->qp_info;
+
+retry:
+ queued_send_wr = NULL;
+ spin_lock_irqsave(&send_queue->lock, flags);
+ list_del(&mad_list->list);
+
+ /* Move queued send to the send queue */
+ if (send_queue->count-- > send_queue->max_active) {
+ mad_list = container_of(qp_info->overflow_list.next,
+ struct ib_mad_list_head, list);
+ queued_send_wr = container_of(mad_list,
+ struct ib_mad_send_wr_private,
+ mad_list);
+ list_del(&mad_list->list);
+ list_add_tail(&mad_list->list, &send_queue->list);
+ }
+ spin_unlock_irqrestore(&send_queue->lock, flags);
+
+ /* Restore client wr_id in WC and complete send */
+ wc->wr_id = mad_send_wr->wr_id;
+ if (atomic_read(&qp_info->snoop_count))
+ snoop_send(qp_info, &mad_send_wr->send_wr,
+ (struct ib_mad_send_wc *)wc,
+ IB_MAD_SNOOP_SEND_COMPLETIONS);
+ ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc);
+
+ if (queued_send_wr) {
+ ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
+ &bad_send_wr);
+ if (ret) {
+ printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
+ mad_send_wr = queued_send_wr;
+ wc->status = IB_WC_LOC_QP_OP_ERR;
+ goto retry;
+ }
+ }
+}
+
+static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_list_head *mad_list;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp_info->send_queue.lock, flags);
+ list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
+ mad_send_wr = container_of(mad_list,
+ struct ib_mad_send_wr_private,
+ mad_list);
+ mad_send_wr->retry = 1;
+ }
+ spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+}
+
+static void mad_error_handler(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
+{
+ struct ib_mad_list_head *mad_list;
+ struct ib_mad_qp_info *qp_info;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ int ret;
+
+ /* Determine if failure was a send or receive */
+ mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ qp_info = mad_list->mad_queue->qp_info;
+ if (mad_list->mad_queue == &qp_info->recv_queue)
+ /*
+ * Receive errors indicate that the QP has entered the error
+ * state - error handling/shutdown code will cleanup
+ */
+ return;
+
+ /*
+ * Send errors will transition the QP to SQE - move
+ * QP to RTS and repost flushed work requests
+ */
+ mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
+ mad_list);
+ if (wc->status == IB_WC_WR_FLUSH_ERR) {
+ if (mad_send_wr->retry) {
+ /* Repost send */
+ struct ib_send_wr *bad_send_wr;
+
+ mad_send_wr->retry = 0;
+ ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
+ &bad_send_wr);
+ if (ret)
+ ib_mad_send_done_handler(port_priv, wc);
+ } else
+ ib_mad_send_done_handler(port_priv, wc);
+ } else {
+ struct ib_qp_attr *attr;
+
+ /* Transition QP to RTS and fail offending send */
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (attr) {
+ attr->qp_state = IB_QPS_RTS;
+ attr->cur_qp_state = IB_QPS_SQE;
+ ret = ib_modify_qp(qp_info->qp, attr,
+ IB_QP_STATE | IB_QP_CUR_STATE);
+ kfree(attr);
+ if (ret)
+ printk(KERN_ERR PFX "mad_error_handler - "
+ "ib_modify_qp to RTS : %d\n", ret);
+ else
+ mark_sends_for_retry(qp_info);
+ }
+ ib_mad_send_done_handler(port_priv, wc);
+ }
+}
+
+/*
+ * IB MAD completion callback
+ */
+static void ib_mad_completion_handler(void *data)
+{
+ struct ib_mad_port_private *port_priv;
+ struct ib_wc wc;
+
+ port_priv = (struct ib_mad_port_private *)data;
+ ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+
+ while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
+ if (wc.status == IB_WC_SUCCESS) {
+ switch (wc.opcode) {
+ case IB_WC_SEND:
+ ib_mad_send_done_handler(port_priv, &wc);
+ break;
+ case IB_WC_RECV:
+ ib_mad_recv_done_handler(port_priv, &wc);
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ } else
+ mad_error_handler(port_priv, &wc);
+ }
+}
+
+static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
+{
+ unsigned long flags;
+ struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
+ struct list_head cancel_list;
+
+ INIT_LIST_HEAD(&cancel_list);
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
+ &mad_agent_priv->send_list, agent_list) {
+ if (mad_send_wr->status == IB_WC_SUCCESS) {
+ mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+ mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
+ }
+ }
+
+ /* Empty wait list to prevent receives from finding a request */
+ list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ /* Report all cancelled requests */
+ mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
+ mad_send_wc.vendor_err = 0;
+
+ list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
+ &cancel_list, agent_list) {
+ mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+
+ list_del(&mad_send_wr->agent_list);
+ kfree(mad_send_wr);
+ atomic_dec(&mad_agent_priv->refcount);
+ }
+}
+
+static struct ib_mad_send_wr_private*
+find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv,
+ u64 wr_id)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+
+ list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
+ agent_list) {
+ if (mad_send_wr->wr_id == wr_id)
+ return mad_send_wr;
+ }
+
+ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
+ agent_list) {
+ if (mad_send_wr->wr_id == wr_id)
+ return mad_send_wr;
+ }
+ return NULL;
+}
+
+void cancel_sends(void *data)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
+ unsigned long flags;
+
+ mad_agent_priv = data;
+
+ mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
+ mad_send_wc.vendor_err = 0;
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ while (!list_empty(&mad_agent_priv->canceled_list)) {
+ mad_send_wr = list_entry(mad_agent_priv->canceled_list.next,
+ struct ib_mad_send_wr_private,
+ agent_list);
+
+ list_del(&mad_send_wr->agent_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+
+ kfree(mad_send_wr);
+ if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ wake_up(&mad_agent_priv->wait);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ }
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+}
+
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+ u64 wr_id)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ unsigned long flags;
+
+ mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+ agent);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id);
+ if (!mad_send_wr) {
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ goto out;
+ }
+
+ if (mad_send_wr->status == IB_WC_SUCCESS)
+ mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
+
+ if (mad_send_wr->refcount != 0) {
+ mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ goto out;
+ }
+
+ list_del(&mad_send_wr->agent_list);
+ list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->canceled_list);
+ adjust_timeout(mad_agent_priv);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ queue_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->canceled_work);
+out:
+ return;
+}
+EXPORT_SYMBOL(ib_cancel_mad);
+
+static void local_completions(void *data)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_local_private *local;
+ struct ib_mad_agent_private *recv_mad_agent;
+ unsigned long flags;
+ struct ib_wc wc;
+ struct ib_mad_send_wc mad_send_wc;
+
+ mad_agent_priv = (struct ib_mad_agent_private *)data;
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ while (!list_empty(&mad_agent_priv->local_list)) {
+ local = list_entry(mad_agent_priv->local_list.next,
+ struct ib_mad_local_private,
+ completion_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ if (local->mad_priv) {
+ recv_mad_agent = local->recv_mad_agent;
+ if (!recv_mad_agent) {
+ printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
+ kmem_cache_free(ib_mad_cache, local->mad_priv);
+ goto local_send_completion;
+ }
+
+ /*
+ * Defined behavior is to complete response
+ * before request
+ */
+ build_smp_wc(local->wr_id, IB_LID_PERMISSIVE,
+ 0 /* pkey index */,
+ recv_mad_agent->agent.port_num, &wc);
+
+ local->mad_priv->header.recv_wc.wc = &wc;
+ local->mad_priv->header.recv_wc.mad_len =
+ sizeof(struct ib_mad);
+ INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.recv_buf.list);
+ local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
+ local->mad_priv->header.recv_wc.recv_buf.mad =
+ &local->mad_priv->mad.mad;
+ if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
+ snoop_recv(recv_mad_agent->qp_info,
+ &local->mad_priv->header.recv_wc,
+ IB_MAD_SNOOP_RECVS);
+ recv_mad_agent->agent.recv_handler(
+ &recv_mad_agent->agent,
+ &local->mad_priv->header.recv_wc);
+ spin_lock_irqsave(&recv_mad_agent->lock, flags);
+ atomic_dec(&recv_mad_agent->refcount);
+ spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
+ }
+
+local_send_completion:
+ /* Complete send */
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.wr_id = local->wr_id;
+ if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
+ snoop_send(mad_agent_priv->qp_info, &local->send_wr,
+ &mad_send_wc,
+ IB_MAD_SNOOP_SEND_COMPLETIONS);
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_del(&local->completion_list);
+ atomic_dec(&mad_agent_priv->refcount);
+ kfree(local);
+ }
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+}
+
+static void timeout_sends(void *data)
+{
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
+ unsigned long flags, delay;
+
+ mad_agent_priv = (struct ib_mad_agent_private *)data;
+
+ mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
+ mad_send_wc.vendor_err = 0;
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ while (!list_empty(&mad_agent_priv->wait_list)) {
+ mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
+ struct ib_mad_send_wr_private,
+ agent_list);
+
+ if (time_after(mad_send_wr->timeout, jiffies)) {
+ delay = mad_send_wr->timeout - jiffies;
+ if ((long)delay <= 0)
+ delay = 1;
+ queue_delayed_work(mad_agent_priv->qp_info->
+ port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
+ break;
+ }
+
+ list_del(&mad_send_wr->agent_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+
+ kfree(mad_send_wr);
+ atomic_dec(&mad_agent_priv->refcount);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ }
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+}
+
+static void ib_mad_thread_completion_handler(struct ib_cq *cq)
+{
+ struct ib_mad_port_private *port_priv = cq->cq_context;
+
+ queue_work(port_priv->wq, &port_priv->work);
+}
+
+/*
+ * Allocate receive MADs and post receive WRs for them
+ */
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_private *mad)
+{
+ unsigned long flags;
+ int post, ret;
+ struct ib_mad_private *mad_priv;
+ struct ib_sge sg_list;
+ struct ib_recv_wr recv_wr, *bad_recv_wr;
+ struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
+
+ /* Initialize common scatter list fields */
+ sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
+ sg_list.lkey = (*qp_info->port_priv->mr).lkey;
+
+ /* Initialize common receive WR fields */
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &sg_list;
+ recv_wr.num_sge = 1;
+
+ do {
+ /* Allocate and map receive buffer */
+ if (mad) {
+ mad_priv = mad;
+ mad = NULL;
+ } else {
+ mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
+ if (!mad_priv) {
+ printk(KERN_ERR PFX "No memory for receive buffer\n");
+ ret = -ENOMEM;
+ break;
+ }
+ }
+ sg_list.addr = dma_map_single(qp_info->port_priv->
+ device->dma_device,
+ &mad_priv->grh,
+ sizeof *mad_priv -
+ sizeof mad_priv->header,
+ DMA_FROM_DEVICE);
+ pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
+ recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
+ mad_priv->header.mad_list.mad_queue = recv_queue;
+
+ /* Post receive WR */
+ spin_lock_irqsave(&recv_queue->lock, flags);
+ post = (++recv_queue->count < recv_queue->max_active);
+ list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
+ spin_unlock_irqrestore(&recv_queue->lock, flags);
+ ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
+ if (ret) {
+ spin_lock_irqsave(&recv_queue->lock, flags);
+ list_del(&mad_priv->header.mad_list.list);
+ recv_queue->count--;
+ spin_unlock_irqrestore(&recv_queue->lock, flags);
+ dma_unmap_single(qp_info->port_priv->device->dma_device,
+ pci_unmap_addr(&mad_priv->header,
+ mapping),
+ sizeof *mad_priv -
+ sizeof mad_priv->header,
+ DMA_FROM_DEVICE);
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
+ break;
+ }
+ } while (post);
+
+ return ret;
+}
+
+/*
+ * Return all the posted receive MADs
+ */
+static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
+{
+ struct ib_mad_private_header *mad_priv_hdr;
+ struct ib_mad_private *recv;
+ struct ib_mad_list_head *mad_list;
+
+ while (!list_empty(&qp_info->recv_queue.list)) {
+
+ mad_list = list_entry(qp_info->recv_queue.list.next,
+ struct ib_mad_list_head, list);
+ mad_priv_hdr = container_of(mad_list,
+ struct ib_mad_private_header,
+ mad_list);
+ recv = container_of(mad_priv_hdr, struct ib_mad_private,
+ header);
+
+ /* Remove from posted receive MAD list */
+ list_del(&mad_list->list);
+
+ /* Undo PCI mapping */
+ dma_unmap_single(qp_info->port_priv->device->dma_device,
+ pci_unmap_addr(&recv->header, mapping),
+ sizeof(struct ib_mad_private) -
+ sizeof(struct ib_mad_private_header),
+ DMA_FROM_DEVICE);
+ kmem_cache_free(ib_mad_cache, recv);
+ }
+
+ qp_info->recv_queue.count = 0;
+}
+
+/*
+ * Start the port
+ */
+static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
+{
+ int ret, i;
+ struct ib_qp_attr *attr;
+ struct ib_qp *qp;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr) {
+ printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < IB_MAD_QPS_CORE; i++) {
+ qp = port_priv->qp_info[i].qp;
+ /*
+ * PKey index for QP1 is irrelevant but
+ * one is needed for the Reset to Init transition
+ */
+ attr->qp_state = IB_QPS_INIT;
+ attr->pkey_index = 0;
+ attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
+ ret = ib_modify_qp(qp, attr, IB_QP_STATE |
+ IB_QP_PKEY_INDEX | IB_QP_QKEY);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't change QP%d state to "
+ "INIT: %d\n", i, ret);
+ goto out;
+ }
+
+ attr->qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(qp, attr, IB_QP_STATE);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't change QP%d state to "
+ "RTR: %d\n", i, ret);
+ goto out;
+ }
+
+ attr->qp_state = IB_QPS_RTS;
+ attr->sq_psn = IB_MAD_SEND_Q_PSN;
+ ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't change QP%d state to "
+ "RTS: %d\n", i, ret);
+ goto out;
+ }
+ }
+
+ ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+ if (ret) {
+ printk(KERN_ERR PFX "Failed to request completion "
+ "notification: %d\n", ret);
+ goto out;
+ }
+
+ for (i = 0; i < IB_MAD_QPS_CORE; i++) {
+ ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't post receive WRs\n");
+ goto out;
+ }
+ }
+out:
+ kfree(attr);
+ return ret;
+}
+
+static void qp_event_handler(struct ib_event *event, void *qp_context)
+{
+ struct ib_mad_qp_info *qp_info = qp_context;
+
+ /* It's worse than that! He's dead, Jim! */
+ printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
+ event->event, qp_info->qp->qp_num);
+}
+
+static void init_mad_queue(struct ib_mad_qp_info *qp_info,
+ struct ib_mad_queue *mad_queue)
+{
+ mad_queue->qp_info = qp_info;
+ mad_queue->count = 0;
+ spin_lock_init(&mad_queue->lock);
+ INIT_LIST_HEAD(&mad_queue->list);
+}
+
+static void init_mad_qp(struct ib_mad_port_private *port_priv,
+ struct ib_mad_qp_info *qp_info)
+{
+ qp_info->port_priv = port_priv;
+ init_mad_queue(qp_info, &qp_info->send_queue);
+ init_mad_queue(qp_info, &qp_info->recv_queue);
+ INIT_LIST_HEAD(&qp_info->overflow_list);
+ spin_lock_init(&qp_info->snoop_lock);
+ qp_info->snoop_table = NULL;
+ qp_info->snoop_table_size = 0;
+ atomic_set(&qp_info->snoop_count, 0);
+}
+
+static int create_mad_qp(struct ib_mad_qp_info *qp_info,
+ enum ib_qp_type qp_type)
+{
+ struct ib_qp_init_attr qp_init_attr;
+ int ret;
+
+ memset(&qp_init_attr, 0, sizeof qp_init_attr);
+ qp_init_attr.send_cq = qp_info->port_priv->cq;
+ qp_init_attr.recv_cq = qp_info->port_priv->cq;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
+ qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
+ qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
+ qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
+ qp_init_attr.qp_type = qp_type;
+ qp_init_attr.port_num = qp_info->port_priv->port_num;
+ qp_init_attr.qp_context = qp_info;
+ qp_init_attr.event_handler = qp_event_handler;
+ qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
+ if (IS_ERR(qp_info->qp)) {
+ printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
+ get_spl_qp_index(qp_type));
+ ret = PTR_ERR(qp_info->qp);
+ goto error;
+ }
+ /* Use minimum queue sizes unless the CQ is resized */
+ qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE;
+ qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE;
+ return 0;
+
+error:
+ return ret;
+}
+
+static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
+{
+ ib_destroy_qp(qp_info->qp);
+ if (qp_info->snoop_table)
+ kfree(qp_info->snoop_table);
+}
+
+/*
+ * Open the port
+ * Create the QP, PD, MR, and CQ if needed
+ */
+static int ib_mad_port_open(struct ib_device *device,
+ int port_num)
+{
+ int ret, cq_size;
+ struct ib_mad_port_private *port_priv;
+ unsigned long flags;
+ char name[sizeof "ib_mad123"];
+
+ /* First, check if port already open at MAD layer */
+ port_priv = ib_get_mad_port(device, port_num);
+ if (port_priv) {
+ printk(KERN_DEBUG PFX "%s port %d already open\n",
+ device->name, port_num);
+ return 0;
+ }
+
+ /* Create new device info */
+ port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
+ if (!port_priv) {
+ printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
+ return -ENOMEM;
+ }
+ memset(port_priv, 0, sizeof *port_priv);
+ port_priv->device = device;
+ port_priv->port_num = port_num;
+ spin_lock_init(&port_priv->reg_lock);
+ INIT_LIST_HEAD(&port_priv->agent_list);
+ init_mad_qp(port_priv, &port_priv->qp_info[0]);
+ init_mad_qp(port_priv, &port_priv->qp_info[1]);
+
+ cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2;
+ port_priv->cq = ib_create_cq(port_priv->device,
+ (ib_comp_handler)
+ ib_mad_thread_completion_handler,
+ NULL, port_priv, cq_size);
+ if (IS_ERR(port_priv->cq)) {
+ printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
+ ret = PTR_ERR(port_priv->cq);
+ goto error3;
+ }
+
+ port_priv->pd = ib_alloc_pd(device);
+ if (IS_ERR(port_priv->pd)) {
+ printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
+ ret = PTR_ERR(port_priv->pd);
+ goto error4;
+ }
+
+ port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(port_priv->mr)) {
+ printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
+ ret = PTR_ERR(port_priv->mr);
+ goto error5;
+ }
+
+ ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
+ if (ret)
+ goto error6;
+ ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
+ if (ret)
+ goto error7;
+
+ snprintf(name, sizeof name, "ib_mad%d", port_num);
+ port_priv->wq = create_singlethread_workqueue(name);
+ if (!port_priv->wq) {
+ ret = -ENOMEM;
+ goto error8;
+ }
+ INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv);
+
+ ret = ib_mad_port_start(port_priv);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't start port\n");
+ goto error9;
+ }
+
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ list_add_tail(&port_priv->port_list, &ib_mad_port_list);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+ return 0;
+
+error9:
+ destroy_workqueue(port_priv->wq);
+error8:
+ destroy_mad_qp(&port_priv->qp_info[1]);
+error7:
+ destroy_mad_qp(&port_priv->qp_info[0]);
+error6:
+ ib_dereg_mr(port_priv->mr);
+error5:
+ ib_dealloc_pd(port_priv->pd);
+error4:
+ ib_destroy_cq(port_priv->cq);
+ cleanup_recv_queue(&port_priv->qp_info[1]);
+ cleanup_recv_queue(&port_priv->qp_info[0]);
+error3:
+ kfree(port_priv);
+
+ return ret;
+}
+
+/*
+ * Close the port
+ * If there are no classes using the port, free the port
+ * resources (CQ, MR, PD, QP) and remove the port's info structure
+ */
+static int ib_mad_port_close(struct ib_device *device, int port_num)
+{
+ struct ib_mad_port_private *port_priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+ port_priv = __ib_get_mad_port(device, port_num);
+ if (port_priv == NULL) {
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+ printk(KERN_ERR PFX "Port %d not found\n", port_num);
+ return -ENODEV;
+ }
+ list_del(&port_priv->port_list);
+ spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
+ /* Stop processing completions. */
+ flush_workqueue(port_priv->wq);
+ destroy_workqueue(port_priv->wq);
+ destroy_mad_qp(&port_priv->qp_info[1]);
+ destroy_mad_qp(&port_priv->qp_info[0]);
+ ib_dereg_mr(port_priv->mr);
+ ib_dealloc_pd(port_priv->pd);
+ ib_destroy_cq(port_priv->cq);
+ cleanup_recv_queue(&port_priv->qp_info[1]);
+ cleanup_recv_queue(&port_priv->qp_info[0]);
+ /* XXX: Handle deallocation of MAD registration tables */
+
+ kfree(port_priv);
+
+ return 0;
+}
+
+static void ib_mad_init_device(struct ib_device *device)
+{
+ int ret, num_ports, cur_port, i, ret2;
+
+ if (device->node_type == IB_NODE_SWITCH) {
+ num_ports = 1;
+ cur_port = 0;
+ } else {
+ num_ports = device->phys_port_cnt;
+ cur_port = 1;
+ }
+ for (i = 0; i < num_ports; i++, cur_port++) {
+ ret = ib_mad_port_open(device, cur_port);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't open %s port %d\n",
+ device->name, cur_port);
+ goto error_device_open;
+ }
+ ret = ib_agent_port_open(device, cur_port);
+ if (ret) {
+ printk(KERN_ERR PFX "Couldn't open %s port %d "
+ "for agents\n",
+ device->name, cur_port);
+ goto error_device_open;
+ }
+ }
+
+ goto error_device_query;
+
+error_device_open:
+ while (i > 0) {
+ cur_port--;
+ ret2 = ib_agent_port_close(device, cur_port);
+ if (ret2) {
+ printk(KERN_ERR PFX "Couldn't close %s port %d "
+ "for agents\n",
+ device->name, cur_port);
+ }
+ ret2 = ib_mad_port_close(device, cur_port);
+ if (ret2) {
+ printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+ device->name, cur_port);
+ }
+ i--;
+ }
+
+error_device_query:
+ return;
+}
+
+static void ib_mad_remove_device(struct ib_device *device)
+{
+ int ret = 0, i, num_ports, cur_port, ret2;
+
+ if (device->node_type == IB_NODE_SWITCH) {
+ num_ports = 1;
+ cur_port = 0;
+ } else {
+ num_ports = device->phys_port_cnt;
+ cur_port = 1;
+ }
+ for (i = 0; i < num_ports; i++, cur_port++) {
+ ret2 = ib_agent_port_close(device, cur_port);
+ if (ret2) {
+ printk(KERN_ERR PFX "Couldn't close %s port %d "
+ "for agents\n",
+ device->name, cur_port);
+ if (!ret)
+ ret = ret2;
+ }
+ ret2 = ib_mad_port_close(device, cur_port);
+ if (ret2) {
+ printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+ device->name, cur_port);
+ if (!ret)
+ ret = ret2;
+ }
+ }
+}
+
+static struct ib_client mad_client = {
+ .name = "mad",
+ .add = ib_mad_init_device,
+ .remove = ib_mad_remove_device
+};
+
+static int __init ib_mad_init_module(void)
+{
+ int ret;
+
+ spin_lock_init(&ib_mad_port_list_lock);
+ spin_lock_init(&ib_agent_port_list_lock);
+
+ ib_mad_cache = kmem_cache_create("ib_mad",
+ sizeof(struct ib_mad_private),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL,
+ NULL);
+ if (!ib_mad_cache) {
+ printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
+ ret = -ENOMEM;
+ goto error1;
+ }
+
+ INIT_LIST_HEAD(&ib_mad_port_list);
+
+ if (ib_register_client(&mad_client)) {
+ printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
+ ret = -EINVAL;
+ goto error2;
+ }
+
+ return 0;
+
+error2:
+ kmem_cache_destroy(ib_mad_cache);
+error1:
+ return ret;
+}
+
+static void __exit ib_mad_cleanup_module(void)
+{
+ ib_unregister_client(&mad_client);
+
+ if (kmem_cache_destroy(ib_mad_cache)) {
+ printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n");
+ }
+}
+
+module_init(ib_mad_init_module);
+module_exit(ib_mad_cleanup_module);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
new file mode 100644
index 00000000000..4ba9f726bf1
--- /dev/null
+++ b/drivers/infiniband/core/mad_priv.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: mad_priv.h 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#ifndef __IB_MAD_PRIV_H__
+#define __IB_MAD_PRIV_H__
+
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <ib_mad.h>
+#include <ib_smi.h>
+
+
+#define PFX "ib_mad: "
+
+#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
+
+/* QP and CQ parameters */
+#define IB_MAD_QP_SEND_SIZE 128
+#define IB_MAD_QP_RECV_SIZE 512
+#define IB_MAD_SEND_REQ_MAX_SG 2
+#define IB_MAD_RECV_REQ_MAX_SG 1
+
+#define IB_MAD_SEND_Q_PSN 0
+
+/* Registration table sizes */
+#define MAX_MGMT_CLASS 80
+#define MAX_MGMT_VERSION 8
+#define MAX_MGMT_OUI 8
+#define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \
+ IB_MGMT_CLASS_VENDOR_RANGE2_START + 1)
+
+struct ib_mad_list_head {
+ struct list_head list;
+ struct ib_mad_queue *mad_queue;
+};
+
+struct ib_mad_private_header {
+ struct ib_mad_list_head mad_list;
+ struct ib_mad_recv_wc recv_wc;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+} __attribute__ ((packed));
+
+struct ib_mad_private {
+ struct ib_mad_private_header header;
+ struct ib_grh grh;
+ union {
+ struct ib_mad mad;
+ struct ib_rmpp_mad rmpp_mad;
+ struct ib_smp smp;
+ } mad;
+} __attribute__ ((packed));
+
+struct ib_mad_agent_private {
+ struct list_head agent_list;
+ struct ib_mad_agent agent;
+ struct ib_mad_reg_req *reg_req;
+ struct ib_mad_qp_info *qp_info;
+
+ spinlock_t lock;
+ struct list_head send_list;
+ struct list_head wait_list;
+ struct work_struct timed_work;
+ unsigned long timeout;
+ struct list_head local_list;
+ struct work_struct local_work;
+ struct list_head canceled_list;
+ struct work_struct canceled_work;
+
+ atomic_t refcount;
+ wait_queue_head_t wait;
+ u8 rmpp_version;
+};
+
+struct ib_mad_snoop_private {
+ struct ib_mad_agent agent;
+ struct ib_mad_qp_info *qp_info;
+ int snoop_index;
+ int mad_snoop_flags;
+ atomic_t refcount;
+ wait_queue_head_t wait;
+};
+
+struct ib_mad_send_wr_private {
+ struct ib_mad_list_head mad_list;
+ struct list_head agent_list;
+ struct ib_mad_agent *agent;
+ struct ib_send_wr send_wr;
+ struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
+ u64 wr_id; /* client WR ID */
+ u64 tid;
+ unsigned long timeout;
+ int retry;
+ int refcount;
+ enum ib_wc_status status;
+};
+
+struct ib_mad_local_private {
+ struct list_head completion_list;
+ struct ib_mad_private *mad_priv;
+ struct ib_mad_agent_private *recv_mad_agent;
+ struct ib_send_wr send_wr;
+ struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
+ u64 wr_id; /* client WR ID */
+ u64 tid;
+};
+
+struct ib_mad_mgmt_method_table {
+ struct ib_mad_agent_private *agent[IB_MGMT_MAX_METHODS];
+};
+
+struct ib_mad_mgmt_class_table {
+ struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
+};
+
+struct ib_mad_mgmt_vendor_class {
+ u8 oui[MAX_MGMT_OUI][3];
+ struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_OUI];
+};
+
+struct ib_mad_mgmt_vendor_class_table {
+ struct ib_mad_mgmt_vendor_class *vendor_class[MAX_MGMT_VENDOR_RANGE2];
+};
+
+struct ib_mad_mgmt_version_table {
+ struct ib_mad_mgmt_class_table *class;
+ struct ib_mad_mgmt_vendor_class_table *vendor;
+};
+
+struct ib_mad_queue {
+ spinlock_t lock;
+ struct list_head list;
+ int count;
+ int max_active;
+ struct ib_mad_qp_info *qp_info;
+};
+
+struct ib_mad_qp_info {
+ struct ib_mad_port_private *port_priv;
+ struct ib_qp *qp;
+ struct ib_mad_queue send_queue;
+ struct ib_mad_queue recv_queue;
+ struct list_head overflow_list;
+ spinlock_t snoop_lock;
+ struct ib_mad_snoop_private **snoop_table;
+ int snoop_table_size;
+ atomic_t snoop_count;
+};
+
+struct ib_mad_port_private {
+ struct list_head port_list;
+ struct ib_device *device;
+ int port_num;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+
+ spinlock_t reg_lock;
+ struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
+ struct list_head agent_list;
+ struct workqueue_struct *wq;
+ struct work_struct work;
+ struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
+};
+
+extern kmem_cache_t *ib_mad_cache;
+
+#endif /* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
new file mode 100644
index 00000000000..5f15feffeae
--- /dev/null
+++ b/drivers/infiniband/core/packer.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <ib_pack.h>
+
+static u64 value_read(int offset, int size, void *structure)
+{
+ switch (size) {
+ case 1: return *(u8 *) (structure + offset);
+ case 2: return be16_to_cpup((__be16 *) (structure + offset));
+ case 4: return be32_to_cpup((__be32 *) (structure + offset));
+ case 8: return be64_to_cpup((__be64 *) (structure + offset));
+ default:
+ printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
+ return 0;
+ }
+}
+
+/**
+ * ib_pack - Pack a structure into a buffer
+ * @desc:Array of structure field descriptions
+ * @desc_len:Number of entries in @desc
+ * @structure:Structure to pack from
+ * @buf:Buffer to pack into
+ *
+ * ib_pack() packs a list of structure fields into a buffer,
+ * controlled by the array of fields in @desc.
+ */
+void ib_pack(const struct ib_field *desc,
+ int desc_len,
+ void *structure,
+ void *buf)
+{
+ int i;
+
+ for (i = 0; i < desc_len; ++i) {
+ if (desc[i].size_bits <= 32) {
+ int shift;
+ u32 val;
+ __be32 mask;
+ __be32 *addr;
+
+ shift = 32 - desc[i].offset_bits - desc[i].size_bits;
+ if (desc[i].struct_size_bytes)
+ val = value_read(desc[i].struct_offset_bytes,
+ desc[i].struct_size_bytes,
+ structure) << shift;
+ else
+ val = 0;
+
+ mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift);
+ addr = (__be32 *) buf + desc[i].offset_words;
+ *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask);
+ } else if (desc[i].size_bits <= 64) {
+ int shift;
+ u64 val;
+ __be64 mask;
+ __be64 *addr;
+
+ shift = 64 - desc[i].offset_bits - desc[i].size_bits;
+ if (desc[i].struct_size_bytes)
+ val = value_read(desc[i].struct_offset_bytes,
+ desc[i].struct_size_bytes,
+ structure) << shift;
+ else
+ val = 0;
+
+ mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift);
+ addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
+ *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
+ } else {
+ if (desc[i].offset_bits % 8 ||
+ desc[i].size_bits % 8) {
+ printk(KERN_WARNING "Structure field %s of size %d "
+ "bits is not byte-aligned\n",
+ desc[i].field_name, desc[i].size_bits);
+ }
+
+ if (desc[i].struct_size_bytes)
+ memcpy(buf + desc[i].offset_words * 4 +
+ desc[i].offset_bits / 8,
+ structure + desc[i].struct_offset_bytes,
+ desc[i].size_bits / 8);
+ else
+ memset(buf + desc[i].offset_words * 4 +
+ desc[i].offset_bits / 8,
+ 0,
+ desc[i].size_bits / 8);
+ }
+ }
+}
+EXPORT_SYMBOL(ib_pack);
+
+static void value_write(int offset, int size, u64 val, void *structure)
+{
+ switch (size * 8) {
+ case 8: *( u8 *) (structure + offset) = val; break;
+ case 16: *(__be16 *) (structure + offset) = cpu_to_be16(val); break;
+ case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
+ case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
+ default:
+ printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
+ }
+}
+
+/**
+ * ib_unpack - Unpack a buffer into a structure
+ * @desc:Array of structure field descriptions
+ * @desc_len:Number of entries in @desc
+ * @buf:Buffer to unpack from
+ * @structure:Structure to unpack into
+ *
+ * ib_pack() unpacks a list of structure fields from a buffer,
+ * controlled by the array of fields in @desc.
+ */
+void ib_unpack(const struct ib_field *desc,
+ int desc_len,
+ void *buf,
+ void *structure)
+{
+ int i;
+
+ for (i = 0; i < desc_len; ++i) {
+ if (!desc[i].struct_size_bytes)
+ continue;
+
+ if (desc[i].size_bits <= 32) {
+ int shift;
+ u32 val;
+ u32 mask;
+ __be32 *addr;
+
+ shift = 32 - desc[i].offset_bits - desc[i].size_bits;
+ mask = ((1ull << desc[i].size_bits) - 1) << shift;
+ addr = (__be32 *) buf + desc[i].offset_words;
+ val = (be32_to_cpup(addr) & mask) >> shift;
+ value_write(desc[i].struct_offset_bytes,
+ desc[i].struct_size_bytes,
+ val,
+ structure);
+ } else if (desc[i].size_bits <= 64) {
+ int shift;
+ u64 val;
+ u64 mask;
+ __be64 *addr;
+
+ shift = 64 - desc[i].offset_bits - desc[i].size_bits;
+ mask = ((1ull << desc[i].size_bits) - 1) << shift;
+ addr = (__be64 *) buf + desc[i].offset_words;
+ val = (be64_to_cpup(addr) & mask) >> shift;
+ value_write(desc[i].struct_offset_bytes,
+ desc[i].struct_size_bytes,
+ val,
+ structure);
+ } else {
+ if (desc[i].offset_bits % 8 ||
+ desc[i].size_bits % 8) {
+ printk(KERN_WARNING "Structure field %s of size %d "
+ "bits is not byte-aligned\n",
+ desc[i].field_name, desc[i].size_bits);
+ }
+
+ memcpy(structure + desc[i].struct_offset_bytes,
+ buf + desc[i].offset_words * 4 +
+ desc[i].offset_bits / 8,
+ desc[i].size_bits / 8);
+ }
+ }
+}
+EXPORT_SYMBOL(ib_unpack);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
new file mode 100644
index 00000000000..d4233ee61c3
--- /dev/null
+++ b/drivers/infiniband/core/sa_query.c
@@ -0,0 +1,866 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: sa_query.c 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/random.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/kref.h>
+#include <linux/idr.h>
+
+#include <ib_pack.h>
+#include <ib_sa.h>
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand subnet administration query support");
+MODULE_LICENSE("Dual BSD/GPL");
+
+/*
+ * These two structures must be packed because they have 64-bit fields
+ * that are only 32-bit aligned. 64-bit architectures will lay them
+ * out wrong otherwise. (And unfortunately they are sent on the wire
+ * so we can't change the layout)
+ */
+struct ib_sa_hdr {
+ u64 sm_key;
+ u16 attr_offset;
+ u16 reserved;
+ ib_sa_comp_mask comp_mask;
+} __attribute__ ((packed));
+
+struct ib_sa_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ struct ib_sa_hdr sa_hdr;
+ u8 data[200];
+} __attribute__ ((packed));
+
+struct ib_sa_sm_ah {
+ struct ib_ah *ah;
+ struct kref ref;
+};
+
+struct ib_sa_port {
+ struct ib_mad_agent *agent;
+ struct ib_mr *mr;
+ struct ib_sa_sm_ah *sm_ah;
+ struct work_struct update_task;
+ spinlock_t ah_lock;
+ u8 port_num;
+};
+
+struct ib_sa_device {
+ int start_port, end_port;
+ struct ib_event_handler event_handler;
+ struct ib_sa_port port[0];
+};
+
+struct ib_sa_query {
+ void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
+ void (*release)(struct ib_sa_query *);
+ struct ib_sa_port *port;
+ struct ib_sa_mad *mad;
+ struct ib_sa_sm_ah *sm_ah;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+ int id;
+};
+
+struct ib_sa_path_query {
+ void (*callback)(int, struct ib_sa_path_rec *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
+struct ib_sa_mcmember_query {
+ void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
+static void ib_sa_add_one(struct ib_device *device);
+static void ib_sa_remove_one(struct ib_device *device);
+
+static struct ib_client sa_client = {
+ .name = "sa",
+ .add = ib_sa_add_one,
+ .remove = ib_sa_remove_one
+};
+
+static spinlock_t idr_lock;
+static DEFINE_IDR(query_idr);
+
+static spinlock_t tid_lock;
+static u32 tid;
+
+enum {
+ IB_SA_ATTR_CLASS_PORTINFO = 0x01,
+ IB_SA_ATTR_NOTICE = 0x02,
+ IB_SA_ATTR_INFORM_INFO = 0x03,
+ IB_SA_ATTR_NODE_REC = 0x11,
+ IB_SA_ATTR_PORT_INFO_REC = 0x12,
+ IB_SA_ATTR_SL2VL_REC = 0x13,
+ IB_SA_ATTR_SWITCH_REC = 0x14,
+ IB_SA_ATTR_LINEAR_FDB_REC = 0x15,
+ IB_SA_ATTR_RANDOM_FDB_REC = 0x16,
+ IB_SA_ATTR_MCAST_FDB_REC = 0x17,
+ IB_SA_ATTR_SM_INFO_REC = 0x18,
+ IB_SA_ATTR_LINK_REC = 0x20,
+ IB_SA_ATTR_GUID_INFO_REC = 0x30,
+ IB_SA_ATTR_SERVICE_REC = 0x31,
+ IB_SA_ATTR_PARTITION_REC = 0x33,
+ IB_SA_ATTR_RANGE_REC = 0x34,
+ IB_SA_ATTR_PATH_REC = 0x35,
+ IB_SA_ATTR_VL_ARB_REC = 0x36,
+ IB_SA_ATTR_MC_GROUP_REC = 0x37,
+ IB_SA_ATTR_MC_MEMBER_REC = 0x38,
+ IB_SA_ATTR_TRACE_REC = 0x39,
+ IB_SA_ATTR_MULTI_PATH_REC = 0x3a,
+ IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b
+};
+
+#define PATH_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
+ .field_name = "sa_path_rec:" #field
+
+static const struct ib_field path_rec_table[] = {
+ { RESERVED,
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { RESERVED,
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { PATH_REC_FIELD(dgid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { PATH_REC_FIELD(sgid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { PATH_REC_FIELD(dlid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { PATH_REC_FIELD(slid),
+ .offset_words = 10,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { PATH_REC_FIELD(raw_traffic),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 11,
+ .offset_bits = 1,
+ .size_bits = 3 },
+ { PATH_REC_FIELD(flow_label),
+ .offset_words = 11,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { PATH_REC_FIELD(hop_limit),
+ .offset_words = 11,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { PATH_REC_FIELD(traffic_class),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { PATH_REC_FIELD(reversible),
+ .offset_words = 12,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { PATH_REC_FIELD(numb_path),
+ .offset_words = 12,
+ .offset_bits = 9,
+ .size_bits = 7 },
+ { PATH_REC_FIELD(pkey),
+ .offset_words = 12,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 13,
+ .offset_bits = 0,
+ .size_bits = 12 },
+ { PATH_REC_FIELD(sl),
+ .offset_words = 13,
+ .offset_bits = 12,
+ .size_bits = 4 },
+ { PATH_REC_FIELD(mtu_selector),
+ .offset_words = 13,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { PATH_REC_FIELD(mtu),
+ .offset_words = 13,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { PATH_REC_FIELD(rate_selector),
+ .offset_words = 13,
+ .offset_bits = 24,
+ .size_bits = 2 },
+ { PATH_REC_FIELD(rate),
+ .offset_words = 13,
+ .offset_bits = 26,
+ .size_bits = 6 },
+ { PATH_REC_FIELD(packet_life_time_selector),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 2 },
+ { PATH_REC_FIELD(packet_life_time),
+ .offset_words = 14,
+ .offset_bits = 2,
+ .size_bits = 6 },
+ { PATH_REC_FIELD(preference),
+ .offset_words = 14,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 16,
+ .size_bits = 48 },
+};
+
+#define MCMEMBER_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
+ .field_name = "sa_mcmember_rec:" #field
+
+static const struct ib_field mcmember_rec_table[] = {
+ { MCMEMBER_REC_FIELD(mgid),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { MCMEMBER_REC_FIELD(port_gid),
+ .offset_words = 4,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { MCMEMBER_REC_FIELD(qkey),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { MCMEMBER_REC_FIELD(mlid),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { MCMEMBER_REC_FIELD(mtu_selector),
+ .offset_words = 9,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { MCMEMBER_REC_FIELD(mtu),
+ .offset_words = 9,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { MCMEMBER_REC_FIELD(traffic_class),
+ .offset_words = 9,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { MCMEMBER_REC_FIELD(pkey),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { MCMEMBER_REC_FIELD(rate_selector),
+ .offset_words = 10,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { MCMEMBER_REC_FIELD(rate),
+ .offset_words = 10,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { MCMEMBER_REC_FIELD(packet_life_time_selector),
+ .offset_words = 10,
+ .offset_bits = 24,
+ .size_bits = 2 },
+ { MCMEMBER_REC_FIELD(packet_life_time),
+ .offset_words = 10,
+ .offset_bits = 26,
+ .size_bits = 6 },
+ { MCMEMBER_REC_FIELD(sl),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { MCMEMBER_REC_FIELD(flow_label),
+ .offset_words = 11,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { MCMEMBER_REC_FIELD(hop_limit),
+ .offset_words = 11,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { MCMEMBER_REC_FIELD(scope),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { MCMEMBER_REC_FIELD(join_state),
+ .offset_words = 12,
+ .offset_bits = 4,
+ .size_bits = 4 },
+ { MCMEMBER_REC_FIELD(proxy_join),
+ .offset_words = 12,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 12,
+ .offset_bits = 9,
+ .size_bits = 23 },
+};
+
+static void free_sm_ah(struct kref *kref)
+{
+ struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
+
+ ib_destroy_ah(sm_ah->ah);
+ kfree(sm_ah);
+}
+
+static void update_sm_ah(void *port_ptr)
+{
+ struct ib_sa_port *port = port_ptr;
+ struct ib_sa_sm_ah *new_ah, *old_ah;
+ struct ib_port_attr port_attr;
+ struct ib_ah_attr ah_attr;
+
+ if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
+ printk(KERN_WARNING "Couldn't query port\n");
+ return;
+ }
+
+ new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
+ if (!new_ah) {
+ printk(KERN_WARNING "Couldn't allocate new SM AH\n");
+ return;
+ }
+
+ kref_init(&new_ah->ref);
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = port_attr.sm_lid;
+ ah_attr.sl = port_attr.sm_sl;
+ ah_attr.port_num = port->port_num;
+
+ new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
+ if (IS_ERR(new_ah->ah)) {
+ printk(KERN_WARNING "Couldn't create new SM AH\n");
+ kfree(new_ah);
+ return;
+ }
+
+ spin_lock_irq(&port->ah_lock);
+ old_ah = port->sm_ah;
+ port->sm_ah = new_ah;
+ spin_unlock_irq(&port->ah_lock);
+
+ if (old_ah)
+ kref_put(&old_ah->ref, free_sm_ah);
+}
+
+static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
+{
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE) {
+ struct ib_sa_device *sa_dev =
+ ib_get_client_data(event->device, &sa_client);
+
+ schedule_work(&sa_dev->port[event->element.port_num -
+ sa_dev->start_port].update_task);
+ }
+}
+
+/**
+ * ib_sa_cancel_query - try to cancel an SA query
+ * @id:ID of query to cancel
+ * @query:query pointer to cancel
+ *
+ * Try to cancel an SA query. If the id and query don't match up or
+ * the query has already completed, nothing is done. Otherwise the
+ * query is canceled and will complete with a status of -EINTR.
+ */
+void ib_sa_cancel_query(int id, struct ib_sa_query *query)
+{
+ unsigned long flags;
+ struct ib_mad_agent *agent;
+
+ spin_lock_irqsave(&idr_lock, flags);
+ if (idr_find(&query_idr, id) != query) {
+ spin_unlock_irqrestore(&idr_lock, flags);
+ return;
+ }
+ agent = query->port->agent;
+ spin_unlock_irqrestore(&idr_lock, flags);
+
+ ib_cancel_mad(agent, id);
+}
+EXPORT_SYMBOL(ib_sa_cancel_query);
+
+static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
+{
+ unsigned long flags;
+
+ memset(mad, 0, sizeof *mad);
+
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+
+ spin_lock_irqsave(&tid_lock, flags);
+ mad->mad_hdr.tid =
+ cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
+ spin_unlock_irqrestore(&tid_lock, flags);
+}
+
+static int send_mad(struct ib_sa_query *query, int timeout_ms)
+{
+ struct ib_sa_port *port = query->port;
+ unsigned long flags;
+ int ret;
+ struct ib_sge gather_list;
+ struct ib_send_wr *bad_wr, wr = {
+ .opcode = IB_WR_SEND,
+ .sg_list = &gather_list,
+ .num_sge = 1,
+ .send_flags = IB_SEND_SIGNALED,
+ .wr = {
+ .ud = {
+ .mad_hdr = &query->mad->mad_hdr,
+ .remote_qpn = 1,
+ .remote_qkey = IB_QP1_QKEY,
+ .timeout_ms = timeout_ms
+ }
+ }
+ };
+
+retry:
+ if (!idr_pre_get(&query_idr, GFP_ATOMIC))
+ return -ENOMEM;
+ spin_lock_irqsave(&idr_lock, flags);
+ ret = idr_get_new(&query_idr, query, &query->id);
+ spin_unlock_irqrestore(&idr_lock, flags);
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ return ret;
+
+ wr.wr_id = query->id;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ kref_get(&port->sm_ah->ref);
+ query->sm_ah = port->sm_ah;
+ wr.wr.ud.ah = port->sm_ah->ah;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ gather_list.addr = dma_map_single(port->agent->device->dma_device,
+ query->mad,
+ sizeof (struct ib_sa_mad),
+ DMA_TO_DEVICE);
+ gather_list.length = sizeof (struct ib_sa_mad);
+ gather_list.lkey = port->mr->lkey;
+ pci_unmap_addr_set(query, mapping, gather_list.addr);
+
+ ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
+ if (ret) {
+ dma_unmap_single(port->agent->device->dma_device,
+ pci_unmap_addr(query, mapping),
+ sizeof (struct ib_sa_mad),
+ DMA_TO_DEVICE);
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+ spin_lock_irqsave(&idr_lock, flags);
+ idr_remove(&query_idr, query->id);
+ spin_unlock_irqrestore(&idr_lock, flags);
+ }
+
+ return ret;
+}
+
+static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_path_query *query =
+ container_of(sa_query, struct ib_sa_path_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_path_rec rec;
+
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
+{
+ kfree(sa_query->mad);
+ kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
+}
+
+/**
+ * ib_sa_path_rec_get - Start a Path get query
+ * @device:device to send query on
+ * @port_num: port number to send query on
+ * @rec:Path Record to send in query
+ * @comp_mask:component mask to send in query
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when query completes, times out or is
+ * canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:query context, used to cancel query
+ *
+ * Send a Path Record Get query to the SA to look up a path. The
+ * callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query. The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_path_rec_get() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, int gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_path_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
+ struct ib_mad_agent *agent = port->agent;
+ int ret;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+ query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
+ if (!query->sa_query.mad) {
+ kfree(query);
+ return -ENOMEM;
+ }
+
+ query->callback = callback;
+ query->context = context;
+
+ init_mad(query->sa_query.mad, agent);
+
+ query->sa_query.callback = ib_sa_path_rec_callback;
+ query->sa_query.release = ib_sa_path_rec_release;
+ query->sa_query.port = port;
+ query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
+ query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec, query->sa_query.mad->data);
+
+ *sa_query = &query->sa_query;
+ ret = send_mad(&query->sa_query, timeout_ms);
+ if (ret) {
+ *sa_query = NULL;
+ kfree(query->sa_query.mad);
+ kfree(query);
+ }
+
+ return ret ? ret : query->sa_query.id;
+}
+EXPORT_SYMBOL(ib_sa_path_rec_get);
+
+static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_mcmember_query *query =
+ container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_mcmember_rec rec;
+
+ ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
+{
+ kfree(sa_query->mad);
+ kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
+}
+
+int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+ u8 method,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, int gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_mcmember_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_mcmember_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
+ struct ib_mad_agent *agent = port->agent;
+ int ret;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+ query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
+ if (!query->sa_query.mad) {
+ kfree(query);
+ return -ENOMEM;
+ }
+
+ query->callback = callback;
+ query->context = context;
+
+ init_mad(query->sa_query.mad, agent);
+
+ query->sa_query.callback = ib_sa_mcmember_rec_callback;
+ query->sa_query.release = ib_sa_mcmember_rec_release;
+ query->sa_query.port = port;
+ query->sa_query.mad->mad_hdr.method = method;
+ query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
+ rec, query->sa_query.mad->data);
+
+ *sa_query = &query->sa_query;
+ ret = send_mad(&query->sa_query, timeout_ms);
+ if (ret) {
+ *sa_query = NULL;
+ kfree(query->sa_query.mad);
+ kfree(query);
+ }
+
+ return ret ? ret : query->sa_query.id;
+}
+EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_sa_query *query;
+ unsigned long flags;
+
+ spin_lock_irqsave(&idr_lock, flags);
+ query = idr_find(&query_idr, mad_send_wc->wr_id);
+ spin_unlock_irqrestore(&idr_lock, flags);
+
+ if (!query)
+ return;
+
+ switch (mad_send_wc->status) {
+ case IB_WC_SUCCESS:
+ /* No callback -- already got recv */
+ break;
+ case IB_WC_RESP_TIMEOUT_ERR:
+ query->callback(query, -ETIMEDOUT, NULL);
+ break;
+ case IB_WC_WR_FLUSH_ERR:
+ query->callback(query, -EINTR, NULL);
+ break;
+ default:
+ query->callback(query, -EIO, NULL);
+ break;
+ }
+
+ dma_unmap_single(agent->device->dma_device,
+ pci_unmap_addr(query, mapping),
+ sizeof (struct ib_sa_mad),
+ DMA_TO_DEVICE);
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+
+ query->release(query);
+
+ spin_lock_irqsave(&idr_lock, flags);
+ idr_remove(&query_idr, mad_send_wc->wr_id);
+ spin_unlock_irqrestore(&idr_lock, flags);
+}
+
+static void recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_sa_query *query;
+ unsigned long flags;
+
+ spin_lock_irqsave(&idr_lock, flags);
+ query = idr_find(&query_idr, mad_recv_wc->wc->wr_id);
+ spin_unlock_irqrestore(&idr_lock, flags);
+
+ if (query) {
+ if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
+ query->callback(query,
+ mad_recv_wc->recv_buf.mad->mad_hdr.status ?
+ -EINVAL : 0,
+ (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
+ else
+ query->callback(query, -EIO, NULL);
+ }
+
+ ib_free_recv_mad(mad_recv_wc);
+}
+
+static void ib_sa_add_one(struct ib_device *device)
+{
+ struct ib_sa_device *sa_dev;
+ int s, e, i;
+
+ if (device->node_type == IB_NODE_SWITCH)
+ s = e = 0;
+ else {
+ s = 1;
+ e = device->phys_port_cnt;
+ }
+
+ sa_dev = kmalloc(sizeof *sa_dev +
+ (e - s + 1) * sizeof (struct ib_sa_port),
+ GFP_KERNEL);
+ if (!sa_dev)
+ return;
+
+ sa_dev->start_port = s;
+ sa_dev->end_port = e;
+
+ for (i = 0; i <= e - s; ++i) {
+ sa_dev->port[i].mr = NULL;
+ sa_dev->port[i].sm_ah = NULL;
+ sa_dev->port[i].port_num = i + s;
+ spin_lock_init(&sa_dev->port[i].ah_lock);
+
+ sa_dev->port[i].agent =
+ ib_register_mad_agent(device, i + s, IB_QPT_GSI,
+ NULL, 0, send_handler,
+ recv_handler, sa_dev);
+ if (IS_ERR(sa_dev->port[i].agent))
+ goto err;
+
+ sa_dev->port[i].mr = ib_get_dma_mr(sa_dev->port[i].agent->qp->pd,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(sa_dev->port[i].mr)) {
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
+ goto err;
+ }
+
+ INIT_WORK(&sa_dev->port[i].update_task,
+ update_sm_ah, &sa_dev->port[i]);
+ }
+
+ ib_set_client_data(device, &sa_client, sa_dev);
+
+ /*
+ * We register our event handler after everything is set up,
+ * and then update our cached info after the event handler is
+ * registered to avoid any problems if a port changes state
+ * during our initialization.
+ */
+
+ INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
+ if (ib_register_event_handler(&sa_dev->event_handler))
+ goto err;
+
+ for (i = 0; i <= e - s; ++i)
+ update_sm_ah(&sa_dev->port[i]);
+
+ return;
+
+err:
+ while (--i >= 0) {
+ ib_dereg_mr(sa_dev->port[i].mr);
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
+ }
+
+ kfree(sa_dev);
+
+ return;
+}
+
+static void ib_sa_remove_one(struct ib_device *device)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ int i;
+
+ if (!sa_dev)
+ return;
+
+ ib_unregister_event_handler(&sa_dev->event_handler);
+
+ for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
+ kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
+ }
+
+ kfree(sa_dev);
+}
+
+static int __init ib_sa_init(void)
+{
+ int ret;
+
+ spin_lock_init(&idr_lock);
+ spin_lock_init(&tid_lock);
+
+ get_random_bytes(&tid, sizeof tid);
+
+ ret = ib_register_client(&sa_client);
+ if (ret)
+ printk(KERN_ERR "Couldn't register ib_sa client\n");
+
+ return ret;
+}
+
+static void __exit ib_sa_cleanup(void)
+{
+ ib_unregister_client(&sa_client);
+}
+
+module_init(ib_sa_init);
+module_exit(ib_sa_cleanup);
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
new file mode 100644
index 00000000000..b4b284324a3
--- /dev/null
+++ b/drivers/infiniband/core/smi.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#include <ib_smi.h>
+#include "smi.h"
+
+/*
+ * Fixup a directed route SMP for sending
+ * Return 0 if the SMP should be discarded
+ */
+int smi_handle_dr_smp_send(struct ib_smp *smp,
+ u8 node_type,
+ int port_num)
+{
+ u8 hop_ptr, hop_cnt;
+
+ hop_ptr = smp->hop_ptr;
+ hop_cnt = smp->hop_cnt;
+
+ /* See section 14.2.2.2, Vol 1 IB spec */
+ if (!ib_get_smp_direction(smp)) {
+ /* C14-9:1 */
+ if (hop_cnt && hop_ptr == 0) {
+ smp->hop_ptr++;
+ return (smp->initial_path[smp->hop_ptr] ==
+ port_num);
+ }
+
+ /* C14-9:2 */
+ if (hop_ptr && hop_ptr < hop_cnt) {
+ if (node_type != IB_NODE_SWITCH)
+ return 0;
+
+ /* smp->return_path set when received */
+ smp->hop_ptr++;
+ return (smp->initial_path[smp->hop_ptr] ==
+ port_num);
+ }
+
+ /* C14-9:3 -- We're at the end of the DR segment of path */
+ if (hop_ptr == hop_cnt) {
+ /* smp->return_path set when received */
+ smp->hop_ptr++;
+ return (node_type == IB_NODE_SWITCH ||
+ smp->dr_dlid == IB_LID_PERMISSIVE);
+ }
+
+ /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
+ /* C14-9:5 -- Fail unreasonable hop pointer */
+ return (hop_ptr == hop_cnt + 1);
+
+ } else {
+ /* C14-13:1 */
+ if (hop_cnt && hop_ptr == hop_cnt + 1) {
+ smp->hop_ptr--;
+ return (smp->return_path[smp->hop_ptr] ==
+ port_num);
+ }
+
+ /* C14-13:2 */
+ if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
+ if (node_type != IB_NODE_SWITCH)
+ return 0;
+
+ smp->hop_ptr--;
+ return (smp->return_path[smp->hop_ptr] ==
+ port_num);
+ }
+
+ /* C14-13:3 -- at the end of the DR segment of path */
+ if (hop_ptr == 1) {
+ smp->hop_ptr--;
+ /* C14-13:3 -- SMPs destined for SM shouldn't be here */
+ return (node_type == IB_NODE_SWITCH ||
+ smp->dr_slid == IB_LID_PERMISSIVE);
+ }
+
+ /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
+ if (hop_ptr == 0)
+ return 1;
+
+ /* C14-13:5 -- Check for unreasonable hop pointer */
+ return 0;
+ }
+}
+
+/*
+ * Adjust information for a received SMP
+ * Return 0 if the SMP should be dropped
+ */
+int smi_handle_dr_smp_recv(struct ib_smp *smp,
+ u8 node_type,
+ int port_num,
+ int phys_port_cnt)
+{
+ u8 hop_ptr, hop_cnt;
+
+ hop_ptr = smp->hop_ptr;
+ hop_cnt = smp->hop_cnt;
+
+ /* See section 14.2.2.2, Vol 1 IB spec */
+ if (!ib_get_smp_direction(smp)) {
+ /* C14-9:1 -- sender should have incremented hop_ptr */
+ if (hop_cnt && hop_ptr == 0)
+ return 0;
+
+ /* C14-9:2 -- intermediate hop */
+ if (hop_ptr && hop_ptr < hop_cnt) {
+ if (node_type != IB_NODE_SWITCH)
+ return 0;
+
+ smp->return_path[hop_ptr] = port_num;
+ /* smp->hop_ptr updated when sending */
+ return (smp->initial_path[hop_ptr+1] <= phys_port_cnt);
+ }
+
+ /* C14-9:3 -- We're at the end of the DR segment of path */
+ if (hop_ptr == hop_cnt) {
+ if (hop_cnt)
+ smp->return_path[hop_ptr] = port_num;
+ /* smp->hop_ptr updated when sending */
+
+ return (node_type == IB_NODE_SWITCH ||
+ smp->dr_dlid == IB_LID_PERMISSIVE);
+ }
+
+ /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
+ /* C14-9:5 -- fail unreasonable hop pointer */
+ return (hop_ptr == hop_cnt + 1);
+
+ } else {
+
+ /* C14-13:1 */
+ if (hop_cnt && hop_ptr == hop_cnt + 1) {
+ smp->hop_ptr--;
+ return (smp->return_path[smp->hop_ptr] ==
+ port_num);
+ }
+
+ /* C14-13:2 */
+ if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
+ if (node_type != IB_NODE_SWITCH)
+ return 0;
+
+ /* smp->hop_ptr updated when sending */
+ return (smp->return_path[hop_ptr-1] <= phys_port_cnt);
+ }
+
+ /* C14-13:3 -- We're at the end of the DR segment of path */
+ if (hop_ptr == 1) {
+ if (smp->dr_slid == IB_LID_PERMISSIVE) {
+ /* giving SMP to SM - update hop_ptr */
+ smp->hop_ptr--;
+ return 1;
+ }
+ /* smp->hop_ptr updated when sending */
+ return (node_type == IB_NODE_SWITCH);
+ }
+
+ /* C14-13:4 -- hop_ptr = 0 -> give to SM */
+ /* C14-13:5 -- Check for unreasonable hop pointer */
+ return (hop_ptr == 0);
+ }
+}
+
+/*
+ * Return 1 if the received DR SMP should be forwarded to the send queue
+ * Return 0 if the SMP should be completed up the stack
+ */
+int smi_check_forward_dr_smp(struct ib_smp *smp)
+{
+ u8 hop_ptr, hop_cnt;
+
+ hop_ptr = smp->hop_ptr;
+ hop_cnt = smp->hop_cnt;
+
+ if (!ib_get_smp_direction(smp)) {
+ /* C14-9:2 -- intermediate hop */
+ if (hop_ptr && hop_ptr < hop_cnt)
+ return 1;
+
+ /* C14-9:3 -- at the end of the DR segment of path */
+ if (hop_ptr == hop_cnt)
+ return (smp->dr_dlid == IB_LID_PERMISSIVE);
+
+ /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
+ if (hop_ptr == hop_cnt + 1)
+ return 1;
+ } else {
+ /* C14-13:2 */
+ if (2 <= hop_ptr && hop_ptr <= hop_cnt)
+ return 1;
+
+ /* C14-13:3 -- at the end of the DR segment of path */
+ if (hop_ptr == 1)
+ return (smp->dr_slid != IB_LID_PERMISSIVE);
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
new file mode 100644
index 00000000000..db25503a073
--- /dev/null
+++ b/drivers/infiniband/core/smi.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: smi.h 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#ifndef __SMI_H_
+#define __SMI_H_
+
+int smi_handle_dr_smp_recv(struct ib_smp *smp,
+ u8 node_type,
+ int port_num,
+ int phys_port_cnt);
+extern int smi_check_forward_dr_smp(struct ib_smp *smp);
+extern int smi_handle_dr_smp_send(struct ib_smp *smp,
+ u8 node_type,
+ int port_num);
+extern int smi_check_local_dr_smp(struct ib_smp *smp,
+ struct ib_device *device,
+ int port_num);
+
+/*
+ * Return 1 if the SMP should be handled by the local SMA/SM via process_mad
+ */
+static inline int smi_check_local_smp(struct ib_mad_agent *mad_agent,
+ struct ib_smp *smp)
+{
+ /* C14-9:3 -- We're at the end of the DR segment of path */
+ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
+ return ((mad_agent->device->process_mad &&
+ !ib_get_smp_direction(smp) &&
+ (smp->hop_ptr == smp->hop_cnt + 1)));
+}
+
+#endif /* __SMI_H_ */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
new file mode 100644
index 00000000000..3a413f72ff6
--- /dev/null
+++ b/drivers/infiniband/core/sysfs.c
@@ -0,0 +1,762 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: sysfs.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include "core_priv.h"
+
+#include <ib_mad.h>
+
+struct ib_port {
+ struct kobject kobj;
+ struct ib_device *ibdev;
+ struct attribute_group gid_group;
+ struct attribute **gid_attr;
+ struct attribute_group pkey_group;
+ struct attribute **pkey_attr;
+ u8 port_num;
+};
+
+struct port_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
+ ssize_t (*store)(struct ib_port *, struct port_attribute *,
+ const char *buf, size_t count);
+};
+
+#define PORT_ATTR(_name, _mode, _show, _store) \
+struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
+#define PORT_ATTR_RO(_name) \
+struct port_attribute port_attr_##_name = __ATTR_RO(_name)
+
+struct port_table_attribute {
+ struct port_attribute attr;
+ int index;
+};
+
+static ssize_t port_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+
+ if (!port_attr->show)
+ return 0;
+
+ return port_attr->show(p, port_attr, buf);
+}
+
+static struct sysfs_ops port_sysfs_ops = {
+ .show = port_attr_show
+};
+
+static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ static const char *state_name[] = {
+ [IB_PORT_NOP] = "NOP",
+ [IB_PORT_DOWN] = "DOWN",
+ [IB_PORT_INIT] = "INIT",
+ [IB_PORT_ARMED] = "ARMED",
+ [IB_PORT_ACTIVE] = "ACTIVE",
+ [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
+ };
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%d: %s\n", attr.state,
+ attr.state >= 0 && attr.state <= ARRAY_SIZE(state_name) ?
+ state_name[attr.state] : "UNKNOWN");
+}
+
+static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%x\n", attr.lid);
+}
+
+static ssize_t lid_mask_count_show(struct ib_port *p,
+ struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%d\n", attr.lmc);
+}
+
+static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%x\n", attr.sm_lid);
+}
+
+static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%d\n", attr.sm_sl);
+}
+
+static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
+}
+
+static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+ char *speed = "";
+ int rate;
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ switch (attr.active_speed) {
+ case 2: speed = " DDR"; break;
+ case 4: speed = " QDR"; break;
+ }
+
+ rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
+ if (rate < 0)
+ return -EINVAL;
+
+ return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
+ rate / 10, rate % 10 ? ".5" : "",
+ ib_width_enum_to_int(attr.active_width), speed);
+}
+
+static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
+ char *buf)
+{
+ struct ib_port_attr attr;
+
+ ssize_t ret;
+
+ ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ if (ret)
+ return ret;
+
+ switch (attr.phys_state) {
+ case 1: return sprintf(buf, "1: Sleep\n");
+ case 2: return sprintf(buf, "2: Polling\n");
+ case 3: return sprintf(buf, "3: Disabled\n");
+ case 4: return sprintf(buf, "4: PortConfigurationTraining\n");
+ case 5: return sprintf(buf, "5: LinkUp\n");
+ case 6: return sprintf(buf, "6: LinkErrorRecovery\n");
+ case 7: return sprintf(buf, "7: Phy Test\n");
+ default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
+ }
+}
+
+static PORT_ATTR_RO(state);
+static PORT_ATTR_RO(lid);
+static PORT_ATTR_RO(lid_mask_count);
+static PORT_ATTR_RO(sm_lid);
+static PORT_ATTR_RO(sm_sl);
+static PORT_ATTR_RO(cap_mask);
+static PORT_ATTR_RO(rate);
+static PORT_ATTR_RO(phys_state);
+
+static struct attribute *port_default_attrs[] = {
+ &port_attr_state.attr,
+ &port_attr_lid.attr,
+ &port_attr_lid_mask_count.attr,
+ &port_attr_sm_lid.attr,
+ &port_attr_sm_sl.attr,
+ &port_attr_cap_mask.attr,
+ &port_attr_rate.attr,
+ &port_attr_phys_state.attr,
+ NULL
+};
+
+static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ union ib_gid gid;
+ ssize_t ret;
+
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((u16 *) gid.raw)[0]),
+ be16_to_cpu(((u16 *) gid.raw)[1]),
+ be16_to_cpu(((u16 *) gid.raw)[2]),
+ be16_to_cpu(((u16 *) gid.raw)[3]),
+ be16_to_cpu(((u16 *) gid.raw)[4]),
+ be16_to_cpu(((u16 *) gid.raw)[5]),
+ be16_to_cpu(((u16 *) gid.raw)[6]),
+ be16_to_cpu(((u16 *) gid.raw)[7]));
+}
+
+static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ u16 pkey;
+ ssize_t ret;
+
+ ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%04x\n", pkey);
+}
+
+#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+}
+
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int offset = tab_attr->index & 0xffff;
+ int width = (tab_attr->index >> 16) & 0xff;
+ struct ib_mad *in_mad = NULL;
+ struct ib_mad *out_mad = NULL;
+ ssize_t ret;
+
+ if (!p->ibdev->process_mad)
+ return sprintf(buf, "N/A (no PMA)\n");
+
+ in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ memset(in_mad, 0, sizeof *in_mad);
+ in_mad->mad_hdr.base_version = 1;
+ in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
+ in_mad->mad_hdr.class_version = 1;
+ in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
+
+ in_mad->data[41] = p->port_num; /* PortSelect field */
+
+ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
+ p->port_num, NULL, NULL, in_mad, out_mad) &
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ switch (width) {
+ case 4:
+ ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+ (offset % 4)) & 0xf);
+ break;
+ case 8:
+ ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+ break;
+ case 16:
+ ret = sprintf(buf, "%u\n",
+ be16_to_cpup((u16 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 32:
+ ret = sprintf(buf, "%u\n",
+ be32_to_cpup((u32 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ default:
+ ret = 0;
+ }
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+
+ return ret;
+}
+
+static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
+static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
+static PORT_PMA_ATTR(link_downed , 2, 8, 56);
+static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
+static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
+static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
+static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
+static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
+static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
+static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
+static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
+static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
+static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
+static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
+static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
+static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
+
+static struct attribute *pma_attrs[] = {
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_port_xmit_data.attr.attr,
+ &port_pma_attr_port_rcv_data.attr.attr,
+ &port_pma_attr_port_xmit_packets.attr.attr,
+ &port_pma_attr_port_rcv_packets.attr.attr,
+ NULL
+};
+
+static struct attribute_group pma_group = {
+ .name = "counters",
+ .attrs = pma_attrs
+};
+
+static void ib_port_release(struct kobject *kobj)
+{
+ struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+ struct attribute *a;
+ int i;
+
+ for (i = 0; (a = p->gid_attr[i]); ++i) {
+ kfree(a->name);
+ kfree(a);
+ }
+
+ for (i = 0; (a = p->pkey_attr[i]); ++i) {
+ kfree(a->name);
+ kfree(a);
+ }
+
+ kfree(p->gid_attr);
+ kfree(p);
+}
+
+static struct kobj_type port_type = {
+ .release = ib_port_release,
+ .sysfs_ops = &port_sysfs_ops,
+ .default_attrs = port_default_attrs
+};
+
+static void ib_device_release(struct class_device *cdev)
+{
+ struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+
+ kfree(dev);
+}
+
+static int ib_device_hotplug(struct class_device *cdev, char **envp,
+ int num_envp, char *buf, int size)
+{
+ struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+ int i = 0, len = 0;
+
+ if (add_hotplug_env_var(envp, num_envp, &i, buf, size, &len,
+ "NAME=%s", dev->name))
+ return -ENOMEM;
+
+ /*
+ * It might be nice to pass the node GUID to hotplug, but
+ * right now the only way to get it is to query the device
+ * provider, and this can crash during device removal because
+ * we are will be running after driver removal has started.
+ * We could add a node_guid field to struct ib_device, or we
+ * could just let the hotplug script read the node GUID from
+ * sysfs when devices are added.
+ */
+
+ envp[i] = NULL;
+ return 0;
+}
+
+static int alloc_group(struct attribute ***attr,
+ ssize_t (*show)(struct ib_port *,
+ struct port_attribute *, char *buf),
+ int len)
+{
+ struct port_table_attribute ***tab_attr =
+ (struct port_table_attribute ***) attr;
+ int i;
+ int ret;
+
+ *tab_attr = kmalloc((1 + len) * sizeof *tab_attr, GFP_KERNEL);
+ if (!*tab_attr)
+ return -ENOMEM;
+
+ memset(*tab_attr, 0, (1 + len) * sizeof *tab_attr);
+
+ for (i = 0; i < len; ++i) {
+ (*tab_attr)[i] = kmalloc(sizeof *(*tab_attr)[i], GFP_KERNEL);
+ if (!(*tab_attr)[i]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memset((*tab_attr)[i], 0, sizeof *(*tab_attr)[i]);
+ (*tab_attr)[i]->attr.attr.name = kmalloc(8, GFP_KERNEL);
+ if (!(*tab_attr)[i]->attr.attr.name) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (snprintf((*tab_attr)[i]->attr.attr.name, 8, "%d", i) >= 8) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ (*tab_attr)[i]->attr.attr.mode = S_IRUGO;
+ (*tab_attr)[i]->attr.attr.owner = THIS_MODULE;
+ (*tab_attr)[i]->attr.show = show;
+ (*tab_attr)[i]->index = i;
+ }
+
+ return 0;
+
+err:
+ for (i = 0; i < len; ++i) {
+ if ((*tab_attr)[i])
+ kfree((*tab_attr)[i]->attr.attr.name);
+ kfree((*tab_attr)[i]);
+ }
+
+ kfree(*tab_attr);
+
+ return ret;
+}
+
+static int add_port(struct ib_device *device, int port_num)
+{
+ struct ib_port *p;
+ struct ib_port_attr attr;
+ int i;
+ int ret;
+
+ ret = ib_query_port(device, port_num, &attr);
+ if (ret)
+ return ret;
+
+ p = kmalloc(sizeof *p, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ memset(p, 0, sizeof *p);
+
+ p->ibdev = device;
+ p->port_num = port_num;
+ p->kobj.ktype = &port_type;
+
+ p->kobj.parent = kobject_get(&device->ports_parent);
+ if (!p->kobj.parent) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ ret = kobject_set_name(&p->kobj, "%d", port_num);
+ if (ret)
+ goto err_put;
+
+ ret = kobject_register(&p->kobj);
+ if (ret)
+ goto err_put;
+
+ ret = sysfs_create_group(&p->kobj, &pma_group);
+ if (ret)
+ goto err_put;
+
+ ret = alloc_group(&p->gid_attr, show_port_gid, attr.gid_tbl_len);
+ if (ret)
+ goto err_remove_pma;
+
+ p->gid_group.name = "gids";
+ p->gid_group.attrs = p->gid_attr;
+
+ ret = sysfs_create_group(&p->kobj, &p->gid_group);
+ if (ret)
+ goto err_free_gid;
+
+ ret = alloc_group(&p->pkey_attr, show_port_pkey, attr.pkey_tbl_len);
+ if (ret)
+ goto err_remove_gid;
+
+ p->pkey_group.name = "pkeys";
+ p->pkey_group.attrs = p->pkey_attr;
+
+ ret = sysfs_create_group(&p->kobj, &p->pkey_group);
+ if (ret)
+ goto err_free_pkey;
+
+ list_add_tail(&p->kobj.entry, &device->port_list);
+
+ return 0;
+
+err_free_pkey:
+ for (i = 0; i < attr.pkey_tbl_len; ++i) {
+ kfree(p->pkey_attr[i]->name);
+ kfree(p->pkey_attr[i]);
+ }
+
+ kfree(p->pkey_attr);
+
+err_remove_gid:
+ sysfs_remove_group(&p->kobj, &p->gid_group);
+
+err_free_gid:
+ for (i = 0; i < attr.gid_tbl_len; ++i) {
+ kfree(p->gid_attr[i]->name);
+ kfree(p->gid_attr[i]);
+ }
+
+ kfree(p->gid_attr);
+
+err_remove_pma:
+ sysfs_remove_group(&p->kobj, &pma_group);
+
+err_put:
+ kobject_put(&device->ports_parent);
+
+err:
+ kfree(p);
+ return ret;
+}
+
+static ssize_t show_node_type(struct class_device *cdev, char *buf)
+{
+ struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+
+ switch (dev->node_type) {
+ case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
+ case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+ case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+ default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+ }
+}
+
+static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf)
+{
+ struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+ struct ib_device_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_device(dev, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((u16 *) &attr.sys_image_guid)[0]),
+ be16_to_cpu(((u16 *) &attr.sys_image_guid)[1]),
+ be16_to_cpu(((u16 *) &attr.sys_image_guid)[2]),
+ be16_to_cpu(((u16 *) &attr.sys_image_guid)[3]));
+}
+
+static ssize_t show_node_guid(struct class_device *cdev, char *buf)
+{
+ struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+ struct ib_device_attr attr;
+ ssize_t ret;
+
+ ret = ib_query_device(dev, &attr);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((u16 *) &attr.node_guid)[0]),
+ be16_to_cpu(((u16 *) &attr.node_guid)[1]),
+ be16_to_cpu(((u16 *) &attr.node_guid)[2]),
+ be16_to_cpu(((u16 *) &attr.node_guid)[3]));
+}
+
+static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
+static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
+static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
+
+static struct class_device_attribute *ib_class_attributes[] = {
+ &class_device_attr_node_type,
+ &class_device_attr_sys_image_guid,
+ &class_device_attr_node_guid
+};
+
+static struct class ib_class = {
+ .name = "infiniband",
+ .release = ib_device_release,
+ .hotplug = ib_device_hotplug,
+};
+
+int ib_device_register_sysfs(struct ib_device *device)
+{
+ struct class_device *class_dev = &device->class_dev;
+ int ret;
+ int i;
+
+ class_dev->class = &ib_class;
+ class_dev->class_data = device;
+ strlcpy(class_dev->class_id, device->name, BUS_ID_SIZE);
+
+ INIT_LIST_HEAD(&device->port_list);
+
+ ret = class_device_register(class_dev);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
+ ret = class_device_create_file(class_dev, ib_class_attributes[i]);
+ if (ret)
+ goto err_unregister;
+ }
+
+ device->ports_parent.parent = kobject_get(&class_dev->kobj);
+ if (!device->ports_parent.parent) {
+ ret = -EBUSY;
+ goto err_unregister;
+ }
+ ret = kobject_set_name(&device->ports_parent, "ports");
+ if (ret)
+ goto err_put;
+ ret = kobject_register(&device->ports_parent);
+ if (ret)
+ goto err_put;
+
+ if (device->node_type == IB_NODE_SWITCH) {
+ ret = add_port(device, 0);
+ if (ret)
+ goto err_put;
+ } else {
+ int i;
+
+ for (i = 1; i <= device->phys_port_cnt; ++i) {
+ ret = add_port(device, i);
+ if (ret)
+ goto err_put;
+ }
+ }
+
+ return 0;
+
+err_put:
+ {
+ struct kobject *p, *t;
+ struct ib_port *port;
+
+ list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ list_del(&p->entry);
+ port = container_of(p, struct ib_port, kobj);
+ sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_unregister(p);
+ }
+ }
+
+ kobject_put(&class_dev->kobj);
+
+err_unregister:
+ class_device_unregister(class_dev);
+
+err:
+ return ret;
+}
+
+void ib_device_unregister_sysfs(struct ib_device *device)
+{
+ struct kobject *p, *t;
+ struct ib_port *port;
+
+ list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ list_del(&p->entry);
+ port = container_of(p, struct ib_port, kobj);
+ sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_unregister(p);
+ }
+
+ kobject_unregister(&device->ports_parent);
+ class_device_unregister(&device->class_dev);
+}
+
+int ib_sysfs_setup(void)
+{
+ return class_register(&ib_class);
+}
+
+void ib_sysfs_cleanup(void)
+{
+ class_unregister(&ib_class);
+}
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
new file mode 100644
index 00000000000..dc4eb1db5e9
--- /dev/null
+++ b/drivers/infiniband/core/ud_header.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ud_header.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/errno.h>
+
+#include <ib_pack.h>
+
+#define STRUCT_FIELD(header, field) \
+ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
+ .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
+ .field_name = #header ":" #field
+
+static const struct ib_field lrh_table[] = {
+ { STRUCT_FIELD(lrh, virtual_lane),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { STRUCT_FIELD(lrh, link_version),
+ .offset_words = 0,
+ .offset_bits = 4,
+ .size_bits = 4 },
+ { STRUCT_FIELD(lrh, service_level),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 4 },
+ { RESERVED,
+ .offset_words = 0,
+ .offset_bits = 12,
+ .size_bits = 2 },
+ { STRUCT_FIELD(lrh, link_next_header),
+ .offset_words = 0,
+ .offset_bits = 14,
+ .size_bits = 2 },
+ { STRUCT_FIELD(lrh, destination_lid),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 5 },
+ { STRUCT_FIELD(lrh, packet_length),
+ .offset_words = 1,
+ .offset_bits = 5,
+ .size_bits = 11 },
+ { STRUCT_FIELD(lrh, source_lid),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 }
+};
+
+static const struct ib_field grh_table[] = {
+ { STRUCT_FIELD(grh, ip_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { STRUCT_FIELD(grh, traffic_class),
+ .offset_words = 0,
+ .offset_bits = 4,
+ .size_bits = 8 },
+ { STRUCT_FIELD(grh, flow_label),
+ .offset_words = 0,
+ .offset_bits = 12,
+ .size_bits = 20 },
+ { STRUCT_FIELD(grh, payload_length),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(grh, next_header),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 8 },
+ { STRUCT_FIELD(grh, hop_limit),
+ .offset_words = 1,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { STRUCT_FIELD(grh, source_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { STRUCT_FIELD(grh, destination_gid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 }
+};
+
+static const struct ib_field bth_table[] = {
+ { STRUCT_FIELD(bth, opcode),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(bth, solicited_event),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { STRUCT_FIELD(bth, mig_req),
+ .offset_words = 0,
+ .offset_bits = 9,
+ .size_bits = 1 },
+ { STRUCT_FIELD(bth, pad_count),
+ .offset_words = 0,
+ .offset_bits = 10,
+ .size_bits = 2 },
+ { STRUCT_FIELD(bth, transport_header_version),
+ .offset_words = 0,
+ .offset_bits = 12,
+ .size_bits = 4 },
+ { STRUCT_FIELD(bth, pkey),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(bth, destination_qpn),
+ .offset_words = 1,
+ .offset_bits = 8,
+ .size_bits = 24 },
+ { STRUCT_FIELD(bth, ack_req),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 2,
+ .offset_bits = 1,
+ .size_bits = 7 },
+ { STRUCT_FIELD(bth, psn),
+ .offset_words = 2,
+ .offset_bits = 8,
+ .size_bits = 24 }
+};
+
+static const struct ib_field deth_table[] = {
+ { STRUCT_FIELD(deth, qkey),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { RESERVED,
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(deth, source_qpn),
+ .offset_words = 1,
+ .offset_bits = 8,
+ .size_bits = 24 }
+};
+
+/**
+ * ib_ud_header_init - Initialize UD header structure
+ * @payload_bytes:Length of packet payload
+ * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @header:Structure to initialize
+ *
+ * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header,
+ * lrh.packet_length, grh.ip_version, grh.payload_length,
+ * grh.next_header, bth.opcode, bth.pad_count and
+ * bth.transport_header_version fields of a &struct ib_ud_header given
+ * the payload length and whether a GRH will be included.
+ */
+void ib_ud_header_init(int payload_bytes,
+ int grh_present,
+ struct ib_ud_header *header)
+{
+ int header_len;
+
+ memset(header, 0, sizeof *header);
+
+ header_len =
+ IB_LRH_BYTES +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES;
+ if (grh_present) {
+ header_len += IB_GRH_BYTES;
+ }
+
+ header->lrh.link_version = 0;
+ header->lrh.link_next_header =
+ grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
+ header->lrh.packet_length = (IB_LRH_BYTES +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4 + /* ICRC */
+ 3) / 4; /* round up */
+
+ header->grh_present = grh_present;
+ if (grh_present) {
+ header->lrh.packet_length += IB_GRH_BYTES / 4;
+
+ header->grh.ip_version = 6;
+ header->grh.payload_length =
+ cpu_to_be16((IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4 + /* ICRC */
+ 3) & ~3); /* round up */
+ header->grh.next_header = 0x1b;
+ }
+
+ cpu_to_be16s(&header->lrh.packet_length);
+
+ if (header->immediate_present)
+ header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ else
+ header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ header->bth.pad_count = (4 - payload_bytes) & 3;
+ header->bth.transport_header_version = 0;
+}
+EXPORT_SYMBOL(ib_ud_header_init);
+
+/**
+ * ib_ud_header_pack - Pack UD header struct into wire format
+ * @header:UD header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_ud_header_pack() packs the UD header structure @header into wire
+ * format in the buffer @buf.
+ */
+int ib_ud_header_pack(struct ib_ud_header *header,
+ void *buf)
+{
+ int len = 0;
+
+ ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
+ &header->lrh, buf);
+ len += IB_LRH_BYTES;
+
+ if (header->grh_present) {
+ ib_pack(grh_table, ARRAY_SIZE(grh_table),
+ &header->grh, buf + len);
+ len += IB_GRH_BYTES;
+ }
+
+ ib_pack(bth_table, ARRAY_SIZE(bth_table),
+ &header->bth, buf + len);
+ len += IB_BTH_BYTES;
+
+ ib_pack(deth_table, ARRAY_SIZE(deth_table),
+ &header->deth, buf + len);
+ len += IB_DETH_BYTES;
+
+ if (header->immediate_present) {
+ memcpy(buf + len, &header->immediate_data, sizeof header->immediate_data);
+ len += sizeof header->immediate_data;
+ }
+
+ return len;
+}
+EXPORT_SYMBOL(ib_ud_header_pack);
+
+/**
+ * ib_ud_header_unpack - Unpack UD header struct from wire format
+ * @header:UD header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_ud_header_pack() unpacks the UD header structure @header from wire
+ * format in the buffer @buf.
+ */
+int ib_ud_header_unpack(void *buf,
+ struct ib_ud_header *header)
+{
+ ib_unpack(lrh_table, ARRAY_SIZE(lrh_table),
+ buf, &header->lrh);
+ buf += IB_LRH_BYTES;
+
+ if (header->lrh.link_version != 0) {
+ printk(KERN_WARNING "Invalid LRH.link_version %d\n",
+ header->lrh.link_version);
+ return -EINVAL;
+ }
+
+ switch (header->lrh.link_next_header) {
+ case IB_LNH_IBA_LOCAL:
+ header->grh_present = 0;
+ break;
+
+ case IB_LNH_IBA_GLOBAL:
+ header->grh_present = 1;
+ ib_unpack(grh_table, ARRAY_SIZE(grh_table),
+ buf, &header->grh);
+ buf += IB_GRH_BYTES;
+
+ if (header->grh.ip_version != 6) {
+ printk(KERN_WARNING "Invalid GRH.ip_version %d\n",
+ header->grh.ip_version);
+ return -EINVAL;
+ }
+ if (header->grh.next_header != 0x1b) {
+ printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n",
+ header->grh.next_header);
+ return -EINVAL;
+ }
+ break;
+
+ default:
+ printk(KERN_WARNING "Invalid LRH.link_next_header %d\n",
+ header->lrh.link_next_header);
+ return -EINVAL;
+ }
+
+ ib_unpack(bth_table, ARRAY_SIZE(bth_table),
+ buf, &header->bth);
+ buf += IB_BTH_BYTES;
+
+ switch (header->bth.opcode) {
+ case IB_OPCODE_UD_SEND_ONLY:
+ header->immediate_present = 0;
+ break;
+ case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE:
+ header->immediate_present = 1;
+ break;
+ default:
+ printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n",
+ header->bth.opcode);
+ return -EINVAL;
+ }
+
+ if (header->bth.transport_header_version != 0) {
+ printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n",
+ header->bth.transport_header_version);
+ return -EINVAL;
+ }
+
+ ib_unpack(deth_table, ARRAY_SIZE(deth_table),
+ buf, &header->deth);
+ buf += IB_DETH_BYTES;
+
+ if (header->immediate_present)
+ memcpy(&header->immediate_data, buf, sizeof header->immediate_data);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_ud_header_unpack);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
new file mode 100644
index 00000000000..54b4f33b0bf
--- /dev/null
+++ b/drivers/infiniband/core/user_mad.c
@@ -0,0 +1,840 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: user_mad.c 1389 2004-12-27 22:56:47Z roland $
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/poll.h>
+#include <linux/rwsem.h>
+#include <linux/kref.h>
+
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+
+#include <ib_mad.h>
+#include <ib_user_mad.h>
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
+MODULE_LICENSE("Dual BSD/GPL");
+
+enum {
+ IB_UMAD_MAX_PORTS = 64,
+ IB_UMAD_MAX_AGENTS = 32,
+
+ IB_UMAD_MAJOR = 231,
+ IB_UMAD_MINOR_BASE = 0
+};
+
+struct ib_umad_port {
+ int devnum;
+ struct cdev dev;
+ struct class_device class_dev;
+
+ int sm_devnum;
+ struct cdev sm_dev;
+ struct class_device sm_class_dev;
+ struct semaphore sm_sem;
+
+ struct ib_device *ib_dev;
+ struct ib_umad_device *umad_dev;
+ u8 port_num;
+};
+
+struct ib_umad_device {
+ int start_port, end_port;
+ struct kref ref;
+ struct ib_umad_port port[0];
+};
+
+struct ib_umad_file {
+ struct ib_umad_port *port;
+ spinlock_t recv_lock;
+ struct list_head recv_list;
+ wait_queue_head_t recv_wait;
+ struct rw_semaphore agent_mutex;
+ struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
+ struct ib_mr *mr[IB_UMAD_MAX_AGENTS];
+};
+
+struct ib_umad_packet {
+ struct ib_user_mad mad;
+ struct ib_ah *ah;
+ struct list_head list;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+static spinlock_t map_lock;
+static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
+
+static void ib_umad_add_one(struct ib_device *device);
+static void ib_umad_remove_one(struct ib_device *device);
+
+static int queue_packet(struct ib_umad_file *file,
+ struct ib_mad_agent *agent,
+ struct ib_umad_packet *packet)
+{
+ int ret = 1;
+
+ down_read(&file->agent_mutex);
+ for (packet->mad.id = 0;
+ packet->mad.id < IB_UMAD_MAX_AGENTS;
+ packet->mad.id++)
+ if (agent == file->agent[packet->mad.id]) {
+ spin_lock_irq(&file->recv_lock);
+ list_add_tail(&packet->list, &file->recv_list);
+ spin_unlock_irq(&file->recv_lock);
+ wake_up_interruptible(&file->recv_wait);
+ ret = 0;
+ break;
+ }
+
+ up_read(&file->agent_mutex);
+
+ return ret;
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *send_wc)
+{
+ struct ib_umad_file *file = agent->context;
+ struct ib_umad_packet *packet =
+ (void *) (unsigned long) send_wc->wr_id;
+
+ dma_unmap_single(agent->device->dma_device,
+ pci_unmap_addr(packet, mapping),
+ sizeof packet->mad.data,
+ DMA_TO_DEVICE);
+ ib_destroy_ah(packet->ah);
+
+ if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
+ packet->mad.status = ETIMEDOUT;
+
+ if (!queue_packet(file, agent, packet))
+ return;
+ }
+
+ kfree(packet);
+}
+
+static void recv_handler(struct ib_mad_agent *agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_umad_file *file = agent->context;
+ struct ib_umad_packet *packet;
+
+ if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
+ goto out;
+
+ packet = kmalloc(sizeof *packet, GFP_KERNEL);
+ if (!packet)
+ goto out;
+
+ memset(packet, 0, sizeof *packet);
+
+ memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data);
+ packet->mad.status = 0;
+ packet->mad.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
+ packet->mad.lid = cpu_to_be16(mad_recv_wc->wc->slid);
+ packet->mad.sl = mad_recv_wc->wc->sl;
+ packet->mad.path_bits = mad_recv_wc->wc->dlid_path_bits;
+ packet->mad.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
+ if (packet->mad.grh_present) {
+ /* XXX parse GRH */
+ packet->mad.gid_index = 0;
+ packet->mad.hop_limit = 0;
+ packet->mad.traffic_class = 0;
+ memset(packet->mad.gid, 0, 16);
+ packet->mad.flow_label = 0;
+ }
+
+ if (queue_packet(file, agent, packet))
+ kfree(packet);
+
+out:
+ ib_free_recv_mad(mad_recv_wc);
+}
+
+static ssize_t ib_umad_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_umad_file *file = filp->private_data;
+ struct ib_umad_packet *packet;
+ ssize_t ret;
+
+ if (count < sizeof (struct ib_user_mad))
+ return -EINVAL;
+
+ spin_lock_irq(&file->recv_lock);
+
+ while (list_empty(&file->recv_list)) {
+ spin_unlock_irq(&file->recv_lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->recv_wait,
+ !list_empty(&file->recv_list)))
+ return -ERESTARTSYS;
+
+ spin_lock_irq(&file->recv_lock);
+ }
+
+ packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
+ list_del(&packet->list);
+
+ spin_unlock_irq(&file->recv_lock);
+
+ if (copy_to_user(buf, &packet->mad, sizeof packet->mad))
+ ret = -EFAULT;
+ else
+ ret = sizeof packet->mad;
+
+ kfree(packet);
+ return ret;
+}
+
+static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_umad_file *file = filp->private_data;
+ struct ib_umad_packet *packet;
+ struct ib_mad_agent *agent;
+ struct ib_ah_attr ah_attr;
+ struct ib_sge gather_list;
+ struct ib_send_wr *bad_wr, wr = {
+ .opcode = IB_WR_SEND,
+ .sg_list = &gather_list,
+ .num_sge = 1,
+ .send_flags = IB_SEND_SIGNALED,
+ };
+ u8 method;
+ u64 *tid;
+ int ret;
+
+ if (count < sizeof (struct ib_user_mad))
+ return -EINVAL;
+
+ packet = kmalloc(sizeof *packet, GFP_KERNEL);
+ if (!packet)
+ return -ENOMEM;
+
+ if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) {
+ kfree(packet);
+ return -EFAULT;
+ }
+
+ if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ down_read(&file->agent_mutex);
+
+ agent = file->agent[packet->mad.id];
+ if (!agent) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ /*
+ * If userspace is generating a request that will generate a
+ * response, we need to make sure the high-order part of the
+ * transaction ID matches the agent being used to send the
+ * MAD.
+ */
+ method = ((struct ib_mad_hdr *) packet->mad.data)->method;
+
+ if (!(method & IB_MGMT_METHOD_RESP) &&
+ method != IB_MGMT_METHOD_TRAP_REPRESS &&
+ method != IB_MGMT_METHOD_SEND) {
+ tid = &((struct ib_mad_hdr *) packet->mad.data)->tid;
+ *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
+ (be64_to_cpup(tid) & 0xffffffff));
+ }
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = be16_to_cpu(packet->mad.lid);
+ ah_attr.sl = packet->mad.sl;
+ ah_attr.src_path_bits = packet->mad.path_bits;
+ ah_attr.port_num = file->port->port_num;
+ if (packet->mad.grh_present) {
+ ah_attr.ah_flags = IB_AH_GRH;
+ memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16);
+ ah_attr.grh.flow_label = packet->mad.flow_label;
+ ah_attr.grh.hop_limit = packet->mad.hop_limit;
+ ah_attr.grh.traffic_class = packet->mad.traffic_class;
+ }
+
+ packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ if (IS_ERR(packet->ah)) {
+ ret = PTR_ERR(packet->ah);
+ goto err_up;
+ }
+
+ gather_list.addr = dma_map_single(agent->device->dma_device,
+ packet->mad.data,
+ sizeof packet->mad.data,
+ DMA_TO_DEVICE);
+ gather_list.length = sizeof packet->mad.data;
+ gather_list.lkey = file->mr[packet->mad.id]->lkey;
+ pci_unmap_addr_set(packet, mapping, gather_list.addr);
+
+ wr.wr.ud.mad_hdr = (struct ib_mad_hdr *) packet->mad.data;
+ wr.wr.ud.ah = packet->ah;
+ wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn);
+ wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey);
+ wr.wr.ud.timeout_ms = packet->mad.timeout_ms;
+
+ wr.wr_id = (unsigned long) packet;
+
+ ret = ib_post_send_mad(agent, &wr, &bad_wr);
+ if (ret) {
+ dma_unmap_single(agent->device->dma_device,
+ pci_unmap_addr(packet, mapping),
+ sizeof packet->mad.data,
+ DMA_TO_DEVICE);
+ goto err_up;
+ }
+
+ up_read(&file->agent_mutex);
+
+ return sizeof packet->mad;
+
+err_up:
+ up_read(&file->agent_mutex);
+
+err:
+ kfree(packet);
+ return ret;
+}
+
+static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
+{
+ struct ib_umad_file *file = filp->private_data;
+
+ /* we will always be able to post a MAD send */
+ unsigned int mask = POLLOUT | POLLWRNORM;
+
+ poll_wait(filp, &file->recv_wait, wait);
+
+ if (!list_empty(&file->recv_list))
+ mask |= POLLIN | POLLRDNORM;
+
+ return mask;
+}
+
+static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
+{
+ struct ib_user_mad_reg_req ureq;
+ struct ib_mad_reg_req req;
+ struct ib_mad_agent *agent;
+ int agent_id;
+ int ret;
+
+ down_write(&file->agent_mutex);
+
+ if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (ureq.qpn != 0 && ureq.qpn != 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
+ if (!file->agent[agent_id])
+ goto found;
+
+ ret = -ENOMEM;
+ goto out;
+
+found:
+ req.mgmt_class = ureq.mgmt_class;
+ req.mgmt_class_version = ureq.mgmt_class_version;
+ memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask);
+ memcpy(req.oui, ureq.oui, sizeof req.oui);
+
+ agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
+ ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
+ &req, 0, send_handler, recv_handler,
+ file);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ goto out;
+ }
+
+ file->agent[agent_id] = agent;
+
+ file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(file->mr[agent_id])) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (put_user(agent_id,
+ (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) {
+ ret = -EFAULT;
+ goto err_mr;
+ }
+
+ ret = 0;
+ goto out;
+
+err_mr:
+ ib_dereg_mr(file->mr[agent_id]);
+
+err:
+ file->agent[agent_id] = NULL;
+ ib_unregister_mad_agent(agent);
+
+out:
+ up_write(&file->agent_mutex);
+ return ret;
+}
+
+static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
+{
+ u32 id;
+ int ret = 0;
+
+ down_write(&file->agent_mutex);
+
+ if (get_user(id, (u32 __user *) arg)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ib_dereg_mr(file->mr[id]);
+ ib_unregister_mad_agent(file->agent[id]);
+ file->agent[id] = NULL;
+
+out:
+ up_write(&file->agent_mutex);
+ return ret;
+}
+
+static long ib_umad_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case IB_USER_MAD_REGISTER_AGENT:
+ return ib_umad_reg_agent(filp->private_data, arg);
+ case IB_USER_MAD_UNREGISTER_AGENT:
+ return ib_umad_unreg_agent(filp->private_data, arg);
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+static int ib_umad_open(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_port *port =
+ container_of(inode->i_cdev, struct ib_umad_port, dev);
+ struct ib_umad_file *file;
+
+ file = kmalloc(sizeof *file, GFP_KERNEL);
+ if (!file)
+ return -ENOMEM;
+
+ memset(file, 0, sizeof *file);
+
+ spin_lock_init(&file->recv_lock);
+ init_rwsem(&file->agent_mutex);
+ INIT_LIST_HEAD(&file->recv_list);
+ init_waitqueue_head(&file->recv_wait);
+
+ file->port = port;
+ filp->private_data = file;
+
+ return 0;
+}
+
+static int ib_umad_close(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_file *file = filp->private_data;
+ int i;
+
+ for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
+ if (file->agent[i]) {
+ ib_dereg_mr(file->mr[i]);
+ ib_unregister_mad_agent(file->agent[i]);
+ }
+
+ kfree(file);
+
+ return 0;
+}
+
+static struct file_operations umad_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_umad_read,
+ .write = ib_umad_write,
+ .poll = ib_umad_poll,
+ .unlocked_ioctl = ib_umad_ioctl,
+ .compat_ioctl = ib_umad_ioctl,
+ .open = ib_umad_open,
+ .release = ib_umad_close
+};
+
+static int ib_umad_sm_open(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_port *port =
+ container_of(inode->i_cdev, struct ib_umad_port, sm_dev);
+ struct ib_port_modify props = {
+ .set_port_cap_mask = IB_PORT_SM
+ };
+ int ret;
+
+ if (filp->f_flags & O_NONBLOCK) {
+ if (down_trylock(&port->sm_sem))
+ return -EAGAIN;
+ } else {
+ if (down_interruptible(&port->sm_sem))
+ return -ERESTARTSYS;
+ }
+
+ ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+ if (ret) {
+ up(&port->sm_sem);
+ return ret;
+ }
+
+ filp->private_data = port;
+
+ return 0;
+}
+
+static int ib_umad_sm_close(struct inode *inode, struct file *filp)
+{
+ struct ib_umad_port *port = filp->private_data;
+ struct ib_port_modify props = {
+ .clr_port_cap_mask = IB_PORT_SM
+ };
+ int ret;
+
+ ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+ up(&port->sm_sem);
+
+ return ret;
+}
+
+static struct file_operations umad_sm_fops = {
+ .owner = THIS_MODULE,
+ .open = ib_umad_sm_open,
+ .release = ib_umad_sm_close
+};
+
+static struct ib_client umad_client = {
+ .name = "umad",
+ .add = ib_umad_add_one,
+ .remove = ib_umad_remove_one
+};
+
+static ssize_t show_dev(struct class_device *class_dev, char *buf)
+{
+ struct ib_umad_port *port = class_get_devdata(class_dev);
+
+ if (class_dev == &port->class_dev)
+ return print_dev_t(buf, port->dev.dev);
+ else
+ return print_dev_t(buf, port->sm_dev.dev);
+}
+static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
+
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
+{
+ struct ib_umad_port *port = class_get_devdata(class_dev);
+
+ return sprintf(buf, "%s\n", port->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static ssize_t show_port(struct class_device *class_dev, char *buf)
+{
+ struct ib_umad_port *port = class_get_devdata(class_dev);
+
+ return sprintf(buf, "%d\n", port->port_num);
+}
+static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
+
+static void ib_umad_release_dev(struct kref *ref)
+{
+ struct ib_umad_device *dev =
+ container_of(ref, struct ib_umad_device, ref);
+
+ kfree(dev);
+}
+
+static void ib_umad_release_port(struct class_device *class_dev)
+{
+ struct ib_umad_port *port = class_get_devdata(class_dev);
+
+ if (class_dev == &port->class_dev) {
+ cdev_del(&port->dev);
+ clear_bit(port->devnum, dev_map);
+ } else {
+ cdev_del(&port->sm_dev);
+ clear_bit(port->sm_devnum, dev_map);
+ }
+
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+}
+
+static struct class umad_class = {
+ .name = "infiniband_mad",
+ .release = ib_umad_release_port
+};
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int ib_umad_init_port(struct ib_device *device, int port_num,
+ struct ib_umad_port *port)
+{
+ spin_lock(&map_lock);
+ port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+ if (port->devnum >= IB_UMAD_MAX_PORTS) {
+ spin_unlock(&map_lock);
+ return -1;
+ }
+ port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS);
+ if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) {
+ spin_unlock(&map_lock);
+ return -1;
+ }
+ set_bit(port->devnum, dev_map);
+ set_bit(port->sm_devnum, dev_map);
+ spin_unlock(&map_lock);
+
+ port->ib_dev = device;
+ port->port_num = port_num;
+ init_MUTEX(&port->sm_sem);
+
+ cdev_init(&port->dev, &umad_fops);
+ port->dev.owner = THIS_MODULE;
+ kobject_set_name(&port->dev.kobj, "umad%d", port->devnum);
+ if (cdev_add(&port->dev, base_dev + port->devnum, 1))
+ return -1;
+
+ port->class_dev.class = &umad_class;
+ port->class_dev.dev = device->dma_device;
+
+ snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum);
+
+ if (class_device_register(&port->class_dev))
+ goto err_cdev;
+
+ class_set_devdata(&port->class_dev, port);
+ kref_get(&port->umad_dev->ref);
+
+ if (class_device_create_file(&port->class_dev, &class_device_attr_dev))
+ goto err_class;
+ if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev))
+ goto err_class;
+ if (class_device_create_file(&port->class_dev, &class_device_attr_port))
+ goto err_class;
+
+ cdev_init(&port->sm_dev, &umad_sm_fops);
+ port->sm_dev.owner = THIS_MODULE;
+ kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
+ if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1))
+ return -1;
+
+ port->sm_class_dev.class = &umad_class;
+ port->sm_class_dev.dev = device->dma_device;
+
+ snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
+
+ if (class_device_register(&port->sm_class_dev))
+ goto err_sm_cdev;
+
+ class_set_devdata(&port->sm_class_dev, port);
+ kref_get(&port->umad_dev->ref);
+
+ if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev))
+ goto err_sm_class;
+ if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev))
+ goto err_sm_class;
+ if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port))
+ goto err_sm_class;
+
+ return 0;
+
+err_sm_class:
+ class_device_unregister(&port->sm_class_dev);
+
+err_sm_cdev:
+ cdev_del(&port->sm_dev);
+
+err_class:
+ class_device_unregister(&port->class_dev);
+
+err_cdev:
+ cdev_del(&port->dev);
+ clear_bit(port->devnum, dev_map);
+
+ return -1;
+}
+
+static void ib_umad_add_one(struct ib_device *device)
+{
+ struct ib_umad_device *umad_dev;
+ int s, e, i;
+
+ if (device->node_type == IB_NODE_SWITCH)
+ s = e = 0;
+ else {
+ s = 1;
+ e = device->phys_port_cnt;
+ }
+
+ umad_dev = kmalloc(sizeof *umad_dev +
+ (e - s + 1) * sizeof (struct ib_umad_port),
+ GFP_KERNEL);
+ if (!umad_dev)
+ return;
+
+ memset(umad_dev, 0, sizeof *umad_dev +
+ (e - s + 1) * sizeof (struct ib_umad_port));
+
+ kref_init(&umad_dev->ref);
+
+ umad_dev->start_port = s;
+ umad_dev->end_port = e;
+
+ for (i = s; i <= e; ++i) {
+ umad_dev->port[i - s].umad_dev = umad_dev;
+
+ if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
+ goto err;
+ }
+
+ ib_set_client_data(device, &umad_client, umad_dev);
+
+ return;
+
+err:
+ while (--i >= s) {
+ class_device_unregister(&umad_dev->port[i - s].class_dev);
+ class_device_unregister(&umad_dev->port[i - s].sm_class_dev);
+ }
+
+ kref_put(&umad_dev->ref, ib_umad_release_dev);
+}
+
+static void ib_umad_remove_one(struct ib_device *device)
+{
+ struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client);
+ int i;
+
+ if (!umad_dev)
+ return;
+
+ for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) {
+ class_device_unregister(&umad_dev->port[i].class_dev);
+ class_device_unregister(&umad_dev->port[i].sm_class_dev);
+ }
+
+ kref_put(&umad_dev->ref, ib_umad_release_dev);
+}
+
+static int __init ib_umad_init(void)
+{
+ int ret;
+
+ spin_lock_init(&map_lock);
+
+ ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
+ "infiniband_mad");
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't register device number\n");
+ goto out;
+ }
+
+ ret = class_register(&umad_class);
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
+ goto out_chrdev;
+ }
+
+ ret = class_create_file(&umad_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
+ goto out_class;
+ }
+
+ ret = ib_register_client(&umad_client);
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
+ goto out_class;
+ }
+
+ return 0;
+
+out_class:
+ class_unregister(&umad_class);
+
+out_chrdev:
+ unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+
+out:
+ return ret;
+}
+
+static void __exit ib_umad_cleanup(void)
+{
+ ib_unregister_client(&umad_client);
+ class_unregister(&umad_class);
+ unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+}
+
+module_init(ib_umad_init);
+module_exit(ib_umad_cleanup);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
new file mode 100644
index 00000000000..7c08ed0cd7d
--- /dev/null
+++ b/drivers/infiniband/core/verbs.c
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+
+#include <ib_verbs.h>
+
+/* Protection domains */
+
+struct ib_pd *ib_alloc_pd(struct ib_device *device)
+{
+ struct ib_pd *pd;
+
+ pd = device->alloc_pd(device);
+
+ if (!IS_ERR(pd)) {
+ pd->device = device;
+ atomic_set(&pd->usecnt, 0);
+ }
+
+ return pd;
+}
+EXPORT_SYMBOL(ib_alloc_pd);
+
+int ib_dealloc_pd(struct ib_pd *pd)
+{
+ if (atomic_read(&pd->usecnt))
+ return -EBUSY;
+
+ return pd->device->dealloc_pd(pd);
+}
+EXPORT_SYMBOL(ib_dealloc_pd);
+
+/* Address handles */
+
+struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ struct ib_ah *ah;
+
+ ah = pd->device->create_ah(pd, ah_attr);
+
+ if (!IS_ERR(ah)) {
+ ah->device = pd->device;
+ ah->pd = pd;
+ atomic_inc(&pd->usecnt);
+ }
+
+ return ah;
+}
+EXPORT_SYMBOL(ib_create_ah);
+
+int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+ return ah->device->modify_ah ?
+ ah->device->modify_ah(ah, ah_attr) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_modify_ah);
+
+int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+ return ah->device->query_ah ?
+ ah->device->query_ah(ah, ah_attr) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_ah);
+
+int ib_destroy_ah(struct ib_ah *ah)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ pd = ah->pd;
+ ret = ah->device->destroy_ah(ah);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_destroy_ah);
+
+/* Queue pairs */
+
+struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct ib_qp *qp;
+
+ qp = pd->device->create_qp(pd, qp_init_attr);
+
+ if (!IS_ERR(qp)) {
+ qp->device = pd->device;
+ qp->pd = pd;
+ qp->send_cq = qp_init_attr->send_cq;
+ qp->recv_cq = qp_init_attr->recv_cq;
+ qp->srq = qp_init_attr->srq;
+ qp->event_handler = qp_init_attr->event_handler;
+ qp->qp_context = qp_init_attr->qp_context;
+ qp->qp_type = qp_init_attr->qp_type;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&qp_init_attr->send_cq->usecnt);
+ atomic_inc(&qp_init_attr->recv_cq->usecnt);
+ if (qp_init_attr->srq)
+ atomic_inc(&qp_init_attr->srq->usecnt);
+ }
+
+ return qp;
+}
+EXPORT_SYMBOL(ib_create_qp);
+
+int ib_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask)
+{
+ return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
+}
+EXPORT_SYMBOL(ib_modify_qp);
+
+int ib_query_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ return qp->device->query_qp ?
+ qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_qp);
+
+int ib_destroy_qp(struct ib_qp *qp)
+{
+ struct ib_pd *pd;
+ struct ib_cq *scq, *rcq;
+ struct ib_srq *srq;
+ int ret;
+
+ pd = qp->pd;
+ scq = qp->send_cq;
+ rcq = qp->recv_cq;
+ srq = qp->srq;
+
+ ret = qp->device->destroy_qp(qp);
+ if (!ret) {
+ atomic_dec(&pd->usecnt);
+ atomic_dec(&scq->usecnt);
+ atomic_dec(&rcq->usecnt);
+ if (srq)
+ atomic_dec(&srq->usecnt);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_destroy_qp);
+
+/* Completion queues */
+
+struct ib_cq *ib_create_cq(struct ib_device *device,
+ ib_comp_handler comp_handler,
+ void (*event_handler)(struct ib_event *, void *),
+ void *cq_context, int cqe)
+{
+ struct ib_cq *cq;
+
+ cq = device->create_cq(device, cqe);
+
+ if (!IS_ERR(cq)) {
+ cq->device = device;
+ cq->comp_handler = comp_handler;
+ cq->event_handler = event_handler;
+ cq->cq_context = cq_context;
+ atomic_set(&cq->usecnt, 0);
+ }
+
+ return cq;
+}
+EXPORT_SYMBOL(ib_create_cq);
+
+int ib_destroy_cq(struct ib_cq *cq)
+{
+ if (atomic_read(&cq->usecnt))
+ return -EBUSY;
+
+ return cq->device->destroy_cq(cq);
+}
+EXPORT_SYMBOL(ib_destroy_cq);
+
+int ib_resize_cq(struct ib_cq *cq,
+ int cqe)
+{
+ int ret;
+
+ if (!cq->device->resize_cq)
+ return -ENOSYS;
+
+ ret = cq->device->resize_cq(cq, &cqe);
+ if (!ret)
+ cq->cqe = cqe;
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_resize_cq);
+
+/* Memory regions */
+
+struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
+{
+ struct ib_mr *mr;
+
+ mr = pd->device->get_dma_mr(pd, mr_access_flags);
+
+ if (!IS_ERR(mr)) {
+ mr->device = pd->device;
+ mr->pd = pd;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+ }
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_get_dma_mr);
+
+struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start)
+{
+ struct ib_mr *mr;
+
+ mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
+ mr_access_flags, iova_start);
+
+ if (!IS_ERR(mr)) {
+ mr->device = pd->device;
+ mr->pd = pd;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+ }
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_reg_phys_mr);
+
+int ib_rereg_phys_mr(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start)
+{
+ struct ib_pd *old_pd;
+ int ret;
+
+ if (!mr->device->rereg_phys_mr)
+ return -ENOSYS;
+
+ if (atomic_read(&mr->usecnt))
+ return -EBUSY;
+
+ old_pd = mr->pd;
+
+ ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
+ phys_buf_array, num_phys_buf,
+ mr_access_flags, iova_start);
+
+ if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) {
+ atomic_dec(&old_pd->usecnt);
+ atomic_inc(&pd->usecnt);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_rereg_phys_mr);
+
+int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+ return mr->device->query_mr ?
+ mr->device->query_mr(mr, mr_attr) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_mr);
+
+int ib_dereg_mr(struct ib_mr *mr)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ if (atomic_read(&mr->usecnt))
+ return -EBUSY;
+
+ pd = mr->pd;
+ ret = mr->device->dereg_mr(mr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_dereg_mr);
+
+/* Memory windows */
+
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
+{
+ struct ib_mw *mw;
+
+ if (!pd->device->alloc_mw)
+ return ERR_PTR(-ENOSYS);
+
+ mw = pd->device->alloc_mw(pd);
+ if (!IS_ERR(mw)) {
+ mw->device = pd->device;
+ mw->pd = pd;
+ atomic_inc(&pd->usecnt);
+ }
+
+ return mw;
+}
+EXPORT_SYMBOL(ib_alloc_mw);
+
+int ib_dealloc_mw(struct ib_mw *mw)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ pd = mw->pd;
+ ret = mw->device->dealloc_mw(mw);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_dealloc_mw);
+
+/* "Fast" memory regions */
+
+struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct ib_fmr *fmr;
+
+ if (!pd->device->alloc_fmr)
+ return ERR_PTR(-ENOSYS);
+
+ fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
+ if (!IS_ERR(fmr)) {
+ fmr->device = pd->device;
+ fmr->pd = pd;
+ atomic_inc(&pd->usecnt);
+ }
+
+ return fmr;
+}
+EXPORT_SYMBOL(ib_alloc_fmr);
+
+int ib_unmap_fmr(struct list_head *fmr_list)
+{
+ struct ib_fmr *fmr;
+
+ if (list_empty(fmr_list))
+ return 0;
+
+ fmr = list_entry(fmr_list->next, struct ib_fmr, list);
+ return fmr->device->unmap_fmr(fmr_list);
+}
+EXPORT_SYMBOL(ib_unmap_fmr);
+
+int ib_dealloc_fmr(struct ib_fmr *fmr)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ pd = fmr->pd;
+ ret = fmr->device->dealloc_fmr(fmr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_dealloc_fmr);
+
+/* Multicast groups */
+
+int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
+{
+ return qp->device->attach_mcast ?
+ qp->device->attach_mcast(qp, gid, lid) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_attach_mcast);
+
+int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
+{
+ return qp->device->detach_mcast ?
+ qp->device->detach_mcast(qp, gid, lid) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_detach_mcast);