diff options
Diffstat (limited to 'drivers/infiniband')
130 files changed, 25273 insertions, 2725 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 69a53d476b5..9edfacee7d8 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD ---help--- Userspace InfiniBand Management Datagram (MAD) support. This is the kernel side of the userspace MAD support, which allows - userspace processes to send and receive MADs. You will also + userspace processes to send and receive MADs. You will also need libibumad from <http://www.openib.org>. config INFINIBAND_USER_ACCESS @@ -36,6 +36,8 @@ config INFINIBAND_ADDR_TRANS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/ipath/Kconfig" +source "drivers/infiniband/hw/ehca/Kconfig" +source "drivers/infiniband/hw/amso1100/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index c7ff58c1d0e..2b5d1098ef4 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -1,6 +1,8 @@ obj-$(CONFIG_INFINIBAND) += core/ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ -obj-$(CONFIG_IPATH_CORE) += hw/ipath/ +obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/ +obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ +obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 68e73ec2d1f..163d991eb8c 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,7 +1,7 @@ infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o $(infiniband-y) + ib_cm.o iw_cm.o $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o @@ -14,6 +14,8 @@ ib_sa-y := sa_query.o ib_cm-y := cm.o +iw_cm-y := iwcm.o + rdma_cm-y := cma.o ib_addr-y := addr.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1205e802782..9cbf09e2052 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -61,12 +61,15 @@ static LIST_HEAD(req_list); static DECLARE_WORK(work, process_req, NULL); static struct workqueue_struct *addr_wq; -static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - unsigned char *dst_dev_addr) +int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, + const unsigned char *dst_dev_addr) { switch (dev->type) { case ARPHRD_INFINIBAND: - dev_addr->dev_type = IB_NODE_CA; + dev_addr->dev_type = RDMA_NODE_IB_CA; + break; + case ARPHRD_ETHER: + dev_addr->dev_type = RDMA_NODE_RNIC; break; default: return -EADDRNOTAVAIL; @@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); return 0; } +EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { @@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) if (!dev) return -EADDRNOTAVAIL; - ret = copy_addr(dev_addr, dev, NULL); + ret = rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); return ret; } @@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, /* If the device does ARP internally, return 'done' */ if (rt->idev->dev->flags & IFF_NOARP) { - copy_addr(addr, rt->idev->dev, NULL); + rdma_copy_addr(addr, rt->idev->dev, NULL); goto put; } @@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, src_in->sin_addr.s_addr = rt->rt_src; } - ret = copy_addr(addr, neigh->dev, neigh->ha); + ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); release: neigh_release(neigh); put: @@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in, if (ZERONET(src_ip)) { src_in->sin_family = dst_in->sin_family; src_in->sin_addr.s_addr = dst_ip; - ret = copy_addr(addr, dev, dev->dev_addr); + ret = rdma_copy_addr(addr, dev, dev->dev_addr); } else if (LOOPBACK(src_ip)) { ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); if (!ret) @@ -327,10 +331,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) } EXPORT_SYMBOL(rdma_addr_cancel); -static int netevent_callback(struct notifier_block *self, unsigned long event, +static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { - if (event == NETEVENT_NEIGH_UPDATE) { + if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; if (neigh->dev->type == ARPHRD_INFINIBAND && diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 75313ade2e0..20e9f64e67a 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -62,12 +62,13 @@ struct ib_update_work { static inline int start_port(struct ib_device *device) { - return device->node_type == IB_NODE_SWITCH ? 0 : 1; + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; } static inline int end_port(struct ib_device *device) { - return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; + return (device->node_type == RDMA_NODE_IB_SWITCH) ? + 0 : device->phys_port_cnt; } int ib_get_cached_gid(struct ib_device *device, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0de335b7bfc..f35fcc4c063 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004-2006 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -41,6 +41,7 @@ #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/pci.h> +#include <linux/random.h> #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/workqueue.h> @@ -73,6 +74,7 @@ static struct ib_cm { struct rb_root remote_id_table; struct rb_root remote_sidr_table; struct idr local_id_table; + __be32 random_id_operand; struct workqueue_struct *wq; } cm; @@ -177,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, if (IS_ERR(ah)) return PTR_ERR(ah); - m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, + m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); @@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; - int ret; + int ret, id; static int next_id; do { spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++, - (__force int *) &cm_id_priv->id.local_id); + ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, + next_id++, &id); spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + + cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand); return ret; } @@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id) unsigned long flags; spin_lock_irqsave(&cm.lock, flags); - idr_remove(&cm.local_id_table, (__force int) local_id); + idr_remove(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); spin_unlock_irqrestore(&cm.lock, flags); } @@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id); + cm_id_priv = idr_find(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -679,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; + cm_cleanup_timewait(cm_id_priv->timewait_info); + /* * The cm_id could be destroyed by the user before we exit timewait. * To protect against this, we search for the cm_id after exiting @@ -1354,7 +1362,7 @@ static int cm_req_handler(struct cm_work *work) id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); - goto error1; + goto destroy; } cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; @@ -1363,7 +1371,8 @@ static int cm_req_handler(struct cm_work *work) listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { ret = -EINVAL; - goto error2; + kfree(cm_id_priv->timewait_info); + goto destroy; } cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; @@ -1373,12 +1382,22 @@ static int cm_req_handler(struct cm_work *work) cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); - if (ret) - goto error3; + if (ret) { + ib_get_cached_gid(work->port->cm_dev->device, + work->port->port_num, 0, &work->path[0].sgid); + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, + &work->path[0].sgid, sizeof work->path[0].sgid, + NULL, 0); + goto rejected; + } if (req_msg->alt_local_lid) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); - if (ret) - goto error3; + if (ret) { + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, + &work->path[0].sgid, + sizeof work->path[0].sgid, NULL, 0); + goto rejected; + } } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( @@ -1400,12 +1419,11 @@ static int cm_req_handler(struct cm_work *work) cm_deref_id(listen_cm_id_priv); return 0; -error3: atomic_dec(&cm_id_priv->refcount); +rejected: + atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); - cm_cleanup_timewait(cm_id_priv->timewait_info); -error2: kfree(cm_id_priv->timewait_info); - cm_id_priv->timewait_info = NULL; -error1: ib_destroy_cm_id(&cm_id_priv->id); +destroy: + ib_destroy_cm_id(cm_id); return ret; } @@ -2072,8 +2090,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) spin_unlock_irqrestore(&cm.lock, flags); return NULL; } - cm_id_priv = idr_find(&cm.local_id_table, - (__force int) timewait_info->work.local_id); + cm_id_priv = idr_find(&cm.local_id_table, (__force int) + (timewait_info->work.local_id ^ + cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -3125,7 +3144,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; @@ -3262,6 +3282,9 @@ static void cm_add_one(struct ib_device *device) int ret; u8 i; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) @@ -3349,6 +3372,7 @@ static int __init ib_cm_init(void) cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); + get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); idr_pre_get(&cm.local_id_table, GFP_KERNEL); cm.wq = create_workqueue("ib_cm"); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d6f99d5720f..1178bd434d1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -35,6 +35,7 @@ #include <linux/mutex.h> #include <linux/random.h> #include <linux/idr.h> +#include <linux/inetdevice.h> #include <net/tcp.h> @@ -43,13 +44,14 @@ #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> #include <rdma/ib_sa.h> +#include <rdma/iw_cm.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 -#define CMA_MAX_CM_RETRIES 3 +#define CMA_MAX_CM_RETRIES 15 static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -60,6 +62,7 @@ static struct ib_client cma_client = { .remove = cma_remove_one }; +static struct ib_sa_client sa_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); @@ -124,6 +127,7 @@ struct rdma_id_private { int query_id; union { struct ib_cm_id *ib; + struct iw_cm_id *iw; } cm_id; u32 seq_num; @@ -259,15 +263,24 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv) id_priv->cma_dev = NULL; } -static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) +static int cma_acquire_dev(struct rdma_id_private *id_priv) { + enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type; struct cma_device *cma_dev; union ib_gid gid; int ret = -ENODEV; - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid), + switch (rdma_node_get_transport(dev_type)) { + case RDMA_TRANSPORT_IB: + ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + break; + case RDMA_TRANSPORT_IWARP: + iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + break; + default: + return -ENODEV; + } - mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) { ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); @@ -276,20 +289,9 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) break; } } - mutex_unlock(&lock); return ret; } -static int cma_acquire_dev(struct rdma_id_private *id_priv) -{ - switch (id_priv->id.route.addr.dev_addr.dev_type) { - case IB_NODE_CA: - return cma_acquire_ib_dev(id_priv); - default: - return -ENODEV; - } -} - static void cma_deref_id(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->refcount)) @@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) IB_QP_PKEY_INDEX | IB_QP_PORT); } +static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); +} + int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { @@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (IS_ERR(qp)) return PTR_ERR(qp); - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_init_ib_qp(id_priv, qp); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_init_iw_qp(id_priv, qp); + break; default: ret = -ENOSYS; break; @@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int ret; id_priv = container_of(id, struct rdma_id_private, id); - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, qp_attr_mask); if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, + qp_attr_mask); + break; default: ret = -ENOSYS; break; @@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv, static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); break; @@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv) cma_exch(id_priv, CMA_DESTROYING); if (id_priv->cma_dev) { - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); break; + case RDMA_TRANSPORT_IWARP: + if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) + iw_destroy_cm_id(id_priv->cm_id.iw); + break; default: break; } @@ -689,19 +712,25 @@ void rdma_destroy_id(struct rdma_cm_id *id) state = cma_exch(id_priv, CMA_DESTROYING); cma_cancel_operation(id_priv, state); + mutex_lock(&lock); if (id_priv->cma_dev) { - switch (id->device->node_type) { - case IB_NODE_CA: - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + mutex_unlock(&lock); + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); break; + case RDMA_TRANSPORT_IWARP: + if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) + iw_destroy_cm_id(id_priv->cm_id.iw); + break; default: break; } - mutex_lock(&lock); + mutex_lock(&lock); cma_detach_from_dev(id_priv); - mutex_unlock(&lock); } + mutex_unlock(&lock); cma_release_port(id_priv); cma_deref_id(id_priv); @@ -869,7 +898,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); - rt->addr.dev_addr.dev_type = IB_NODE_CA; + rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA; id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -898,7 +927,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } atomic_inc(&conn_id->dev_remove); - ret = cma_acquire_ib_dev(conn_id); + mutex_lock(&lock); + ret = cma_acquire_dev(conn_id); + mutex_unlock(&lock); if (ret) { ret = -ENODEV; cma_release_remove(conn_id); @@ -982,6 +1013,130 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, } } +static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) +{ + struct rdma_id_private *id_priv = iw_id->context; + enum rdma_cm_event_type event = 0; + struct sockaddr_in *sin; + int ret = 0; + + atomic_inc(&id_priv->dev_remove); + + switch (iw_event->event) { + case IW_CM_EVENT_CLOSE: + event = RDMA_CM_EVENT_DISCONNECTED; + break; + case IW_CM_EVENT_CONNECT_REPLY: + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + *sin = iw_event->local_addr; + sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; + *sin = iw_event->remote_addr; + if (iw_event->status) + event = RDMA_CM_EVENT_REJECTED; + else + event = RDMA_CM_EVENT_ESTABLISHED; + break; + case IW_CM_EVENT_ESTABLISHED: + event = RDMA_CM_EVENT_ESTABLISHED; + break; + default: + BUG_ON(1); + } + + ret = cma_notify_user(id_priv, event, iw_event->status, + iw_event->private_data, + iw_event->private_data_len); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.iw = NULL; + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + rdma_destroy_id(&id_priv->id); + return ret; + } + + cma_release_remove(id_priv); + return ret; +} + +static int iw_conn_req_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct rdma_cm_id *new_cm_id; + struct rdma_id_private *listen_id, *conn_id; + struct sockaddr_in *sin; + struct net_device *dev = NULL; + int ret; + + listen_id = cm_id->context; + atomic_inc(&listen_id->dev_remove); + if (!cma_comp(listen_id, CMA_LISTEN)) { + ret = -ECONNABORTED; + goto out; + } + + /* Create a new RDMA id for the new IW CM ID */ + new_cm_id = rdma_create_id(listen_id->id.event_handler, + listen_id->id.context, + RDMA_PS_TCP); + if (!new_cm_id) { + ret = -ENOMEM; + goto out; + } + conn_id = container_of(new_cm_id, struct rdma_id_private, id); + atomic_inc(&conn_id->dev_remove); + conn_id->state = CMA_CONNECT; + + dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); + if (!dev) { + ret = -EADDRNOTAVAIL; + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); + if (ret) { + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + + mutex_lock(&lock); + ret = cma_acquire_dev(conn_id); + mutex_unlock(&lock); + if (ret) { + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + + conn_id->cm_id.iw = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_iw_handler; + + sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; + *sin = iw_event->local_addr; + sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; + *sin = iw_event->remote_addr; + + ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, + iw_event->private_data, + iw_event->private_data_len); + if (ret) { + /* User wants to destroy the CM ID */ + conn_id->cm_id.iw = NULL; + cma_exch(conn_id, CMA_DESTROYING); + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + } + +out: + if (dev) + dev_put(dev); + cma_release_remove(listen_id); + return ret; +} + static int cma_ib_listen(struct rdma_id_private *id_priv) { struct ib_cm_compare_data compare_data; @@ -1011,6 +1166,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) return ret; } +static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) +{ + int ret; + struct sockaddr_in *sin; + + id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, + iw_conn_req_handler, + id_priv); + if (IS_ERR(id_priv->cm_id.iw)) + return PTR_ERR(id_priv->cm_id.iw); + + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + id_priv->cm_id.iw->local_addr = *sin; + + ret = iw_cm_listen(id_priv->cm_id.iw, backlog); + + if (ret) { + iw_destroy_cm_id(id_priv->cm_id.iw); + id_priv->cm_id.iw = NULL; + } + + return ret; +} + static int cma_listen_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { @@ -1087,12 +1266,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv->backlog = backlog; if (id->device) { - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_ib_listen(id_priv); if (ret) goto err; break; + case RDMA_TRANSPORT_IWARP: + ret = cma_iw_listen(id_priv, backlog); + if (ret) + goto err; + break; default: ret = -ENOSYS; goto err; @@ -1140,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); path_rec.numb_path = 1; - id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device, + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, id_priv->id.port_num, &path_rec, IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, @@ -1231,6 +1415,23 @@ err: } EXPORT_SYMBOL(rdma_set_ib_paths); +static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) +{ + struct cma_work *work; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler, work); + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +} + int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; @@ -1241,10 +1442,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) return -EINVAL; atomic_inc(&id_priv->refcount); - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_resolve_ib_route(id_priv, timeout_ms); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_resolve_iw_route(id_priv, timeout_ms); + break; default: ret = -ENOSYS; break; @@ -1309,16 +1513,26 @@ static void addr_handler(int status, struct sockaddr *src_addr, enum rdma_cm_event_type event; atomic_inc(&id_priv->dev_remove); - if (!id_priv->cma_dev && !status) + + /* + * Grab mutex to block rdma_destroy_id() from removing the device while + * we're trying to acquire it. + */ + mutex_lock(&lock); + if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) { + mutex_unlock(&lock); + goto out; + } + + if (!status && !id_priv->cma_dev) status = cma_acquire_dev(id_priv); + mutex_unlock(&lock); if (status) { - if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND)) + if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) goto out; event = RDMA_CM_EVENT_ADDR_ERROR; } else { - if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) - goto out; memcpy(&id_priv->id.route.addr.src_addr, src_addr, ip_addr_size(src_addr)); event = RDMA_CM_EVENT_ADDR_RESOLVED; @@ -1492,7 +1706,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { if (cma_any_addr(&cur_id->id.route.addr.src_addr)) return -EADDRNOTAVAIL; - + cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr) return -EADDRINUSE; @@ -1542,8 +1756,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (!cma_any_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); - if (!ret) + if (!ret) { + mutex_lock(&lock); ret = cma_acquire_dev(id_priv); + mutex_unlock(&lock); + } if (ret) goto err; } @@ -1649,6 +1866,47 @@ out: return ret; } +static int cma_connect_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_id *cm_id; + struct sockaddr_in* sin; + int ret; + struct iw_cm_conn_param iw_param; + + cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); + if (IS_ERR(cm_id)) { + ret = PTR_ERR(cm_id); + goto out; + } + + id_priv->cm_id.iw = cm_id; + + sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; + cm_id->local_addr = *sin; + + sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; + cm_id->remote_addr = *sin; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) { + iw_destroy_cm_id(cm_id); + return ret; + } + + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + if (id_priv->id.qp) + iw_param.qpn = id_priv->qp_num; + else + iw_param.qpn = conn_param->qp_num; + ret = iw_cm_connect(cm_id, &iw_param); +out: + return ret; +} + int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; @@ -1664,10 +1922,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_connect_ib(id_priv, conn_param); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_connect_iw(id_priv, conn_param); + break; default: ret = -ENOSYS; break; @@ -1708,6 +1969,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, return ib_send_cm_rep(id_priv->cm_id.ib, &rep); } +static int cma_accept_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_conn_param iw_param; + int ret; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) + return ret; + + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + if (id_priv->id.qp) { + iw_param.qpn = id_priv->qp_num; + } else + iw_param.qpn = conn_param->qp_num; + + return iw_cm_accept(id_priv->cm_id.iw, &iw_param); +} + int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; @@ -1723,13 +2006,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: if (conn_param) ret = cma_accept_ib(id_priv, conn_param); else ret = cma_rep_recv(id_priv); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_accept_iw(id_priv, conn_param); + break; default: ret = -ENOSYS; break; @@ -1756,12 +2042,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, if (!cma_comp(id_priv, CMA_CONNECT)) return -EINVAL; - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_reject(id_priv->cm_id.iw, + private_data, private_data_len); + break; default: ret = -ENOSYS; break; @@ -1780,17 +2070,20 @@ int rdma_disconnect(struct rdma_cm_id *id) !cma_comp(id_priv, CMA_DISCONNECT)) return -EINVAL; - ret = cma_modify_qp_err(id); - if (ret) - goto out; - - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: + ret = cma_modify_qp_err(id); + if (ret) + goto out; /* Initiate or respond to a disconnect. */ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); + break; default: + ret = -EINVAL; break; } out: @@ -1907,12 +2200,15 @@ static int cma_init(void) if (!cma_wq) return -ENOMEM; + ib_sa_register_client(&sa_client); + ret = ib_register_client(&cma_client); if (ret) goto err; return 0; err: + ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); return ret; } @@ -1920,6 +2216,7 @@ err: static void cma_cleanup(void) { ib_unregister_client(&cma_client); + ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b2f3cb91d9b..63d2a39fb82 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client) EXPORT_SYMBOL(ib_get_client_data); /** - * ib_set_client_data - Get IB client context + * ib_set_client_data - Set IB client context * @device:Device to set context for * @client:Client to set context for * @data:Context to set @@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { if (port_num) return -EINVAL; } else if (port_num < 1 || port_num > device->phys_port_cnt) @@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { if (port_num) return -EINVAL; } else if (port_num < 1 || port_num > device->phys_port_cnt) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c new file mode 100644 index 00000000000..c3fb304a4e8 --- /dev/null +++ b/drivers/infiniband/core/iwcm.c @@ -0,0 +1,1019 @@ +/* + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/rbtree.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/completion.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_addr.h> + +#include "iwcm.h" + +MODULE_AUTHOR("Tom Tucker"); +MODULE_DESCRIPTION("iWARP CM"); +MODULE_LICENSE("Dual BSD/GPL"); + +static struct workqueue_struct *iwcm_wq; +struct iwcm_work { + struct work_struct work; + struct iwcm_id_private *cm_id; + struct list_head list; + struct iw_cm_event event; + struct list_head free_list; +}; + +/* + * The following services provide a mechanism for pre-allocating iwcm_work + * elements. The design pre-allocates them based on the cm_id type: + * LISTENING IDS: Get enough elements preallocated to handle the + * listen backlog. + * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE + * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE + * + * Allocating them in connect and listen avoids having to deal + * with allocation failures on the event upcall from the provider (which + * is called in the interrupt context). + * + * One exception is when creating the cm_id for incoming connection requests. + * There are two cases: + * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If + * the backlog is exceeded, then no more connection request events will + * be processed. cm_event_handler() returns -ENOMEM in this case. Its up + * to the provider to reject the connectino request. + * 2) in the connection request workqueue handler, cm_conn_req_handler(). + * If work elements cannot be allocated for the new connect request cm_id, + * then IWCM will call the provider reject method. This is ok since + * cm_conn_req_handler() runs in the workqueue thread context. + */ + +static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) +{ + struct iwcm_work *work; + + if (list_empty(&cm_id_priv->work_free_list)) + return NULL; + work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, + free_list); + list_del_init(&work->free_list); + return work; +} + +static void put_work(struct iwcm_work *work) +{ + list_add(&work->free_list, &work->cm_id->work_free_list); +} + +static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) +{ + struct list_head *e, *tmp; + + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + kfree(list_entry(e, struct iwcm_work, free_list)); +} + +static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) +{ + struct iwcm_work *work; + + BUG_ON(!list_empty(&cm_id_priv->work_free_list)); + while (count--) { + work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); + if (!work) { + dealloc_work_entries(cm_id_priv); + return -ENOMEM; + } + work->cm_id = cm_id_priv; + INIT_LIST_HEAD(&work->list); + put_work(work); + } + return 0; +} + +/* + * Save private data from incoming connection requests in the + * cm_id_priv so the low level driver doesn't have to. Adjust + * the event ptr to point to the local copy. + */ +static int copy_private_data(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *event) +{ + void *p; + + p = kmalloc(event->private_data_len, GFP_ATOMIC); + if (!p) + return -ENOMEM; + memcpy(p, event->private_data, event->private_data_len); + event->private_data = p; + return 0; +} + +/* + * Release a reference on cm_id. If the last reference is being removed + * and iw_destroy_cm_id is waiting, wake up the waiting thread. + */ +static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) +{ + int ret = 0; + + BUG_ON(atomic_read(&cm_id_priv->refcount)==0); + if (atomic_dec_and_test(&cm_id_priv->refcount)) { + BUG_ON(!list_empty(&cm_id_priv->work_list)); + if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) { + BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING); + BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, + &cm_id_priv->flags)); + ret = 1; + } + complete(&cm_id_priv->destroy_comp); + } + + return ret; +} + +static void add_ref(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + atomic_inc(&cm_id_priv->refcount); +} + +static void rem_ref(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + iwcm_deref_id(cm_id_priv); +} + +static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); + +struct iw_cm_id *iw_create_cm_id(struct ib_device *device, + iw_cm_handler cm_handler, + void *context) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); + if (!cm_id_priv) + return ERR_PTR(-ENOMEM); + + cm_id_priv->state = IW_CM_STATE_IDLE; + cm_id_priv->id.device = device; + cm_id_priv->id.cm_handler = cm_handler; + cm_id_priv->id.context = context; + cm_id_priv->id.event_handler = cm_event_handler; + cm_id_priv->id.add_ref = add_ref; + cm_id_priv->id.rem_ref = rem_ref; + spin_lock_init(&cm_id_priv->lock); + atomic_set(&cm_id_priv->refcount, 1); + init_waitqueue_head(&cm_id_priv->connect_wait); + init_completion(&cm_id_priv->destroy_comp); + INIT_LIST_HEAD(&cm_id_priv->work_list); + INIT_LIST_HEAD(&cm_id_priv->work_free_list); + + return &cm_id_priv->id; +} +EXPORT_SYMBOL(iw_create_cm_id); + + +static int iwcm_modify_qp_err(struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + if (!qp) + return -EINVAL; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); +} + +/* + * This is really the RDMAC CLOSING state. It is most similar to the + * IB SQD QP state. + */ +static int iwcm_modify_qp_sqd(struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + BUG_ON(qp == NULL); + qp_attr.qp_state = IB_QPS_SQD; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); +} + +/* + * CM_ID <-- CLOSING + * + * Block if a passive or active connection is currenlty being processed. Then + * process the event as follows: + * - If we are ESTABLISHED, move to CLOSING and modify the QP state + * based on the abrupt flag + * - If the connection is already in the CLOSING or IDLE state, the peer is + * disconnecting concurrently with us and we've already seen the + * DISCONNECT event -- ignore the request and return 0 + * - Disconnect on a listening endpoint returns -EINVAL + */ +int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + struct ib_qp *qp = NULL; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + /* Wait if we're currently in a connect or accept downcall */ + wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_ESTABLISHED: + cm_id_priv->state = IW_CM_STATE_CLOSING; + + /* QP could be <nul> for user-mode client */ + if (cm_id_priv->qp) + qp = cm_id_priv->qp; + else + ret = -EINVAL; + break; + case IW_CM_STATE_LISTEN: + ret = -EINVAL; + break; + case IW_CM_STATE_CLOSING: + /* remote peer closed first */ + case IW_CM_STATE_IDLE: + /* accept or connect returned !0 */ + break; + case IW_CM_STATE_CONN_RECV: + /* + * App called disconnect before/without calling accept after + * connect_request event delivered. + */ + break; + case IW_CM_STATE_CONN_SENT: + /* Can only get here if wait above fails */ + default: + BUG(); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (qp) { + if (abrupt) + ret = iwcm_modify_qp_err(qp); + else + ret = iwcm_modify_qp_sqd(qp); + + /* + * If both sides are disconnecting the QP could + * already be in ERR or SQD states + */ + ret = 0; + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_disconnect); + +/* + * CM_ID <-- DESTROYING + * + * Clean up all resources associated with the connection and release + * the initial reference taken by iw_create_cm_id. + */ +static void destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + /* + * Wait if we're currently in a connect or accept downcall. A + * listening endpoint should never block here. + */ + wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_LISTEN: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + /* destroy the listening endpoint */ + ret = cm_id->device->iwcm->destroy_listen(cm_id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_ESTABLISHED: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + /* Abrupt close of the connection */ + (void)iwcm_modify_qp_err(cm_id_priv->qp); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CLOSING: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + break; + case IW_CM_STATE_CONN_RECV: + /* + * App called destroy before/without calling accept after + * receiving connection request event notification. + */ + cm_id_priv->state = IW_CM_STATE_DESTROYING; + break; + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_DESTROYING: + default: + BUG(); + break; + } + if (cm_id_priv->qp) { + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + (void)iwcm_deref_id(cm_id_priv); +} + +/* + * This function is only called by the application thread and cannot + * be called by the event thread. The function will wait for all + * references to be released on the cm_id and then kfree the cm_id + * object. + */ +void iw_destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); + + destroy_cm_id(cm_id); + + wait_for_completion(&cm_id_priv->destroy_comp); + + dealloc_work_entries(cm_id_priv); + + kfree(cm_id_priv); +} +EXPORT_SYMBOL(iw_destroy_cm_id); + +/* + * CM_ID <-- LISTEN + * + * Start listening for connect requests. Generates one CONNECT_REQUEST + * event for each inbound connect request. + */ +int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + ret = alloc_work_entries(cm_id_priv, backlog); + if (ret) + return ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + cm_id_priv->state = IW_CM_STATE_LISTEN; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + if (ret) + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + default: + ret = -EINVAL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} +EXPORT_SYMBOL(iw_cm_listen); + +/* + * CM_ID <-- IDLE + * + * Rejects an inbound connection request. No events are generated. + */ +int iw_cm_reject(struct iw_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->reject(cm_id, private_data, + private_data_len); + + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} +EXPORT_SYMBOL(iw_cm_reject); + +/* + * CM_ID <-- ESTABLISHED + * + * Accepts an inbound connection request and generates an ESTABLISHED + * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block + * until the ESTABLISHED event is received from the provider. + */ +int iw_cm_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param) +{ + struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + /* Get the ib_qp given the QPN */ + qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + if (!qp) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return -EINVAL; + } + cm_id->device->iwcm->add_ref(qp); + cm_id_priv->qp = qp; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->accept(cm_id, iw_param); + if (ret) { + /* An error on accept precludes provider events */ + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_accept); + +/* + * Active Side: CM_ID <-- CONN_SENT + * + * If successful, results in the generation of a CONNECT_REPLY + * event. iw_cm_disconnect and iw_cm_destroy will block until the + * CONNECT_REPLY event is received from the provider. + */ +int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct iwcm_id_private *cm_id_priv; + int ret = 0; + unsigned long flags; + struct ib_qp *qp; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + ret = alloc_work_entries(cm_id_priv, 4); + if (ret) + return ret; + + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + spin_lock_irqsave(&cm_id_priv->lock, flags); + + if (cm_id_priv->state != IW_CM_STATE_IDLE) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + + /* Get the ib_qp given the QPN */ + qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + if (!qp) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return -EINVAL; + } + cm_id->device->iwcm->add_ref(qp); + cm_id_priv->qp = qp; + cm_id_priv->state = IW_CM_STATE_CONN_SENT; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->connect(cm_id, iw_param); + if (ret) { + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); + cm_id_priv->state = IW_CM_STATE_IDLE; + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_connect); + +/* + * Passive Side: new CM_ID <-- CONN_RECV + * + * Handles an inbound connect request. The function creates a new + * iw_cm_id to represent the new connection and inherits the client + * callback function and other attributes from the listening parent. + * + * The work item contains a pointer to the listen_cm_id and the event. The + * listen_cm_id contains the client cm_handler, context and + * device. These are copied when the device is cloned. The event + * contains the new four tuple. + * + * An error on the child should not affect the parent, so this + * function does not return a value. + */ +static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + struct iw_cm_id *cm_id; + struct iwcm_id_private *cm_id_priv; + int ret; + + /* + * The provider should never generate a connection request + * event with a bad status. + */ + BUG_ON(iw_event->status); + + /* + * We could be destroying the listening id. If so, ignore this + * upcall. + */ + spin_lock_irqsave(&listen_id_priv->lock, flags); + if (listen_id_priv->state != IW_CM_STATE_LISTEN) { + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + return; + } + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + + cm_id = iw_create_cm_id(listen_id_priv->id.device, + listen_id_priv->id.cm_handler, + listen_id_priv->id.context); + /* If the cm_id could not be created, ignore the request */ + if (IS_ERR(cm_id)) + return; + + cm_id->provider_data = iw_event->provider_data; + cm_id->local_addr = iw_event->local_addr; + cm_id->remote_addr = iw_event->remote_addr; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + cm_id_priv->state = IW_CM_STATE_CONN_RECV; + + ret = alloc_work_entries(cm_id_priv, 3); + if (ret) { + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + return; + } + + /* Call the client CM handler */ + ret = cm_id->cm_handler(cm_id, iw_event); + if (ret) { + set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); + destroy_cm_id(cm_id); + if (atomic_read(&cm_id_priv->refcount)==0) + kfree(cm_id); + } + + if (iw_event->private_data_len) + kfree(iw_event->private_data); +} + +/* + * Passive Side: CM_ID <-- ESTABLISHED + * + * The provider generated an ESTABLISHED event which means that + * the MPA negotion has completed successfully and we are now in MPA + * FPDU mode. + * + * This event can only be received in the CONN_RECV state. If the + * remote peer closed, the ESTABLISHED event would be received followed + * by the CLOSE event. If the app closes, it will block until we wake + * it up after processing this event. + */ +static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + + /* + * We clear the CONNECT_WAIT bit here to allow the callback + * function to call iw_cm_disconnect. Calling iw_destroy_cm_id + * from a callback handler is not allowed. + */ + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); + cm_id_priv->state = IW_CM_STATE_ESTABLISHED; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} + +/* + * Active Side: CM_ID <-- ESTABLISHED + * + * The app has called connect and is waiting for the established event to + * post it's requests to the server. This event will wake up anyone + * blocked in iw_cm_disconnect or iw_destroy_id. + */ +static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + /* + * Clear the connect wait bit so a callback function calling + * iw_cm_disconnect will not wait and deadlock this thread + */ + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); + if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) { + cm_id_priv->id.local_addr = iw_event->local_addr; + cm_id_priv->id.remote_addr = iw_event->remote_addr; + cm_id_priv->state = IW_CM_STATE_ESTABLISHED; + } else { + /* REJECTED or RESET */ + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + cm_id_priv->state = IW_CM_STATE_IDLE; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + + if (iw_event->private_data_len) + kfree(iw_event->private_data); + + /* Wake up waiters on connect complete */ + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} + +/* + * CM_ID <-- CLOSING + * + * If in the ESTABLISHED state, move to CLOSING. + */ +static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) + cm_id_priv->state = IW_CM_STATE_CLOSING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +} + +/* + * CM_ID <-- IDLE + * + * If in the ESTBLISHED or CLOSING states, the QP will have have been + * moved by the provider to the ERR state. Disassociate the CM_ID from + * the QP, move to IDLE, and remove the 'connected' reference. + * + * If in some other state, the cm_id was destroyed asynchronously. + * This is the last reference that will result in waking up + * the app thread blocked in iw_destroy_cm_id. + */ +static int cm_close_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + spin_lock_irqsave(&cm_id_priv->lock, flags); + + if (cm_id_priv->qp) { + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + } + switch (cm_id_priv->state) { + case IW_CM_STATE_ESTABLISHED: + case IW_CM_STATE_CLOSING: + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_DESTROYING: + break; + default: + BUG(); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} + +static int process_event(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + int ret = 0; + + switch (iw_event->event) { + case IW_CM_EVENT_CONNECT_REQUEST: + cm_conn_req_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_CONNECT_REPLY: + ret = cm_conn_rep_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_ESTABLISHED: + ret = cm_conn_est_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_DISCONNECT: + cm_disconnect_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_CLOSE: + ret = cm_close_handler(cm_id_priv, iw_event); + break; + default: + BUG(); + } + + return ret; +} + +/* + * Process events on the work_list for the cm_id. If the callback + * function requests that the cm_id be deleted, a flag is set in the + * cm_id flags to indicate that when the last reference is + * removed, the cm_id is to be destroyed. This is necessary to + * distinguish between an object that will be destroyed by the app + * thread asleep on the destroy_comp list vs. an object destroyed + * here synchronously when the last reference is removed. + */ +static void cm_work_handler(void *arg) +{ + struct iwcm_work *work = arg, lwork; + struct iwcm_id_private *cm_id_priv = work->cm_id; + unsigned long flags; + int empty; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + empty = list_empty(&cm_id_priv->work_list); + while (!empty) { + work = list_entry(cm_id_priv->work_list.next, + struct iwcm_work, list); + list_del_init(&work->list); + empty = list_empty(&cm_id_priv->work_list); + lwork = *work; + put_work(work); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = process_event(cm_id_priv, &work->event); + if (ret) { + set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); + destroy_cm_id(&cm_id_priv->id); + } + BUG_ON(atomic_read(&cm_id_priv->refcount)==0); + if (iwcm_deref_id(cm_id_priv)) + return; + + if (atomic_read(&cm_id_priv->refcount)==0 && + test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { + dealloc_work_entries(cm_id_priv); + kfree(cm_id_priv); + return; + } + spin_lock_irqsave(&cm_id_priv->lock, flags); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +} + +/* + * This function is called on interrupt context. Schedule events on + * the iwcm_wq thread to allow callback functions to downcall into + * the CM and/or block. Events are queued to a per-CM_ID + * work_list. If this is the first event on the work_list, the work + * element is also queued on the iwcm_wq thread. + * + * Each event holds a reference on the cm_id. Until the last posted + * event has been delivered and processed, the cm_id cannot be + * deleted. + * + * Returns: + * 0 - the event was handled. + * -ENOMEM - the event was not handled due to lack of resources. + */ +static int cm_event_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct iwcm_work *work; + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + work = get_work(cm_id_priv); + if (!work) { + ret = -ENOMEM; + goto out; + } + + INIT_WORK(&work->work, cm_work_handler, work); + work->cm_id = cm_id_priv; + work->event = *iw_event; + + if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || + work->event.event == IW_CM_EVENT_CONNECT_REPLY) && + work->event.private_data_len) { + ret = copy_private_data(cm_id_priv, &work->event); + if (ret) { + put_work(work); + goto out; + } + } + + atomic_inc(&cm_id_priv->refcount); + if (list_empty(&cm_id_priv->work_list)) { + list_add_tail(&work->list, &cm_id_priv->work_list); + queue_work(iwcm_wq, &work->work); + } else + list_add_tail(&work->list, &cm_id_priv->work_list); +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE| + IB_ACCESS_REMOTE_READ; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_ESTABLISHED: + *qp_attr_mask = 0; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct iwcm_id_private *cm_id_priv; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + switch (qp_attr->qp_state) { + case IB_QPS_INIT: + case IB_QPS_RTR: + ret = iwcm_init_qp_init_attr(cm_id_priv, + qp_attr, qp_attr_mask); + break; + case IB_QPS_RTS: + ret = iwcm_init_qp_rts_attr(cm_id_priv, + qp_attr, qp_attr_mask); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} +EXPORT_SYMBOL(iw_cm_init_qp_attr); + +static int __init iw_cm_init(void) +{ + iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); + if (!iwcm_wq) + return -ENOMEM; + + return 0; +} + +static void __exit iw_cm_cleanup(void) +{ + destroy_workqueue(iwcm_wq); +} + +module_init(iw_cm_init); +module_exit(iw_cm_cleanup); diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h new file mode 100644 index 00000000000..3f6cc82564c --- /dev/null +++ b/drivers/infiniband/core/iwcm.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef IWCM_H +#define IWCM_H + +enum iw_cm_state { + IW_CM_STATE_IDLE, /* unbound, inactive */ + IW_CM_STATE_LISTEN, /* listen waiting for connect */ + IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */ + IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */ + IW_CM_STATE_ESTABLISHED, /* established */ + IW_CM_STATE_CLOSING, /* disconnect */ + IW_CM_STATE_DESTROYING /* object being deleted */ +}; + +struct iwcm_id_private { + struct iw_cm_id id; + enum iw_cm_state state; + unsigned long flags; + struct ib_qp *qp; + struct completion destroy_comp; + wait_queue_head_t connect_wait; + struct list_head work_list; + spinlock_t lock; + atomic_t refcount; + struct list_head work_free_list; +}; + +#define IWCM_F_CALLBACK_DESTROY 1 +#define IWCM_F_CONNECT_WAIT 2 + +#endif /* IWCM_H */ diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1c3cfbbe6a9..493f4c65c7a 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, int i; for (i = 0; i < MAX_MGMT_OUI; i++) - /* Is there matching OUI for this vendor class ? */ - if (!memcmp(vendor_class->oui[i], oui, 3)) + /* Is there matching OUI for this vendor class ? */ + if (!memcmp(vendor_class->oui[i], oui, 3)) return i; return -1; @@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } } @@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, } } sg_list.addr = dma_map_single(qp_info->port_priv-> - device->dma_device, + device->dma_device, &mad_priv->grh, sizeof *mad_priv - - sizeof mad_priv->header, + sizeof mad_priv->header, DMA_FROM_DEVICE); pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr); recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; @@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) struct ib_qp *qp; attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) { + if (!attr) { printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); return -ENOMEM; } @@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device) { int start, end, i; - if (device->node_type == IB_NODE_SWITCH) { + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) { start = 0; end = 0; } else { @@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device) { int i, num_ports, cur_port; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { num_ports = 1; cur_port = 0; } else { @@ -2984,10 +2987,7 @@ error1: static void __exit ib_mad_cleanup_module(void) { ib_unregister_client(&mad_client); - - if (kmem_cache_destroy(ib_mad_cache)) { - printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n"); - } + kmem_cache_destroy(ib_mad_cache); } module_init(ib_mad_init_module); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index d147f3bad2c..d06b59083f6 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -38,8 +38,8 @@ #define __IB_MAD_PRIV_H__ #include <linux/completion.h> +#include <linux/err.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/workqueue.h> #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index ebcd5b18177..1ef79d015a1 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -33,8 +33,6 @@ * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $ */ -#include <linux/dma-mapping.h> - #include "mad_priv.h" #include "mad_rmpp.h" @@ -60,6 +58,7 @@ struct mad_rmpp_recv { int last_ack; int seg_num; int newwin; + int repwin; __be64 tid; u32 src_qp; @@ -170,6 +169,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, return msg; } +static void ack_ds_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + struct ib_rmpp_mad *rmpp_mad; + int ret; + + msg = alloc_response_msg(&agent->agent, recv_wc); + if (IS_ERR(msg)) + return; + + rmpp_mad = msg->mad; + memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); + + rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.seg_num = 0; + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1); + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + ib_destroy_ah(msg->ah); + ib_free_send_mad(msg); + } +} + void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad; @@ -271,6 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, rmpp_recv->newwin = 1; rmpp_recv->seg_num = 1; rmpp_recv->last_ack = 0; + rmpp_recv->repwin = 1; mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; rmpp_recv->tid = mad_hdr->tid; @@ -365,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent) static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, int seg_num) { - struct ib_mad_recv_buf *seg_buf; + struct ib_mad_recv_buf *seg_buf; int cur_seg_num; list_for_each_entry_reverse(seg_buf, rmpp_list, list) { @@ -591,6 +617,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr, break; } +static void process_ds_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc, int newwin) +{ + struct mad_rmpp_recv *rmpp_recv; + + rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); + if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE) + rmpp_recv->repwin = newwin; +} + static void process_rmpp_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { @@ -616,8 +652,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, spin_lock_irqsave(&agent->lock, flags); mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); - if (!mad_send_wr) - goto out; /* Unmatched ACK */ + if (!mad_send_wr) { + if (!seg_num) + process_ds_ack(agent, mad_recv_wc, newwin); + goto out; /* Unmatched or DS RMPP ACK */ + } + + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) && + (mad_send_wr->timeout)) { + spin_unlock_irqrestore(&agent->lock, flags); + ack_ds_ack(agent, mad_recv_wc); + return; /* Repeated ACK for DS RMPP transaction */ + } if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) @@ -656,6 +702,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (mad_send_wr->refcount == 1) ib_reset_mad_timeout(mad_send_wr, mad_send_wr->send_buf.timeout_ms); + spin_unlock_irqrestore(&agent->lock, flags); + ack_ds_ack(agent, mad_recv_wc); + return; } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { @@ -772,6 +821,39 @@ out: return NULL; } +static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv; + struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad; + struct mad_rmpp_recv *rmpp_recv; + struct ib_ah_attr ah_attr; + unsigned long flags; + int newwin = 1; + + if (!(mad_hdr->method & IB_MGMT_METHOD_RESP)) + goto out; + + spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->tid != mad_hdr->tid || + rmpp_recv->mgmt_class != mad_hdr->mgmt_class || + rmpp_recv->class_version != mad_hdr->class_version || + (rmpp_recv->method & IB_MGMT_METHOD_RESP)) + continue; + + if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr)) + continue; + + if (rmpp_recv->slid == ah_attr.dlid) { + newwin = rmpp_recv->repwin; + break; + } + } + spin_unlock_irqrestore(&agent->lock, flags); +out: + return newwin; +} + int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; @@ -787,7 +869,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) return IB_RMPP_RESULT_INTERNAL; } - mad_send_wr->newwin = 1; + mad_send_wr->newwin = init_newwin(mad_send_wr); /* We need to wait for the final ACK even if there isn't a response */ mad_send_wr->refcount += (mad_send_wr->timeout == 0); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index d6b84226bba..1706d3c7e95 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -75,6 +76,7 @@ struct ib_sa_device { struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); + struct ib_sa_client *client; struct ib_sa_port *port; struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; @@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event } } +void ib_sa_register_client(struct ib_sa_client *client) +{ + atomic_set(&client->users, 1); + init_completion(&client->comp); +} +EXPORT_SYMBOL(ib_sa_register_client); + +static inline void ib_sa_client_get(struct ib_sa_client *client) +{ + atomic_inc(&client->users); +} + +static inline void ib_sa_client_put(struct ib_sa_client *client) +{ + if (atomic_dec_and_test(&client->users)) + complete(&client->comp); +} + +void ib_sa_unregister_client(struct ib_sa_client *client) +{ + ib_sa_client_put(client); + wait_for_completion(&client->comp); +} +EXPORT_SYMBOL(ib_sa_unregister_client); + /** * ib_sa_cancel_query - try to cancel an SA query * @id:ID of query to cancel @@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) /** * ib_sa_path_rec_get - Start a Path get query + * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:Path Record to send in query @@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) * error code. Otherwise it is a query ID that can be used to cancel * the query. */ -int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, +int ib_sa_path_rec_get(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) /** * ib_sa_service_rec_query - Start Service Record operation + * @client:SA client * @device:device to send request on * @port_num: port number to send request on * @method:SA method - should be get, set, or delete @@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) * error code. Otherwise it is a request ID that can be used to cancel * the query. */ -int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, +int ib_sa_service_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } -int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, +int ib_sa_mcmember_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, @@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -887,8 +928,9 @@ static void send_handler(struct ib_mad_agent *agent, idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); - ib_free_send_mad(mad_send_wc->send_buf); + ib_free_send_mad(mad_send_wc->send_buf); kref_put(&query->sm_ah->ref, free_sm_ah); + ib_sa_client_put(query->client); query->release(query); } @@ -919,7 +961,10 @@ static void ib_sa_add_one(struct ib_device *device) struct ib_sa_device *sa_dev; int s, e, i; - if (device->node_type == IB_NODE_SWITCH) + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 35852e794e2..54b81e17ad5 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, /* C14-9:2 */ if (hop_ptr && hop_ptr < hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; /* smp->return_path set when received */ @@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, if (hop_ptr == hop_cnt) { /* smp->return_path set when received */ smp->hop_ptr++; - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_dlid == IB_LID_PERMISSIVE); } @@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, /* C14-13:2 */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; smp->hop_ptr--; @@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, if (hop_ptr == 1) { smp->hop_ptr--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_slid == IB_LID_PERMISSIVE); } @@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; smp->return_path[hop_ptr] = port_num; @@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, smp->return_path[hop_ptr] = port_num; /* smp->hop_ptr updated when sending */ - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_dlid == IB_LID_PERMISSIVE); } @@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, /* C14-13:2 */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; /* smp->hop_ptr updated when sending */ @@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, return 1; } /* smp->hop_ptr updated when sending */ - return (node_type == IB_NODE_SWITCH); + return (node_type == RDMA_NODE_IB_SWITCH); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 21f9282c1b2..709323c14c5 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -68,7 +68,7 @@ struct port_table_attribute { int index; }; -static inline int ibdev_is_alive(const struct ib_device *dev) +static inline int ibdev_is_alive(const struct ib_device *dev) { return dev->reg_state == IB_DEV_REGISTERED; } @@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf) return -ENODEV; switch (dev->node_type) { - case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); - case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); - case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); - default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); + case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); + case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); + case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); + case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); + default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); } } @@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device) if (ret) goto err_put; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { ret = add_port(device, 0); if (ret) goto err_put; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index c1c6fda9452..ad4f4d5c292 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, info = evt->param.apr_rcvd.apr_info; break; case IB_CM_SIDR_REQ_RECEIVED: - uvt->resp.u.sidr_req_resp.pkey = + uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; - uvt->resp.u.sidr_req_resp.port = + uvt->resp.u.sidr_req_resp.port = evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; @@ -1237,7 +1237,7 @@ static struct class ucm_class = { static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_ucm_device *dev; - + dev = container_of(class_dev, struct ib_ucm_device, class_dev); return sprintf(buf, "%s\n", dev->ib_dev->name); } @@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device) { struct ib_ucm_device *ucm_dev; - if (!device->alloc_ucontext) + if (!device->alloc_ucontext || + rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 1273f8807e8..807fbd6b841 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device) struct ib_umad_device *umad_dev; int s, e, i; - if (device->node_type == IB_NODE_SWITCH) + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 30923eb68ec..b72c7f69ca9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, } static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, - struct ib_ucontext *context) + struct ib_ucontext *context, int nested) { struct ib_uobject *uobj; @@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, if (!uobj) return NULL; - down_read(&uobj->mutex); + if (nested) + down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); + else + down_read(&uobj->mutex); if (!uobj->live) { put_uobj_read(uobj); return NULL; @@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, return uobj; } -static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context) +static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, + int nested) { struct ib_uobject *uobj; - uobj = idr_read_uobj(idr, id, context); + uobj = idr_read_uobj(idr, id, context, nested); return uobj ? uobj->object : NULL; } static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context); + return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); } static void put_pd_read(struct ib_pd *pd) @@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd) put_uobj_read(pd->uobject); } -static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context) +static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) { - return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context); + return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); } static void put_cq_read(struct ib_cq *cq) @@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq) static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context); + return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); } static void put_ah_read(struct ib_ah *ah) @@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah) static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context); + return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } static void put_qp_read(struct ib_qp *qp) @@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp) static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context); + return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); } static void put_srq_read(struct ib_srq *srq) @@ -837,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, err_copy: idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); - err_free: ib_destroy_cq(cq); @@ -867,7 +870,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - cq = idr_read_cq(cmd.cq_handle, file->ucontext); + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; @@ -875,11 +878,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, if (ret) goto out; - memset(&resp, 0, sizeof resp); resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + &resp, sizeof resp.cqe)) ret = -EFAULT; out: @@ -894,7 +896,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp *resp; - struct ib_uobject *uobj; struct ib_cq *cq; struct ib_wc *wc; int ret = 0; @@ -915,16 +916,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, goto out_wc; } - uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) { + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) { ret = -EINVAL; goto out; } - cq = uobj->object; resp->count = ib_poll_cq(cq, cmd.ne, wc); - put_uobj_read(uobj); + put_cq_read(cq); for (i = 0; i < resp->count; i++) { resp->wc[i].wr_id = wc[i].wr_id; @@ -959,21 +959,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_req_notify_cq cmd; - struct ib_uobject *uobj; struct ib_cq *cq; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) return -EINVAL; - cq = uobj->object; ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - put_uobj_read(uobj); + put_cq_read(cq); return in_len; } @@ -1064,9 +1062,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext); + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext); + scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { ret = -EINVAL; @@ -1274,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_modify_qp cmd; + struct ib_udata udata; struct ib_qp *qp; struct ib_qp_attr *attr; int ret; @@ -1281,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + out_len); + attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; @@ -1337,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - ret = ib_modify_qp(qp, attr, cmd.attr_mask); + ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); put_qp_read(qp); @@ -1674,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; @@ -1724,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; @@ -2055,6 +2055,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_modify_srq cmd; + struct ib_udata udata; struct ib_srq *srq; struct ib_srq_attr attr; int ret; @@ -2062,6 +2063,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + out_len); + srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; @@ -2069,7 +2073,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; - ret = ib_modify_srq(srq, &attr, cmd.attr_mask); + ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); put_srq_read(srq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 468999c3880..8b5dd3649bb 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); +enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type) +{ + switch (node_type) { + case RDMA_NODE_IB_CA: + case RDMA_NODE_IB_SWITCH: + case RDMA_NODE_IB_ROUTER: + return RDMA_TRANSPORT_IB; + case RDMA_NODE_RNIC: + return RDMA_TRANSPORT_IWARP; + default: + BUG(); + return 0; + } +} +EXPORT_SYMBOL(rdma_node_get_transport); + /* Protection domains */ struct ib_pd *ib_alloc_pd(struct ib_device *device) @@ -231,7 +248,7 @@ int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) { - return srq->device->modify_srq(srq, srq_attr, srq_attr_mask); + return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_srq); @@ -547,7 +564,7 @@ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - return qp->device->modify_qp(qp, qp_attr, qp_attr_mask); + return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild new file mode 100644 index 00000000000..06964c4af84 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/Kbuild @@ -0,0 +1,8 @@ +ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG +EXTRA_CFLAGS += -DDEBUG +endif + +obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o + +iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \ + c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig new file mode 100644 index 00000000000..809cb14ac6d --- /dev/null +++ b/drivers/infiniband/hw/amso1100/Kconfig @@ -0,0 +1,15 @@ +config INFINIBAND_AMSO1100 + tristate "Ammasso 1100 HCA support" + depends on PCI && INET && INFINIBAND + ---help--- + This is a low-level driver for the Ammasso 1100 host + channel adapter (HCA). + +config INFINIBAND_AMSO1100_DEBUG + bool "Verbose debugging output" + depends on INFINIBAND_AMSO1100 + default n + ---help--- + This option causes the amso1100 driver to produce a bunch of + debug messages. Select this if you are developing the driver + or trying to diagnose a problem. diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c new file mode 100644 index 00000000000..9e9120f3601 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -0,0 +1,1255 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/inetdevice.h> +#include <linux/delay.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/if_vlan.h> +#include <linux/crc32.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/init.h> +#include <linux/dma-mapping.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/byteorder.h> + +#include <rdma/ib_smi.h> +#include "c2.h" +#include "c2_provider.h" + +MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); +MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK + | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; + +static int debug = -1; /* defaults above */ +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +static int c2_up(struct net_device *netdev); +static int c2_down(struct net_device *netdev); +static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +static void c2_tx_interrupt(struct net_device *netdev); +static void c2_rx_interrupt(struct net_device *netdev); +static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs); +static void c2_tx_timeout(struct net_device *netdev); +static int c2_change_mtu(struct net_device *netdev, int new_mtu); +static void c2_reset(struct c2_port *c2_port); +static struct net_device_stats *c2_get_stats(struct net_device *netdev); + +static struct pci_device_id c2_pci_table[] = { + { PCI_DEVICE(0x18b8, 0xb001) }, + { 0 } +}; + +MODULE_DEVICE_TABLE(pci, c2_pci_table); + +static void c2_print_macaddr(struct net_device *netdev) +{ + pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, " + "IRQ %u\n", netdev->name, + netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], + netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5], + netdev->irq); +} + +static void c2_set_rxbufsize(struct c2_port *c2_port) +{ + struct net_device *netdev = c2_port->netdev; + + if (netdev->mtu > RX_BUF_SIZE) + c2_port->rx_buf_size = + netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) + + NET_IP_ALIGN; + else + c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE; +} + +/* + * Allocate TX ring elements and chain them together. + * One-to-one association of adapter descriptors with ring elements. + */ +static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr, + dma_addr_t base, void __iomem * mmio_txp_ring) +{ + struct c2_tx_desc *tx_desc; + struct c2_txp_desc __iomem *txp_desc; + struct c2_element *elem; + int i; + + tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL); + if (!tx_ring->start) + return -ENOMEM; + + elem = tx_ring->start; + tx_desc = vaddr; + txp_desc = mmio_txp_ring; + for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) { + tx_desc->len = 0; + tx_desc->status = 0; + + /* Set TXP_HTXD_UNINIT */ + __raw_writeq(cpu_to_be64(0x1122334455667788ULL), + (void __iomem *) txp_desc + C2_TXP_ADDR); + __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN); + __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT), + (void __iomem *) txp_desc + C2_TXP_FLAGS); + + elem->skb = NULL; + elem->ht_desc = tx_desc; + elem->hw_desc = txp_desc; + + if (i == tx_ring->count - 1) { + elem->next = tx_ring->start; + tx_desc->next_offset = base; + } else { + elem->next = elem + 1; + tx_desc->next_offset = + base + (i + 1) * sizeof(*tx_desc); + } + } + + tx_ring->to_use = tx_ring->to_clean = tx_ring->start; + + return 0; +} + +/* + * Allocate RX ring elements and chain them together. + * One-to-one association of adapter descriptors with ring elements. + */ +static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr, + dma_addr_t base, void __iomem * mmio_rxp_ring) +{ + struct c2_rx_desc *rx_desc; + struct c2_rxp_desc __iomem *rxp_desc; + struct c2_element *elem; + int i; + + rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL); + if (!rx_ring->start) + return -ENOMEM; + + elem = rx_ring->start; + rx_desc = vaddr; + rxp_desc = mmio_rxp_ring; + for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) { + rx_desc->len = 0; + rx_desc->status = 0; + + /* Set RXP_HRXD_UNINIT */ + __raw_writew(cpu_to_be16(RXP_HRXD_OK), + (void __iomem *) rxp_desc + C2_RXP_STATUS); + __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT); + __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN); + __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL), + (void __iomem *) rxp_desc + C2_RXP_ADDR); + __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT), + (void __iomem *) rxp_desc + C2_RXP_FLAGS); + + elem->skb = NULL; + elem->ht_desc = rx_desc; + elem->hw_desc = rxp_desc; + + if (i == rx_ring->count - 1) { + elem->next = rx_ring->start; + rx_desc->next_offset = base; + } else { + elem->next = elem + 1; + rx_desc->next_offset = + base + (i + 1) * sizeof(*rx_desc); + } + } + + rx_ring->to_use = rx_ring->to_clean = rx_ring->start; + + return 0; +} + +/* Setup buffer for receiving */ +static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem) +{ + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_rx_desc *rx_desc = elem->ht_desc; + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen; + struct c2_rxp_hdr *rxp_hdr; + + skb = dev_alloc_skb(c2_port->rx_buf_size); + if (unlikely(!skb)) { + pr_debug("%s: out of memory for receive\n", + c2_port->netdev->name); + return -ENOMEM; + } + + /* Zero out the rxp hdr in the sk_buff */ + memset(skb->data, 0, sizeof(*rxp_hdr)); + + skb->dev = c2_port->netdev; + + maplen = c2_port->rx_buf_size; + mapaddr = + pci_map_single(c2dev->pcidev, skb->data, maplen, + PCI_DMA_FROMDEVICE); + + /* Set the sk_buff RXP_header to RXP_HRXD_READY */ + rxp_hdr = (struct c2_rxp_hdr *) skb->data; + rxp_hdr->flags = RXP_HRXD_READY; + + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)), + elem->hw_desc + C2_RXP_LEN); + __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR); + __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS); + + elem->skb = skb; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + rx_desc->len = maplen; + + return 0; +} + +/* + * Allocate buffers for the Rx ring + * For receive: rx_ring.to_clean is next received frame + */ +static int c2_rx_fill(struct c2_port *c2_port) +{ + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + int ret = 0; + + elem = rx_ring->start; + do { + if (c2_rx_alloc(c2_port, elem)) { + ret = 1; + break; + } + } while ((elem = elem->next) != rx_ring->start); + + rx_ring->to_clean = rx_ring->start; + return ret; +} + +/* Free all buffers in RX ring, assumes receiver stopped */ +static void c2_rx_clean(struct c2_port *c2_port) +{ + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + struct c2_rx_desc *rx_desc; + + elem = rx_ring->start; + do { + rx_desc = elem->ht_desc; + rx_desc->len = 0; + + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew(0, elem->hw_desc + C2_RXP_COUNT); + __raw_writew(0, elem->hw_desc + C2_RXP_LEN); + __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL), + elem->hw_desc + C2_RXP_ADDR); + __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT), + elem->hw_desc + C2_RXP_FLAGS); + + if (elem->skb) { + pci_unmap_single(c2dev->pcidev, elem->mapaddr, + elem->maplen, PCI_DMA_FROMDEVICE); + dev_kfree_skb(elem->skb); + elem->skb = NULL; + } + } while ((elem = elem->next) != rx_ring->start); +} + +static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem) +{ + struct c2_tx_desc *tx_desc = elem->ht_desc; + + tx_desc->len = 0; + + pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen, + PCI_DMA_TODEVICE); + + if (elem->skb) { + dev_kfree_skb_any(elem->skb); + elem->skb = NULL; + } + + return 0; +} + +/* Free all buffers in TX ring, assumes transmitter stopped */ +static void c2_tx_clean(struct c2_port *c2_port) +{ + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + struct c2_txp_desc txp_htxd; + int retry; + unsigned long flags; + + spin_lock_irqsave(&c2_port->tx_lock, flags); + + elem = tx_ring->start; + + do { + retry = 0; + do { + txp_htxd.flags = + readw(elem->hw_desc + C2_TXP_FLAGS); + + if (txp_htxd.flags == TXP_HTXD_READY) { + retry = 1; + __raw_writew(0, + elem->hw_desc + C2_TXP_LEN); + __raw_writeq(0, + elem->hw_desc + C2_TXP_ADDR); + __raw_writew(cpu_to_be16(TXP_HTXD_DONE), + elem->hw_desc + C2_TXP_FLAGS); + c2_port->netstats.tx_dropped++; + break; + } else { + __raw_writew(0, + elem->hw_desc + C2_TXP_LEN); + __raw_writeq(cpu_to_be64(0x1122334455667788ULL), + elem->hw_desc + C2_TXP_ADDR); + __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT), + elem->hw_desc + C2_TXP_FLAGS); + } + + c2_tx_free(c2_port->c2dev, elem); + + } while ((elem = elem->next) != tx_ring->start); + } while (retry); + + c2_port->tx_avail = c2_port->tx_ring.count - 1; + c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start; + + if (c2_port->tx_avail > MAX_SKB_FRAGS + 1) + netif_wake_queue(c2_port->netdev); + + spin_unlock_irqrestore(&c2_port->tx_lock, flags); +} + +/* + * Process transmit descriptors marked 'DONE' by the firmware, + * freeing up their unneeded sk_buffs. + */ +static void c2_tx_interrupt(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + struct c2_txp_desc txp_htxd; + + spin_lock(&c2_port->tx_lock); + + for (elem = tx_ring->to_clean; elem != tx_ring->to_use; + elem = elem->next) { + txp_htxd.flags = + be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS)); + + if (txp_htxd.flags != TXP_HTXD_DONE) + break; + + if (netif_msg_tx_done(c2_port)) { + /* PCI reads are expensive in fast path */ + txp_htxd.len = + be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN)); + pr_debug("%s: tx done slot %3Zu status 0x%x len " + "%5u bytes\n", + netdev->name, elem - tx_ring->start, + txp_htxd.flags, txp_htxd.len); + } + + c2_tx_free(c2dev, elem); + ++(c2_port->tx_avail); + } + + tx_ring->to_clean = elem; + + if (netif_queue_stopped(netdev) + && c2_port->tx_avail > MAX_SKB_FRAGS + 1) + netif_wake_queue(netdev); + + spin_unlock(&c2_port->tx_lock); +} + +static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem) +{ + struct c2_rx_desc *rx_desc = elem->ht_desc; + struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data; + + if (rxp_hdr->status != RXP_HRXD_OK || + rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) { + pr_debug("BAD RXP_HRXD\n"); + pr_debug(" rx_desc : %p\n", rx_desc); + pr_debug(" index : %Zu\n", + elem - c2_port->rx_ring.start); + pr_debug(" len : %u\n", rx_desc->len); + pr_debug(" rxp_hdr : %p [PA %p]\n", rxp_hdr, + (void *) __pa((unsigned long) rxp_hdr)); + pr_debug(" flags : 0x%x\n", rxp_hdr->flags); + pr_debug(" status: 0x%x\n", rxp_hdr->status); + pr_debug(" len : %u\n", rxp_hdr->len); + pr_debug(" rsvd : 0x%x\n", rxp_hdr->rsvd); + } + + /* Setup the skb for reuse since we're dropping this pkt */ + elem->skb->tail = elem->skb->data = elem->skb->head; + + /* Zero out the rxp hdr in the sk_buff */ + memset(elem->skb->data, 0, sizeof(*rxp_hdr)); + + /* Write the descriptor to the adapter's rx ring */ + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew(0, elem->hw_desc + C2_RXP_COUNT); + __raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)), + elem->hw_desc + C2_RXP_LEN); + __raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR); + __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS); + + pr_debug("packet dropped\n"); + c2_port->netstats.rx_dropped++; +} + +static void c2_rx_interrupt(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + struct c2_rx_desc *rx_desc; + struct c2_rxp_hdr *rxp_hdr; + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen, buflen; + unsigned long flags; + + spin_lock_irqsave(&c2dev->lock, flags); + + /* Begin where we left off */ + rx_ring->to_clean = rx_ring->start + c2dev->cur_rx; + + for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean; + elem = elem->next) { + rx_desc = elem->ht_desc; + mapaddr = elem->mapaddr; + maplen = elem->maplen; + skb = elem->skb; + rxp_hdr = (struct c2_rxp_hdr *) skb->data; + + if (rxp_hdr->flags != RXP_HRXD_DONE) + break; + buflen = rxp_hdr->len; + + /* Sanity check the RXP header */ + if (rxp_hdr->status != RXP_HRXD_OK || + buflen > (rx_desc->len - sizeof(*rxp_hdr))) { + c2_rx_error(c2_port, elem); + continue; + } + + /* + * Allocate and map a new skb for replenishing the host + * RX desc + */ + if (c2_rx_alloc(c2_port, elem)) { + c2_rx_error(c2_port, elem); + continue; + } + + /* Unmap the old skb */ + pci_unmap_single(c2dev->pcidev, mapaddr, maplen, + PCI_DMA_FROMDEVICE); + + prefetch(skb->data); + + /* + * Skip past the leading 8 bytes comprising of the + * "struct c2_rxp_hdr", prepended by the adapter + * to the usual Ethernet header ("struct ethhdr"), + * to the start of the raw Ethernet packet. + * + * Fix up the various fields in the sk_buff before + * passing it up to netif_rx(). The transfer size + * (in bytes) specified by the adapter len field of + * the "struct rxp_hdr_t" does NOT include the + * "sizeof(struct c2_rxp_hdr)". + */ + skb->data += sizeof(*rxp_hdr); + skb->tail = skb->data + buflen; + skb->len = buflen; + skb->dev = netdev; + skb->protocol = eth_type_trans(skb, netdev); + + netif_rx(skb); + + netdev->last_rx = jiffies; + c2_port->netstats.rx_packets++; + c2_port->netstats.rx_bytes += buflen; + } + + /* Save where we left off */ + rx_ring->to_clean = elem; + c2dev->cur_rx = elem - rx_ring->start; + C2_SET_CUR_RX(c2dev, c2dev->cur_rx); + + spin_unlock_irqrestore(&c2dev->lock, flags); +} + +/* + * Handle netisr0 TX & RX interrupts. + */ +static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned int netisr0, dmaisr; + int handled = 0; + struct c2_dev *c2dev = (struct c2_dev *) dev_id; + + /* Process CCILNET interrupts */ + netisr0 = readl(c2dev->regs + C2_NISR0); + if (netisr0) { + + /* + * There is an issue with the firmware that always + * provides the status of RX for both TX & RX + * interrupts. So process both queues here. + */ + c2_rx_interrupt(c2dev->netdev); + c2_tx_interrupt(c2dev->netdev); + + /* Clear the interrupt */ + writel(netisr0, c2dev->regs + C2_NISR0); + handled++; + } + + /* Process RNIC interrupts */ + dmaisr = readl(c2dev->regs + C2_DISR); + if (dmaisr) { + writel(dmaisr, c2dev->regs + C2_DISR); + c2_rnic_interrupt(c2dev); + handled++; + } + + if (handled) { + return IRQ_HANDLED; + } else { + return IRQ_NONE; + } +} + +static int c2_up(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_element *elem; + struct c2_rxp_hdr *rxp_hdr; + struct in_device *in_dev; + size_t rx_size, tx_size; + int ret, i; + unsigned int netimr0; + + if (netif_msg_ifup(c2_port)) + pr_debug("%s: enabling interface\n", netdev->name); + + /* Set the Rx buffer size based on MTU */ + c2_set_rxbufsize(c2_port); + + /* Allocate DMA'able memory for Tx/Rx host descriptor rings */ + rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc); + tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc); + + c2_port->mem_size = tx_size + rx_size; + c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size, + &c2_port->dma); + if (c2_port->mem == NULL) { + pr_debug("Unable to allocate memory for " + "host descriptor rings\n"); + return -ENOMEM; + } + + memset(c2_port->mem, 0, c2_port->mem_size); + + /* Create the Rx host descriptor ring */ + if ((ret = + c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma, + c2dev->mmio_rxp_ring))) { + pr_debug("Unable to create RX ring\n"); + goto bail0; + } + + /* Allocate Rx buffers for the host descriptor ring */ + if (c2_rx_fill(c2_port)) { + pr_debug("Unable to fill RX ring\n"); + goto bail1; + } + + /* Create the Tx host descriptor ring */ + if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size, + c2_port->dma + rx_size, + c2dev->mmio_txp_ring))) { + pr_debug("Unable to create TX ring\n"); + goto bail1; + } + + /* Set the TX pointer to where we left off */ + c2_port->tx_avail = c2_port->tx_ring.count - 1; + c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean = + c2_port->tx_ring.start + c2dev->cur_tx; + + /* missing: Initialize MAC */ + + BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean); + + /* Reset the adapter, ensures the driver is in sync with the RXP */ + c2_reset(c2_port); + + /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */ + for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count; + i++, elem++) { + rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data; + rxp_hdr->flags = 0; + __raw_writew(cpu_to_be16(RXP_HRXD_READY), + elem->hw_desc + C2_RXP_FLAGS); + } + + /* Enable network packets */ + netif_start_queue(netdev); + + /* Enable IRQ */ + writel(0, c2dev->regs + C2_IDIS); + netimr0 = readl(c2dev->regs + C2_NIMR0); + netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT); + writel(netimr0, c2dev->regs + C2_NIMR0); + + /* Tell the stack to ignore arp requests for ipaddrs bound to + * other interfaces. This is needed to prevent the host stack + * from responding to arp requests to the ipaddr bound on the + * rdma interface. + */ + in_dev = in_dev_get(netdev); + in_dev->cnf.arp_ignore = 1; + in_dev_put(in_dev); + + return 0; + + bail1: + c2_rx_clean(c2_port); + kfree(c2_port->rx_ring.start); + + bail0: + pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem, + c2_port->dma); + + return ret; +} + +static int c2_down(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + + if (netif_msg_ifdown(c2_port)) + pr_debug("%s: disabling interface\n", + netdev->name); + + /* Wait for all the queued packets to get sent */ + c2_tx_interrupt(netdev); + + /* Disable network packets */ + netif_stop_queue(netdev); + + /* Disable IRQs by clearing the interrupt mask */ + writel(1, c2dev->regs + C2_IDIS); + writel(0, c2dev->regs + C2_NIMR0); + + /* missing: Stop transmitter */ + + /* missing: Stop receiver */ + + /* Reset the adapter, ensures the driver is in sync with the RXP */ + c2_reset(c2_port); + + /* missing: Turn off LEDs here */ + + /* Free all buffers in the host descriptor rings */ + c2_tx_clean(c2_port); + c2_rx_clean(c2_port); + + /* Free the host descriptor rings */ + kfree(c2_port->rx_ring.start); + kfree(c2_port->tx_ring.start); + pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem, + c2_port->dma); + + return 0; +} + +static void c2_reset(struct c2_port *c2_port) +{ + struct c2_dev *c2dev = c2_port->c2dev; + unsigned int cur_rx = c2dev->cur_rx; + + /* Tell the hardware to quiesce */ + C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI); + + /* + * The hardware will reset the C2_PCI_HRX_QUI bit once + * the RXP is quiesced. Wait 2 seconds for this. + */ + ssleep(2); + + cur_rx = C2_GET_CUR_RX(c2dev); + + if (cur_rx & C2_PCI_HRX_QUI) + pr_debug("c2_reset: failed to quiesce the hardware!\n"); + + cur_rx &= ~C2_PCI_HRX_QUI; + + c2dev->cur_rx = cur_rx; + + pr_debug("Current RX: %u\n", c2dev->cur_rx); +} + +static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + dma_addr_t mapaddr; + u32 maplen; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&c2_port->tx_lock, flags); + + if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) { + netif_stop_queue(netdev); + spin_unlock_irqrestore(&c2_port->tx_lock, flags); + + pr_debug("%s: Tx ring full when queue awake!\n", + netdev->name); + return NETDEV_TX_BUSY; + } + + maplen = skb_headlen(skb); + mapaddr = + pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE); + + elem = tx_ring->to_use; + elem->skb = skb; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + + /* Tell HW to xmit */ + __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR); + __raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN); + __raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS); + + c2_port->netstats.tx_packets++; + c2_port->netstats.tx_bytes += maplen; + + /* Loop thru additional data fragments and queue them */ + if (skb_shinfo(skb)->nr_frags) { + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + maplen = frag->size; + mapaddr = + pci_map_page(c2dev->pcidev, frag->page, + frag->page_offset, maplen, + PCI_DMA_TODEVICE); + + elem = elem->next; + elem->skb = NULL; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + + /* Tell HW to xmit */ + __raw_writeq(cpu_to_be64(mapaddr), + elem->hw_desc + C2_TXP_ADDR); + __raw_writew(cpu_to_be16(maplen), + elem->hw_desc + C2_TXP_LEN); + __raw_writew(cpu_to_be16(TXP_HTXD_READY), + elem->hw_desc + C2_TXP_FLAGS); + + c2_port->netstats.tx_packets++; + c2_port->netstats.tx_bytes += maplen; + } + } + + tx_ring->to_use = elem->next; + c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1); + + if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) { + netif_stop_queue(netdev); + if (netif_msg_tx_queued(c2_port)) + pr_debug("%s: transmit queue full\n", + netdev->name); + } + + spin_unlock_irqrestore(&c2_port->tx_lock, flags); + + netdev->trans_start = jiffies; + + return NETDEV_TX_OK; +} + +static struct net_device_stats *c2_get_stats(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + + return &c2_port->netstats; +} + +static void c2_tx_timeout(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + + if (netif_msg_timer(c2_port)) + pr_debug("%s: tx timeout\n", netdev->name); + + c2_tx_clean(c2_port); +} + +static int c2_change_mtu(struct net_device *netdev, int new_mtu) +{ + int ret = 0; + + if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) + return -EINVAL; + + netdev->mtu = new_mtu; + + if (netif_running(netdev)) { + c2_down(netdev); + + c2_up(netdev); + } + + return ret; +} + +/* Initialize network device */ +static struct net_device *c2_devinit(struct c2_dev *c2dev, + void __iomem * mmio_addr) +{ + struct c2_port *c2_port = NULL; + struct net_device *netdev = alloc_etherdev(sizeof(*c2_port)); + + if (!netdev) { + pr_debug("c2_port etherdev alloc failed"); + return NULL; + } + + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); + + netdev->open = c2_up; + netdev->stop = c2_down; + netdev->hard_start_xmit = c2_xmit_frame; + netdev->get_stats = c2_get_stats; + netdev->tx_timeout = c2_tx_timeout; + netdev->change_mtu = c2_change_mtu; + netdev->watchdog_timeo = C2_TX_TIMEOUT; + netdev->irq = c2dev->pcidev->irq; + + c2_port = netdev_priv(netdev); + c2_port->netdev = netdev; + c2_port->c2dev = c2dev; + c2_port->msg_enable = netif_msg_init(debug, default_msg); + c2_port->tx_ring.count = C2_NUM_TX_DESC; + c2_port->rx_ring.count = C2_NUM_RX_DESC; + + spin_lock_init(&c2_port->tx_lock); + + /* Copy our 48-bit ethernet hardware address */ + memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6); + + /* Validate the MAC address */ + if (!is_valid_ether_addr(netdev->dev_addr)) { + pr_debug("Invalid MAC Address\n"); + c2_print_macaddr(netdev); + free_netdev(netdev); + return NULL; + } + + c2dev->netdev = netdev; + + return netdev; +} + +static int __devinit c2_probe(struct pci_dev *pcidev, + const struct pci_device_id *ent) +{ + int ret = 0, i; + unsigned long reg0_start, reg0_flags, reg0_len; + unsigned long reg2_start, reg2_flags, reg2_len; + unsigned long reg4_start, reg4_flags, reg4_len; + unsigned kva_map_size; + struct net_device *netdev = NULL; + struct c2_dev *c2dev = NULL; + void __iomem *mmio_regs = NULL; + + printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n", + DRV_VERSION); + + /* Enable PCI device */ + ret = pci_enable_device(pcidev); + if (ret) { + printk(KERN_ERR PFX "%s: Unable to enable PCI device\n", + pci_name(pcidev)); + goto bail0; + } + + reg0_start = pci_resource_start(pcidev, BAR_0); + reg0_len = pci_resource_len(pcidev, BAR_0); + reg0_flags = pci_resource_flags(pcidev, BAR_0); + + reg2_start = pci_resource_start(pcidev, BAR_2); + reg2_len = pci_resource_len(pcidev, BAR_2); + reg2_flags = pci_resource_flags(pcidev, BAR_2); + + reg4_start = pci_resource_start(pcidev, BAR_4); + reg4_len = pci_resource_len(pcidev, BAR_4); + reg4_flags = pci_resource_flags(pcidev, BAR_4); + + pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len); + pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len); + pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len); + + /* Make sure PCI base addr are MMIO */ + if (!(reg0_flags & IORESOURCE_MEM) || + !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) { + printk(KERN_ERR PFX "PCI regions not an MMIO resource\n"); + ret = -ENODEV; + goto bail1; + } + + /* Check for weird/broken PCI region reporting */ + if ((reg0_len < C2_REG0_SIZE) || + (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) { + printk(KERN_ERR PFX "Invalid PCI region sizes\n"); + ret = -ENODEV; + goto bail1; + } + + /* Reserve PCI I/O and memory resources */ + ret = pci_request_regions(pcidev, DRV_NAME); + if (ret) { + printk(KERN_ERR PFX "%s: Unable to request regions\n", + pci_name(pcidev)); + goto bail1; + } + + if ((sizeof(dma_addr_t) > 4)) { + ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK); + if (ret < 0) { + printk(KERN_ERR PFX "64b DMA configuration failed\n"); + goto bail2; + } + } else { + ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK); + if (ret < 0) { + printk(KERN_ERR PFX "32b DMA configuration failed\n"); + goto bail2; + } + } + + /* Enables bus-mastering on the device */ + pci_set_master(pcidev); + + /* Remap the adapter PCI registers in BAR4 */ + mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, + sizeof(struct c2_adapter_pci_regs)); + if (mmio_regs == 0UL) { + printk(KERN_ERR PFX + "Unable to remap adapter PCI registers in BAR4\n"); + ret = -EIO; + goto bail2; + } + + /* Validate PCI regs magic */ + for (i = 0; i < sizeof(c2_magic); i++) { + if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) { + printk(KERN_ERR PFX "Downlevel Firmware boot loader " + "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash " + "utility to update your boot loader\n", + i + 1, sizeof(c2_magic), + readb(mmio_regs + C2_REGS_MAGIC + i), + c2_magic[i]); + printk(KERN_ERR PFX "Adapter not claimed\n"); + iounmap(mmio_regs); + ret = -EIO; + goto bail2; + } + } + + /* Validate the adapter version */ + if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) { + printk(KERN_ERR PFX "Version mismatch " + "[fw=%u, c2=%u], Adapter not claimed\n", + be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)), + C2_VERSION); + ret = -EINVAL; + iounmap(mmio_regs); + goto bail2; + } + + /* Validate the adapter IVN */ + if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) { + printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using " + "the OpenIB device support kit. " + "[fw=0x%x, c2=0x%x], Adapter not claimed\n", + be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)), + C2_IVN); + ret = -EINVAL; + iounmap(mmio_regs); + goto bail2; + } + + /* Allocate hardware structure */ + c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev)); + if (!c2dev) { + printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", + pci_name(pcidev)); + ret = -ENOMEM; + iounmap(mmio_regs); + goto bail2; + } + + memset(c2dev, 0, sizeof(*c2dev)); + spin_lock_init(&c2dev->lock); + c2dev->pcidev = pcidev; + c2dev->cur_tx = 0; + + /* Get the last RX index */ + c2dev->cur_rx = + (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) - + 0xffffc000) / sizeof(struct c2_rxp_desc); + + /* Request an interrupt line for the driver */ + ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev); + if (ret) { + printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", + pci_name(pcidev), pcidev->irq); + iounmap(mmio_regs); + goto bail3; + } + + /* Set driver specific data */ + pci_set_drvdata(pcidev, c2dev); + + /* Initialize network device */ + if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) { + iounmap(mmio_regs); + goto bail4; + } + + /* Save off the actual size prior to unmapping mmio_regs */ + kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE)); + + /* Unmap the adapter PCI registers in BAR4 */ + iounmap(mmio_regs); + + /* Register network device */ + ret = register_netdev(netdev); + if (ret) { + printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", + ret); + goto bail5; + } + + /* Disable network packets */ + netif_stop_queue(netdev); + + /* Remap the adapter HRXDQ PA space to kernel VA space */ + c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET, + C2_RXP_HRXDQ_SIZE); + if (c2dev->mmio_rxp_ring == 0UL) { + printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n"); + ret = -EIO; + goto bail6; + } + + /* Remap the adapter HTXDQ PA space to kernel VA space */ + c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET, + C2_TXP_HTXDQ_SIZE); + if (c2dev->mmio_txp_ring == 0UL) { + printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n"); + ret = -EIO; + goto bail7; + } + + /* Save off the current RX index in the last 4 bytes of the TXP Ring */ + C2_SET_CUR_RX(c2dev, c2dev->cur_rx); + + /* Remap the PCI registers in adapter BAR0 to kernel VA space */ + c2dev->regs = ioremap_nocache(reg0_start, reg0_len); + if (c2dev->regs == 0UL) { + printk(KERN_ERR PFX "Unable to remap BAR0\n"); + ret = -EIO; + goto bail8; + } + + /* Remap the PCI registers in adapter BAR4 to kernel VA space */ + c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET; + c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, + kva_map_size); + if (c2dev->kva == 0UL) { + printk(KERN_ERR PFX "Unable to remap BAR4\n"); + ret = -EIO; + goto bail9; + } + + /* Print out the MAC address */ + c2_print_macaddr(netdev); + + ret = c2_rnic_init(c2dev); + if (ret) { + printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret); + goto bail10; + } + + c2_register_device(c2dev); + + return 0; + + bail10: + iounmap(c2dev->kva); + + bail9: + iounmap(c2dev->regs); + + bail8: + iounmap(c2dev->mmio_txp_ring); + + bail7: + iounmap(c2dev->mmio_rxp_ring); + + bail6: + unregister_netdev(netdev); + + bail5: + free_netdev(netdev); + + bail4: + free_irq(pcidev->irq, c2dev); + + bail3: + ib_dealloc_device(&c2dev->ibdev); + + bail2: + pci_release_regions(pcidev); + + bail1: + pci_disable_device(pcidev); + + bail0: + return ret; +} + +static void __devexit c2_remove(struct pci_dev *pcidev) +{ + struct c2_dev *c2dev = pci_get_drvdata(pcidev); + struct net_device *netdev = c2dev->netdev; + + /* Unregister with OpenIB */ + c2_unregister_device(c2dev); + + /* Clean up the RNIC resources */ + c2_rnic_term(c2dev); + + /* Remove network device from the kernel */ + unregister_netdev(netdev); + + /* Free network device */ + free_netdev(netdev); + + /* Free the interrupt line */ + free_irq(pcidev->irq, c2dev); + + /* missing: Turn LEDs off here */ + + /* Unmap adapter PA space */ + iounmap(c2dev->kva); + iounmap(c2dev->regs); + iounmap(c2dev->mmio_txp_ring); + iounmap(c2dev->mmio_rxp_ring); + + /* Free the hardware structure */ + ib_dealloc_device(&c2dev->ibdev); + + /* Release reserved PCI I/O and memory resources */ + pci_release_regions(pcidev); + + /* Disable PCI device */ + pci_disable_device(pcidev); + + /* Clear driver specific data */ + pci_set_drvdata(pcidev, NULL); +} + +static struct pci_driver c2_pci_driver = { + .name = DRV_NAME, + .id_table = c2_pci_table, + .probe = c2_probe, + .remove = __devexit_p(c2_remove), +}; + +static int __init c2_init_module(void) +{ + return pci_module_init(&c2_pci_driver); +} + +static void __exit c2_exit_module(void) +{ + pci_unregister_driver(&c2_pci_driver); +} + +module_init(c2_init_module); +module_exit(c2_exit_module); diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h new file mode 100644 index 00000000000..1b17dcdd050 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2.h @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __C2_H +#define __C2_H + +#include <linux/netdevice.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/idr.h> +#include <asm/semaphore.h> + +#include "c2_provider.h" +#include "c2_mq.h" +#include "c2_status.h" + +#define DRV_NAME "c2" +#define DRV_VERSION "1.1" +#define PFX DRV_NAME ": " + +#define BAR_0 0 +#define BAR_2 2 +#define BAR_4 4 + +#define RX_BUF_SIZE (1536 + 8) +#define ETH_JUMBO_MTU 9000 +#define C2_MAGIC "CEPHEUS" +#define C2_VERSION 4 +#define C2_IVN (18 & 0x7fffffff) + +#define C2_REG0_SIZE (16 * 1024) +#define C2_REG2_SIZE (2 * 1024 * 1024) +#define C2_REG4_SIZE (256 * 1024 * 1024) +#define C2_NUM_TX_DESC 341 +#define C2_NUM_RX_DESC 256 +#define C2_PCI_REGS_OFFSET (0x10000) +#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2)) +#define C2_RXP_HRXDQ_SIZE (4096) +#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE) +#define C2_TXP_HTXDQ_SIZE (4096) +#define C2_TX_TIMEOUT (6*HZ) + +/* CEPHEUS */ +static const u8 c2_magic[] = { + 0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53 +}; + +enum adapter_pci_regs { + C2_REGS_MAGIC = 0x0000, + C2_REGS_VERS = 0x0008, + C2_REGS_IVN = 0x000C, + C2_REGS_PCI_WINSIZE = 0x0010, + C2_REGS_Q0_QSIZE = 0x0014, + C2_REGS_Q0_MSGSIZE = 0x0018, + C2_REGS_Q0_POOLSTART = 0x001C, + C2_REGS_Q0_SHARED = 0x0020, + C2_REGS_Q1_QSIZE = 0x0024, + C2_REGS_Q1_MSGSIZE = 0x0028, + C2_REGS_Q1_SHARED = 0x0030, + C2_REGS_Q2_QSIZE = 0x0034, + C2_REGS_Q2_MSGSIZE = 0x0038, + C2_REGS_Q2_SHARED = 0x0040, + C2_REGS_ENADDR = 0x004C, + C2_REGS_RDMA_ENADDR = 0x0054, + C2_REGS_HRX_CUR = 0x006C, +}; + +struct c2_adapter_pci_regs { + char reg_magic[8]; + u32 version; + u32 ivn; + u32 pci_window_size; + u32 q0_q_size; + u32 q0_msg_size; + u32 q0_pool_start; + u32 q0_shared; + u32 q1_q_size; + u32 q1_msg_size; + u32 q1_pool_start; + u32 q1_shared; + u32 q2_q_size; + u32 q2_msg_size; + u32 q2_pool_start; + u32 q2_shared; + u32 log_start; + u32 log_size; + u8 host_enaddr[8]; + u8 rdma_enaddr[8]; + u32 crash_entry; + u32 crash_ready[2]; + u32 fw_txd_cur; + u32 fw_hrxd_cur; + u32 fw_rxd_cur; +}; + +enum pci_regs { + C2_HISR = 0x0000, + C2_DISR = 0x0004, + C2_HIMR = 0x0008, + C2_DIMR = 0x000C, + C2_NISR0 = 0x0010, + C2_NISR1 = 0x0014, + C2_NIMR0 = 0x0018, + C2_NIMR1 = 0x001C, + C2_IDIS = 0x0020, +}; + +enum { + C2_PCI_HRX_INT = 1 << 8, + C2_PCI_HTX_INT = 1 << 17, + C2_PCI_HRX_QUI = 1 << 31, +}; + +/* + * Cepheus registers in BAR0. + */ +struct c2_pci_regs { + u32 hostisr; + u32 dmaisr; + u32 hostimr; + u32 dmaimr; + u32 netisr0; + u32 netisr1; + u32 netimr0; + u32 netimr1; + u32 int_disable; +}; + +/* TXP flags */ +enum c2_txp_flags { + TXP_HTXD_DONE = 0, + TXP_HTXD_READY = 1 << 0, + TXP_HTXD_UNINIT = 1 << 1, +}; + +/* RXP flags */ +enum c2_rxp_flags { + RXP_HRXD_UNINIT = 0, + RXP_HRXD_READY = 1 << 0, + RXP_HRXD_DONE = 1 << 1, +}; + +/* RXP status */ +enum c2_rxp_status { + RXP_HRXD_ZERO = 0, + RXP_HRXD_OK = 1 << 0, + RXP_HRXD_BUF_OV = 1 << 1, +}; + +/* TXP descriptor fields */ +enum txp_desc { + C2_TXP_FLAGS = 0x0000, + C2_TXP_LEN = 0x0002, + C2_TXP_ADDR = 0x0004, +}; + +/* RXP descriptor fields */ +enum rxp_desc { + C2_RXP_FLAGS = 0x0000, + C2_RXP_STATUS = 0x0002, + C2_RXP_COUNT = 0x0004, + C2_RXP_LEN = 0x0006, + C2_RXP_ADDR = 0x0008, +}; + +struct c2_txp_desc { + u16 flags; + u16 len; + u64 addr; +} __attribute__ ((packed)); + +struct c2_rxp_desc { + u16 flags; + u16 status; + u16 count; + u16 len; + u64 addr; +} __attribute__ ((packed)); + +struct c2_rxp_hdr { + u16 flags; + u16 status; + u16 len; + u16 rsvd; +} __attribute__ ((packed)); + +struct c2_tx_desc { + u32 len; + u32 status; + dma_addr_t next_offset; +}; + +struct c2_rx_desc { + u32 len; + u32 status; + dma_addr_t next_offset; +}; + +struct c2_alloc { + u32 last; + u32 max; + spinlock_t lock; + unsigned long *table; +}; + +struct c2_array { + struct { + void **page; + int used; + } *page_list; +}; + +/* + * The MQ shared pointer pool is organized as a linked list of + * chunks. Each chunk contains a linked list of free shared pointers + * that can be allocated to a given user mode client. + * + */ +struct sp_chunk { + struct sp_chunk *next; + dma_addr_t dma_addr; + DECLARE_PCI_UNMAP_ADDR(mapping); + u16 head; + u16 shared_ptr[0]; +}; + +struct c2_pd_table { + u32 last; + u32 max; + spinlock_t lock; + unsigned long *table; +}; + +struct c2_qp_table { + struct idr idr; + spinlock_t lock; + int last; +}; + +struct c2_element { + struct c2_element *next; + void *ht_desc; /* host descriptor */ + void __iomem *hw_desc; /* hardware descriptor */ + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen; +}; + +struct c2_ring { + struct c2_element *to_clean; + struct c2_element *to_use; + struct c2_element *start; + unsigned long count; +}; + +struct c2_dev { + struct ib_device ibdev; + void __iomem *regs; + void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */ + void __iomem *mmio_rxp_ring; + spinlock_t lock; + struct pci_dev *pcidev; + struct net_device *netdev; + struct net_device *pseudo_netdev; + unsigned int cur_tx; + unsigned int cur_rx; + u32 adapter_handle; + int device_cap_flags; + void __iomem *kva; /* KVA device memory */ + unsigned long pa; /* PA device memory */ + void **qptr_array; + + kmem_cache_t *host_msg_cache; + + struct list_head cca_link; /* adapter list */ + struct list_head eh_wakeup_list; /* event wakeup list */ + wait_queue_head_t req_vq_wo; + + /* Cached RNIC properties */ + struct ib_device_attr props; + + struct c2_pd_table pd_table; + struct c2_qp_table qp_table; + int ports; /* num of GigE ports */ + int devnum; + spinlock_t vqlock; /* sync vbs req MQ */ + + /* Verbs Queues */ + struct c2_mq req_vq; /* Verbs Request MQ */ + struct c2_mq rep_vq; /* Verbs Reply MQ */ + struct c2_mq aeq; /* Async Events MQ */ + + /* Kernel client MQs */ + struct sp_chunk *kern_mqsp_pool; + + /* Device updates these values when posting messages to a host + * target queue */ + u16 req_vq_shared; + u16 rep_vq_shared; + u16 aeq_shared; + u16 irq_claimed; + + /* + * Shared host target pages for user-accessible MQs. + */ + int hthead; /* index of first free entry */ + void *htpages; /* kernel vaddr */ + int htlen; /* length of htpages memory */ + void *htuva; /* user mapped vaddr */ + spinlock_t htlock; /* serialize allocation */ + + u64 adapter_hint_uva; /* access to the activity FIFO */ + + // spinlock_t aeq_lock; + // spinlock_t rnic_lock; + + u16 *hint_count; + dma_addr_t hint_count_dma; + u16 hints_read; + + int init; /* TRUE if it's ready */ + char ae_cache_name[16]; + char vq_cache_name[16]; +}; + +struct c2_port { + u32 msg_enable; + struct c2_dev *c2dev; + struct net_device *netdev; + + spinlock_t tx_lock; + u32 tx_avail; + struct c2_ring tx_ring; + struct c2_ring rx_ring; + + void *mem; /* PCI memory for host rings */ + dma_addr_t dma; + unsigned long mem_size; + + u32 rx_buf_size; + + struct net_device_stats netstats; +}; + +/* + * Activity FIFO registers in BAR0. + */ +#define PCI_BAR0_HOST_HINT 0x100 +#define PCI_BAR0_ADAPTER_HINT 0x2000 + +/* + * Ammasso PCI vendor id and Cepheus PCI device id. + */ +#define CQ_ARMED 0x01 +#define CQ_WAIT_FOR_DMA 0x80 + +/* + * The format of a hint is as follows: + * Lower 16 bits are the count of hints for the queue. + * Next 15 bits are the qp_index + * Upper most bit depends on who reads it: + * If read by producer, then it means Full (1) or Not-Full (0) + * If read by consumer, then it means Empty (1) or Not-Empty (0) + */ +#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count) +#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16) +#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF) + + +/* + * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t + * struct. + */ +#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000 + +#ifndef readq +static inline u64 readq(const void __iomem * addr) +{ + u64 ret = readl(addr + 4); + ret <<= 32; + ret |= readl(addr); + + return ret; +} +#endif + +#ifndef writeq +static inline void __raw_writeq(u64 val, void __iomem * addr) +{ + __raw_writel((u32) (val), addr); + __raw_writel((u32) (val >> 32), (addr + 4)); +} +#endif + +#define C2_SET_CUR_RX(c2dev, cur_rx) \ + __raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092) + +#define C2_GET_CUR_RX(c2dev) \ + be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092)) + +static inline struct c2_dev *to_c2dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct c2_dev, ibdev); +} + +static inline int c2_errno(void *reply) +{ + switch (c2_wr_get_result(reply)) { + case C2_OK: + return 0; + case CCERR_NO_BUFS: + case CCERR_INSUFFICIENT_RESOURCES: + case CCERR_ZERO_RDMA_READ_RESOURCES: + return -ENOMEM; + case CCERR_MR_IN_USE: + case CCERR_QP_IN_USE: + return -EBUSY; + case CCERR_ADDR_IN_USE: + return -EADDRINUSE; + case CCERR_ADDR_NOT_AVAIL: + return -EADDRNOTAVAIL; + case CCERR_CONN_RESET: + return -ECONNRESET; + case CCERR_NOT_IMPLEMENTED: + case CCERR_INVALID_WQE: + return -ENOSYS; + case CCERR_QP_NOT_PRIVILEGED: + return -EPERM; + case CCERR_STACK_ERROR: + return -EPROTO; + case CCERR_ACCESS_VIOLATION: + case CCERR_BASE_AND_BOUNDS_VIOLATION: + return -EFAULT; + case CCERR_STAG_STATE_NOT_INVALID: + case CCERR_INVALID_ADDRESS: + case CCERR_INVALID_CQ: + case CCERR_INVALID_EP: + case CCERR_INVALID_MODIFIER: + case CCERR_INVALID_MTU: + case CCERR_INVALID_PD_ID: + case CCERR_INVALID_QP: + case CCERR_INVALID_RNIC: + case CCERR_INVALID_STAG: + return -EINVAL; + default: + return -EAGAIN; + } +} + +/* Device */ +extern int c2_register_device(struct c2_dev *c2dev); +extern void c2_unregister_device(struct c2_dev *c2dev); +extern int c2_rnic_init(struct c2_dev *c2dev); +extern void c2_rnic_term(struct c2_dev *c2dev); +extern void c2_rnic_interrupt(struct c2_dev *c2dev); +extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask); +extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask); + +/* QPs */ +extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd, + struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp); +extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp); +extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn); +extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, + struct ib_qp_attr *attr, int attr_mask); +extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp, + int ord, int ird); +extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, + struct ib_send_wr **bad_wr); +extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, + struct ib_recv_wr **bad_wr); +extern void __devinit c2_init_qp_table(struct c2_dev *c2dev); +extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev); +extern void c2_set_qp_state(struct c2_qp *, int); +extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn); + +/* PDs */ +extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd); +extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd); +extern int __devinit c2_init_pd_table(struct c2_dev *c2dev); +extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev); + +/* CQs */ +extern int c2_init_cq(struct c2_dev *c2dev, int entries, + struct c2_ucontext *ctx, struct c2_cq *cq); +extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq); +extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index); +extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index); +extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); +extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); + +/* CM */ +extern int c2_llp_connect(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param); +extern int c2_llp_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param); +extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, + u8 pdata_len); +extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog); +extern int c2_llp_service_destroy(struct iw_cm_id *cm_id); + +/* MM */ +extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list, + int page_size, int pbl_depth, u32 length, + u32 off, u64 *va, enum c2_acf acf, + struct c2_mr *mr); +extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index); + +/* AE */ +extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index); + +/* MQSP Allocator */ +extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **root); +extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root); +extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, + dma_addr_t *dma_addr, gfp_t gfp_mask); +extern void c2_free_mqsp(u16 * mqsp); +#endif diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c new file mode 100644 index 00000000000..08f46c83a3a --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include <rdma/iw_cm.h> +#include "c2_status.h" +#include "c2_ae.h" + +static int c2_convert_cm_status(u32 c2_status) +{ + switch (c2_status) { + case C2_CONN_STATUS_SUCCESS: + return 0; + case C2_CONN_STATUS_REJECTED: + return -ENETRESET; + case C2_CONN_STATUS_REFUSED: + return -ECONNREFUSED; + case C2_CONN_STATUS_TIMEDOUT: + return -ETIMEDOUT; + case C2_CONN_STATUS_NETUNREACH: + return -ENETUNREACH; + case C2_CONN_STATUS_HOSTUNREACH: + return -EHOSTUNREACH; + case C2_CONN_STATUS_INVALID_RNIC: + return -EINVAL; + case C2_CONN_STATUS_INVALID_QP: + return -EINVAL; + case C2_CONN_STATUS_INVALID_QP_STATE: + return -EINVAL; + case C2_CONN_STATUS_ADDR_NOT_AVAIL: + return -EADDRNOTAVAIL; + default: + printk(KERN_ERR PFX + "%s - Unable to convert CM status: %d\n", + __FUNCTION__, c2_status); + return -EIO; + } +} + +#ifdef DEBUG +static const char* to_event_str(int event) +{ + static const char* event_str[] = { + "CCAE_REMOTE_SHUTDOWN", + "CCAE_ACTIVE_CONNECT_RESULTS", + "CCAE_CONNECTION_REQUEST", + "CCAE_LLP_CLOSE_COMPLETE", + "CCAE_TERMINATE_MESSAGE_RECEIVED", + "CCAE_LLP_CONNECTION_RESET", + "CCAE_LLP_CONNECTION_LOST", + "CCAE_LLP_SEGMENT_SIZE_INVALID", + "CCAE_LLP_INVALID_CRC", + "CCAE_LLP_BAD_FPDU", + "CCAE_INVALID_DDP_VERSION", + "CCAE_INVALID_RDMA_VERSION", + "CCAE_UNEXPECTED_OPCODE", + "CCAE_INVALID_DDP_QUEUE_NUMBER", + "CCAE_RDMA_READ_NOT_ENABLED", + "CCAE_RDMA_WRITE_NOT_ENABLED", + "CCAE_RDMA_READ_TOO_SMALL", + "CCAE_NO_L_BIT", + "CCAE_TAGGED_INVALID_STAG", + "CCAE_TAGGED_BASE_BOUNDS_VIOLATION", + "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION", + "CCAE_TAGGED_INVALID_PD", + "CCAE_WRAP_ERROR", + "CCAE_BAD_CLOSE", + "CCAE_BAD_LLP_CLOSE", + "CCAE_INVALID_MSN_RANGE", + "CCAE_INVALID_MSN_GAP", + "CCAE_IRRQ_OVERFLOW", + "CCAE_IRRQ_MSN_GAP", + "CCAE_IRRQ_MSN_RANGE", + "CCAE_IRRQ_INVALID_STAG", + "CCAE_IRRQ_BASE_BOUNDS_VIOLATION", + "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION", + "CCAE_IRRQ_INVALID_PD", + "CCAE_IRRQ_WRAP_ERROR", + "CCAE_CQ_SQ_COMPLETION_OVERFLOW", + "CCAE_CQ_RQ_COMPLETION_ERROR", + "CCAE_QP_SRQ_WQE_ERROR", + "CCAE_QP_LOCAL_CATASTROPHIC_ERROR", + "CCAE_CQ_OVERFLOW", + "CCAE_CQ_OPERATION_ERROR", + "CCAE_SRQ_LIMIT_REACHED", + "CCAE_QP_RQ_LIMIT_REACHED", + "CCAE_SRQ_CATASTROPHIC_ERROR", + "CCAE_RNIC_CATASTROPHIC_ERROR" + }; + + if (event < CCAE_REMOTE_SHUTDOWN || + event > CCAE_RNIC_CATASTROPHIC_ERROR) + return "<invalid event>"; + + event -= CCAE_REMOTE_SHUTDOWN; + return event_str[event]; +} + +static const char *to_qp_state_str(int state) +{ + switch (state) { + case C2_QP_STATE_IDLE: + return "C2_QP_STATE_IDLE"; + case C2_QP_STATE_CONNECTING: + return "C2_QP_STATE_CONNECTING"; + case C2_QP_STATE_RTS: + return "C2_QP_STATE_RTS"; + case C2_QP_STATE_CLOSING: + return "C2_QP_STATE_CLOSING"; + case C2_QP_STATE_TERMINATE: + return "C2_QP_STATE_TERMINATE"; + case C2_QP_STATE_ERROR: + return "C2_QP_STATE_ERROR"; + default: + return "<invalid QP state>"; + }; +} +#endif + +void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) +{ + struct c2_mq *mq = c2dev->qptr_array[mq_index]; + union c2wr *wr; + void *resource_user_context; + struct iw_cm_event cm_event; + struct ib_event ib_event; + enum c2_resource_indicator resource_indicator; + enum c2_event_id event_id; + unsigned long flags; + int status; + + /* + * retreive the message + */ + wr = c2_mq_consume(mq); + if (!wr) + return; + + memset(&ib_event, 0, sizeof(ib_event)); + memset(&cm_event, 0, sizeof(cm_event)); + + event_id = c2_wr_get_id(wr); + resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type); + resource_user_context = + (void *) (unsigned long) wr->ae.ae_generic.user_context; + + status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr)); + + pr_debug("event received c2_dev=%p, event_id=%d, " + "resource_indicator=%d, user_context=%p, status = %d\n", + c2dev, event_id, resource_indicator, resource_user_context, + status); + + switch (resource_indicator) { + case C2_RES_IND_QP:{ + + struct c2_qp *qp = (struct c2_qp *)resource_user_context; + struct iw_cm_id *cm_id = qp->cm_id; + struct c2wr_ae_active_connect_results *res; + + if (!cm_id) { + pr_debug("event received, but cm_id is <nul>, qp=%p!\n", + qp); + goto ignore_it; + } + pr_debug("%s: event = %s, user_context=%llx, " + "resource_type=%x, " + "resource=%x, qp_state=%s\n", + __FUNCTION__, + to_event_str(event_id), + be64_to_cpu(wr->ae.ae_generic.user_context), + be32_to_cpu(wr->ae.ae_generic.resource_type), + be32_to_cpu(wr->ae.ae_generic.resource), + to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state))); + + c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state)); + + switch (event_id) { + case CCAE_ACTIVE_CONNECT_RESULTS: + res = &wr->ae.ae_active_connect_results; + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.local_addr.sin_addr.s_addr = res->laddr; + cm_event.remote_addr.sin_addr.s_addr = res->raddr; + cm_event.local_addr.sin_port = res->lport; + cm_event.remote_addr.sin_port = res->rport; + if (status == 0) { + cm_event.private_data_len = + be32_to_cpu(res->private_data_length); + cm_event.private_data = res->private_data; + } else { + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + cm_event.private_data_len = 0; + cm_event.private_data = NULL; + } + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + case CCAE_TERMINATE_MESSAGE_RECEIVED: + case CCAE_CQ_SQ_COMPLETION_OVERFLOW: + ib_event.device = &c2dev->ibdev; + ib_event.element.qp = &qp->ibqp; + ib_event.event = IB_EVENT_QP_REQ_ERR; + + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&ib_event, + qp->ibqp. + qp_context); + break; + case CCAE_BAD_CLOSE: + case CCAE_LLP_CLOSE_COMPLETE: + case CCAE_LLP_CONNECTION_RESET: + case CCAE_LLP_CONNECTION_LOST: + BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b); + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + cm_event.event = IW_CM_EVENT_CLOSE; + cm_event.status = 0; + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + default: + BUG_ON(1); + pr_debug("%s:%d Unexpected event_id=%d on QP=%p, " + "CM_ID=%p\n", + __FUNCTION__, __LINE__, + event_id, qp, cm_id); + break; + } + break; + } + + case C2_RES_IND_EP:{ + + struct c2wr_ae_connection_request *req = + &wr->ae.ae_connection_request; + struct iw_cm_id *cm_id = + (struct iw_cm_id *)resource_user_context; + + pr_debug("C2_RES_IND_EP event_id=%d\n", event_id); + if (event_id != CCAE_CONNECTION_REQUEST) { + pr_debug("%s: Invalid event_id: %d\n", + __FUNCTION__, event_id); + break; + } + cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; + cm_event.provider_data = (void*)(unsigned long)req->cr_handle; + cm_event.local_addr.sin_addr.s_addr = req->laddr; + cm_event.remote_addr.sin_addr.s_addr = req->raddr; + cm_event.local_addr.sin_port = req->lport; + cm_event.remote_addr.sin_port = req->rport; + cm_event.private_data_len = + be32_to_cpu(req->private_data_length); + cm_event.private_data = req->private_data; + + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + } + + case C2_RES_IND_CQ:{ + struct c2_cq *cq = + (struct c2_cq *) resource_user_context; + + pr_debug("IB_EVENT_CQ_ERR\n"); + ib_event.device = &c2dev->ibdev; + ib_event.element.cq = &cq->ibcq; + ib_event.event = IB_EVENT_CQ_ERR; + + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&ib_event, + cq->ibcq.cq_context); + } + + default: + printk("Bad resource indicator = %d\n", + resource_indicator); + break; + } + + ignore_it: + c2_mq_free(mq); +} diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h new file mode 100644 index 00000000000..3a065c33b83 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_ae.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_AE_H_ +#define _C2_AE_H_ + +/* + * WARNING: If you change this file, also bump C2_IVN_BASE + * in common/include/clustercore/c2_ivn.h. + */ + +/* + * Asynchronous Event Identifiers + * + * These start at 0x80 only so it's obvious from inspection that + * they are not work-request statuses. This isn't critical. + * + * NOTE: these event id's must fit in eight bits. + */ +enum c2_event_id { + CCAE_REMOTE_SHUTDOWN = 0x80, + CCAE_ACTIVE_CONNECT_RESULTS, + CCAE_CONNECTION_REQUEST, + CCAE_LLP_CLOSE_COMPLETE, + CCAE_TERMINATE_MESSAGE_RECEIVED, + CCAE_LLP_CONNECTION_RESET, + CCAE_LLP_CONNECTION_LOST, + CCAE_LLP_SEGMENT_SIZE_INVALID, + CCAE_LLP_INVALID_CRC, + CCAE_LLP_BAD_FPDU, + CCAE_INVALID_DDP_VERSION, + CCAE_INVALID_RDMA_VERSION, + CCAE_UNEXPECTED_OPCODE, + CCAE_INVALID_DDP_QUEUE_NUMBER, + CCAE_RDMA_READ_NOT_ENABLED, + CCAE_RDMA_WRITE_NOT_ENABLED, + CCAE_RDMA_READ_TOO_SMALL, + CCAE_NO_L_BIT, + CCAE_TAGGED_INVALID_STAG, + CCAE_TAGGED_BASE_BOUNDS_VIOLATION, + CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION, + CCAE_TAGGED_INVALID_PD, + CCAE_WRAP_ERROR, + CCAE_BAD_CLOSE, + CCAE_BAD_LLP_CLOSE, + CCAE_INVALID_MSN_RANGE, + CCAE_INVALID_MSN_GAP, + CCAE_IRRQ_OVERFLOW, + CCAE_IRRQ_MSN_GAP, + CCAE_IRRQ_MSN_RANGE, + CCAE_IRRQ_INVALID_STAG, + CCAE_IRRQ_BASE_BOUNDS_VIOLATION, + CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION, + CCAE_IRRQ_INVALID_PD, + CCAE_IRRQ_WRAP_ERROR, + CCAE_CQ_SQ_COMPLETION_OVERFLOW, + CCAE_CQ_RQ_COMPLETION_ERROR, + CCAE_QP_SRQ_WQE_ERROR, + CCAE_QP_LOCAL_CATASTROPHIC_ERROR, + CCAE_CQ_OVERFLOW, + CCAE_CQ_OPERATION_ERROR, + CCAE_SRQ_LIMIT_REACHED, + CCAE_QP_RQ_LIMIT_REACHED, + CCAE_SRQ_CATASTROPHIC_ERROR, + CCAE_RNIC_CATASTROPHIC_ERROR +/* WARNING If you add more id's, make sure their values fit in eight bits. */ +}; + +/* + * Resource Indicators and Identifiers + */ +enum c2_resource_indicator { + C2_RES_IND_QP = 1, + C2_RES_IND_EP, + C2_RES_IND_CQ, + C2_RES_IND_SRQ, +}; + +#endif /* _C2_AE_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c new file mode 100644 index 00000000000..1d2529992c0 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_alloc.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/bitmap.h> + +#include "c2.h" + +static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **head) +{ + int i; + struct sp_chunk *new_head; + + new_head = (struct sp_chunk *) __get_free_page(gfp_mask); + if (new_head == NULL) + return -ENOMEM; + + new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head, + PAGE_SIZE, DMA_FROM_DEVICE); + pci_unmap_addr_set(new_head, mapping, new_head->dma_addr); + + new_head->next = NULL; + new_head->head = 0; + + /* build list where each index is the next free slot */ + for (i = 0; + i < (PAGE_SIZE - sizeof(struct sp_chunk) - + sizeof(u16)) / sizeof(u16) - 1; + i++) { + new_head->shared_ptr[i] = i + 1; + } + /* terminate list */ + new_head->shared_ptr[i] = 0xFFFF; + + *head = new_head; + return 0; +} + +int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **root) +{ + return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root); +} + +void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root) +{ + struct sp_chunk *next; + + while (root) { + next = root->next; + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(root, mapping), PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page((struct page *) root); + root = next; + } +} + +u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, + dma_addr_t *dma_addr, gfp_t gfp_mask) +{ + u16 mqsp; + + while (head) { + mqsp = head->head; + if (mqsp != 0xFFFF) { + head->head = head->shared_ptr[mqsp]; + break; + } else if (head->next == NULL) { + if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) == + 0) { + head = head->next; + mqsp = head->head; + head->head = head->shared_ptr[mqsp]; + break; + } else + return NULL; + } else + head = head->next; + } + if (head) { + *dma_addr = head->dma_addr + + ((unsigned long) &(head->shared_ptr[mqsp]) - + (unsigned long) head); + pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__, + &(head->shared_ptr[mqsp]), (u64)*dma_addr); + return &(head->shared_ptr[mqsp]); + } + return NULL; +} + +void c2_free_mqsp(u16 * mqsp) +{ + struct sp_chunk *head; + u16 idx; + + /* The chunk containing this ptr begins at the page boundary */ + head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK); + + /* Link head to new mqsp */ + *mqsp = head->head; + + /* Compute the shared_ptr index */ + idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1; + idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1; + + /* Point this index at the head */ + head->shared_ptr[idx] = head->head; + + /* Point head at this index */ + head->head = idx; +} diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c new file mode 100644 index 00000000000..485254efdd1 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_cm.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include "c2.h" +#include "c2_wr.h" +#include "c2_vq.h" +#include <rdma/iw_cm.h> + +int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct c2_dev *c2dev = to_c2dev(cm_id->device); + struct ib_qp *ibqp; + struct c2_qp *qp; + struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */ + struct c2_vq_req *vq_req; + int err; + + ibqp = c2_get_qp(cm_id->device, iw_param->qpn); + if (!ibqp) + return -EINVAL; + qp = to_c2qp(ibqp); + + /* Associate QP <--> CM_ID */ + cm_id->provider_data = qp; + cm_id->add_ref(cm_id); + qp->cm_id = cm_id; + + /* + * only support the max private_data length + */ + if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { + err = -EINVAL; + goto bail0; + } + /* + * Set the rdma read limits + */ + err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird); + if (err) + goto bail0; + + /* + * Create and send a WR_QP_CONNECT... + */ + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail1; + } + + c2_wr_set_id(wr, CCWR_QP_CONNECT); + wr->hdr.context = 0; + wr->rnic_handle = c2dev->adapter_handle; + wr->qp_handle = qp->adapter_handle; + + wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr; + wr->remote_port = cm_id->remote_addr.sin_port; + + /* + * Move any private data from the callers's buf into + * the WR. + */ + if (iw_param->private_data) { + wr->private_data_length = + cpu_to_be32(iw_param->private_data_len); + memcpy(&wr->private_data[0], iw_param->private_data, + iw_param->private_data_len); + } else + wr->private_data_length = 0; + + /* + * Send WR to adapter. NOTE: There is no synch reply from + * the adapter. + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + vq_req_free(c2dev, vq_req); + + bail1: + kfree(wr); + bail0: + if (err) { + /* + * If we fail, release reference on QP and + * disassociate QP from CM_ID + */ + cm_id->provider_data = NULL; + qp->cm_id = NULL; + cm_id->rem_ref(cm_id); + } + return err; +} + +int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog) +{ + struct c2_dev *c2dev; + struct c2wr_ep_listen_create_req wr; + struct c2wr_ep_listen_create_rep *reply; + struct c2_vq_req *vq_req; + int err; + + c2dev = to_c2dev(cm_id->device); + if (c2dev == NULL) + return -EINVAL; + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE); + wr.hdr.context = (u64) (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.local_addr = cm_id->local_addr.sin_addr.s_addr; + wr.local_port = cm_id->local_addr.sin_port; + wr.backlog = cpu_to_be32(backlog); + wr.user_context = (u64) (unsigned long) cm_id; + + /* + * Reference the request struct. Dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply = + (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + if ((err = c2_errno(reply)) != 0) + goto bail1; + + /* + * Keep the adapter handle. Used in subsequent destroy + */ + cm_id->provider_data = (void*)(unsigned long) reply->ep_handle; + + /* + * free vq stuff + */ + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + return 0; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + + +int c2_llp_service_destroy(struct iw_cm_id *cm_id) +{ + + struct c2_dev *c2dev; + struct c2wr_ep_listen_destroy_req wr; + struct c2wr_ep_listen_destroy_rep *reply; + struct c2_vq_req *vq_req; + int err; + + c2dev = to_c2dev(cm_id->device); + if (c2dev == NULL) + return -EINVAL; + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.ep_handle = (u32)(unsigned long)cm_id->provider_data; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + if ((err = c2_errno(reply)) != 0) + goto bail1; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct c2_dev *c2dev = to_c2dev(cm_id->device); + struct c2_qp *qp; + struct ib_qp *ibqp; + struct c2wr_cr_accept_req *wr; /* variable length WR */ + struct c2_vq_req *vq_req; + struct c2wr_cr_accept_rep *reply; /* VQ Reply msg ptr. */ + int err; + + ibqp = c2_get_qp(cm_id->device, iw_param->qpn); + if (!ibqp) + return -EINVAL; + qp = to_c2qp(ibqp); + + /* Set the RDMA read limits */ + err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird); + if (err) + goto bail0; + + /* Allocate verbs request. */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail1; + } + vq_req->qp = qp; + vq_req->cm_id = cm_id; + vq_req->event = IW_CM_EVENT_ESTABLISHED; + + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail2; + } + + /* Build the WR */ + c2_wr_set_id(wr, CCWR_CR_ACCEPT); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->ep_handle = (u32) (unsigned long) cm_id->provider_data; + wr->qp_handle = qp->adapter_handle; + + /* Replace the cr_handle with the QP after accept */ + cm_id->provider_data = qp; + cm_id->add_ref(cm_id); + qp->cm_id = cm_id; + + cm_id->provider_data = qp; + + /* Validate private_data length */ + if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { + err = -EINVAL; + goto bail2; + } + + if (iw_param->private_data) { + wr->private_data_length = cpu_to_be32(iw_param->private_data_len); + memcpy(&wr->private_data[0], + iw_param->private_data, iw_param->private_data_len); + } else + wr->private_data_length = 0; + + /* Reference the request struct. Dereferenced in the int handler. */ + vq_req_get(c2dev, vq_req); + + /* Send WR to adapter */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail2; + } + + /* Wait for reply from adapter */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail2; + + /* Check that reply is present */ + reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail2; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + if (!err) + c2_set_qp_state(qp, C2_QP_STATE_RTS); + bail2: + kfree(wr); + bail1: + vq_req_free(c2dev, vq_req); + bail0: + if (err) { + /* + * If we fail, release reference on QP and + * disassociate QP from CM_ID + */ + cm_id->provider_data = NULL; + qp->cm_id = NULL; + cm_id->rem_ref(cm_id); + } + return err; +} + +int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + struct c2_dev *c2dev; + struct c2wr_cr_reject_req wr; + struct c2_vq_req *vq_req; + struct c2wr_cr_reject_rep *reply; + int err; + + c2dev = to_c2dev(cm_id->device); + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_CR_REJECT); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.ep_handle = (u32) (unsigned long) cm_id->provider_data; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply = (struct c2wr_cr_reject_rep *) (unsigned long) + vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + err = c2_errno(reply); + /* + * free vq stuff + */ + vq_repbuf_free(c2dev, reply); + + bail0: + vq_req_free(c2dev, vq_req); + return err; +} diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c new file mode 100644 index 00000000000..9d7bcc5ade9 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include "c2.h" +#include "c2_vq.h" +#include "c2_status.h" + +#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1)) + +static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn) +{ + struct c2_cq *cq; + unsigned long flags; + + spin_lock_irqsave(&c2dev->lock, flags); + cq = c2dev->qptr_array[cqn]; + if (!cq) { + spin_unlock_irqrestore(&c2dev->lock, flags); + return NULL; + } + atomic_inc(&cq->refcount); + spin_unlock_irqrestore(&c2dev->lock, flags); + return cq; +} + +static void c2_cq_put(struct c2_cq *cq) +{ + if (atomic_dec_and_test(&cq->refcount)) + wake_up(&cq->wait); +} + +void c2_cq_event(struct c2_dev *c2dev, u32 mq_index) +{ + struct c2_cq *cq; + + cq = c2_cq_get(c2dev, mq_index); + if (!cq) { + printk("discarding events on destroyed CQN=%d\n", mq_index); + return; + } + + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + c2_cq_put(cq); +} + +void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index) +{ + struct c2_cq *cq; + struct c2_mq *q; + + cq = c2_cq_get(c2dev, mq_index); + if (!cq) + return; + + spin_lock_irq(&cq->lock); + q = &cq->mq; + if (q && !c2_mq_empty(q)) { + u16 priv = q->priv; + struct c2wr_ce *msg; + + while (priv != be16_to_cpu(*q->shared)) { + msg = (struct c2wr_ce *) + (q->msg_pool.host + priv * q->msg_size); + if (msg->qp_user_context == (u64) (unsigned long) qp) { + msg->qp_user_context = (u64) 0; + } + priv = (priv + 1) % q->q_size; + } + } + spin_unlock_irq(&cq->lock); + c2_cq_put(cq); +} + +static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status) +{ + switch (status) { + case C2_OK: + return IB_WC_SUCCESS; + case CCERR_FLUSHED: + return IB_WC_WR_FLUSH_ERR; + case CCERR_BASE_AND_BOUNDS_VIOLATION: + return IB_WC_LOC_PROT_ERR; + case CCERR_ACCESS_VIOLATION: + return IB_WC_LOC_ACCESS_ERR; + case CCERR_TOTAL_LENGTH_TOO_BIG: + return IB_WC_LOC_LEN_ERR; + case CCERR_INVALID_WINDOW: + return IB_WC_MW_BIND_ERR; + default: + return IB_WC_GENERAL_ERR; + } +} + + +static inline int c2_poll_one(struct c2_dev *c2dev, + struct c2_cq *cq, struct ib_wc *entry) +{ + struct c2wr_ce *ce; + struct c2_qp *qp; + int is_recv = 0; + + ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + if (!ce) { + return -EAGAIN; + } + + /* + * if the qp returned is null then this qp has already + * been freed and we are unable process the completion. + * try pulling the next message + */ + while ((qp = + (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) { + c2_mq_free(&cq->mq); + ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + if (!ce) + return -EAGAIN; + } + + entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce)); + entry->wr_id = ce->hdr.context; + entry->qp_num = ce->handle; + entry->wc_flags = 0; + entry->slid = 0; + entry->sl = 0; + entry->src_qp = 0; + entry->dlid_path_bits = 0; + entry->pkey_index = 0; + + switch (c2_wr_get_id(ce)) { + case C2_WR_TYPE_SEND: + entry->opcode = IB_WC_SEND; + break; + case C2_WR_TYPE_RDMA_WRITE: + entry->opcode = IB_WC_RDMA_WRITE; + break; + case C2_WR_TYPE_RDMA_READ: + entry->opcode = IB_WC_RDMA_READ; + break; + case C2_WR_TYPE_BIND_MW: + entry->opcode = IB_WC_BIND_MW; + break; + case C2_WR_TYPE_RECV: + entry->byte_len = be32_to_cpu(ce->bytes_rcvd); + entry->opcode = IB_WC_RECV; + is_recv = 1; + break; + default: + break; + } + + /* consume the WQEs */ + if (is_recv) + c2_mq_lconsume(&qp->rq_mq, 1); + else + c2_mq_lconsume(&qp->sq_mq, + be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1); + + /* free the message */ + c2_mq_free(&cq->mq); + + return 0; +} + +int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + struct c2_dev *c2dev = to_c2dev(ibcq->device); + struct c2_cq *cq = to_c2cq(ibcq); + unsigned long flags; + int npolled, err; + + spin_lock_irqsave(&cq->lock, flags); + + for (npolled = 0; npolled < num_entries; ++npolled) { + + err = c2_poll_one(c2dev, cq, entry + npolled); + if (err) + break; + } + + spin_unlock_irqrestore(&cq->lock, flags); + + return npolled; +} + +int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +{ + struct c2_mq_shared __iomem *shared; + struct c2_cq *cq; + + cq = to_c2cq(ibcq); + shared = cq->mq.peer; + + if (notify == IB_CQ_NEXT_COMP) + writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type); + else if (notify == IB_CQ_SOLICITED) + writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type); + else + return -EINVAL; + + writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed); + + /* + * Now read back shared->armed to make the PCI + * write synchronous. This is necessary for + * correct cq notification semantics. + */ + readb(&shared->armed); + + return 0; +} + +static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) +{ + + dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping), + mq->q_size * mq->msg_size, DMA_FROM_DEVICE); + free_pages((unsigned long) mq->msg_pool.host, + get_order(mq->q_size * mq->msg_size)); +} + +static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, + int msg_size) +{ + unsigned long pool_start; + + pool_start = __get_free_pages(GFP_KERNEL, + get_order(q_size * msg_size)); + if (!pool_start) + return -ENOMEM; + + c2_mq_rep_init(mq, + 0, /* index (currently unknown) */ + q_size, + msg_size, + (u8 *) pool_start, + NULL, /* peer (currently unknown) */ + C2_MQ_HOST_TARGET); + + mq->host_dma = dma_map_single(c2dev->ibdev.dma_device, + (void *)pool_start, + q_size * msg_size, DMA_FROM_DEVICE); + pci_unmap_addr_set(mq, mapping, mq->host_dma); + + return 0; +} + +int c2_init_cq(struct c2_dev *c2dev, int entries, + struct c2_ucontext *ctx, struct c2_cq *cq) +{ + struct c2wr_cq_create_req wr; + struct c2wr_cq_create_rep *reply; + unsigned long peer_pa; + struct c2_vq_req *vq_req; + int err; + + might_sleep(); + + cq->ibcq.cqe = entries - 1; + cq->is_kernel = !ctx; + + /* Allocate a shared pointer */ + cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &cq->mq.shared_dma, GFP_KERNEL); + if (!cq->mq.shared) + return -ENOMEM; + + /* Allocate pages for the message pool */ + err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE); + if (err) + goto bail0; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail1; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_CQ_CREATE); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.msg_size = cpu_to_be32(cq->mq.msg_size); + wr.depth = cpu_to_be32(cq->mq.q_size); + wr.shared_ht = cpu_to_be64(cq->mq.shared_dma); + wr.msg_pool = cpu_to_be64(cq->mq.host_dma); + wr.user_context = (u64) (unsigned long) (cq); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail2; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail2; + + reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail2; + } + + if ((err = c2_errno(reply)) != 0) + goto bail3; + + cq->adapter_handle = reply->cq_handle; + cq->mq.index = be32_to_cpu(reply->mq_index); + + peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared); + cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE); + if (!cq->mq.peer) { + err = -ENOMEM; + goto bail3; + } + + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + spin_lock_init(&cq->lock); + atomic_set(&cq->refcount, 1); + init_waitqueue_head(&cq->wait); + + /* + * Use the MQ index allocated by the adapter to + * store the CQ in the qptr_array + */ + cq->cqn = cq->mq.index; + c2dev->qptr_array[cq->cqn] = cq; + + return 0; + + bail3: + vq_repbuf_free(c2dev, reply); + bail2: + vq_req_free(c2dev, vq_req); + bail1: + c2_free_cq_buf(c2dev, &cq->mq); + bail0: + c2_free_mqsp(cq->mq.shared); + + return err; +} + +void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq) +{ + int err; + struct c2_vq_req *vq_req; + struct c2wr_cq_destroy_req wr; + struct c2wr_cq_destroy_rep *reply; + + might_sleep(); + + /* Clear CQ from the qptr array */ + spin_lock_irq(&c2dev->lock); + c2dev->qptr_array[cq->mq.index] = NULL; + atomic_dec(&cq->refcount); + spin_unlock_irq(&c2dev->lock); + + wait_event(cq->wait, !atomic_read(&cq->refcount)); + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + goto bail0; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_CQ_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.cq_handle = cq->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg); + + vq_repbuf_free(c2dev, reply); + bail1: + vq_req_free(c2dev, vq_req); + bail0: + if (cq->is_kernel) { + c2_free_cq_buf(c2dev, &cq->mq); + } + + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c new file mode 100644 index 00000000000..0d0bc33ca30 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_intr.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include <rdma/iw_cm.h> +#include "c2_vq.h" + +static void handle_mq(struct c2_dev *c2dev, u32 index); +static void handle_vq(struct c2_dev *c2dev, u32 mq_index); + +/* + * Handle RNIC interrupts + */ +void c2_rnic_interrupt(struct c2_dev *c2dev) +{ + unsigned int mq_index; + + while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) { + mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT); + if (mq_index & 0x80000000) { + break; + } + + c2dev->hints_read++; + handle_mq(c2dev, mq_index); + } + +} + +/* + * Top level MQ handler + */ +static void handle_mq(struct c2_dev *c2dev, u32 mq_index) +{ + if (c2dev->qptr_array[mq_index] == NULL) { + pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n", + mq_index); + return; + } + + switch (mq_index) { + case (0): + /* + * An index of 0 in the activity queue + * indicates the req vq now has messages + * available... + * + * Wake up any waiters waiting on req VQ + * message availability. + */ + wake_up(&c2dev->req_vq_wo); + break; + case (1): + handle_vq(c2dev, mq_index); + break; + case (2): + /* We have to purge the VQ in case there are pending + * accept reply requests that would result in the + * generation of an ESTABLISHED event. If we don't + * generate these first, a CLOSE event could end up + * being delivered before the ESTABLISHED event. + */ + handle_vq(c2dev, 1); + + c2_ae_event(c2dev, mq_index); + break; + default: + /* There is no event synchronization between CQ events + * and AE or CM events. In fact, CQE could be + * delivered for all of the I/O up to and including the + * FLUSH for a peer disconenct prior to the ESTABLISHED + * event being delivered to the app. The reason for this + * is that CM events are delivered on a thread, while AE + * and CM events are delivered on interrupt context. + */ + c2_cq_event(c2dev, mq_index); + break; + } + + return; +} + +/* + * Handles verbs WR replies. + */ +static void handle_vq(struct c2_dev *c2dev, u32 mq_index) +{ + void *adapter_msg, *reply_msg; + struct c2wr_hdr *host_msg; + struct c2wr_hdr tmp; + struct c2_mq *reply_vq; + struct c2_vq_req *req; + struct iw_cm_event cm_event; + int err; + + reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index]; + + /* + * get next msg from mq_index into adapter_msg. + * don't free it yet. + */ + adapter_msg = c2_mq_consume(reply_vq); + if (adapter_msg == NULL) { + return; + } + + host_msg = vq_repbuf_alloc(c2dev); + + /* + * If we can't get a host buffer, then we'll still + * wakeup the waiter, we just won't give him the msg. + * It is assumed the waiter will deal with this... + */ + if (!host_msg) { + pr_debug("handle_vq: no repbufs!\n"); + + /* + * just copy the WR header into a local variable. + * this allows us to still demux on the context + */ + host_msg = &tmp; + memcpy(host_msg, adapter_msg, sizeof(tmp)); + reply_msg = NULL; + } else { + memcpy(host_msg, adapter_msg, reply_vq->msg_size); + reply_msg = host_msg; + } + + /* + * consume the msg from the MQ + */ + c2_mq_free(reply_vq); + + /* + * wakeup the waiter. + */ + req = (struct c2_vq_req *) (unsigned long) host_msg->context; + if (req == NULL) { + /* + * We should never get here, as the adapter should + * never send us a reply that we're not expecting. + */ + vq_repbuf_free(c2dev, host_msg); + pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n"); + return; + } + + err = c2_errno(reply_msg); + if (!err) switch (req->event) { + case IW_CM_EVENT_ESTABLISHED: + c2_set_qp_state(req->qp, + C2_QP_STATE_RTS); + case IW_CM_EVENT_CLOSE: + + /* + * Move the QP to RTS if this is + * the established event + */ + cm_event.event = req->event; + cm_event.status = 0; + cm_event.local_addr = req->cm_id->local_addr; + cm_event.remote_addr = req->cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + req->cm_id->event_handler(req->cm_id, &cm_event); + break; + default: + break; + } + + req->reply_msg = (u64) (unsigned long) (reply_msg); + atomic_set(&req->reply_ready, 1); + wake_up(&req->wait_object); + + /* + * If the request was cancelled, then this put will + * free the vq_req memory...and reply_msg!!! + */ + vq_req_put(c2dev, req); +} diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c new file mode 100644 index 00000000000..1e4f46493fc --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mm.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include "c2_vq.h" + +#define PBL_VIRT 1 +#define PBL_PHYS 2 + +/* + * Send all the PBL messages to convey the remainder of the PBL + * Wait for the adapter's reply on the last one. + * This is indicated by setting the MEM_PBL_COMPLETE in the flags. + * + * NOTE: vq_req is _not_ freed by this function. The VQ Host + * Reply buffer _is_ freed by this function. + */ +static int +send_pbl_messages(struct c2_dev *c2dev, u32 stag_index, + unsigned long va, u32 pbl_depth, + struct c2_vq_req *vq_req, int pbl_type) +{ + u32 pbe_count; /* amt that fits in a PBL msg */ + u32 count; /* amt in this PBL MSG. */ + struct c2wr_nsmr_pbl_req *wr; /* PBL WR ptr */ + struct c2wr_nsmr_pbl_rep *reply; /* reply ptr */ + int err, pbl_virt, pbl_index, i; + + switch (pbl_type) { + case PBL_VIRT: + pbl_virt = 1; + break; + case PBL_PHYS: + pbl_virt = 0; + break; + default: + return -EINVAL; + break; + } + + pbe_count = (c2dev->req_vq.msg_size - + sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + return -ENOMEM; + } + c2_wr_set_id(wr, CCWR_NSMR_PBL); + + /* + * Only the last PBL message will generate a reply from the verbs, + * so we set the context to 0 indicating there is no kernel verbs + * handler blocked awaiting this reply. + */ + wr->hdr.context = 0; + wr->rnic_handle = c2dev->adapter_handle; + wr->stag_index = stag_index; /* already swapped */ + wr->flags = 0; + pbl_index = 0; + while (pbl_depth) { + count = min(pbe_count, pbl_depth); + wr->addrs_length = cpu_to_be32(count); + + /* + * If this is the last message, then reference the + * vq request struct cuz we're gonna wait for a reply. + * also make this PBL msg as the last one. + */ + if (count == pbl_depth) { + /* + * reference the request struct. dereferenced in the + * int handler. + */ + vq_req_get(c2dev, vq_req); + wr->flags = cpu_to_be32(MEM_PBL_COMPLETE); + + /* + * This is the last PBL message. + * Set the context to our VQ Request Object so we can + * wait for the reply. + */ + wr->hdr.context = (unsigned long) vq_req; + } + + /* + * If pbl_virt is set then va is a virtual address + * that describes a virtually contiguous memory + * allocation. The wr needs the start of each virtual page + * to be converted to the corresponding physical address + * of the page. If pbl_virt is not set then va is an array + * of physical addresses and there is no conversion to do. + * Just fill in the wr with what is in the array. + */ + for (i = 0; i < count; i++) { + if (pbl_virt) { + va += PAGE_SIZE; + } else { + wr->paddrs[i] = + cpu_to_be64(((u64 *)va)[pbl_index + i]); + } + } + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + if (count <= pbe_count) { + vq_req_put(c2dev, vq_req); + } + goto bail0; + } + pbl_depth -= count; + pbl_index += count; + } + + /* + * Now wait for the reply... + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + + vq_repbuf_free(c2dev, reply); + bail0: + kfree(wr); + return err; +} + +#define C2_PBL_MAX_DEPTH 131072 +int +c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list, + int page_size, int pbl_depth, u32 length, + u32 offset, u64 *va, enum c2_acf acf, + struct c2_mr *mr) +{ + struct c2_vq_req *vq_req; + struct c2wr_nsmr_register_req *wr; + struct c2wr_nsmr_register_rep *reply; + u16 flags; + int i, pbe_count, count; + int err; + + if (!va || !length || !addr_list || !pbl_depth) + return -EINTR; + + /* + * Verify PBL depth is within rnic max + */ + if (pbl_depth > C2_PBL_MAX_DEPTH) { + return -EINTR; + } + + /* + * allocate verbs request object + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + /* + * build the WR + */ + c2_wr_set_id(wr, CCWR_NSMR_REGISTER); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + + flags = (acf | MEM_VA_BASED | MEM_REMOTE); + + /* + * compute how many pbes can fit in the message + */ + pbe_count = (c2dev->req_vq.msg_size - + sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64); + + if (pbl_depth <= pbe_count) { + flags |= MEM_PBL_COMPLETE; + } + wr->flags = cpu_to_be16(flags); + wr->stag_key = 0; //stag_key; + wr->va = cpu_to_be64(*va); + wr->pd_id = mr->pd->pd_id; + wr->pbe_size = cpu_to_be32(page_size); + wr->length = cpu_to_be32(length); + wr->pbl_depth = cpu_to_be32(pbl_depth); + wr->fbo = cpu_to_be32(offset); + count = min(pbl_depth, pbe_count); + wr->addrs_length = cpu_to_be32(count); + + /* + * fill out the PBL for this message + */ + for (i = 0; i < count; i++) { + wr->paddrs[i] = cpu_to_be64(addr_list[i]); + } + + /* + * regerence the request struct + */ + vq_req_get(c2dev, vq_req); + + /* + * send the WR to the adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + /* + * wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail1; + } + + /* + * process reply + */ + reply = + (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + if ((err = c2_errno(reply))) { + goto bail2; + } + //*p_pb_entries = be32_to_cpu(reply->pbl_depth); + mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index); + vq_repbuf_free(c2dev, reply); + + /* + * if there are still more PBEs we need to send them to + * the adapter and wait for a reply on the final one. + * reuse vq_req for this purpose. + */ + pbl_depth -= count; + if (pbl_depth) { + + vq_req->reply_msg = (unsigned long) NULL; + atomic_set(&vq_req->reply_ready, 0); + err = send_pbl_messages(c2dev, + cpu_to_be32(mr->ibmr.lkey), + (unsigned long) &addr_list[i], + pbl_depth, vq_req, PBL_PHYS); + if (err) { + goto bail1; + } + } + + vq_req_free(c2dev, vq_req); + kfree(wr); + + return err; + + bail2: + vq_repbuf_free(c2dev, reply); + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index) +{ + struct c2_vq_req *vq_req; /* verbs request object */ + struct c2wr_stag_dealloc_req wr; /* work request */ + struct c2wr_stag_dealloc_rep *reply; /* WR reply */ + int err; + + + /* + * allocate verbs request object + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + return -ENOMEM; + } + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_STAG_DEALLOC); + wr.hdr.context = (u64) (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.stag_index = cpu_to_be32(stag_index); + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c new file mode 100644 index 00000000000..b88a7559210 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mq.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include "c2_mq.h" + +void *c2_mq_alloc(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + if (c2_mq_full(q)) { + return NULL; + } else { +#ifdef DEBUG + struct c2wr_hdr *m = + (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size); +#ifdef CCMSGMAGIC + BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC)); + m->magic = cpu_to_be32(CCWR_MAGIC); +#endif + return m; +#else + return q->msg_pool.host + q->priv * q->msg_size; +#endif + } +} + +void c2_mq_produce(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + if (!c2_mq_full(q)) { + q->priv = (q->priv + 1) % q->q_size; + q->hint_count++; + /* Update peer's offset. */ + __raw_writew(cpu_to_be16(q->priv), &q->peer->shared); + } +} + +void *c2_mq_consume(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_HOST_TARGET); + + if (c2_mq_empty(q)) { + return NULL; + } else { +#ifdef DEBUG + struct c2wr_hdr *m = (struct c2wr_hdr *) + (q->msg_pool.host + q->priv * q->msg_size); +#ifdef CCMSGMAGIC + BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC)); +#endif + return m; +#else + return q->msg_pool.host + q->priv * q->msg_size; +#endif + } +} + +void c2_mq_free(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_HOST_TARGET); + + if (!c2_mq_empty(q)) { + +#ifdef CCMSGMAGIC + { + struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *) + (q->msg_pool.adapter + q->priv * q->msg_size); + __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic); + } +#endif + q->priv = (q->priv + 1) % q->q_size; + /* Update peer's offset. */ + __raw_writew(cpu_to_be16(q->priv), &q->peer->shared); + } +} + + +void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + while (wqe_count--) { + BUG_ON(c2_mq_empty(q)); + *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size); + } +} + +#if 0 +u32 c2_mq_count(struct c2_mq *q) +{ + s32 count; + + if (q->type == C2_MQ_HOST_TARGET) + count = be16_to_cpu(*q->shared) - q->priv; + else + count = q->priv - be16_to_cpu(*q->shared); + + if (count < 0) + count += q->q_size; + + return (u32) count; +} +#endif /* 0 */ + +void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 __iomem *pool_start, u16 __iomem *peer, u32 type) +{ + BUG_ON(!q->shared); + + /* This code assumes the byte swapping has already been done! */ + q->index = index; + q->q_size = q_size; + q->msg_size = msg_size; + q->msg_pool.adapter = pool_start; + q->peer = (struct c2_mq_shared __iomem *) peer; + q->magic = C2_MQ_MAGIC; + q->type = type; + q->priv = 0; + q->hint_count = 0; + return; +} +void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 *pool_start, u16 __iomem *peer, u32 type) +{ + BUG_ON(!q->shared); + + /* This code assumes the byte swapping has already been done! */ + q->index = index; + q->q_size = q_size; + q->msg_size = msg_size; + q->msg_pool.host = pool_start; + q->peer = (struct c2_mq_shared __iomem *) peer; + q->magic = C2_MQ_MAGIC; + q->type = type; + q->priv = 0; + q->hint_count = 0; + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h new file mode 100644 index 00000000000..9185bbb2165 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mq.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C2_MQ_H_ +#define _C2_MQ_H_ +#include <linux/kernel.h> +#include <linux/dma-mapping.h> +#include "c2_wr.h" + +enum c2_shared_regs { + + C2_SHARED_ARMED = 0x10, + C2_SHARED_NOTIFY = 0x18, + C2_SHARED_SHARED = 0x40, +}; + +struct c2_mq_shared { + u16 unused1; + u8 armed; + u8 notification_type; + u32 unused2; + u16 shared; + /* Pad to 64 bytes. */ + u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)]; +}; + +enum c2_mq_type { + C2_MQ_HOST_TARGET = 1, + C2_MQ_ADAPTER_TARGET = 2, +}; + +/* + * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ. + * c2_user_mq_t (which is the same format) is for user-mode MQs... + */ +#define C2_MQ_MAGIC 0x4d512020 /* 'MQ ' */ +struct c2_mq { + u32 magic; + union { + u8 *host; + u8 __iomem *adapter; + } msg_pool; + dma_addr_t host_dma; + DECLARE_PCI_UNMAP_ADDR(mapping); + u16 hint_count; + u16 priv; + struct c2_mq_shared __iomem *peer; + u16 *shared; + dma_addr_t shared_dma; + u32 q_size; + u32 msg_size; + u32 index; + enum c2_mq_type type; +}; + +static __inline__ int c2_mq_empty(struct c2_mq *q) +{ + return q->priv == be16_to_cpu(*q->shared); +} + +static __inline__ int c2_mq_full(struct c2_mq *q) +{ + return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size; +} + +extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count); +extern void *c2_mq_alloc(struct c2_mq *q); +extern void c2_mq_produce(struct c2_mq *q); +extern void *c2_mq_consume(struct c2_mq *q); +extern void c2_mq_free(struct c2_mq *q); +extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 __iomem *pool_start, u16 __iomem *peer, u32 type); +extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 *pool_start, u16 __iomem *peer, u32 type); + +#endif /* _C2_MQ_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c new file mode 100644 index 00000000000..00c709926c8 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_pd.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/init.h> +#include <linux/errno.h> + +#include "c2.h" +#include "c2_provider.h" + +int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd) +{ + u32 obj; + int ret = 0; + + spin_lock(&c2dev->pd_table.lock); + obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max, + c2dev->pd_table.last); + if (obj >= c2dev->pd_table.max) + obj = find_first_zero_bit(c2dev->pd_table.table, + c2dev->pd_table.max); + if (obj < c2dev->pd_table.max) { + pd->pd_id = obj; + __set_bit(obj, c2dev->pd_table.table); + c2dev->pd_table.last = obj+1; + if (c2dev->pd_table.last >= c2dev->pd_table.max) + c2dev->pd_table.last = 0; + } else + ret = -ENOMEM; + spin_unlock(&c2dev->pd_table.lock); + return ret; +} + +void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd) +{ + spin_lock(&c2dev->pd_table.lock); + __clear_bit(pd->pd_id, c2dev->pd_table.table); + spin_unlock(&c2dev->pd_table.lock); +} + +int __devinit c2_init_pd_table(struct c2_dev *c2dev) +{ + + c2dev->pd_table.last = 0; + c2dev->pd_table.max = c2dev->props.max_pd; + spin_lock_init(&c2dev->pd_table.lock); + c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) * + sizeof(long), GFP_KERNEL); + if (!c2dev->pd_table.table) + return -ENOMEM; + bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd); + return 0; +} + +void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev) +{ + kfree(c2dev->pd_table.table); +} diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c new file mode 100644 index 00000000000..dd6af551108 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -0,0 +1,870 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/inetdevice.h> +#include <linux/delay.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/if_vlan.h> +#include <linux/crc32.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/init.h> +#include <linux/dma-mapping.h> +#include <linux/if_arp.h> +#include <linux/vmalloc.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/byteorder.h> + +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> +#include "c2.h" +#include "c2_provider.h" +#include "c2_user.h" + +static int c2_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct c2_dev *c2dev = to_c2dev(ibdev); + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + *props = c2dev->props; + return 0; +} + +static int c2_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + props->max_mtu = IB_MTU_4096; + props->lid = 0; + props->lmc = 0; + props->sm_lid = 0; + props->sm_sl = 0; + props->state = IB_PORT_ACTIVE; + props->phys_state = 0; + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->qkey_viol_cntr = 0; + props->active_width = 1; + props->active_speed = 1; + + return 0; +} + +static int c2_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return 0; +} + +static int c2_query_pkey(struct ib_device *ibdev, + u8 port, u16 index, u16 * pkey) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + *pkey = 0; + return 0; +} + +static int c2_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct c2_dev *c2dev = to_c2dev(ibdev); + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6); + + return 0; +} + +/* Allocate the user context data structure. This keeps track + * of all objects associated with a particular user-mode client. + */ +static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct c2_ucontext *context; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + return &context->ibucontext; +} + +static int c2_dealloc_ucontext(struct ib_ucontext *context) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + kfree(context); + return 0; +} + +static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct c2_pd *pd; + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = c2_pd_alloc(to_c2dev(ibdev), !context, pd); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) { + if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) { + c2_pd_free(to_c2dev(ibdev), pd); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + + return &pd->ibpd; +} + +static int c2_dealloc_pd(struct ib_pd *pd) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + c2_pd_free(to_c2dev(pd->device), to_c2pd(pd)); + kfree(pd); + + return 0; +} + +static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return ERR_PTR(-ENOSYS); +} + +static int c2_ah_destroy(struct ib_ah *ah) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static void c2_add_ref(struct ib_qp *ibqp) +{ + struct c2_qp *qp; + BUG_ON(!ibqp); + qp = to_c2qp(ibqp); + atomic_inc(&qp->refcount); +} + +static void c2_rem_ref(struct ib_qp *ibqp) +{ + struct c2_qp *qp; + BUG_ON(!ibqp); + qp = to_c2qp(ibqp); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +struct ib_qp *c2_get_qp(struct ib_device *device, int qpn) +{ + struct c2_dev* c2dev = to_c2dev(device); + struct c2_qp *qp; + + qp = c2_find_qpn(c2dev, qpn); + pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n", + __FUNCTION__, qp, qpn, device, + (qp?atomic_read(&qp->refcount):0)); + + return (qp?&qp->ibqp:NULL); +} + +static struct ib_qp *c2_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct c2_qp *qp; + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + switch (init_attr->qp_type) { + case IB_QPT_RC: + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + pr_debug("%s: Unable to allocate QP\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); + } + spin_lock_init(&qp->lock); + if (pd->uobject) { + /* userspace specific */ + } + + err = c2_alloc_qp(to_c2dev(pd->device), + to_c2pd(pd), init_attr, qp); + + if (err && pd->uobject) { + /* userspace specific */ + } + + break; + default: + pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__, + init_attr->qp_type); + return ERR_PTR(-EINVAL); + break; + } + + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + return &qp->ibqp; +} + +static int c2_destroy_qp(struct ib_qp *ib_qp) +{ + struct c2_qp *qp = to_c2qp(ib_qp); + + pr_debug("%s:%u qp=%p,qp->state=%d\n", + __FUNCTION__, __LINE__,ib_qp,qp->state); + c2_free_qp(to_c2dev(ib_qp->device), qp); + kfree(qp); + return 0; +} + +static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct c2_cq *cq; + int err; + + cq = kmalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); + } + + err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq); + if (err) { + pr_debug("%s: error initializing CQ\n", __FUNCTION__); + kfree(cq); + return ERR_PTR(err); + } + + return &cq->ibcq; +} + +static int c2_destroy_cq(struct ib_cq *ib_cq) +{ + struct c2_cq *cq = to_c2cq(ib_cq); + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + c2_free_cq(to_c2dev(ib_cq->device), cq); + kfree(cq); + + return 0; +} + +static inline u32 c2_convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) | + C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND; +} + +static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 * iova_start) +{ + struct c2_mr *mr; + u64 *page_list; + u32 total_len; + int err, i, j, k, page_shift, pbl_depth; + + pbl_depth = 0; + total_len = 0; + + page_shift = PAGE_SHIFT; + /* + * If there is only 1 buffer we assume this could + * be a map of all phy mem...use a 32k page_shift. + */ + if (num_phys_buf == 1) + page_shift += 3; + + for (i = 0; i < num_phys_buf; i++) { + + if (buffer_list[i].addr & ~PAGE_MASK) { + pr_debug("Unaligned Memory Buffer: 0x%x\n", + (unsigned int) buffer_list[i].addr); + return ERR_PTR(-EINVAL); + } + + if (!buffer_list[i].size) { + pr_debug("Invalid Buffer Size\n"); + return ERR_PTR(-EINVAL); + } + + total_len += buffer_list[i].size; + pbl_depth += ALIGN(buffer_list[i].size, + (1 << page_shift)) >> page_shift; + } + + page_list = vmalloc(sizeof(u64) * pbl_depth); + if (!page_list) { + pr_debug("couldn't vmalloc page_list of size %zd\n", + (sizeof(u64) * pbl_depth)); + return ERR_PTR(-ENOMEM); + } + + for (i = 0, j = 0; i < num_phys_buf; i++) { + + int naddrs; + + naddrs = ALIGN(buffer_list[i].size, + (1 << page_shift)) >> page_shift; + for (k = 0; k < naddrs; k++) + page_list[j++] = (buffer_list[i].addr + + (k << page_shift)); + } + + mr = kmalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->pd = to_c2pd(ib_pd); + pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " + "*iova_start %llx, first pa %llx, last pa %llx\n", + __FUNCTION__, page_shift, pbl_depth, total_len, + *iova_start, page_list[0], page_list[pbl_depth-1]); + err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list, + (1 << page_shift), pbl_depth, + total_len, 0, iova_start, + c2_convert_access(acc), mr); + vfree(page_list); + if (err) { + kfree(mr); + return ERR_PTR(err); + } + + return &mr->ibmr; +} + +static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct ib_phys_buf bl; + u64 kva = 0; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + /* AMSO1100 limit */ + bl.size = 0xffffffff; + bl.addr = 0; + return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); +} + +static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, + int acc, struct ib_udata *udata) +{ + u64 *pages; + u64 kva = 0; + int shift, n, len; + int i, j, k; + int err = 0; + struct ib_umem_chunk *chunk; + struct c2_pd *c2pd = to_c2pd(pd); + struct c2_mr *c2mr; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + shift = ffs(region->page_size) - 1; + + c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); + if (!c2mr) + return ERR_PTR(-ENOMEM); + c2mr->pd = c2pd; + + n = 0; + list_for_each_entry(chunk, ®ion->chunk_list, list) + n += chunk->nents; + + pages = kmalloc(n * sizeof(u64), GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err; + } + + i = 0; + list_for_each_entry(chunk, ®ion->chunk_list, list) { + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = + sg_dma_address(&chunk->page_list[j]) + + (region->page_size * k); + } + } + } + + kva = (u64)region->virt_base; + err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), + pages, + region->page_size, + i, + region->length, + region->offset, + &kva, + c2_convert_access(acc), + c2mr); + kfree(pages); + if (err) { + kfree(c2mr); + return ERR_PTR(err); + } + return &c2mr->ibmr; + +err: + kfree(c2mr); + return ERR_PTR(err); +} + +static int c2_dereg_mr(struct ib_mr *ib_mr) +{ + struct c2_mr *mr = to_c2mr(ib_mr); + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); + if (err) + pr_debug("c2_stag_dealloc failed: %d\n", err); + else + kfree(mr); + + return err; +} + +static ssize_t show_rev(struct class_device *cdev, char *buf) +{ + struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev); + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return sprintf(buf, "%x\n", dev->props.hw_ver); +} + +static ssize_t show_fw_ver(struct class_device *cdev, char *buf) +{ + struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev); + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return sprintf(buf, "%x.%x.%x\n", + (int) (dev->props.fw_ver >> 32), + (int) (dev->props.fw_ver >> 16) & 0xffff, + (int) (dev->props.fw_ver & 0xffff)); +} + +static ssize_t show_hca(struct class_device *cdev, char *buf) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return sprintf(buf, "AMSO1100\n"); +} + +static ssize_t show_board(struct class_device *cdev, char *buf) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID"); +} + +static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct class_device_attribute *c2_class_attributes[] = { + &class_device_attr_hw_rev, + &class_device_attr_fw_ver, + &class_device_attr_hca_type, + &class_device_attr_board_id +}; + +static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + int err; + + err = + c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr, + attr_mask); + + return err; +} + +static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static int c2_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + /* Request a connection */ + return c2_llp_connect(cm_id, iw_param); +} + +static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + /* Accept the new connection */ + return c2_llp_accept(cm_id, iw_param); +} + +static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + err = c2_llp_reject(cm_id, pdata, pdata_len); + return err; +} + +static int c2_service_create(struct iw_cm_id *cm_id, int backlog) +{ + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + err = c2_llp_service_create(cm_id, backlog); + pr_debug("%s:%u err=%d\n", + __FUNCTION__, __LINE__, + err); + return err; +} + +static int c2_service_destroy(struct iw_cm_id *cm_id) +{ + int err; + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + err = c2_llp_service_destroy(cm_id); + + return err; +} + +static int c2_pseudo_up(struct net_device *netdev) +{ + struct in_device *ind; + struct c2_dev *c2dev = netdev->priv; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + pr_debug("adding...\n"); + for_ifa(ind) { +#ifdef DEBUG + u8 *ip = (u8 *) & ifa->ifa_address; + + pr_debug("%s: %d.%d.%d.%d\n", + ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]); +#endif + c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask); + } + endfor_ifa(ind); + in_dev_put(ind); + + return 0; +} + +static int c2_pseudo_down(struct net_device *netdev) +{ + struct in_device *ind; + struct c2_dev *c2dev = netdev->priv; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + pr_debug("deleting...\n"); + for_ifa(ind) { +#ifdef DEBUG + u8 *ip = (u8 *) & ifa->ifa_address; + + pr_debug("%s: %d.%d.%d.%d\n", + ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]); +#endif + c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask); + } + endfor_ifa(ind); + in_dev_put(ind); + + return 0; +} + +static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu) +{ + int ret = 0; + + if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) + return -EINVAL; + + netdev->mtu = new_mtu; + + /* TODO: Tell rnic about new rmda interface mtu */ + return ret; +} + +static void setup(struct net_device *netdev) +{ + SET_MODULE_OWNER(netdev); + netdev->open = c2_pseudo_up; + netdev->stop = c2_pseudo_down; + netdev->hard_start_xmit = c2_pseudo_xmit_frame; + netdev->get_stats = NULL; + netdev->tx_timeout = NULL; + netdev->set_mac_address = NULL; + netdev->change_mtu = c2_pseudo_change_mtu; + netdev->watchdog_timeo = 0; + netdev->type = ARPHRD_ETHER; + netdev->mtu = 1500; + netdev->hard_header_len = ETH_HLEN; + netdev->addr_len = ETH_ALEN; + netdev->tx_queue_len = 0; + netdev->flags |= IFF_NOARP; + return; +} + +static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) +{ + char name[IFNAMSIZ]; + struct net_device *netdev; + + /* change ethxxx to iwxxx */ + strcpy(name, "iw"); + strcat(name, &c2dev->netdev->name[3]); + netdev = alloc_netdev(sizeof(*netdev), name, setup); + if (!netdev) { + printk(KERN_ERR PFX "%s - etherdev alloc failed", + __FUNCTION__); + return NULL; + } + + netdev->priv = c2dev; + + SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); + + memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6); + + /* Print out the MAC address */ + pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n", + netdev->name, + netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], + netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]); + +#if 0 + /* Disable network packets */ + netif_stop_queue(netdev); +#endif + return netdev; +} + +int c2_register_device(struct c2_dev *dev) +{ + int ret; + int i; + + /* Register pseudo network device */ + dev->pseudo_netdev = c2_pseudo_netdev_init(dev); + if (dev->pseudo_netdev) { + ret = register_netdev(dev->pseudo_netdev); + if (ret) { + printk(KERN_ERR PFX + "Unable to register netdev, ret = %d\n", ret); + free_netdev(dev->pseudo_netdev); + return ret; + } + } + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX); + dev->ibdev.owner = THIS_MODULE; + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + + dev->ibdev.node_type = RDMA_NODE_RNIC; + memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); + memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); + dev->ibdev.phys_port_cnt = 1; + dev->ibdev.dma_device = &dev->pcidev->dev; + dev->ibdev.class_dev.dev = &dev->pcidev->dev; + dev->ibdev.query_device = c2_query_device; + dev->ibdev.query_port = c2_query_port; + dev->ibdev.modify_port = c2_modify_port; + dev->ibdev.query_pkey = c2_query_pkey; + dev->ibdev.query_gid = c2_query_gid; + dev->ibdev.alloc_ucontext = c2_alloc_ucontext; + dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext; + dev->ibdev.mmap = c2_mmap_uar; + dev->ibdev.alloc_pd = c2_alloc_pd; + dev->ibdev.dealloc_pd = c2_dealloc_pd; + dev->ibdev.create_ah = c2_ah_create; + dev->ibdev.destroy_ah = c2_ah_destroy; + dev->ibdev.create_qp = c2_create_qp; + dev->ibdev.modify_qp = c2_modify_qp; + dev->ibdev.destroy_qp = c2_destroy_qp; + dev->ibdev.create_cq = c2_create_cq; + dev->ibdev.destroy_cq = c2_destroy_cq; + dev->ibdev.poll_cq = c2_poll_cq; + dev->ibdev.get_dma_mr = c2_get_dma_mr; + dev->ibdev.reg_phys_mr = c2_reg_phys_mr; + dev->ibdev.reg_user_mr = c2_reg_user_mr; + dev->ibdev.dereg_mr = c2_dereg_mr; + + dev->ibdev.alloc_fmr = NULL; + dev->ibdev.unmap_fmr = NULL; + dev->ibdev.dealloc_fmr = NULL; + dev->ibdev.map_phys_fmr = NULL; + + dev->ibdev.attach_mcast = c2_multicast_attach; + dev->ibdev.detach_mcast = c2_multicast_detach; + dev->ibdev.process_mad = c2_process_mad; + + dev->ibdev.req_notify_cq = c2_arm_cq; + dev->ibdev.post_send = c2_post_send; + dev->ibdev.post_recv = c2_post_receive; + + dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); + dev->ibdev.iwcm->add_ref = c2_add_ref; + dev->ibdev.iwcm->rem_ref = c2_rem_ref; + dev->ibdev.iwcm->get_qp = c2_get_qp; + dev->ibdev.iwcm->connect = c2_connect; + dev->ibdev.iwcm->accept = c2_accept; + dev->ibdev.iwcm->reject = c2_reject; + dev->ibdev.iwcm->create_listen = c2_service_create; + dev->ibdev.iwcm->destroy_listen = c2_service_destroy; + + ret = ib_register_device(&dev->ibdev); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) { + ret = class_device_create_file(&dev->ibdev.class_dev, + c2_class_attributes[i]); + if (ret) { + unregister_netdev(dev->pseudo_netdev); + free_netdev(dev->pseudo_netdev); + ib_unregister_device(&dev->ibdev); + return ret; + } + } + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return 0; +} + +void c2_unregister_device(struct c2_dev *dev) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + unregister_netdev(dev->pseudo_netdev); + free_netdev(dev->pseudo_netdev); + ib_unregister_device(&dev->ibdev); +} diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h new file mode 100644 index 00000000000..fc906223220 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_provider.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef C2_PROVIDER_H +#define C2_PROVIDER_H +#include <linux/inetdevice.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_pack.h> + +#include "c2_mq.h" +#include <rdma/iw_cm.h> + +#define C2_MPT_FLAG_ATOMIC (1 << 14) +#define C2_MPT_FLAG_REMOTE_WRITE (1 << 13) +#define C2_MPT_FLAG_REMOTE_READ (1 << 12) +#define C2_MPT_FLAG_LOCAL_WRITE (1 << 11) +#define C2_MPT_FLAG_LOCAL_READ (1 << 10) + +struct c2_buf_list { + void *buf; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + + +/* The user context keeps track of objects allocated for a + * particular user-mode client. */ +struct c2_ucontext { + struct ib_ucontext ibucontext; +}; + +struct c2_mtt; + +/* All objects associated with a PD are kept in the + * associated user context if present. + */ +struct c2_pd { + struct ib_pd ibpd; + u32 pd_id; +}; + +struct c2_mr { + struct ib_mr ibmr; + struct c2_pd *pd; +}; + +struct c2_av; + +enum c2_ah_type { + C2_AH_ON_HCA, + C2_AH_PCI_POOL, + C2_AH_KMALLOC +}; + +struct c2_ah { + struct ib_ah ibah; +}; + +struct c2_cq { + struct ib_cq ibcq; + spinlock_t lock; + atomic_t refcount; + int cqn; + int is_kernel; + wait_queue_head_t wait; + + u32 adapter_handle; + struct c2_mq mq; +}; + +struct c2_wq { + spinlock_t lock; +}; +struct iw_cm_id; +struct c2_qp { + struct ib_qp ibqp; + struct iw_cm_id *cm_id; + spinlock_t lock; + atomic_t refcount; + wait_queue_head_t wait; + int qpn; + + u32 adapter_handle; + u32 send_sgl_depth; + u32 recv_sgl_depth; + u32 rdma_write_sgl_depth; + u8 state; + + struct c2_mq sq_mq; + struct c2_mq rq_mq; +}; + +struct c2_cr_query_attrs { + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; +}; + +static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct c2_pd, ibpd); +} + +static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct c2_ucontext, ibucontext); +} + +static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct c2_mr, ibmr); +} + + +static inline struct c2_ah *to_c2ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct c2_ah, ibah); +} + +static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct c2_cq, ibcq); +} + +static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct c2_qp, ibqp); +} + +static inline int is_rnic_addr(struct net_device *netdev, u32 addr) +{ + struct in_device *ind; + int ret = 0; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + for_ifa(ind) { + if (ifa->ifa_address == addr) { + ret = 1; + break; + } + } + endfor_ifa(ind); + in_dev_put(ind); + return ret; +} +#endif /* C2_PROVIDER_H */ diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c new file mode 100644 index 00000000000..12261132b07 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -0,0 +1,975 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "c2.h" +#include "c2_vq.h" +#include "c2_status.h" + +#define C2_MAX_ORD_PER_QP 128 +#define C2_MAX_IRD_PER_QP 128 + +#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count) +#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16) +#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF) + +#define NO_SUPPORT -1 +static const u8 c2_opcode[] = { + [IB_WR_SEND] = C2_WR_TYPE_SEND, + [IB_WR_SEND_WITH_IMM] = NO_SUPPORT, + [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT, + [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT, + [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT, +}; + +static int to_c2_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: + return C2_QP_STATE_IDLE; + case IB_QPS_RTS: + return C2_QP_STATE_RTS; + case IB_QPS_SQD: + return C2_QP_STATE_CLOSING; + case IB_QPS_SQE: + return C2_QP_STATE_CLOSING; + case IB_QPS_ERR: + return C2_QP_STATE_ERROR; + default: + return -1; + } +} + +static int to_ib_state(enum c2_qp_state c2_state) +{ + switch (c2_state) { + case C2_QP_STATE_IDLE: + return IB_QPS_RESET; + case C2_QP_STATE_CONNECTING: + return IB_QPS_RTR; + case C2_QP_STATE_RTS: + return IB_QPS_RTS; + case C2_QP_STATE_CLOSING: + return IB_QPS_SQD; + case C2_QP_STATE_ERROR: + return IB_QPS_ERR; + case C2_QP_STATE_TERMINATE: + return IB_QPS_SQE; + default: + return -1; + } +} + +static const char *to_ib_state_str(int ib_state) +{ + static const char *state_str[] = { + "IB_QPS_RESET", + "IB_QPS_INIT", + "IB_QPS_RTR", + "IB_QPS_RTS", + "IB_QPS_SQD", + "IB_QPS_SQE", + "IB_QPS_ERR" + }; + if (ib_state < IB_QPS_RESET || + ib_state > IB_QPS_ERR) + return "<invalid IB QP state>"; + + ib_state -= IB_QPS_RESET; + return state_str[ib_state]; +} + +void c2_set_qp_state(struct c2_qp *qp, int c2_state) +{ + int new_state = to_ib_state(c2_state); + + pr_debug("%s: qp[%p] state modify %s --> %s\n", + __FUNCTION__, + qp, + to_ib_state_str(qp->state), + to_ib_state_str(new_state)); + qp->state = new_state; +} + +#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF + +int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, + struct ib_qp_attr *attr, int attr_mask) +{ + struct c2wr_qp_modify_req wr; + struct c2wr_qp_modify_rep *reply; + struct c2_vq_req *vq_req; + unsigned long flags; + u8 next_state; + int err; + + pr_debug("%s:%d qp=%p, %s --> %s\n", + __FUNCTION__, __LINE__, + qp, + to_ib_state_str(qp->state), + to_ib_state_str(attr->qp_state)); + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_QP_MODIFY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + + if (attr_mask & IB_QP_STATE) { + /* Ensure the state is valid */ + if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) + return -EINVAL; + + wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state)); + + if (attr->qp_state == IB_QPS_ERR) { + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id && qp->state == IB_QPS_RTS) { + pr_debug("Generating CLOSE event for QP-->ERR, " + "qp=%p, cm_id=%p\n",qp,qp->cm_id); + /* Generate an CLOSE event */ + vq_req->cm_id = qp->cm_id; + vq_req->event = IW_CM_EVENT_CLOSE; + } + spin_unlock_irqrestore(&qp->lock, flags); + } + next_state = attr->qp_state; + + } else if (attr_mask & IB_QP_CUR_STATE) { + + if (attr->cur_qp_state != IB_QPS_RTR && + attr->cur_qp_state != IB_QPS_RTS && + attr->cur_qp_state != IB_QPS_SQD && + attr->cur_qp_state != IB_QPS_SQE) + return -EINVAL; + else + wr.next_qp_state = + cpu_to_be32(to_c2_state(attr->cur_qp_state)); + + next_state = attr->cur_qp_state; + + } else { + err = 0; + goto bail0; + } + + /* reference the request struct */ + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + if (!err) + qp->state = next_state; +#ifdef DEBUG + else + pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err); +#endif + /* + * If we're going to error and generating the event here, then + * we need to remove the reference because there will be no + * close event generated by the adapter + */ + spin_lock_irqsave(&qp->lock, flags); + if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + + pr_debug("%s:%d qp=%p, cur_state=%s\n", + __FUNCTION__, __LINE__, + qp, + to_ib_state_str(qp->state)); + return err; +} + +int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp, + int ord, int ird) +{ + struct c2wr_qp_modify_req wr; + struct c2wr_qp_modify_rep *reply; + struct c2_vq_req *vq_req; + int err; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_QP_MODIFY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + wr.ord = cpu_to_be32(ord); + wr.ird = cpu_to_be32(ird); + wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + + /* reference the request struct */ + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + reply = (struct c2wr_qp_modify_rep *) (unsigned long) + vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp) +{ + struct c2_vq_req *vq_req; + struct c2wr_qp_destroy_req wr; + struct c2wr_qp_destroy_rep *reply; + unsigned long flags; + int err; + + /* + * Allocate a verb request message + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + return -ENOMEM; + } + + /* + * Initialize the WR + */ + c2_wr_set_id(&wr, CCWR_QP_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id && qp->state == IB_QPS_RTS) { + pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, " + "qp=%p, cm_id=%p\n",qp,qp->cm_id); + /* Generate an CLOSE event */ + vq_req->qp = qp; + vq_req->cm_id = qp->cm_id; + vq_req->event = IW_CM_EVENT_CLOSE; + } + spin_unlock_irqrestore(&qp->lock, flags); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp) +{ + int ret; + + do { + spin_lock_irq(&c2dev->qp_table.lock); + ret = idr_get_new_above(&c2dev->qp_table.idr, qp, + c2dev->qp_table.last++, &qp->qpn); + spin_unlock_irq(&c2dev->qp_table.lock); + } while ((ret == -EAGAIN) && + idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL)); + return ret; +} + +static void c2_free_qpn(struct c2_dev *c2dev, int qpn) +{ + spin_lock_irq(&c2dev->qp_table.lock); + idr_remove(&c2dev->qp_table.idr, qpn); + spin_unlock_irq(&c2dev->qp_table.lock); +} + +struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn) +{ + unsigned long flags; + struct c2_qp *qp; + + spin_lock_irqsave(&c2dev->qp_table.lock, flags); + qp = idr_find(&c2dev->qp_table.idr, qpn); + spin_unlock_irqrestore(&c2dev->qp_table.lock, flags); + return qp; +} + +int c2_alloc_qp(struct c2_dev *c2dev, + struct c2_pd *pd, + struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp) +{ + struct c2wr_qp_create_req wr; + struct c2wr_qp_create_rep *reply; + struct c2_vq_req *vq_req; + struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq); + struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq); + unsigned long peer_pa; + u32 q_size, msg_size, mmap_size; + void __iomem *mmap; + int err; + + err = c2_alloc_qpn(c2dev, qp); + if (err) + return err; + qp->ibqp.qp_num = qp->qpn; + qp->ibqp.qp_type = IB_QPT_RC; + + /* Allocate the SQ and RQ shared pointers */ + qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &qp->sq_mq.shared_dma, GFP_KERNEL); + if (!qp->sq_mq.shared) { + err = -ENOMEM; + goto bail0; + } + + qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &qp->rq_mq.shared_dma, GFP_KERNEL); + if (!qp->rq_mq.shared) { + err = -ENOMEM; + goto bail1; + } + + /* Allocate the verbs request */ + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + err = -ENOMEM; + goto bail2; + } + + /* Initialize the work request */ + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_QP_CREATE); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.sq_cq_handle = send_cq->adapter_handle; + wr.rq_cq_handle = recv_cq->adapter_handle; + wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1); + wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1); + wr.srq_handle = 0; + wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND | + QP_ZERO_STAG | QP_RDMA_READ_RESPONSE); + wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); + wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge); + wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); + wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma); + wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma); + wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP); + wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP); + wr.pd_id = pd->pd_id; + wr.user_context = (unsigned long) qp; + + vq_req_get(c2dev, vq_req); + + /* Send the WR to the adapter */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail3; + } + + /* Wait for the verb reply */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail3; + } + + /* Process the reply */ + reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail3; + } + + if ((err = c2_wr_get_result(reply)) != 0) { + goto bail4; + } + + /* Fill in the kernel QP struct */ + atomic_set(&qp->refcount, 1); + qp->adapter_handle = reply->qp_handle; + qp->state = IB_QPS_RESET; + qp->send_sgl_depth = qp_attrs->cap.max_send_sge; + qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge; + qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge; + + /* Initialize the SQ MQ */ + q_size = be32_to_cpu(reply->sq_depth); + msg_size = be32_to_cpu(reply->sq_msg_size); + peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start); + mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); + mmap = ioremap_nocache(peer_pa, mmap_size); + if (!mmap) { + err = -ENOMEM; + goto bail5; + } + + c2_mq_req_init(&qp->sq_mq, + be32_to_cpu(reply->sq_mq_index), + q_size, + msg_size, + mmap + sizeof(struct c2_mq_shared), /* pool start */ + mmap, /* peer */ + C2_MQ_ADAPTER_TARGET); + + /* Initialize the RQ mq */ + q_size = be32_to_cpu(reply->rq_depth); + msg_size = be32_to_cpu(reply->rq_msg_size); + peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start); + mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); + mmap = ioremap_nocache(peer_pa, mmap_size); + if (!mmap) { + err = -ENOMEM; + goto bail6; + } + + c2_mq_req_init(&qp->rq_mq, + be32_to_cpu(reply->rq_mq_index), + q_size, + msg_size, + mmap + sizeof(struct c2_mq_shared), /* pool start */ + mmap, /* peer */ + C2_MQ_ADAPTER_TARGET); + + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + return 0; + + bail6: + iounmap(qp->sq_mq.peer); + bail5: + destroy_qp(c2dev, qp); + bail4: + vq_repbuf_free(c2dev, reply); + bail3: + vq_req_free(c2dev, vq_req); + bail2: + c2_free_mqsp(qp->rq_mq.shared); + bail1: + c2_free_mqsp(qp->sq_mq.shared); + bail0: + c2_free_qpn(c2dev, qp->qpn); + return err; +} + +void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) +{ + struct c2_cq *send_cq; + struct c2_cq *recv_cq; + + send_cq = to_c2cq(qp->ibqp.send_cq); + recv_cq = to_c2cq(qp->ibqp.recv_cq); + + /* + * Lock CQs here, so that CQ polling code can do QP lookup + * without taking a lock. + */ + spin_lock_irq(&send_cq->lock); + if (send_cq != recv_cq) + spin_lock(&recv_cq->lock); + + c2_free_qpn(c2dev, qp->qpn); + + if (send_cq != recv_cq) + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + + /* + * Destory qp in the rnic... + */ + destroy_qp(c2dev, qp); + + /* + * Mark any unreaped CQEs as null and void. + */ + c2_cq_clean(c2dev, qp, send_cq->cqn); + if (send_cq != recv_cq) + c2_cq_clean(c2dev, qp, recv_cq->cqn); + /* + * Unmap the MQs and return the shared pointers + * to the message pool. + */ + iounmap(qp->sq_mq.peer); + iounmap(qp->rq_mq.peer); + c2_free_mqsp(qp->sq_mq.shared); + c2_free_mqsp(qp->rq_mq.shared); + + atomic_dec(&qp->refcount); + wait_event(qp->wait, !atomic_read(&qp->refcount)); +} + +/* + * Function: move_sgl + * + * Description: + * Move an SGL from the user's work request struct into a CCIL Work Request + * message, swapping to WR byte order and ensure the total length doesn't + * overflow. + * + * IN: + * dst - ptr to CCIL Work Request message SGL memory. + * src - ptr to the consumers SGL memory. + * + * OUT: none + * + * Return: + * CCIL status codes. + */ +static int +move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len, + u8 * actual_count) +{ + u32 tot = 0; /* running total */ + u8 acount = 0; /* running total non-0 len sge's */ + + while (count > 0) { + /* + * If the addition of this SGE causes the + * total SGL length to exceed 2^32-1, then + * fail-n-bail. + * + * If the current total plus the next element length + * wraps, then it will go negative and be less than the + * current total... + */ + if ((tot + src->length) < tot) { + return -EINVAL; + } + /* + * Bug: 1456 (as well as 1498 & 1643) + * Skip over any sge's supplied with len=0 + */ + if (src->length) { + tot += src->length; + dst->stag = cpu_to_be32(src->lkey); + dst->to = cpu_to_be64(src->addr); + dst->length = cpu_to_be32(src->length); + dst++; + acount++; + } + src++; + count--; + } + + if (acount == 0) { + /* + * Bug: 1476 (as well as 1498, 1456 and 1643) + * Setup the SGL in the WR to make it easier for the RNIC. + * This way, the FW doesn't have to deal with special cases. + * Setting length=0 should be sufficient. + */ + dst->stag = 0; + dst->to = 0; + dst->length = 0; + } + + *p_len = tot; + *actual_count = acount; + return 0; +} + +/* + * Function: c2_activity (private function) + * + * Description: + * Post an mq index to the host->adapter activity fifo. + * + * IN: + * c2dev - ptr to c2dev structure + * mq_index - mq index to post + * shared - value most recently written to shared + * + * OUT: + * + * Return: + * none + */ +static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared) +{ + /* + * First read the register to see if the FIFO is full, and if so, + * spin until it's not. This isn't perfect -- there is no + * synchronization among the clients of the register, but in + * practice it prevents multiple CPU from hammering the bus + * with PCI RETRY. Note that when this does happen, the card + * cannot get on the bus and the card and system hang in a + * deadlock -- thus the need for this code. [TOT] + */ + while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(0); + } + + __raw_writel(C2_HINT_MAKE(mq_index, shared), + c2dev->regs + PCI_BAR0_ADAPTER_HINT); +} + +/* + * Function: qp_wr_post + * + * Description: + * This in-line function allocates a MQ msg, then moves the host-copy of + * the completed WR into msg. Then it posts the message. + * + * IN: + * q - ptr to user MQ. + * wr - ptr to host-copy of the WR. + * qp - ptr to user qp + * size - Number of bytes to post. Assumed to be divisible by 4. + * + * OUT: none + * + * Return: + * CCIL status codes. + */ +static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size) +{ + union c2wr *msg; + + msg = c2_mq_alloc(q); + if (msg == NULL) { + return -EINVAL; + } +#ifdef CCMSGMAGIC + ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC); +#endif + + /* + * Since all header fields in the WR are the same as the + * CQE, set the following so the adapter need not. + */ + c2_wr_set_result(wr, CCERR_PENDING); + + /* + * Copy the wr down to the adapter + */ + memcpy((void *) msg, (void *) wr, size); + + c2_mq_produce(q); + return 0; +} + + +int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, + struct ib_send_wr **bad_wr) +{ + struct c2_dev *c2dev = to_c2dev(ibqp->device); + struct c2_qp *qp = to_c2qp(ibqp); + union c2wr wr; + int err = 0; + + u32 flags; + u32 tot_len; + u8 actual_sge_count; + u32 msg_size; + + if (qp->state > IB_QPS_RTS) + return -EINVAL; + + while (ib_wr) { + + flags = 0; + wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id; + if (ib_wr->send_flags & IB_SEND_SIGNALED) { + flags |= SQ_SIGNALED; + } + + switch (ib_wr->opcode) { + case IB_WR_SEND: + if (ib_wr->send_flags & IB_SEND_SOLICITED) { + c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE); + msg_size = sizeof(struct c2wr_send_req); + } else { + c2_wr_set_id(&wr, C2_WR_TYPE_SEND); + msg_size = sizeof(struct c2wr_send_req); + } + + wr.sqwr.send.remote_stag = 0; + msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge; + if (ib_wr->num_sge > qp->send_sgl_depth) { + err = -EINVAL; + break; + } + if (ib_wr->send_flags & IB_SEND_FENCE) { + flags |= SQ_READ_FENCE; + } + err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data), + ib_wr->sg_list, + ib_wr->num_sge, + &tot_len, &actual_sge_count); + wr.sqwr.send.sge_len = cpu_to_be32(tot_len); + c2_wr_set_sge_count(&wr, actual_sge_count); + break; + case IB_WR_RDMA_WRITE: + c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE); + msg_size = sizeof(struct c2wr_rdma_write_req) + + (sizeof(struct c2_data_addr) * ib_wr->num_sge); + if (ib_wr->num_sge > qp->rdma_write_sgl_depth) { + err = -EINVAL; + break; + } + if (ib_wr->send_flags & IB_SEND_FENCE) { + flags |= SQ_READ_FENCE; + } + wr.sqwr.rdma_write.remote_stag = + cpu_to_be32(ib_wr->wr.rdma.rkey); + wr.sqwr.rdma_write.remote_to = + cpu_to_be64(ib_wr->wr.rdma.remote_addr); + err = move_sgl((struct c2_data_addr *) + & (wr.sqwr.rdma_write.data), + ib_wr->sg_list, + ib_wr->num_sge, + &tot_len, &actual_sge_count); + wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len); + c2_wr_set_sge_count(&wr, actual_sge_count); + break; + case IB_WR_RDMA_READ: + c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ); + msg_size = sizeof(struct c2wr_rdma_read_req); + + /* IWarp only suppots 1 sge for RDMA reads */ + if (ib_wr->num_sge > 1) { + err = -EINVAL; + break; + } + + /* + * Move the local and remote stag/to/len into the WR. + */ + wr.sqwr.rdma_read.local_stag = + cpu_to_be32(ib_wr->sg_list->lkey); + wr.sqwr.rdma_read.local_to = + cpu_to_be64(ib_wr->sg_list->addr); + wr.sqwr.rdma_read.remote_stag = + cpu_to_be32(ib_wr->wr.rdma.rkey); + wr.sqwr.rdma_read.remote_to = + cpu_to_be64(ib_wr->wr.rdma.remote_addr); + wr.sqwr.rdma_read.length = + cpu_to_be32(ib_wr->sg_list->length); + break; + default: + /* error */ + msg_size = 0; + err = -EINVAL; + break; + } + + /* + * If we had an error on the last wr build, then + * break out. Possible errors include bogus WR + * type, and a bogus SGL length... + */ + if (err) { + break; + } + + /* + * Store flags + */ + c2_wr_set_flags(&wr, flags); + + /* + * Post the puppy! + */ + err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size); + if (err) { + break; + } + + /* + * Enqueue mq index to activity FIFO. + */ + c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count); + + ib_wr = ib_wr->next; + } + + if (err) + *bad_wr = ib_wr; + return err; +} + +int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, + struct ib_recv_wr **bad_wr) +{ + struct c2_dev *c2dev = to_c2dev(ibqp->device); + struct c2_qp *qp = to_c2qp(ibqp); + union c2wr wr; + int err = 0; + + if (qp->state > IB_QPS_RTS) + return -EINVAL; + + /* + * Try and post each work request + */ + while (ib_wr) { + u32 tot_len; + u8 actual_sge_count; + + if (ib_wr->num_sge > qp->recv_sgl_depth) { + err = -EINVAL; + break; + } + + /* + * Create local host-copy of the WR + */ + wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id; + c2_wr_set_id(&wr, CCWR_RECV); + c2_wr_set_flags(&wr, 0); + + /* sge_count is limited to eight bits. */ + BUG_ON(ib_wr->num_sge >= 256); + err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data), + ib_wr->sg_list, + ib_wr->num_sge, &tot_len, &actual_sge_count); + c2_wr_set_sge_count(&wr, actual_sge_count); + + /* + * If we had an error on the last wr build, then + * break out. Possible errors include bogus WR + * type, and a bogus SGL length... + */ + if (err) { + break; + } + + err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size); + if (err) { + break; + } + + /* + * Enqueue mq index to activity FIFO + */ + c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count); + + ib_wr = ib_wr->next; + } + + if (err) + *bad_wr = ib_wr; + return err; +} + +void __devinit c2_init_qp_table(struct c2_dev *c2dev) +{ + spin_lock_init(&c2dev->qp_table.lock); + idr_init(&c2dev->qp_table.idr); +} + +void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev) +{ + idr_destroy(&c2dev->qp_table.idr); +} diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c new file mode 100644 index 00000000000..f49a32b7a8f --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -0,0 +1,664 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/delay.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/if_vlan.h> +#include <linux/crc32.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/init.h> +#include <linux/dma-mapping.h> +#include <linux/mm.h> +#include <linux/inet.h> +#include <linux/vmalloc.h> + +#include <linux/route.h> + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/byteorder.h> +#include <rdma/ib_smi.h> +#include "c2.h" +#include "c2_vq.h" + +/* Device capabilities */ +#define C2_MIN_PAGESIZE 1024 + +#define C2_MAX_MRS 32768 +#define C2_MAX_QPS 16000 +#define C2_MAX_WQE_SZ 256 +#define C2_MAX_QP_WR ((128*1024)/C2_MAX_WQE_SZ) +#define C2_MAX_SGES 4 +#define C2_MAX_SGE_RD 1 +#define C2_MAX_CQS 32768 +#define C2_MAX_CQES 4096 +#define C2_MAX_PDS 16384 + +/* + * Send the adapter INIT message to the amso1100 + */ +static int c2_adapter_init(struct c2_dev *c2dev) +{ + struct c2wr_init_req wr; + int err; + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_INIT); + wr.hdr.context = 0; + wr.hint_count = cpu_to_be64(c2dev->hint_count_dma); + wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma); + wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma); + wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma); + wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma); + wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma); + + /* Post the init message */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + + return err; +} + +/* + * Send the adapter TERM message to the amso1100 + */ +static void c2_adapter_term(struct c2_dev *c2dev) +{ + struct c2wr_init_req wr; + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_TERM); + wr.hdr.context = 0; + + /* Post the init message */ + vq_send_wr(c2dev, (union c2wr *) & wr); + c2dev->init = 0; + + return; +} + +/* + * Query the adapter + */ +static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_query_req wr; + struct c2wr_rnic_query_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_RNIC_QUERY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) + err = -ENOMEM; + + err = c2_errno(reply); + if (err) + goto bail2; + + props->fw_ver = + ((u64)be32_to_cpu(reply->fw_ver_major) << 32) | + ((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) | + (be32_to_cpu(reply->fw_ver_patch) && 0xFFFF); + memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6); + props->max_mr_size = 0xFFFFFFFF; + props->page_size_cap = ~(C2_MIN_PAGESIZE-1); + props->vendor_id = be32_to_cpu(reply->vendor_id); + props->vendor_part_id = be32_to_cpu(reply->part_number); + props->hw_ver = be32_to_cpu(reply->hw_version); + props->max_qp = be32_to_cpu(reply->max_qps); + props->max_qp_wr = be32_to_cpu(reply->max_qp_depth); + props->device_cap_flags = c2dev->device_cap_flags; + props->max_sge = C2_MAX_SGES; + props->max_sge_rd = C2_MAX_SGE_RD; + props->max_cq = be32_to_cpu(reply->max_cqs); + props->max_cqe = be32_to_cpu(reply->max_cq_depth); + props->max_mr = be32_to_cpu(reply->max_mrs); + props->max_pd = be32_to_cpu(reply->max_pds); + props->max_qp_rd_atom = be32_to_cpu(reply->max_qp_ird); + props->max_ee_rd_atom = 0; + props->max_res_rd_atom = be32_to_cpu(reply->max_global_ird); + props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord); + props->max_ee_init_rd_atom = 0; + props->atomic_cap = IB_ATOMIC_NONE; + props->max_ee = 0; + props->max_rdd = 0; + props->max_mw = be32_to_cpu(reply->max_mws); + props->max_raw_ipv6_qp = 0; + props->max_raw_ethy_qp = 0; + props->max_mcast_grp = 0; + props->max_mcast_qp_attach = 0; + props->max_total_mcast_qp_attach = 0; + props->max_ah = 0; + props->max_fmr = 0; + props->max_map_per_fmr = 0; + props->max_srq = 0; + props->max_srq_wr = 0; + props->max_srq_sge = 0; + props->max_pkeys = 0; + props->local_ca_ack_delay = 0; + + bail2: + vq_repbuf_free(c2dev, reply); + + bail1: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Add an IP address to the RNIC interface + */ +int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_setconfig_req *wr; + struct c2wr_rnic_setconfig_rep *reply; + struct c2_netaddr netaddr; + int err, len; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + len = sizeof(struct c2_netaddr); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->option = cpu_to_be32(C2_CFG_ADD_ADDR); + + netaddr.ip_addr = inaddr; + netaddr.netmask = inmask; + netaddr.mtu = 0; + + memcpy(wr->data, &netaddr, len); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Delete an IP address from the RNIC interface + */ +int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_setconfig_req *wr; + struct c2wr_rnic_setconfig_rep *reply; + struct c2_netaddr netaddr; + int err, len; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + len = sizeof(struct c2_netaddr); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->option = cpu_to_be32(C2_CFG_DEL_ADDR); + + netaddr.ip_addr = inaddr; + netaddr.netmask = inmask; + netaddr.mtu = 0; + + memcpy(wr->data, &netaddr, len); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Open a single RNIC instance to use with all + * low level openib calls + */ +static int c2_rnic_open(struct c2_dev *c2dev) +{ + struct c2_vq_req *vq_req; + union c2wr wr; + struct c2wr_rnic_open_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + return -ENOMEM; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_RNIC_OPEN); + wr.rnic_open.req.hdr.context = (unsigned long) (vq_req); + wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE); + wr.rnic_open.req.port_num = cpu_to_be16(0); + wr.rnic_open.req.user_context = (unsigned long) c2dev; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + if ((err = c2_errno(reply)) != 0) { + goto bail1; + } + + c2dev->adapter_handle = reply->rnic_handle; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Close the RNIC instance + */ +static int c2_rnic_close(struct c2_dev *c2dev) +{ + struct c2_vq_req *vq_req; + union c2wr wr; + struct c2wr_rnic_close_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + return -ENOMEM; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_RNIC_CLOSE); + wr.rnic_close.req.hdr.context = (unsigned long) vq_req; + wr.rnic_close.req.rnic_handle = c2dev->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + if ((err = c2_errno(reply)) != 0) { + goto bail1; + } + + c2dev->adapter_handle = 0; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Called by c2_probe to initialize the RNIC. This principally + * involves initalizing the various limits and resouce pools that + * comprise the RNIC instance. + */ +int c2_rnic_init(struct c2_dev *c2dev) +{ + int err; + u32 qsize, msgsize; + void *q1_pages; + void *q2_pages; + void __iomem *mmio_regs; + + /* Device capabilities */ + c2dev->device_cap_flags = + (IB_DEVICE_RESIZE_MAX_WR | + IB_DEVICE_CURR_QP_STATE_MOD | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_ZERO_STAG | + IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW); + + /* Allocate the qptr_array */ + c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *)); + if (!c2dev->qptr_array) { + return -ENOMEM; + } + + /* Inialize the qptr_array */ + memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *)); + c2dev->qptr_array[0] = (void *) &c2dev->req_vq; + c2dev->qptr_array[1] = (void *) &c2dev->rep_vq; + c2dev->qptr_array[2] = (void *) &c2dev->aeq; + + /* Initialize data structures */ + init_waitqueue_head(&c2dev->req_vq_wo); + spin_lock_init(&c2dev->vqlock); + spin_lock_init(&c2dev->lock); + + /* Allocate MQ shared pointer pool for kernel clients. User + * mode client pools are hung off the user context + */ + err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool); + if (err) { + goto bail0; + } + + /* Allocate shared pointers for Q0, Q1, and Q2 from + * the shared pointer pool. + */ + + c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->hint_count_dma, + GFP_KERNEL); + c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->req_vq.shared_dma, + GFP_KERNEL); + c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->rep_vq.shared_dma, + GFP_KERNEL); + c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->aeq.shared_dma, GFP_KERNEL); + if (!c2dev->hint_count || !c2dev->req_vq.shared || + !c2dev->rep_vq.shared || !c2dev->aeq.shared) { + err = -ENOMEM; + goto bail1; + } + + mmio_regs = c2dev->kva; + /* Initialize the Verbs Request Queue */ + c2_mq_req_init(&c2dev->req_vq, 0, + be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)), + be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)), + mmio_regs + + be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)), + mmio_regs + + be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)), + C2_MQ_ADAPTER_TARGET); + + /* Initialize the Verbs Reply Queue */ + qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE)); + msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE)); + q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL); + if (!q1_pages) { + err = -ENOMEM; + goto bail1; + } + c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device, + (void *)q1_pages, qsize * msgsize, + DMA_FROM_DEVICE); + pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); + pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages, + (u64)c2dev->rep_vq.host_dma); + c2_mq_rep_init(&c2dev->rep_vq, + 1, + qsize, + msgsize, + q1_pages, + mmio_regs + + be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)), + C2_MQ_HOST_TARGET); + + /* Initialize the Asynchronus Event Queue */ + qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE)); + msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE)); + q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL); + if (!q2_pages) { + err = -ENOMEM; + goto bail2; + } + c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device, + (void *)q2_pages, qsize * msgsize, + DMA_FROM_DEVICE); + pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); + pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages, + (u64)c2dev->rep_vq.host_dma); + c2_mq_rep_init(&c2dev->aeq, + 2, + qsize, + msgsize, + q2_pages, + mmio_regs + + be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)), + C2_MQ_HOST_TARGET); + + /* Initialize the verbs request allocator */ + err = vq_init(c2dev); + if (err) + goto bail3; + + /* Enable interrupts on the adapter */ + writel(0, c2dev->regs + C2_IDIS); + + /* create the WR init message */ + err = c2_adapter_init(c2dev); + if (err) + goto bail4; + c2dev->init++; + + /* open an adapter instance */ + err = c2_rnic_open(c2dev); + if (err) + goto bail4; + + /* Initialize cached the adapter limits */ + if (c2_rnic_query(c2dev, &c2dev->props)) + goto bail5; + + /* Initialize the PD pool */ + err = c2_init_pd_table(c2dev); + if (err) + goto bail5; + + /* Initialize the QP pool */ + c2_init_qp_table(c2dev); + return 0; + + bail5: + c2_rnic_close(c2dev); + bail4: + vq_term(c2dev); + bail3: + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(&c2dev->aeq, mapping), + c2dev->aeq.q_size * c2dev->aeq.msg_size, + DMA_FROM_DEVICE); + kfree(q2_pages); + bail2: + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(&c2dev->rep_vq, mapping), + c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, + DMA_FROM_DEVICE); + kfree(q1_pages); + bail1: + c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); + bail0: + vfree(c2dev->qptr_array); + + return err; +} + +/* + * Called by c2_remove to cleanup the RNIC resources. + */ +void c2_rnic_term(struct c2_dev *c2dev) +{ + + /* Close the open adapter instance */ + c2_rnic_close(c2dev); + + /* Send the TERM message to the adapter */ + c2_adapter_term(c2dev); + + /* Disable interrupts on the adapter */ + writel(1, c2dev->regs + C2_IDIS); + + /* Free the QP pool */ + c2_cleanup_qp_table(c2dev); + + /* Free the PD pool */ + c2_cleanup_pd_table(c2dev); + + /* Free the verbs request allocator */ + vq_term(c2dev); + + /* Unmap and free the asynchronus event queue */ + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(&c2dev->aeq, mapping), + c2dev->aeq.q_size * c2dev->aeq.msg_size, + DMA_FROM_DEVICE); + kfree(c2dev->aeq.msg_pool.host); + + /* Unmap and free the verbs reply queue */ + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(&c2dev->rep_vq, mapping), + c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, + DMA_FROM_DEVICE); + kfree(c2dev->rep_vq.msg_pool.host); + + /* Free the MQ shared pointer pool */ + c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); + + /* Free the qptr_array */ + vfree(c2dev->qptr_array); + + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h new file mode 100644 index 00000000000..6ee4aa92d87 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_status.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_STATUS_H_ +#define _C2_STATUS_H_ + +/* + * Verbs Status Codes + */ +enum c2_status { + C2_OK = 0, /* This must be zero */ + CCERR_INSUFFICIENT_RESOURCES = 1, + CCERR_INVALID_MODIFIER = 2, + CCERR_INVALID_MODE = 3, + CCERR_IN_USE = 4, + CCERR_INVALID_RNIC = 5, + CCERR_INTERRUPTED_OPERATION = 6, + CCERR_INVALID_EH = 7, + CCERR_INVALID_CQ = 8, + CCERR_CQ_EMPTY = 9, + CCERR_NOT_IMPLEMENTED = 10, + CCERR_CQ_DEPTH_TOO_SMALL = 11, + CCERR_PD_IN_USE = 12, + CCERR_INVALID_PD = 13, + CCERR_INVALID_SRQ = 14, + CCERR_INVALID_ADDRESS = 15, + CCERR_INVALID_NETMASK = 16, + CCERR_INVALID_QP = 17, + CCERR_INVALID_QP_STATE = 18, + CCERR_TOO_MANY_WRS_POSTED = 19, + CCERR_INVALID_WR_TYPE = 20, + CCERR_INVALID_SGL_LENGTH = 21, + CCERR_INVALID_SQ_DEPTH = 22, + CCERR_INVALID_RQ_DEPTH = 23, + CCERR_INVALID_ORD = 24, + CCERR_INVALID_IRD = 25, + CCERR_QP_ATTR_CANNOT_CHANGE = 26, + CCERR_INVALID_STAG = 27, + CCERR_QP_IN_USE = 28, + CCERR_OUTSTANDING_WRS = 29, + CCERR_STAG_IN_USE = 30, + CCERR_INVALID_STAG_INDEX = 31, + CCERR_INVALID_SGL_FORMAT = 32, + CCERR_ADAPTER_TIMEOUT = 33, + CCERR_INVALID_CQ_DEPTH = 34, + CCERR_INVALID_PRIVATE_DATA_LENGTH = 35, + CCERR_INVALID_EP = 36, + CCERR_MR_IN_USE = CCERR_STAG_IN_USE, + CCERR_FLUSHED = 38, + CCERR_INVALID_WQE = 39, + CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40, + CCERR_REMOTE_TERMINATION_ERROR = 41, + CCERR_BASE_AND_BOUNDS_VIOLATION = 42, + CCERR_ACCESS_VIOLATION = 43, + CCERR_INVALID_PD_ID = 44, + CCERR_WRAP_ERROR = 45, + CCERR_INV_STAG_ACCESS_ERROR = 46, + CCERR_ZERO_RDMA_READ_RESOURCES = 47, + CCERR_QP_NOT_PRIVILEGED = 48, + CCERR_STAG_STATE_NOT_INVALID = 49, + CCERR_INVALID_PAGE_SIZE = 50, + CCERR_INVALID_BUFFER_SIZE = 51, + CCERR_INVALID_PBE = 52, + CCERR_INVALID_FBO = 53, + CCERR_INVALID_LENGTH = 54, + CCERR_INVALID_ACCESS_RIGHTS = 55, + CCERR_PBL_TOO_BIG = 56, + CCERR_INVALID_VA = 57, + CCERR_INVALID_REGION = 58, + CCERR_INVALID_WINDOW = 59, + CCERR_TOTAL_LENGTH_TOO_BIG = 60, + CCERR_INVALID_QP_ID = 61, + CCERR_ADDR_IN_USE = 62, + CCERR_ADDR_NOT_AVAIL = 63, + CCERR_NET_DOWN = 64, + CCERR_NET_UNREACHABLE = 65, + CCERR_CONN_ABORTED = 66, + CCERR_CONN_RESET = 67, + CCERR_NO_BUFS = 68, + CCERR_CONN_TIMEDOUT = 69, + CCERR_CONN_REFUSED = 70, + CCERR_HOST_UNREACHABLE = 71, + CCERR_INVALID_SEND_SGL_DEPTH = 72, + CCERR_INVALID_RECV_SGL_DEPTH = 73, + CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74, + CCERR_INSUFFICIENT_PRIVILEGES = 75, + CCERR_STACK_ERROR = 76, + CCERR_INVALID_VERSION = 77, + CCERR_INVALID_MTU = 78, + CCERR_INVALID_IMAGE = 79, + CCERR_PENDING = 98, /* not an error; user internally by adapter */ + CCERR_DEFER = 99, /* not an error; used internally by adapter */ + CCERR_FAILED_WRITE = 100, + CCERR_FAILED_ERASE = 101, + CCERR_FAILED_VERIFICATION = 102, + CCERR_NOT_FOUND = 103, + +}; + +/* + * CCAE_ACTIVE_CONNECT_RESULTS status result codes. + */ +enum c2_connect_status { + C2_CONN_STATUS_SUCCESS = C2_OK, + C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES, + C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT, + C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED, + C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE, + C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE, + C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC, + C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP, + C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE, + C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET, + C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL, +}; + +/* + * Flash programming status codes. + */ +enum c2_flash_status { + C2_FLASH_STATUS_SUCCESS = 0x0000, + C2_FLASH_STATUS_VERIFY_ERR = 0x0002, + C2_FLASH_STATUS_IMAGE_ERR = 0x0004, + C2_FLASH_STATUS_ECLBS = 0x0400, + C2_FLASH_STATUS_PSLBS = 0x0800, + C2_FLASH_STATUS_VPENS = 0x1000, +}; + +#endif /* _C2_STATUS_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h new file mode 100644 index 00000000000..7e9e7ad6546 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_user.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef C2_USER_H +#define C2_USER_H + +#include <linux/types.h> + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct c2_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct c2_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct c2_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct c2_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct c2_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; + +#endif /* C2_USER_H */ diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c new file mode 100644 index 00000000000..40caeb5f41b --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_vq.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include "c2_vq.h" +#include "c2_provider.h" + +/* + * Verbs Request Objects: + * + * VQ Request Objects are allocated by the kernel verbs handlers. + * They contain a wait object, a refcnt, an atomic bool indicating that the + * adapter has replied, and a copy of the verb reply work request. + * A pointer to the VQ Request Object is passed down in the context + * field of the work request message, and reflected back by the adapter + * in the verbs reply message. The function handle_vq() in the interrupt + * path will use this pointer to: + * 1) append a copy of the verbs reply message + * 2) mark that the reply is ready + * 3) wake up the kernel verbs handler blocked awaiting the reply. + * + * + * The kernel verbs handlers do a "get" to put a 2nd reference on the + * VQ Request object. If the kernel verbs handler exits before the adapter + * can respond, this extra reference will keep the VQ Request object around + * until the adapter's reply can be processed. The reason we need this is + * because a pointer to this object is stuffed into the context field of + * the verbs work request message, and reflected back in the reply message. + * It is used in the interrupt handler (handle_vq()) to wake up the appropriate + * kernel verb handler that is blocked awaiting the verb reply. + * So handle_vq() will do a "put" on the object when it's done accessing it. + * NOTE: If we guarantee that the kernel verb handler will never bail before + * getting the reply, then we don't need these refcnts. + * + * + * VQ Request objects are freed by the kernel verbs handlers only + * after the verb has been processed, or when the adapter fails and + * does not reply. + * + * + * Verbs Reply Buffers: + * + * VQ Reply bufs are local host memory copies of a + * outstanding Verb Request reply + * message. The are always allocated by the kernel verbs handlers, and _may_ be + * freed by either the kernel verbs handler -or- the interrupt handler. The + * kernel verbs handler _must_ free the repbuf, then free the vq request object + * in that order. + */ + +int vq_init(struct c2_dev *c2dev) +{ + sprintf(c2dev->vq_cache_name, "c2-vq:dev%c", + (char) ('0' + c2dev->devnum)); + c2dev->host_msg_cache = + kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (c2dev->host_msg_cache == NULL) { + return -ENOMEM; + } + return 0; +} + +void vq_term(struct c2_dev *c2dev) +{ + kmem_cache_destroy(c2dev->host_msg_cache); +} + +/* vq_req_alloc - allocate a VQ Request Object and initialize it. + * The refcnt is set to 1. + */ +struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev) +{ + struct c2_vq_req *r; + + r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL); + if (r) { + init_waitqueue_head(&r->wait_object); + r->reply_msg = (u64) NULL; + r->event = 0; + r->cm_id = NULL; + r->qp = NULL; + atomic_set(&r->refcnt, 1); + atomic_set(&r->reply_ready, 0); + } + return r; +} + + +/* vq_req_free - free the VQ Request Object. It is assumed the verbs handler + * has already free the VQ Reply Buffer if it existed. + */ +void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + r->reply_msg = (u64) NULL; + if (atomic_dec_and_test(&r->refcnt)) { + kfree(r); + } +} + +/* vq_req_get - reference a VQ Request Object. Done + * only in the kernel verbs handlers. + */ +void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + atomic_inc(&r->refcnt); +} + + +/* vq_req_put - dereference and potentially free a VQ Request Object. + * + * This is only called by handle_vq() on the + * interrupt when it is done processing + * a verb reply message. If the associated + * kernel verbs handler has already bailed, + * then this put will actually free the VQ + * Request object _and_ the VQ Reply Buffer + * if it exists. + */ +void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + if (atomic_dec_and_test(&r->refcnt)) { + if (r->reply_msg != (u64) NULL) + vq_repbuf_free(c2dev, + (void *) (unsigned long) r->reply_msg); + kfree(r); + } +} + + +/* + * vq_repbuf_alloc - allocate a VQ Reply Buffer. + */ +void *vq_repbuf_alloc(struct c2_dev *c2dev) +{ + return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC); +} + +/* + * vq_send_wr - post a verbs request message to the Verbs Request Queue. + * If a message is not available in the MQ, then block until one is available. + * NOTE: handle_mq() on the interrupt context will wake up threads blocked here. + * When the adapter drains the Verbs Request Queue, + * it inserts MQ index 0 in to the + * adapter->host activity fifo and interrupts the host. + */ +int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr) +{ + void *msg; + wait_queue_t __wait; + + /* + * grab adapter vq lock + */ + spin_lock(&c2dev->vqlock); + + /* + * allocate msg + */ + msg = c2_mq_alloc(&c2dev->req_vq); + + /* + * If we cannot get a msg, then we'll wait + * When a messages are available, the int handler will wake_up() + * any waiters. + */ + while (msg == NULL) { + pr_debug("%s:%d no available msg in VQ, waiting...\n", + __FUNCTION__, __LINE__); + init_waitqueue_entry(&__wait, current); + add_wait_queue(&c2dev->req_vq_wo, &__wait); + spin_unlock(&c2dev->vqlock); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (!c2_mq_full(&c2dev->req_vq)) { + break; + } + if (!signal_pending(current)) { + schedule_timeout(1 * HZ); /* 1 second... */ + continue; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&c2dev->req_vq_wo, &__wait); + return -EINTR; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&c2dev->req_vq_wo, &__wait); + spin_lock(&c2dev->vqlock); + msg = c2_mq_alloc(&c2dev->req_vq); + } + + /* + * copy wr into adapter msg + */ + memcpy(msg, wr, c2dev->req_vq.msg_size); + + /* + * post msg + */ + c2_mq_produce(&c2dev->req_vq); + + /* + * release adapter vq lock + */ + spin_unlock(&c2dev->vqlock); + return 0; +} + + +/* + * vq_wait_for_reply - block until the adapter posts a Verb Reply Message. + */ +int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req) +{ + if (!wait_event_timeout(req->wait_object, + atomic_read(&req->reply_ready), + 60*HZ)) + return -ETIMEDOUT; + + return 0; +} + +/* + * vq_repbuf_free - Free a Verbs Reply Buffer. + */ +void vq_repbuf_free(struct c2_dev *c2dev, void *reply) +{ + kmem_cache_free(c2dev->host_msg_cache, reply); +} diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h new file mode 100644 index 00000000000..33805627a60 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_vq.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_VQ_H_ +#define _C2_VQ_H_ +#include <linux/sched.h> +#include "c2.h" +#include "c2_wr.h" +#include "c2_provider.h" + +struct c2_vq_req { + u64 reply_msg; /* ptr to reply msg */ + wait_queue_head_t wait_object; /* wait object for vq reqs */ + atomic_t reply_ready; /* set when reply is ready */ + atomic_t refcnt; /* used to cancel WRs... */ + int event; + struct iw_cm_id *cm_id; + struct c2_qp *qp; +}; + +extern int vq_init(struct c2_dev *c2dev); +extern void vq_term(struct c2_dev *c2dev); + +extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev); +extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req); +extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req); +extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req); +extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr); + +extern void *vq_repbuf_alloc(struct c2_dev *c2dev); +extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply); + +extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req); +#endif /* _C2_VQ_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h new file mode 100644 index 00000000000..3ec6c43bb0e --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_wr.h @@ -0,0 +1,1520 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_WR_H_ +#define _C2_WR_H_ + +#ifdef CCDEBUG +#define CCWR_MAGIC 0xb07700b0 +#endif + +#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF + +/* Maximum allowed size in bytes of private_data exchange + * on connect. + */ +#define C2_MAX_PRIVATE_DATA_SIZE 200 + +/* + * These types are shared among the adapter, host, and CCIL consumer. + */ +enum c2_cq_notification_type { + C2_CQ_NOTIFICATION_TYPE_NONE = 1, + C2_CQ_NOTIFICATION_TYPE_NEXT, + C2_CQ_NOTIFICATION_TYPE_NEXT_SE +}; + +enum c2_setconfig_cmd { + C2_CFG_ADD_ADDR = 1, + C2_CFG_DEL_ADDR = 2, + C2_CFG_ADD_ROUTE = 3, + C2_CFG_DEL_ROUTE = 4 +}; + +enum c2_getconfig_cmd { + C2_GETCONFIG_ROUTES = 1, + C2_GETCONFIG_ADDRS +}; + +/* + * CCIL Work Request Identifiers + */ +enum c2wr_ids { + CCWR_RNIC_OPEN = 1, + CCWR_RNIC_QUERY, + CCWR_RNIC_SETCONFIG, + CCWR_RNIC_GETCONFIG, + CCWR_RNIC_CLOSE, + CCWR_CQ_CREATE, + CCWR_CQ_QUERY, + CCWR_CQ_MODIFY, + CCWR_CQ_DESTROY, + CCWR_QP_CONNECT, + CCWR_PD_ALLOC, + CCWR_PD_DEALLOC, + CCWR_SRQ_CREATE, + CCWR_SRQ_QUERY, + CCWR_SRQ_MODIFY, + CCWR_SRQ_DESTROY, + CCWR_QP_CREATE, + CCWR_QP_QUERY, + CCWR_QP_MODIFY, + CCWR_QP_DESTROY, + CCWR_NSMR_STAG_ALLOC, + CCWR_NSMR_REGISTER, + CCWR_NSMR_PBL, + CCWR_STAG_DEALLOC, + CCWR_NSMR_REREGISTER, + CCWR_SMR_REGISTER, + CCWR_MR_QUERY, + CCWR_MW_ALLOC, + CCWR_MW_QUERY, + CCWR_EP_CREATE, + CCWR_EP_GETOPT, + CCWR_EP_SETOPT, + CCWR_EP_DESTROY, + CCWR_EP_BIND, + CCWR_EP_CONNECT, + CCWR_EP_LISTEN, + CCWR_EP_SHUTDOWN, + CCWR_EP_LISTEN_CREATE, + CCWR_EP_LISTEN_DESTROY, + CCWR_EP_QUERY, + CCWR_CR_ACCEPT, + CCWR_CR_REJECT, + CCWR_CONSOLE, + CCWR_TERM, + CCWR_FLASH_INIT, + CCWR_FLASH, + CCWR_BUF_ALLOC, + CCWR_BUF_FREE, + CCWR_FLASH_WRITE, + CCWR_INIT, /* WARNING: Don't move this ever again! */ + + + + /* Add new IDs here */ + + + + /* + * WARNING: CCWR_LAST must always be the last verbs id defined! + * All the preceding IDs are fixed, and must not change. + * You can add new IDs, but must not remove or reorder + * any IDs. If you do, YOU will ruin any hope of + * compatability between versions. + */ + CCWR_LAST, + + /* + * Start over at 1 so that arrays indexed by user wr id's + * begin at 1. This is OK since the verbs and user wr id's + * are always used on disjoint sets of queues. + */ + /* + * The order of the CCWR_SEND_XX verbs must + * match the order of the RDMA_OPs + */ + CCWR_SEND = 1, + CCWR_SEND_INV, + CCWR_SEND_SE, + CCWR_SEND_SE_INV, + CCWR_RDMA_WRITE, + CCWR_RDMA_READ, + CCWR_RDMA_READ_INV, + CCWR_MW_BIND, + CCWR_NSMR_FASTREG, + CCWR_STAG_INVALIDATE, + CCWR_RECV, + CCWR_NOP, + CCWR_UNIMPL, +/* WARNING: This must always be the last user wr id defined! */ +}; +#define RDMA_SEND_OPCODE_FROM_WR_ID(x) (x+2) + +/* + * SQ/RQ Work Request Types + */ +enum c2_wr_type { + C2_WR_TYPE_SEND = CCWR_SEND, + C2_WR_TYPE_SEND_SE = CCWR_SEND_SE, + C2_WR_TYPE_SEND_INV = CCWR_SEND_INV, + C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV, + C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE, + C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ, + C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV, + C2_WR_TYPE_BIND_MW = CCWR_MW_BIND, + C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG, + C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE, + C2_WR_TYPE_RECV = CCWR_RECV, + C2_WR_TYPE_NOP = CCWR_NOP, +}; + +struct c2_netaddr { + u32 ip_addr; + u32 netmask; + u32 mtu; +}; + +struct c2_route { + u32 ip_addr; /* 0 indicates the default route */ + u32 netmask; /* netmask associated with dst */ + u32 flags; + union { + u32 ipaddr; /* address of the nexthop interface */ + u8 enaddr[6]; + } nexthop; +}; + +/* + * A Scatter Gather Entry. + */ +struct c2_data_addr { + u32 stag; + u32 length; + u64 to; +}; + +/* + * MR and MW flags used by the consumer, RI, and RNIC. + */ +enum c2_mm_flags { + MEM_REMOTE = 0x0001, /* allow mw binds with remote access. */ + MEM_VA_BASED = 0x0002, /* Not Zero-based */ + MEM_PBL_COMPLETE = 0x0004, /* PBL array is complete in this msg */ + MEM_LOCAL_READ = 0x0008, /* allow local reads */ + MEM_LOCAL_WRITE = 0x0010, /* allow local writes */ + MEM_REMOTE_READ = 0x0020, /* allow remote reads */ + MEM_REMOTE_WRITE = 0x0040, /* allow remote writes */ + MEM_WINDOW_BIND = 0x0080, /* binds allowed */ + MEM_SHARED = 0x0100, /* set if MR is shared */ + MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */ +}; + +/* + * CCIL API ACF flags defined in terms of the low level mem flags. + * This minimizes translation needed in the user API + */ +enum c2_acf { + C2_ACF_LOCAL_READ = MEM_LOCAL_READ, + C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE, + C2_ACF_REMOTE_READ = MEM_REMOTE_READ, + C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE, + C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND +}; + +/* + * Image types of objects written to flash + */ +#define C2_FLASH_IMG_BITFILE 1 +#define C2_FLASH_IMG_OPTION_ROM 2 +#define C2_FLASH_IMG_VPD 3 + +/* + * to fix bug 1815 we define the max size allowable of the + * terminate message (per the IETF spec).Refer to the IETF + * protocal specification, section 12.1.6, page 64) + * The message is prefixed by 20 types of DDP info. + * + * Then the message has 6 bytes for the terminate control + * and DDP segment length info plus a DDP header (either + * 14 or 18 byts) plus 28 bytes for the RDMA header. + * Thus the max size in: + * 20 + (6 + 18 + 28) = 72 + */ +#define C2_MAX_TERMINATE_MESSAGE_SIZE (72) + +/* + * Build String Length. It must be the same as C2_BUILD_STR_LEN in ccil_api.h + */ +#define WR_BUILD_STR_LEN 64 + +/* + * WARNING: All of these structs need to align any 64bit types on + * 64 bit boundaries! 64bit types include u64 and u64. + */ + +/* + * Clustercore Work Request Header. Be sensitive to field layout + * and alignment. + */ +struct c2wr_hdr { + /* wqe_count is part of the cqe. It is put here so the + * adapter can write to it while the wr is pending without + * clobbering part of the wr. This word need not be dma'd + * from the host to adapter by libccil, but we copy it anyway + * to make the memcpy to the adapter better aligned. + */ + u32 wqe_count; + + /* Put these fields next so that later 32- and 64-bit + * quantities are naturally aligned. + */ + u8 id; + u8 result; /* adapter -> host */ + u8 sge_count; /* host -> adapter */ + u8 flags; /* host -> adapter */ + + u64 context; +#ifdef CCMSGMAGIC + u32 magic; + u32 pad; +#endif +} __attribute__((packed)); + +/* + *------------------------ RNIC ------------------------ + */ + +/* + * WR_RNIC_OPEN + */ + +/* + * Flags for the RNIC WRs + */ +enum c2_rnic_flags { + RNIC_IRD_STATIC = 0x0001, + RNIC_ORD_STATIC = 0x0002, + RNIC_QP_STATIC = 0x0004, + RNIC_SRQ_SUPPORTED = 0x0008, + RNIC_PBL_BLOCK_MODE = 0x0010, + RNIC_SRQ_MODEL_ARRIVAL = 0x0020, + RNIC_CQ_OVF_DETECTED = 0x0040, + RNIC_PRIV_MODE = 0x0080 +}; + +struct c2wr_rnic_open_req { + struct c2wr_hdr hdr; + u64 user_context; + u16 flags; /* See enum c2_rnic_flags */ + u16 port_num; +} __attribute__((packed)); + +struct c2wr_rnic_open_rep { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +union c2wr_rnic_open { + struct c2wr_rnic_open_req req; + struct c2wr_rnic_open_rep rep; +} __attribute__((packed)); + +struct c2wr_rnic_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +/* + * WR_RNIC_QUERY + */ +struct c2wr_rnic_query_rep { + struct c2wr_hdr hdr; + u64 user_context; + u32 vendor_id; + u32 part_number; + u32 hw_version; + u32 fw_ver_major; + u32 fw_ver_minor; + u32 fw_ver_patch; + char fw_ver_build_str[WR_BUILD_STR_LEN]; + u32 max_qps; + u32 max_qp_depth; + u32 max_srq_depth; + u32 max_send_sgl_depth; + u32 max_rdma_sgl_depth; + u32 max_cqs; + u32 max_cq_depth; + u32 max_cq_event_handlers; + u32 max_mrs; + u32 max_pbl_depth; + u32 max_pds; + u32 max_global_ird; + u32 max_global_ord; + u32 max_qp_ird; + u32 max_qp_ord; + u32 flags; + u32 max_mws; + u32 pbe_range_low; + u32 pbe_range_high; + u32 max_srqs; + u32 page_size; +} __attribute__((packed)); + +union c2wr_rnic_query { + struct c2wr_rnic_query_req req; + struct c2wr_rnic_query_rep rep; +} __attribute__((packed)); + +/* + * WR_RNIC_GETCONFIG + */ + +struct c2wr_rnic_getconfig_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 option; /* see c2_getconfig_cmd_t */ + u64 reply_buf; + u32 reply_buf_len; +} __attribute__((packed)) ; + +struct c2wr_rnic_getconfig_rep { + struct c2wr_hdr hdr; + u32 option; /* see c2_getconfig_cmd_t */ + u32 count_len; /* length of the number of addresses configured */ +} __attribute__((packed)) ; + +union c2wr_rnic_getconfig { + struct c2wr_rnic_getconfig_req req; + struct c2wr_rnic_getconfig_rep rep; +} __attribute__((packed)) ; + +/* + * WR_RNIC_SETCONFIG + */ +struct c2wr_rnic_setconfig_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 option; /* See c2_setconfig_cmd_t */ + /* variable data and pad. See c2_netaddr and c2_route */ + u8 data[0]; +} __attribute__((packed)) ; + +struct c2wr_rnic_setconfig_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_rnic_setconfig { + struct c2wr_rnic_setconfig_req req; + struct c2wr_rnic_setconfig_rep rep; +} __attribute__((packed)) ; + +/* + * WR_RNIC_CLOSE + */ +struct c2wr_rnic_close_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)) ; + +struct c2wr_rnic_close_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_rnic_close { + struct c2wr_rnic_close_req req; + struct c2wr_rnic_close_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ CQ ------------------------ + */ +struct c2wr_cq_create_req { + struct c2wr_hdr hdr; + u64 shared_ht; + u64 user_context; + u64 msg_pool; + u32 rnic_handle; + u32 msg_size; + u32 depth; +} __attribute__((packed)) ; + +struct c2wr_cq_create_rep { + struct c2wr_hdr hdr; + u32 mq_index; + u32 adapter_shared; + u32 cq_handle; +} __attribute__((packed)) ; + +union c2wr_cq_create { + struct c2wr_cq_create_req req; + struct c2wr_cq_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_cq_modify_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 cq_handle; + u32 new_depth; + u64 new_msg_pool; +} __attribute__((packed)) ; + +struct c2wr_cq_modify_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_cq_modify { + struct c2wr_cq_modify_req req; + struct c2wr_cq_modify_rep rep; +} __attribute__((packed)) ; + +struct c2wr_cq_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 cq_handle; +} __attribute__((packed)) ; + +struct c2wr_cq_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_cq_destroy { + struct c2wr_cq_destroy_req req; + struct c2wr_cq_destroy_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ PD ------------------------ + */ +struct c2wr_pd_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_pd_alloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_pd_alloc { + struct c2wr_pd_alloc_req req; + struct c2wr_pd_alloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_pd_dealloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_pd_dealloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_pd_dealloc { + struct c2wr_pd_dealloc_req req; + struct c2wr_pd_dealloc_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ SRQ ------------------------ + */ +struct c2wr_srq_create_req { + struct c2wr_hdr hdr; + u64 shared_ht; + u64 user_context; + u32 rnic_handle; + u32 srq_depth; + u32 srq_limit; + u32 sgl_depth; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_srq_create_rep { + struct c2wr_hdr hdr; + u32 srq_depth; + u32 sgl_depth; + u32 msg_size; + u32 mq_index; + u32 mq_start; + u32 srq_handle; +} __attribute__((packed)) ; + +union c2wr_srq_create { + struct c2wr_srq_create_req req; + struct c2wr_srq_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_srq_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 srq_handle; +} __attribute__((packed)) ; + +struct c2wr_srq_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_srq_destroy { + struct c2wr_srq_destroy_req req; + struct c2wr_srq_destroy_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ QP ------------------------ + */ +enum c2wr_qp_flags { + QP_RDMA_READ = 0x00000001, /* RDMA read enabled? */ + QP_RDMA_WRITE = 0x00000002, /* RDMA write enabled? */ + QP_MW_BIND = 0x00000004, /* MWs enabled */ + QP_ZERO_STAG = 0x00000008, /* enabled? */ + QP_REMOTE_TERMINATION = 0x00000010, /* remote end terminated */ + QP_RDMA_READ_RESPONSE = 0x00000020 /* Remote RDMA read */ + /* enabled? */ +}; + +struct c2wr_qp_create_req { + struct c2wr_hdr hdr; + u64 shared_sq_ht; + u64 shared_rq_ht; + u64 user_context; + u32 rnic_handle; + u32 sq_cq_handle; + u32 rq_cq_handle; + u32 sq_depth; + u32 rq_depth; + u32 srq_handle; + u32 srq_limit; + u32 flags; /* see enum c2wr_qp_flags */ + u32 send_sgl_depth; + u32 recv_sgl_depth; + u32 rdma_write_sgl_depth; + u32 ord; + u32 ird; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_qp_create_rep { + struct c2wr_hdr hdr; + u32 sq_depth; + u32 rq_depth; + u32 send_sgl_depth; + u32 recv_sgl_depth; + u32 rdma_write_sgl_depth; + u32 ord; + u32 ird; + u32 sq_msg_size; + u32 sq_mq_index; + u32 sq_mq_start; + u32 rq_msg_size; + u32 rq_mq_index; + u32 rq_mq_start; + u32 qp_handle; +} __attribute__((packed)) ; + +union c2wr_qp_create { + struct c2wr_qp_create_req req; + struct c2wr_qp_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_query_rep { + struct c2wr_hdr hdr; + u64 user_context; + u32 rnic_handle; + u32 sq_depth; + u32 rq_depth; + u32 send_sgl_depth; + u32 rdma_write_sgl_depth; + u32 recv_sgl_depth; + u32 ord; + u32 ird; + u16 qp_state; + u16 flags; /* see c2wr_qp_flags_t */ + u32 qp_id; + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; + u32 terminate_msg_length; /* 0 if not present */ + u8 data[0]; + /* Terminate Message in-line here. */ +} __attribute__((packed)) ; + +union c2wr_qp_query { + struct c2wr_qp_query_req req; + struct c2wr_qp_query_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_modify_req { + struct c2wr_hdr hdr; + u64 stream_msg; + u32 stream_msg_length; + u32 rnic_handle; + u32 qp_handle; + u32 next_qp_state; + u32 ord; + u32 ird; + u32 sq_depth; + u32 rq_depth; + u32 llp_ep_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_modify_rep { + struct c2wr_hdr hdr; + u32 ord; + u32 ird; + u32 sq_depth; + u32 rq_depth; + u32 sq_msg_size; + u32 sq_mq_index; + u32 sq_mq_start; + u32 rq_msg_size; + u32 rq_mq_index; + u32 rq_mq_start; +} __attribute__((packed)) ; + +union c2wr_qp_modify { + struct c2wr_qp_modify_req req; + struct c2wr_qp_modify_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_qp_destroy { + struct c2wr_qp_destroy_req req; + struct c2wr_qp_destroy_rep rep; +} __attribute__((packed)) ; + +/* + * The CCWR_QP_CONNECT msg is posted on the verbs request queue. It can + * only be posted when a QP is in IDLE state. After the connect request is + * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state. + * No synchronous reply from adapter to this WR. The results of + * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS + * See c2wr_ae_active_connect_results_t + */ +struct c2wr_qp_connect_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; + u32 remote_addr; + u16 remote_port; + u16 pad; + u32 private_data_length; + u8 private_data[0]; /* Private data in-line. */ +} __attribute__((packed)) ; + +struct c2wr_qp_connect { + struct c2wr_qp_connect_req req; + /* no synchronous reply. */ +} __attribute__((packed)) ; + + +/* + *------------------------ MM ------------------------ + */ + +struct c2wr_nsmr_stag_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pbl_depth; + u32 pd_id; + u32 flags; +} __attribute__((packed)) ; + +struct c2wr_nsmr_stag_alloc_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_stag_alloc { + struct c2wr_nsmr_stag_alloc_req req; + struct c2wr_nsmr_stag_alloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_register_req { + struct c2wr_hdr hdr; + u64 va; + u32 rnic_handle; + u16 flags; + u8 stag_key; + u8 pad; + u32 pd_id; + u32 pbl_depth; + u32 pbe_size; + u32 fbo; + u32 length; + u32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_register_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_register { + struct c2wr_nsmr_register_req req; + struct c2wr_nsmr_register_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_pbl_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 flags; + u32 stag_index; + u32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_pbl_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_nsmr_pbl { + struct c2wr_nsmr_pbl_req req; + struct c2wr_nsmr_pbl_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mr_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_mr_query_rep { + struct c2wr_hdr hdr; + u8 stag_key; + u8 pad[3]; + u32 pd_id; + u32 flags; + u32 pbl_depth; +} __attribute__((packed)) ; + +union c2wr_mr_query { + struct c2wr_mr_query_req req; + struct c2wr_mr_query_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mw_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_mw_query_rep { + struct c2wr_hdr hdr; + u8 stag_key; + u8 pad[3]; + u32 pd_id; + u32 flags; +} __attribute__((packed)) ; + +union c2wr_mw_query { + struct c2wr_mw_query_req req; + struct c2wr_mw_query_rep rep; +} __attribute__((packed)) ; + + +struct c2wr_stag_dealloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_stag_dealloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_stag_dealloc { + struct c2wr_stag_dealloc_req req; + struct c2wr_stag_dealloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_reregister_req { + struct c2wr_hdr hdr; + u64 va; + u32 rnic_handle; + u16 flags; + u8 stag_key; + u8 pad; + u32 stag_index; + u32 pd_id; + u32 pbl_depth; + u32 pbe_size; + u32 fbo; + u32 length; + u32 addrs_length; + u32 pad1; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_reregister_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_reregister { + struct c2wr_nsmr_reregister_req req; + struct c2wr_nsmr_reregister_rep rep; +} __attribute__((packed)) ; + +struct c2wr_smr_register_req { + struct c2wr_hdr hdr; + u64 va; + u32 rnic_handle; + u16 flags; + u8 stag_key; + u8 pad; + u32 stag_index; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_smr_register_rep { + struct c2wr_hdr hdr; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_smr_register { + struct c2wr_smr_register_req req; + struct c2wr_smr_register_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mw_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_mw_alloc_rep { + struct c2wr_hdr hdr; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_mw_alloc { + struct c2wr_mw_alloc_req req; + struct c2wr_mw_alloc_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ WRs ----------------------- + */ + +struct c2wr_user_hdr { + struct c2wr_hdr hdr; /* Has status and WR Type */ +} __attribute__((packed)) ; + +enum c2_qp_state { + C2_QP_STATE_IDLE = 0x01, + C2_QP_STATE_CONNECTING = 0x02, + C2_QP_STATE_RTS = 0x04, + C2_QP_STATE_CLOSING = 0x08, + C2_QP_STATE_TERMINATE = 0x10, + C2_QP_STATE_ERROR = 0x20, +}; + +/* Completion queue entry. */ +struct c2wr_ce { + struct c2wr_hdr hdr; /* Has status and WR Type */ + u64 qp_user_context; /* c2_user_qp_t * */ + u32 qp_state; /* Current QP State */ + u32 handle; /* QPID or EP Handle */ + u32 bytes_rcvd; /* valid for RECV WCs */ + u32 stag; +} __attribute__((packed)) ; + + +/* + * Flags used for all post-sq WRs. These must fit in the flags + * field of the struct c2wr_hdr (eight bits). + */ +enum { + SQ_SIGNALED = 0x01, + SQ_READ_FENCE = 0x02, + SQ_FENCE = 0x04, +}; + +/* + * Common fields for all post-sq WRs. Namely the standard header and a + * secondary header with fields common to all post-sq WRs. + */ +struct c2_sq_hdr { + struct c2wr_user_hdr user_hdr; +} __attribute__((packed)); + +/* + * Same as above but for post-rq WRs. + */ +struct c2_rq_hdr { + struct c2wr_user_hdr user_hdr; +} __attribute__((packed)); + +/* + * use the same struct for all sends. + */ +struct c2wr_send_req { + struct c2_sq_hdr sq_hdr; + u32 sge_len; + u32 remote_stag; + u8 data[0]; /* SGE array */ +} __attribute__((packed)); + +union c2wr_send { + struct c2wr_send_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_rdma_write_req { + struct c2_sq_hdr sq_hdr; + u64 remote_to; + u32 remote_stag; + u32 sge_len; + u8 data[0]; /* SGE array */ +} __attribute__((packed)); + +union c2wr_rdma_write { + struct c2wr_rdma_write_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_rdma_read_req { + struct c2_sq_hdr sq_hdr; + u64 local_to; + u64 remote_to; + u32 local_stag; + u32 remote_stag; + u32 length; +} __attribute__((packed)); + +union c2wr_rdma_read { + struct c2wr_rdma_read_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_mw_bind_req { + struct c2_sq_hdr sq_hdr; + u64 va; + u8 stag_key; + u8 pad[3]; + u32 mw_stag_index; + u32 mr_stag_index; + u32 length; + u32 flags; +} __attribute__((packed)); + +union c2wr_mw_bind { + struct c2wr_mw_bind_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_nsmr_fastreg_req { + struct c2_sq_hdr sq_hdr; + u64 va; + u8 stag_key; + u8 pad[3]; + u32 stag_index; + u32 pbe_size; + u32 fbo; + u32 length; + u32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)); + +union c2wr_nsmr_fastreg { + struct c2wr_nsmr_fastreg_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_stag_invalidate_req { + struct c2_sq_hdr sq_hdr; + u8 stag_key; + u8 pad[3]; + u32 stag_index; +} __attribute__((packed)); + +union c2wr_stag_invalidate { + struct c2wr_stag_invalidate_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +union c2wr_sqwr { + struct c2_sq_hdr sq_hdr; + struct c2wr_send_req send; + struct c2wr_send_req send_se; + struct c2wr_send_req send_inv; + struct c2wr_send_req send_se_inv; + struct c2wr_rdma_write_req rdma_write; + struct c2wr_rdma_read_req rdma_read; + struct c2wr_mw_bind_req mw_bind; + struct c2wr_nsmr_fastreg_req nsmr_fastreg; + struct c2wr_stag_invalidate_req stag_inv; +} __attribute__((packed)); + + +/* + * RQ WRs + */ +struct c2wr_rqwr { + struct c2_rq_hdr rq_hdr; + u8 data[0]; /* array of SGEs */ +} __attribute__((packed)); + +union c2wr_recv { + struct c2wr_rqwr req; + struct c2wr_ce rep; +} __attribute__((packed)); + +/* + * All AEs start with this header. Most AEs only need to convey the + * information in the header. Some, like LLP connection events, need + * more info. The union typdef c2wr_ae_t has all the possible AEs. + * + * hdr.context is the user_context from the rnic_open WR. NULL If this + * is not affiliated with an rnic + * + * hdr.id is the AE identifier (eg; CCAE_REMOTE_SHUTDOWN, + * CCAE_LLP_CLOSE_COMPLETE) + * + * resource_type is one of: C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ + * + * user_context is the context passed down when the host created the resource. + */ +struct c2wr_ae_hdr { + struct c2wr_hdr hdr; + u64 user_context; /* user context for this res. */ + u32 resource_type; /* see enum c2_resource_indicator */ + u32 resource; /* handle for resource */ + u32 qp_state; /* current QP State */ +} __attribute__((packed)); + +/* + * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ, + * the adapter moves the QP into RTS state + */ +struct c2wr_ae_active_connect_results { + struct c2wr_ae_hdr ae_hdr; + u32 laddr; + u32 raddr; + u16 lport; + u16 rport; + u32 private_data_length; + u8 private_data[0]; /* data is in-line in the msg. */ +} __attribute__((packed)); + +/* + * When connections are established by the stack (and the private data + * MPA frame is received), the adapter will generate an event to the host. + * The details of the connection, any private data, and the new connection + * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the + * AE queue: + */ +struct c2wr_ae_connection_request { + struct c2wr_ae_hdr ae_hdr; + u32 cr_handle; /* connreq handle (sock ptr) */ + u32 laddr; + u32 raddr; + u16 lport; + u16 rport; + u32 private_data_length; + u8 private_data[0]; /* data is in-line in the msg. */ +} __attribute__((packed)); + +union c2wr_ae { + struct c2wr_ae_hdr ae_generic; + struct c2wr_ae_active_connect_results ae_active_connect_results; + struct c2wr_ae_connection_request ae_connection_request; +} __attribute__((packed)); + +struct c2wr_init_req { + struct c2wr_hdr hdr; + u64 hint_count; + u64 q0_host_shared; + u64 q1_host_shared; + u64 q1_host_msg_pool; + u64 q2_host_shared; + u64 q2_host_msg_pool; +} __attribute__((packed)); + +struct c2wr_init_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_init { + struct c2wr_init_req req; + struct c2wr_init_rep rep; +} __attribute__((packed)); + +/* + * For upgrading flash. + */ + +struct c2wr_flash_init_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +struct c2wr_flash_init_rep { + struct c2wr_hdr hdr; + u32 adapter_flash_buf_offset; + u32 adapter_flash_len; +} __attribute__((packed)); + +union c2wr_flash_init { + struct c2wr_flash_init_req req; + struct c2wr_flash_init_rep rep; +} __attribute__((packed)); + +struct c2wr_flash_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 len; +} __attribute__((packed)); + +struct c2wr_flash_rep { + struct c2wr_hdr hdr; + u32 status; +} __attribute__((packed)); + +union c2wr_flash { + struct c2wr_flash_req req; + struct c2wr_flash_rep rep; +} __attribute__((packed)); + +struct c2wr_buf_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 size; +} __attribute__((packed)); + +struct c2wr_buf_alloc_rep { + struct c2wr_hdr hdr; + u32 offset; /* 0 if mem not available */ + u32 size; /* 0 if mem not available */ +} __attribute__((packed)); + +union c2wr_buf_alloc { + struct c2wr_buf_alloc_req req; + struct c2wr_buf_alloc_rep rep; +} __attribute__((packed)); + +struct c2wr_buf_free_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 offset; /* Must match value from alloc */ + u32 size; /* Must match value from alloc */ +} __attribute__((packed)); + +struct c2wr_buf_free_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_buf_free { + struct c2wr_buf_free_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_flash_write_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 offset; + u32 size; + u32 type; + u32 flags; +} __attribute__((packed)); + +struct c2wr_flash_write_rep { + struct c2wr_hdr hdr; + u32 status; +} __attribute__((packed)); + +union c2wr_flash_write { + struct c2wr_flash_write_req req; + struct c2wr_flash_write_rep rep; +} __attribute__((packed)); + +/* + * Messages for LLP connection setup. + */ + +/* + * Listen Request. This allocates a listening endpoint to allow passive + * connection setup. Newly established LLP connections are passed up + * via an AE. See c2wr_ae_connection_request_t + */ +struct c2wr_ep_listen_create_req { + struct c2wr_hdr hdr; + u64 user_context; /* returned in AEs. */ + u32 rnic_handle; + u32 local_addr; /* local addr, or 0 */ + u16 local_port; /* 0 means "pick one" */ + u16 pad; + u32 backlog; /* tradional tcp listen bl */ +} __attribute__((packed)); + +struct c2wr_ep_listen_create_rep { + struct c2wr_hdr hdr; + u32 ep_handle; /* handle to new listening ep */ + u16 local_port; /* resulting port... */ + u16 pad; +} __attribute__((packed)); + +union c2wr_ep_listen_create { + struct c2wr_ep_listen_create_req req; + struct c2wr_ep_listen_create_rep rep; +} __attribute__((packed)); + +struct c2wr_ep_listen_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; +} __attribute__((packed)); + +struct c2wr_ep_listen_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_ep_listen_destroy { + struct c2wr_ep_listen_destroy_req req; + struct c2wr_ep_listen_destroy_rep rep; +} __attribute__((packed)); + +struct c2wr_ep_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; +} __attribute__((packed)); + +struct c2wr_ep_query_rep { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; +} __attribute__((packed)); + +union c2wr_ep_query { + struct c2wr_ep_query_req req; + struct c2wr_ep_query_rep rep; +} __attribute__((packed)); + + +/* + * The host passes this down to indicate acceptance of a pending iWARP + * connection. The cr_handle was obtained from the CONNECTION_REQUEST + * AE passed up by the adapter. See c2wr_ae_connection_request_t. + */ +struct c2wr_cr_accept_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; /* QP to bind to this LLP conn */ + u32 ep_handle; /* LLP handle to accept */ + u32 private_data_length; + u8 private_data[0]; /* data in-line in msg. */ +} __attribute__((packed)); + +/* + * adapter sends reply when private data is successfully submitted to + * the LLP. + */ +struct c2wr_cr_accept_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_cr_accept { + struct c2wr_cr_accept_req req; + struct c2wr_cr_accept_rep rep; +} __attribute__((packed)); + +/* + * The host sends this down if a given iWARP connection request was + * rejected by the consumer. The cr_handle was obtained from a + * previous c2wr_ae_connection_request_t AE sent by the adapter. + */ +struct c2wr_cr_reject_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; /* LLP handle to reject */ +} __attribute__((packed)); + +/* + * Dunno if this is needed, but we'll add it for now. The adapter will + * send the reject_reply after the LLP endpoint has been destroyed. + */ +struct c2wr_cr_reject_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_cr_reject { + struct c2wr_cr_reject_req req; + struct c2wr_cr_reject_rep rep; +} __attribute__((packed)); + +/* + * console command. Used to implement a debug console over the verbs + * request and reply queues. + */ + +/* + * Console request message. It contains: + * - message hdr with id = CCWR_CONSOLE + * - the physaddr/len of host memory to be used for the reply. + * - the command string. eg: "netstat -s" or "zoneinfo" + */ +struct c2wr_console_req { + struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */ + u64 reply_buf; /* pinned host buf for reply */ + u32 reply_buf_len; /* length of reply buffer */ + u8 command[0]; /* NUL terminated ascii string */ + /* containing the command req */ +} __attribute__((packed)); + +/* + * flags used in the console reply. + */ +enum c2_console_flags { + CONS_REPLY_TRUNCATED = 0x00000001 /* reply was truncated */ +} __attribute__((packed)); + +/* + * Console reply message. + * hdr.result contains the c2_status_t error if the reply was _not_ generated, + * or C2_OK if the reply was generated. + */ +struct c2wr_console_rep { + struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */ + u32 flags; +} __attribute__((packed)); + +union c2wr_console { + struct c2wr_console_req req; + struct c2wr_console_rep rep; +} __attribute__((packed)); + + +/* + * Giant union with all WRs. Makes life easier... + */ +union c2wr { + struct c2wr_hdr hdr; + struct c2wr_user_hdr user_hdr; + union c2wr_rnic_open rnic_open; + union c2wr_rnic_query rnic_query; + union c2wr_rnic_getconfig rnic_getconfig; + union c2wr_rnic_setconfig rnic_setconfig; + union c2wr_rnic_close rnic_close; + union c2wr_cq_create cq_create; + union c2wr_cq_modify cq_modify; + union c2wr_cq_destroy cq_destroy; + union c2wr_pd_alloc pd_alloc; + union c2wr_pd_dealloc pd_dealloc; + union c2wr_srq_create srq_create; + union c2wr_srq_destroy srq_destroy; + union c2wr_qp_create qp_create; + union c2wr_qp_query qp_query; + union c2wr_qp_modify qp_modify; + union c2wr_qp_destroy qp_destroy; + struct c2wr_qp_connect qp_connect; + union c2wr_nsmr_stag_alloc nsmr_stag_alloc; + union c2wr_nsmr_register nsmr_register; + union c2wr_nsmr_pbl nsmr_pbl; + union c2wr_mr_query mr_query; + union c2wr_mw_query mw_query; + union c2wr_stag_dealloc stag_dealloc; + union c2wr_sqwr sqwr; + struct c2wr_rqwr rqwr; + struct c2wr_ce ce; + union c2wr_ae ae; + union c2wr_init init; + union c2wr_ep_listen_create ep_listen_create; + union c2wr_ep_listen_destroy ep_listen_destroy; + union c2wr_cr_accept cr_accept; + union c2wr_cr_reject cr_reject; + union c2wr_console console; + union c2wr_flash_init flash_init; + union c2wr_flash flash; + union c2wr_buf_alloc buf_alloc; + union c2wr_buf_free buf_free; + union c2wr_flash_write flash_write; +} __attribute__((packed)); + + +/* + * Accessors for the wr fields that are packed together tightly to + * reduce the wr message size. The wr arguments are void* so that + * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types + * in the struct c2wr union can be passed in. + */ +static __inline__ u8 c2_wr_get_id(void *wr) +{ + return ((struct c2wr_hdr *) wr)->id; +} +static __inline__ void c2_wr_set_id(void *wr, u8 id) +{ + ((struct c2wr_hdr *) wr)->id = id; +} +static __inline__ u8 c2_wr_get_result(void *wr) +{ + return ((struct c2wr_hdr *) wr)->result; +} +static __inline__ void c2_wr_set_result(void *wr, u8 result) +{ + ((struct c2wr_hdr *) wr)->result = result; +} +static __inline__ u8 c2_wr_get_flags(void *wr) +{ + return ((struct c2wr_hdr *) wr)->flags; +} +static __inline__ void c2_wr_set_flags(void *wr, u8 flags) +{ + ((struct c2wr_hdr *) wr)->flags = flags; +} +static __inline__ u8 c2_wr_get_sge_count(void *wr) +{ + return ((struct c2wr_hdr *) wr)->sge_count; +} +static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count) +{ + ((struct c2wr_hdr *) wr)->sge_count = sge_count; +} +static __inline__ u32 c2_wr_get_wqe_count(void *wr) +{ + return ((struct c2wr_hdr *) wr)->wqe_count; +} +static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count) +{ + ((struct c2wr_hdr *) wr)->wqe_count = wqe_count; +} + +#endif /* _C2_WR_H_ */ diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig new file mode 100644 index 00000000000..922389b6439 --- /dev/null +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -0,0 +1,16 @@ +config INFINIBAND_EHCA + tristate "eHCA support" + depends on IBMEBUS && INFINIBAND + ---help--- + This driver supports the IBM pSeries eHCA InfiniBand adapter. + + To compile the driver as a module, choose M here. The module + will be called ib_ehca. + +config INFINIBAND_EHCA_SCALING + bool "Scaling support (EXPERIMENTAL)" + depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL + ---help--- + eHCA scaling support schedules the CQ callbacks to different CPUs. + + To enable this feature choose Y here. diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile new file mode 100644 index 00000000000..74d284e46a4 --- /dev/null +++ b/drivers/infiniband/hw/ehca/Makefile @@ -0,0 +1,16 @@ +# Authors: Heiko J Schick <schickhj@de.ibm.com> +# Christoph Raisch <raisch@de.ibm.com> +# Joachim Fenkes <fenkes@de.ibm.com> +# +# Copyright (c) 2005 IBM Corporation +# +# All rights reserved. +# +# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. + +obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o + +ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ + ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ + ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o + diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c new file mode 100644 index 00000000000..3bac197f901 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -0,0 +1,271 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * adress vector functions + * + * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Khadija Souissi <souissik@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <asm/current.h> + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static struct kmem_cache *av_cache; + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + int ret; + struct ehca_av *av; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + + av = kmem_cache_alloc(av_cache, SLAB_KERNEL); + if (!av) { + ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", + pd, ah_attr); + return ERR_PTR(-ENOMEM); + } + + av->av.sl = ah_attr->sl; + av->av.dlid = ah_attr->dlid; + av->av.slid_path_bits = ah_attr->src_path_bits; + + if (ehca_static_rate < 0) { + int ah_mult = ib_rate_to_mult(ah_attr->static_rate); + int ehca_mult = + ib_rate_to_mult(shca->sport[ah_attr->port_num].rate ); + + if (ah_mult >= ehca_mult) + av->av.ipd = 0; + else + av->av.ipd = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + } else + av->av.ipd = ehca_static_rate; + + av->av.lnh = ah_attr->ah_flags; + av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(pd->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ret = -EINVAL; + ehca_err(pd->device, "Invalid port number " + "ehca_query_port() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(pd->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ret = -EINVAL; + ehca_err(pd->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); + } + /* for the time being we use a hard coded PMTU of 2048 Bytes */ + av->av.pmtu = 4; + + /* dgid comes in grh.word_3 */ + memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + return &av->ib_ah; + +create_ah_exit1: + kmem_cache_free(av_cache, av); + + return ERR_PTR(ret); +} + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av; + struct ehca_ud_av new_ehca_av; + struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + memset(&new_ehca_av, 0, sizeof(new_ehca_av)); + new_ehca_av.sl = ah_attr->sl; + new_ehca_av.dlid = ah_attr->dlid; + new_ehca_av.slid_path_bits = ah_attr->src_path_bits; + new_ehca_av.ipd = ah_attr->static_rate; + new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, + (ah_attr->ah_flags & IB_AH_GRH) > 0); + new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); + + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(ah->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ehca_err(ah->device, "Invalid port number " + "ehca_query_port() returned %x " + "ah=%p ah_attr=%p port_num=%x", + rc, ah, ah_attr, ah_attr->port_num); + return -EINVAL; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(ah->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ehca_err(ah->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "ah=%p ah_attr=%p port_num=%x " + "sgid_index=%x", + rc, ah, ah_attr, ah_attr->port_num, + ah_attr->grh.sgid_index); + return -EINVAL; + } + memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); + } + + new_ehca_av.pmtu = 4; /* see also comment in create_ah() */ + + memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + av = container_of(ah, struct ehca_av, ib_ah); + av->av = new_ehca_av; + + return 0; +} + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); + struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, + sizeof(ah_attr->grh.dgid)); + ah_attr->sl = av->av.sl; + + ah_attr->dlid = av->av.dlid; + + ah_attr->src_path_bits = av->av.slid_path_bits; + ah_attr->static_rate = av->av.ipd; + ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); + ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, + av->av.grh.word_0); + ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, + av->av.grh.word_0); + ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, + av->av.grh.word_0); + + return 0; +} + +int ehca_destroy_ah(struct ib_ah *ah) +{ + struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); + + return 0; +} + +int ehca_init_av_cache(void) +{ + av_cache = kmem_cache_create("ehca_cache_av", + sizeof(struct ehca_av), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!av_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_av_cache(void) +{ + if (av_cache) + kmem_cache_destroy(av_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h new file mode 100644 index 00000000000..1c722032319 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -0,0 +1,346 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Struct definition for eHCA internal structures + * + * Authors: Heiko J Schick <schickhj@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_H__ +#define __EHCA_CLASSES_H__ + +#include "ehca_classes.h" +#include "ipz_pt_fn.h" + +struct ehca_module; +struct ehca_qp; +struct ehca_cq; +struct ehca_eq; +struct ehca_mr; +struct ehca_mw; +struct ehca_pd; +struct ehca_av; + +#ifdef CONFIG_PPC64 +#include "ehca_classes_pSeries.h" +#endif + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> + +#include "ehca_irq.h" + +struct ehca_eq { + u32 length; + struct ipz_queue ipz_queue; + struct ipz_eq_handle ipz_eq_handle; + struct work_struct work; + struct h_galpas galpas; + int is_initialized; + struct ehca_pfeq pf; + spinlock_t spinlock; + struct tasklet_struct interrupt_task; + u32 ist; +}; + +struct ehca_sport { + struct ib_cq *ibcq_aqp1; + struct ib_qp *ibqp_aqp1; + enum ib_rate rate; + enum ib_port_state port_state; +}; + +struct ehca_shca { + struct ib_device ib_device; + struct ibmebus_dev *ibmebus_dev; + u8 num_ports; + int hw_level; + struct list_head shca_list; + struct ipz_adapter_handle ipz_hca_handle; + struct ehca_sport sport[2]; + struct ehca_eq eq; + struct ehca_eq neq; + struct ehca_mr *maxmr; + struct ehca_pd *pd; + struct h_galpas galpas; +}; + +struct ehca_pd { + struct ib_pd ib_pd; + struct ipz_pd fw_pd; + u32 ownpid; +}; + +struct ehca_qp { + struct ib_qp ib_qp; + u32 qp_type; + struct ipz_queue ipz_squeue; + struct ipz_queue ipz_rqueue; + struct h_galpas galpas; + u32 qkey; + u32 real_qp_num; + u32 token; + spinlock_t spinlock_s; + spinlock_t spinlock_r; + u32 sq_max_inline_data_size; + struct ipz_qp_handle ipz_qp_handle; + struct ehca_pfqp pf; + struct ib_qp_init_attr init_attr; + u64 uspace_squeue; + u64 uspace_rqueue; + u64 uspace_fwh; + struct ehca_cq *send_cq; + struct ehca_cq *recv_cq; + unsigned int sqerr_purgeflag; + struct hlist_node list_entries; +}; + +/* must be power of 2 */ +#define QP_HASHTAB_LEN 8 + +struct ehca_cq { + struct ib_cq ib_cq; + struct ipz_queue ipz_queue; + struct h_galpas galpas; + spinlock_t spinlock; + u32 cq_number; + u32 token; + u32 nr_of_entries; + struct ipz_cq_handle ipz_cq_handle; + struct ehca_pfcq pf; + spinlock_t cb_lock; + u64 uspace_queue; + u64 uspace_fwh; + struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; + struct list_head entry; + u32 nr_callbacks; + spinlock_t task_lock; + u32 ownpid; +}; + +enum ehca_mr_flag { + EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ + EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ +}; + +struct ehca_mr { + union { + struct ib_mr ib_mr; /* must always be first in ehca_mr */ + struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ + } ib; + spinlock_t mrlock; + + enum ehca_mr_flag flags; + u32 num_pages; /* number of MR pages */ + u32 num_4k; /* number of 4k "page" portions to form MR */ + int acl; /* ACL (stored here for usage in reregister) */ + u64 *start; /* virtual start address (stored here for */ + /* usage in reregister) */ + u64 size; /* size (stored here for usage in reregister) */ + u32 fmr_page_size; /* page size for FMR */ + u32 fmr_max_pages; /* max pages for FMR */ + u32 fmr_max_maps; /* max outstanding maps for FMR */ + u32 fmr_map_cnt; /* map counter for FMR */ + /* fw specific data */ + struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ + struct h_galpas galpas; + /* data for userspace bridge */ + u32 nr_of_pages; + void *pagearray; +}; + +struct ehca_mw { + struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ + spinlock_t mwlock; + + u8 never_bound; /* indication MW was never bound */ + struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ + struct h_galpas galpas; +}; + +enum ehca_mr_pgi_type { + EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, + * ehca_rereg_phys_mr, + * ehca_reg_internal_maxmr */ + EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ + EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ +}; + +struct ehca_mr_pginfo { + enum ehca_mr_pgi_type type; + u64 num_pages; + u64 page_cnt; + u64 num_4k; /* number of 4k "page" portions */ + u64 page_4k_cnt; /* counter for 4k "page" portions */ + u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */ + + /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + + /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct ib_umem_chunk *next_chunk; + u64 next_nmap; + + /* type EHCA_MR_PGI_FMR section */ + u64 *page_list; + u64 next_listelem; + /* next_4k also used within EHCA_MR_PGI_FMR */ +}; + +/* output parameters for MR/FMR hipz calls */ +struct ehca_mr_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 lkey; + u32 rkey; + u64 len; + u64 vaddr; + u32 acl; +}; + +/* output parameters for MW hipz calls */ +struct ehca_mw_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 rkey; +}; + +struct ehca_av { + struct ib_ah ib_ah; + struct ehca_ud_av av; +}; + +struct ehca_ucontext { + struct ib_ucontext ib_ucontext; +}; + +struct ehca_module *ehca_module_new(void); + +int ehca_module_delete(struct ehca_module *me); + +int ehca_eq_ctor(struct ehca_eq *eq); + +int ehca_eq_dtor(struct ehca_eq *eq); + +struct ehca_shca *ehca_shca_new(void); + +int ehca_shca_delete(struct ehca_shca *me); + +struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor); + +int ehca_init_pd_cache(void); +void ehca_cleanup_pd_cache(void); +int ehca_init_cq_cache(void); +void ehca_cleanup_cq_cache(void); +int ehca_init_qp_cache(void); +void ehca_cleanup_qp_cache(void); +int ehca_init_av_cache(void); +void ehca_cleanup_av_cache(void); +int ehca_init_mrmw_cache(void); +void ehca_cleanup_mrmw_cache(void); + +extern spinlock_t ehca_qp_idr_lock; +extern spinlock_t ehca_cq_idr_lock; +extern struct idr ehca_qp_idr; +extern struct idr ehca_cq_idr; + +extern int ehca_static_rate; +extern int ehca_port_act_time; +extern int ehca_use_hp_mr; + +struct ipzu_queue_resp { + u64 queue; /* points to first queue entry */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; + u32 dummy; /* padding for 8 byte alignment */ +}; + +struct ehca_create_cq_resp { + u32 cq_number; + u32 token; + struct ipzu_queue_resp ipz_queue; + struct h_galpas galpas; +}; + +struct ehca_create_qp_resp { + u32 qp_num; + u32 token; + u32 qp_type; + u32 qkey; + /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ + u32 real_qp_num; + u32 dummy; /* padding for 8 byte alignment */ + struct ipzu_queue_resp ipz_squeue; + struct ipzu_queue_resp ipz_rqueue; + struct h_galpas galpas; +}; + +struct ehca_alloc_cq_parms { + u32 nr_cqe; + u32 act_nr_of_entries; + u32 act_pages; + struct ipz_eq_handle eq_handle; +}; + +struct ehca_alloc_qp_parms { + int servicetype; + int sigtype; + int daqp_ctrl; + int max_send_sge; + int max_recv_sge; + int ud_av_l_key_ctl; + + u16 act_nr_send_wqes; + u16 act_nr_recv_wqes; + u8 act_nr_recv_sges; + u8 act_nr_send_sges; + + u32 nr_rq_pages; + u32 nr_sq_pages; + + struct ipz_eq_handle ipz_eq_handle; + struct ipz_pd pd; +}; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); +struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h new file mode 100644 index 00000000000..5665f213b81 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -0,0 +1,236 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * pSeries interface definitions + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_PSERIES_H__ +#define __EHCA_CLASSES_PSERIES_H__ + +#include "hcp_phyp.h" +#include "ipz_pt_fn.h" + + +struct ehca_pfqp { + struct ipz_qpt sqpt; + struct ipz_qpt rqpt; +}; + +struct ehca_pfcq { + struct ipz_qpt qpt; + u32 cqnr; +}; + +struct ehca_pfeq { + struct ipz_qpt qpt; + struct h_galpa galpa; + u32 eqnr; +}; + +struct ipz_adapter_handle { + u64 handle; +}; + +struct ipz_cq_handle { + u64 handle; +}; + +struct ipz_eq_handle { + u64 handle; +}; + +struct ipz_qp_handle { + u64 handle; +}; +struct ipz_mrmw_handle { + u64 handle; +}; + +struct ipz_pd { + u32 value; +}; + +struct hcp_modify_qp_control_block { + u32 qkey; /* 00 */ + u32 rdd; /* reliable datagram domain */ + u32 send_psn; /* 02 */ + u32 receive_psn; /* 03 */ + u32 prim_phys_port; /* 04 */ + u32 alt_phys_port; /* 05 */ + u32 prim_p_key_idx; /* 06 */ + u32 alt_p_key_idx; /* 07 */ + u32 rdma_atomic_ctrl; /* 08 */ + u32 qp_state; /* 09 */ + u32 reserved_10; /* 10 */ + u32 rdma_nr_atomic_resp_res; /* 11 */ + u32 path_migration_state; /* 12 */ + u32 rdma_atomic_outst_dest_qp; /* 13 */ + u32 dest_qp_nr; /* 14 */ + u32 min_rnr_nak_timer_field; /* 15 */ + u32 service_level; /* 16 */ + u32 send_grh_flag; /* 17 */ + u32 retry_count; /* 18 */ + u32 timeout; /* 19 */ + u32 path_mtu; /* 20 */ + u32 max_static_rate; /* 21 */ + u32 dlid; /* 22 */ + u32 rnr_retry_count; /* 23 */ + u32 source_path_bits; /* 24 */ + u32 traffic_class; /* 25 */ + u32 hop_limit; /* 26 */ + u32 source_gid_idx; /* 27 */ + u32 flow_label; /* 28 */ + u32 reserved_29; /* 29 */ + union { /* 30 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid; + u32 service_level_al; /* 34 */ + u32 send_grh_flag_al; /* 35 */ + u32 retry_count_al; /* 36 */ + u32 timeout_al; /* 37 */ + u32 max_static_rate_al; /* 38 */ + u32 dlid_al; /* 39 */ + u32 rnr_retry_count_al; /* 40 */ + u32 source_path_bits_al; /* 41 */ + u32 traffic_class_al; /* 42 */ + u32 hop_limit_al; /* 43 */ + u32 source_gid_idx_al; /* 44 */ + u32 flow_label_al; /* 45 */ + u32 reserved_46; /* 46 */ + u32 reserved_47; /* 47 */ + union { /* 48 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid_al; + u32 max_nr_outst_send_wr; /* 52 */ + u32 max_nr_outst_recv_wr; /* 53 */ + u32 disable_ete_credit_check; /* 54 */ + u32 qp_number; /* 55 */ + u64 send_queue_handle; /* 56 */ + u64 recv_queue_handle; /* 58 */ + u32 actual_nr_sges_in_sq_wqe; /* 60 */ + u32 actual_nr_sges_in_rq_wqe; /* 61 */ + u32 qp_enable; /* 62 */ + u32 curr_srq_limit; /* 63 */ + u64 qp_aff_asyn_ev_log_reg; /* 64 */ + u64 shared_rq_hndl; /* 66 */ + u64 trigg_doorbell_qp_hndl; /* 68 */ + u32 reserved_70_127[58]; /* 70 */ +}; + +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9) +#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20) +#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21) +#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22) +#define MQPCB_DLID EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23) +#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24) +#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25) +#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26) +#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27) +#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28) +#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31) +#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32) +#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33) +#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34) +#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35) +#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36) +#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37) +#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38) +#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39) +#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40) +#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41) +#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42) +#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45) +#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46) +#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47) +#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31) +#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) +#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) +#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49) +#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) + +#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c new file mode 100644 index 00000000000..458fe19648a --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -0,0 +1,427 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Completion queue handling + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Khadija Souissi <souissi@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/current.h> + +#include "ehca_iverbs.h" +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "hcp_if.h" + +static struct kmem_cache *cq_cache; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) +{ + unsigned int qp_num = qp->real_qp_num; + unsigned int key = qp_num & (QP_HASHTAB_LEN-1); + unsigned long spl_flags; + + spin_lock_irqsave(&cq->spinlock, spl_flags); + hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); + spin_unlock_irqrestore(&cq->spinlock, spl_flags); + + ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", + cq->cq_number, qp_num); + + return 0; +} + +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) +{ + int ret = -EINVAL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + unsigned long spl_flags; + + spin_lock_irqsave(&cq->spinlock, spl_flags); + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + hlist_del(iter); + ehca_dbg(cq->ib_cq.device, + "removed qp from cq .cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + ret = 0; + break; + } + } + spin_unlock_irqrestore(&cq->spinlock, spl_flags); + if (ret) + ehca_err(cq->ib_cq.device, + "qp not found cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + + return ret; +} + +struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +{ + struct ehca_qp *ret = NULL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + ret = qp; + break; + } + } + return ret; +} + +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + static const u32 additional_cqe = 20; + struct ib_cq *cq; + struct ehca_cq *my_cq; + struct ehca_shca *shca = + container_of(device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle; + struct ehca_alloc_cq_parms param; /* h_call's out parameters */ + struct h_galpa gal; + void *vpage; + u32 counter; + u64 rpage, cqx_fec, h_ret; + int ipz_rc, ret, i; + unsigned long flags; + + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) + return ERR_PTR(-EINVAL); + + my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL); + if (!my_cq) { + ehca_err(device, "Out of memory for ehca_cq struct device=%p", + device); + return ERR_PTR(-ENOMEM); + } + + memset(my_cq, 0, sizeof(struct ehca_cq)); + memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); + + spin_lock_init(&my_cq->spinlock); + spin_lock_init(&my_cq->cb_lock); + spin_lock_init(&my_cq->task_lock); + my_cq->ownpid = current->tgid; + + cq = &my_cq->ib_cq; + + adapter_handle = shca->ipz_hca_handle; + param.eq_handle = shca->eq.ipz_eq_handle; + + do { + if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't reserve idr nr. device=%p", + device); + goto create_cq_exit1; + } + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + } while (ret == -EAGAIN); + + if (ret) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't allocate new idr entry. device=%p", + device); + goto create_cq_exit1; + } + + /* + * CQs maximum depth is 4GB-64, but we need additional 20 as buffer + * for receiving errors CQEs. + */ + param.nr_cqe = cqe + additional_cqe; + h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); + + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_alloc_resource_cq() failed " + "h_ret=%lx device=%p", h_ret, device); + cq = ERR_PTR(ehca2ib_return_code(h_ret)); + goto create_cq_exit2; + } + + ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0); + if (!ipz_rc) { + ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p", + ipz_rc, device); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit3; + } + + for (counter = 0; counter < param.act_pages; counter++) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if (!vpage) { + ehca_err(device, "ipz_qpageit_get_inc() " + "returns NULL device=%p", device); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_cq(adapter_handle, + my_cq->ipz_cq_handle, + &my_cq->pf, + 0, + 0, + rpage, + 1, + my_cq->galpas. + kernel); + + if (h_ret < H_SUCCESS) { + ehca_err(device, "hipz_h_register_rpage_cq() failed " + "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i " + "act_pages=%i", my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit4; + } + + if (counter == (param.act_pages - 1)) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if ((h_ret != H_SUCCESS) || vpage) { + ehca_err(device, "Registration of pages not " + "complete ehca_cq=%p cq_num=%x " + "h_ret=%lx", my_cq, my_cq->cq_number, + h_ret); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(device, "Registration of page failed " + "ehca_cq=%p cq_num=%x h_ret=%lx" + "counter=%i act_pages=%i", + my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-ENOMEM); + goto create_cq_exit4; + } + } + } + + ipz_qeit_reset(&my_cq->ipz_queue); + + gal = my_cq->galpas.kernel; + cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); + ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx", + my_cq, my_cq->cq_number, cqx_fec); + + my_cq->ib_cq.cqe = my_cq->nr_of_entries = + param.act_nr_of_entries - additional_cqe; + my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; + + for (i = 0; i < QP_HASHTAB_LEN; i++) + INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + + if (context) { + struct ipz_queue *ipz_queue = &my_cq->ipz_queue; + struct ehca_create_cq_resp resp; + struct vm_area_struct *vma; + memset(&resp, 0, sizeof(resp)); + resp.cq_number = my_cq->cq_number; + resp.token = my_cq->token; + resp.ipz_queue.qe_size = ipz_queue->qe_size; + resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; + resp.ipz_queue.queue_length = ipz_queue->queue_length; + resp.ipz_queue.pagesize = ipz_queue->pagesize; + resp.ipz_queue.toggle_state = ipz_queue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000, + ipz_queue->queue_length, + (void**)&resp.ipz_queue.queue, + &vma); + if (ret) { + ehca_err(device, "Could not mmap queue pages"); + cq = ERR_PTR(ret); + goto create_cq_exit4; + } + my_cq->uspace_queue = resp.ipz_queue.queue; + resp.galpas = my_cq->galpas; + ret = ehca_mmap_register(my_cq->galpas.user.fw_handle, + (void**)&resp.galpas.kernel.fw_handle, + &vma); + if (ret) { + ehca_err(device, "Could not mmap fw_handle"); + cq = ERR_PTR(ret); + goto create_cq_exit5; + } + my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + ehca_err(device, "Copy to udata failed."); + goto create_cq_exit6; + } + } + + return cq; + +create_cq_exit6: + ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); + +create_cq_exit5: + ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length); + +create_cq_exit4: + ipz_queue_dtor(&my_cq->ipz_queue); + +create_cq_exit3: + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret != H_SUCCESS) + ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " + "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret); + +create_cq_exit2: + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + +create_cq_exit1: + kmem_cache_free(cq_cache, my_cq); + + return cq; +} + +int ehca_destroy_cq(struct ib_cq *cq) +{ + u64 h_ret; + int ret; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int cq_num = my_cq->cq_number; + struct ib_device *device = cq->device; + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + u32 cur_pid = current->tgid; + unsigned long flags; + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + while (my_cq->nr_callbacks) + yield(); + + idr_remove(&ehca_cq_idr, my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + ehca_err(device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_cq->ownpid); + return -EINVAL; + } + + /* un-mmap if vma alloc */ + if (my_cq->uspace_queue ) { + ret = ehca_munmap(my_cq->uspace_queue, + my_cq->ipz_queue.queue_length); + if (ret) + ehca_err(device, "Could not munmap queue ehca_cq=%p " + "cq_num=%x", my_cq, cq_num); + ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); + if (ret) + ehca_err(device, "Could not munmap fwh ehca_cq=%p " + "cq_num=%x", my_cq, cq_num); + } + + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); + if (h_ret == H_R_STATE) { + /* cq in err: read err data and destroy it forcibly */ + ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err " + "state. Try to delete it forcibly.", + my_cq, cq_num, my_cq->ipz_cq_handle.handle); + ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret == H_SUCCESS) + ehca_dbg(device, "cq_num=%x deleted successfully.", + cq_num); + } + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx " + "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); + return ehca2ib_return_code(h_ret); + } + ipz_queue_dtor(&my_cq->ipz_queue); + kmem_cache_free(cq_cache, my_cq); + + return 0; +} + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + u32 cur_pid = current->tgid; + + if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_cq->ownpid); + return -EINVAL; + } + + /* TODO: proper resize needs to be done */ + ehca_err(cq->device, "not implemented yet"); + + return -EFAULT; +} + +int ehca_init_cq_cache(void) +{ + cq_cache = kmem_cache_create("ehca_cache_cq", + sizeof(struct ehca_cq), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!cq_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_cq_cache(void) +{ + if (cq_cache) + kmem_cache_destroy(cq_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c new file mode 100644 index 00000000000..5281dec66f1 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -0,0 +1,185 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Event queue handling + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Khadija Souissi <souissi@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_qes.h" +#include "hcp_if.h" +#include "ipz_pt_fn.h" + +int ehca_create_eq(struct ehca_shca *shca, + struct ehca_eq *eq, + const enum ehca_eq_type type, const u32 length) +{ + u64 ret; + u32 nr_pages; + u32 i; + void *vpage; + struct ib_device *ib_dev = &shca->ib_device; + + spin_lock_init(&eq->spinlock); + eq->is_initialized = 0; + + if (type != EHCA_EQ && type != EHCA_NEQ) { + ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); + return -EINVAL; + } + if (!length) { + ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); + return -EINVAL; + } + + ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); + + if (ret != H_SUCCESS) { + ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); + return -EINVAL; + } + + ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0); + if (!ret) { + ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); + goto create_eq_exit1; + } + + for (i = 0; i < nr_pages; i++) { + u64 rpage; + + if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) { + ret = H_RESOURCE; + goto create_eq_exit2; + } + + rpage = virt_to_abs(vpage); + ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); + + if (i == (nr_pages - 1)) { + /* last page */ + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (ret != H_SUCCESS || vpage) + goto create_eq_exit2; + } else { + if (ret != H_PAGE_REGISTERED || !vpage) + goto create_eq_exit2; + } + } + + ipz_qeit_reset(&eq->ipz_queue); + + /* register interrupt handlers and initialize work queues */ + if (type == EHCA_EQ) { + ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq, + SA_INTERRUPT, "ehca_eq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + } else if (type == EHCA_NEQ) { + ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq, + SA_INTERRUPT, "ehca_neq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + } + + eq->is_initialized = 1; + + return 0; + +create_eq_exit2: + ipz_queue_dtor(&eq->ipz_queue); + +create_eq_exit1: + hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + return -EINVAL; +} + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + void *eqe; + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + return eqe; +} + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + u64 h_ret; + + spin_lock_irqsave(&eq->spinlock, flags); + ibmebus_free_irq(NULL, eq->ist, (void *)shca); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + spin_unlock_irqrestore(&eq->spinlock, flags); + + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't free EQ resources."); + return -EINVAL; + } + ipz_queue_dtor(&eq->ipz_queue); + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c new file mode 100644 index 00000000000..5eae6ac4842 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -0,0 +1,241 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HCA query functions + * + * Authors: Heiko J Schick <schickhj@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_tools.h" +#include "hcp_if.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +{ + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_hca *rblock; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto query_device1; + } + + memset(props, 0, sizeof(struct ib_device_attr)); + props->fw_ver = rblock->hw_ver; + props->max_mr_size = rblock->max_mr_size; + props->vendor_id = rblock->vendor_id >> 8; + props->vendor_part_id = rblock->vendor_part_id >> 16; + props->hw_ver = rblock->hw_ver; + props->max_qp = min_t(int, rblock->max_qp, INT_MAX); + props->max_qp_wr = min_t(int, rblock->max_wqes_wq, INT_MAX); + props->max_sge = min_t(int, rblock->max_sge, INT_MAX); + props->max_sge_rd = min_t(int, rblock->max_sge_rd, INT_MAX); + props->max_cq = min_t(int, rblock->max_cq, INT_MAX); + props->max_cqe = min_t(int, rblock->max_cqe, INT_MAX); + props->max_mr = min_t(int, rblock->max_mr, INT_MAX); + props->max_mw = min_t(int, rblock->max_mw, INT_MAX); + props->max_pd = min_t(int, rblock->max_pd, INT_MAX); + props->max_ah = min_t(int, rblock->max_ah, INT_MAX); + props->max_fmr = min_t(int, rblock->max_mr, INT_MAX); + props->max_srq = 0; + props->max_srq_wr = 0; + props->max_srq_sge = 0; + props->max_pkeys = 16; + props->local_ca_ack_delay + = rblock->local_ca_ack_delay; + props->max_raw_ipv6_qp + = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX); + props->max_raw_ethy_qp + = min_t(int, rblock->max_raw_ethy_qp, INT_MAX); + props->max_mcast_grp + = min_t(int, rblock->max_mcast_grp, INT_MAX); + props->max_mcast_qp_attach + = min_t(int, rblock->max_mcast_qp_attach, INT_MAX); + props->max_total_mcast_qp_attach + = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); + +query_device1: + kfree(rblock); + + return ret; +} + +int ehca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_port1; + } + + memset(props, 0, sizeof(struct ib_port_attr)); + props->state = rblock->state; + + switch (rblock->max_mtu) { + case 0x1: + props->active_mtu = props->max_mtu = IB_MTU_256; + break; + case 0x2: + props->active_mtu = props->max_mtu = IB_MTU_512; + break; + case 0x3: + props->active_mtu = props->max_mtu = IB_MTU_1024; + break; + case 0x4: + props->active_mtu = props->max_mtu = IB_MTU_2048; + break; + case 0x5: + props->active_mtu = props->max_mtu = IB_MTU_4096; + break; + default: + ehca_err(&shca->ib_device, "Unknown MTU size: %x.", + rblock->max_mtu); + break; + } + + props->gid_tbl_len = rblock->gid_tbl_len; + props->max_msg_sz = rblock->max_msg_sz; + props->bad_pkey_cntr = rblock->bad_pkey_cntr; + props->qkey_viol_cntr = rblock->qkey_viol_cntr; + props->pkey_tbl_len = rblock->pkey_tbl_len; + props->lid = rblock->lid; + props->sm_lid = rblock->sm_lid; + props->lmc = rblock->lmc; + props->sm_sl = rblock->sm_sl; + props->subnet_timeout = rblock->subnet_timeout; + props->init_type_reply = rblock->init_type_reply; + + props->active_width = IB_WIDTH_12X; + props->active_speed = 0x1; + +query_port1: + kfree(rblock); + + return ret; +} + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + struct hipz_query_port *rblock; + + if (index > 16) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_pkey1; + } + + memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); + +query_pkey1: + kfree(rblock); + + return ret; +} + +int ehca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + if (index > 255) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_gid1; + } + + memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); + memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); + +query_gid1: + kfree(rblock); + + return ret; +} + +int ehca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + /* Not implemented yet */ + return -EFAULT; +} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c new file mode 100644 index 00000000000..2a65b5be197 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -0,0 +1,762 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Functions for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick <schickhj@de.ibm.com> + * Khadija Souissi <souissi@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) + +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + +static void queue_comp_task(struct ehca_cq *__cq); + +static struct ehca_comp_pool* pool; +static struct notifier_block comp_pool_callback_nb; + +#endif + +static inline void comp_event_callback(struct ehca_cq *cq) +{ + if (!cq->ib_cq.comp_handler) + return; + + spin_lock(&cq->cb_lock); + cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); + spin_unlock(&cq->cb_lock); + + return; +} + +static void print_error_data(struct ehca_shca * shca, void* data, + u64* rblock, int length) +{ + u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + u64 resource = rblock[1]; + + switch (type) { + case 0x1: /* Queue Pair */ + { + struct ehca_qp *qp = (struct ehca_qp*)data; + + /* only print error data if AER is set */ + if (rblock[6] == 0) + return; + + ehca_err(&shca->ib_device, + "QP 0x%x (resource=%lx) has errors.", + qp->ib_qp.qp_num, resource); + break; + } + case 0x4: /* Completion Queue */ + { + struct ehca_cq *cq = (struct ehca_cq*)data; + + ehca_err(&shca->ib_device, + "CQ 0x%x (resource=%lx) has errors.", + cq->cq_number, resource); + break; + } + default: + ehca_err(&shca->ib_device, + "Unknown errror type: %lx on %s.", + type, shca->ib_device.name); + break; + } + + ehca_err(&shca->ib_device, "Error data is available: %lx.", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data begin " + "---------------------------------------------------"); + ehca_dmp(rblock, length, "resource=%lx", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data end " + "----------------------------------------------------"); + + return; +} + +int ehca_error_data(struct ehca_shca *shca, void *data, + u64 resource) +{ + + unsigned long ret; + u64 *rblock; + unsigned long block_count; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto error_data1; + } + + ret = hipz_h_error_data(shca->ipz_hca_handle, + resource, + rblock, + &block_count); + + if (ret == H_R_STATE) { + ehca_err(&shca->ib_device, + "No error data is available: %lx.", resource); + } + else if (ret == H_SUCCESS) { + int length; + + length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); + + if (length > PAGE_SIZE) + length = PAGE_SIZE; + + print_error_data(shca, data, rblock, length); + } + else { + ehca_err(&shca->ib_device, + "Error data could not be fetched: %lx", resource); + } + + kfree(rblock); + +error_data1: + return ret; + +} + +static void qp_event_callback(struct ehca_shca *shca, + u64 eqe, + enum ib_event_type event_type) +{ + struct ib_event event; + struct ehca_qp *qp; + unsigned long flags; + u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + + if (!qp) + return; + + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + + if (!qp->ib_qp.event_handler) + return; + + event.device = &shca->ib_device; + event.event = event_type; + event.element.qp = &qp->ib_qp; + + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + + return; +} + +static void cq_event_callback(struct ehca_shca *shca, + u64 eqe) +{ + struct ehca_cq *cq; + unsigned long flags; + u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + if (!cq) + return; + + ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + + return; +} + +static void parse_identifier(struct ehca_shca *shca, u64 eqe) +{ + u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); + + switch (identifier) { + case 0x02: /* path migrated */ + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG); + break; + case 0x03: /* communication established */ + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST); + break; + case 0x04: /* send queue drained */ + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED); + break; + case 0x05: /* QP error */ + case 0x06: /* QP error */ + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL); + break; + case 0x07: /* CQ error */ + case 0x08: /* CQ error */ + cq_event_callback(shca, eqe); + break; + case 0x09: /* MRMWPTE error */ + ehca_err(&shca->ib_device, "MRMWPTE error."); + break; + case 0x0A: /* port event */ + ehca_err(&shca->ib_device, "Port event."); + break; + case 0x0B: /* MR access error */ + ehca_err(&shca->ib_device, "MR access error."); + break; + case 0x0C: /* EQ error */ + ehca_err(&shca->ib_device, "EQ error."); + break; + case 0x0D: /* P/Q_Key mismatch */ + ehca_err(&shca->ib_device, "P/Q_Key mismatch."); + break; + case 0x10: /* sampling complete */ + ehca_err(&shca->ib_device, "Sampling complete."); + break; + case 0x11: /* unaffiliated access error */ + ehca_err(&shca->ib_device, "Unaffiliated access error."); + break; + case 0x12: /* path migrating error */ + ehca_err(&shca->ib_device, "Path migration error."); + break; + case 0x13: /* interface trace stopped */ + ehca_err(&shca->ib_device, "Interface trace stopped."); + break; + case 0x14: /* first error capture info available */ + default: + ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", + identifier, shca->ib_device.name); + break; + } + + return; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ + struct ib_event event; + u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); + u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + + switch (ec) { + case 0x30: /* port availability change */ + if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { + ehca_info(&shca->ib_device, + "port %x is active.", port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ACTIVE; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + ib_dispatch_event(&event); + } else { + ehca_info(&shca->ib_device, + "port %x is inactive.", port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + break; + case 0x31: + /* port configuration change + * disruptive change is caused by + * LID, PKEY or SM change + */ + ehca_warn(&shca->ib_device, + "disruptive port %x configuration change", port); + + ehca_info(&shca->ib_device, + "port %x is inactive.", port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + + ehca_info(&shca->ib_device, + "port %x is active.", port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ACTIVE; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + ib_dispatch_event(&event); + break; + case 0x32: /* adapter malfunction */ + ehca_err(&shca->ib_device, "Adapter malfunction."); + break; + case 0x33: /* trace stopped */ + ehca_err(&shca->ib_device, "Traced stopped."); + break; + default: + ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", + ec, shca->ib_device.name); + break; + } + + return; +} + +static inline void reset_eq_pending(struct ehca_cq *cq) +{ + u64 CQx_EP; + struct h_galpa gal = cq->galpas.kernel; + + hipz_galpa_store_cq(gal, cqx_ep, 0x0); + CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); + + return; +} + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->neq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_neq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + u64 ret; + + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + + while (eqe) { + if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) + parse_ec(shca, eqe->entry); + + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + } + + ret = hipz_h_reset_event(shca->ipz_hca_handle, + shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); + + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, "Can't clear notification events."); + + return; +} + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->eq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_eq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + int int_state; + int query_cnt = 0; + + do { + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + + if ((shca->hw_level >= 2) && eqe) + int_state = 1; + else + int_state = 0; + + while ((int_state == 1) || eqe) { + while (eqe) { + u64 eqe_value = eqe->entry; + + ehca_dbg(&shca->ib_device, + "eqe_value=%lx", eqe_value); + + /* TODO: better structure */ + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, + eqe_value)) { + unsigned long flags; + u32 token; + struct ehca_cq *cq; + + ehca_dbg(&shca->ib_device, + "... completion event"); + token = + EHCA_BMASK_GET(EQE_CQ_TOKEN, + eqe_value); + spin_lock_irqsave(&ehca_cq_idr_lock, + flags); + cq = idr_find(&ehca_cq_idr, token); + + if (cq == NULL) { + spin_unlock(&ehca_cq_idr_lock); + break; + } + + reset_eq_pending(cq); +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + queue_comp_task(cq); + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); +#else + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); + comp_event_callback(cq); +#endif + } else { + ehca_dbg(&shca->ib_device, + "... non completion event"); + parse_identifier(shca, eqe_value); + } + eqe = + (struct ehca_eqe *)ehca_poll_eq(shca, + &shca->eq); + } + + if (shca->hw_level >= 2) { + int_state = + hipz_h_query_int_state(shca->ipz_hca_handle, + shca->eq.ist); + query_cnt++; + iosync(); + if (query_cnt >= 100) { + query_cnt = 0; + int_state = 0; + } + } + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + + } + } while (int_state != 0); + + return; +} + +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + +static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +{ + unsigned long flags_last_cpu; + + if (ehca_debug_level) + ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); + + spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu); + pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map); + if (pool->last_cpu == NR_CPUS) + pool->last_cpu = first_cpu(cpu_online_map); + spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu); + + return pool->last_cpu; +} + +static void __queue_comp_task(struct ehca_cq *__cq, + struct ehca_cpu_comp_task *cct) +{ + unsigned long flags_cct; + unsigned long flags_cq; + + spin_lock_irqsave(&cct->task_lock, flags_cct); + spin_lock_irqsave(&__cq->task_lock, flags_cq); + + if (__cq->nr_callbacks == 0) { + __cq->nr_callbacks++; + list_add_tail(&__cq->entry, &cct->cq_list); + cct->cq_jobs++; + wake_up(&cct->wait_queue); + } + else + __cq->nr_callbacks++; + + spin_unlock_irqrestore(&__cq->task_lock, flags_cq); + spin_unlock_irqrestore(&cct->task_lock, flags_cct); +} + +static void queue_comp_task(struct ehca_cq *__cq) +{ + int cpu; + int cpu_id; + struct ehca_cpu_comp_task *cct; + + cpu = get_cpu(); + cpu_id = find_next_online_cpu(pool); + + BUG_ON(!cpu_online(cpu_id)); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + + if (cct->cq_jobs > 0) { + cpu_id = find_next_online_cpu(pool); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + } + + __queue_comp_task(__cq, cct); + + put_cpu(); + + return; +} + +static void run_comp_task(struct ehca_cpu_comp_task* cct) +{ + struct ehca_cq *cq; + unsigned long flags_cct; + unsigned long flags_cq; + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + while (!list_empty(&cct->cq_list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + comp_event_callback(cq); + spin_lock_irqsave(&cct->task_lock, flags_cct); + + spin_lock_irqsave(&cq->task_lock, flags_cq); + cq->nr_callbacks--; + if (cq->nr_callbacks == 0) { + list_del_init(cct->cq_list.next); + cct->cq_jobs--; + } + spin_unlock_irqrestore(&cq->task_lock, flags_cq); + + } + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + + return; +} + +static int comp_task(void *__cct) +{ + struct ehca_cpu_comp_task* cct = __cct; + DECLARE_WAITQUEUE(wait, current); + + set_current_state(TASK_INTERRUPTIBLE); + while(!kthread_should_stop()) { + add_wait_queue(&cct->wait_queue, &wait); + + if (list_empty(&cct->cq_list)) + schedule(); + else + __set_current_state(TASK_RUNNING); + + remove_wait_queue(&cct->wait_queue, &wait); + + if (!list_empty(&cct->cq_list)) + run_comp_task(__cct); + + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + return 0; +} + +static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); + init_waitqueue_head(&cct->wait_queue); + cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); + + return cct->task; +} + +static void destroy_comp_task(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct; + struct task_struct *task; + unsigned long flags_cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + task = cct->task; + cct->task = NULL; + cct->cq_jobs = 0; + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + + if (task) + kthread_stop(task); + + return; +} + +static void take_over_work(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + LIST_HEAD(list); + struct ehca_cq *cq; + unsigned long flags_cct; + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + list_splice_init(&cct->cq_list, &list); + + while(!list_empty(&list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + + list_del(&cq->entry); + __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, + smp_processor_id())); + } + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + +} + +static int comp_pool_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct ehca_cpu_comp_task *cct; + + switch (action) { + case CPU_UP_PREPARE: + ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); + if(!create_comp_task(pool, cpu)) { + ehca_gen_err("Can't create comp_task for cpu: %x", cpu); + return NOTIFY_BAD; + } + break; + case CPU_UP_CANCELED: + ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + kthread_bind(cct->task, any_online_cpu(cpu_online_map)); + destroy_comp_task(pool, cpu); + break; + case CPU_ONLINE: + ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + kthread_bind(cct->task, cpu); + wake_up_process(cct->task); + break; + case CPU_DOWN_PREPARE: + ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); + break; + case CPU_DOWN_FAILED: + ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); + break; + case CPU_DEAD: + ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); + destroy_comp_task(pool, cpu); + take_over_work(pool, cpu); + break; + } + + return NOTIFY_OK; +} + +#endif + +int ehca_create_comp_pool(void) +{ +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + int cpu; + struct task_struct *task; + + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); + if (pool == NULL) + return -ENOMEM; + + spin_lock_init(&pool->last_cpu_lock); + pool->last_cpu = any_online_cpu(cpu_online_map); + + pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); + if (pool->cpu_comp_tasks == NULL) { + kfree(pool); + return -EINVAL; + } + + for_each_online_cpu(cpu) { + task = create_comp_task(pool, cpu); + if (task) { + kthread_bind(task, cpu); + wake_up_process(task); + } + } + + comp_pool_callback_nb.notifier_call = comp_pool_callback; + comp_pool_callback_nb.priority =0; + register_cpu_notifier(&comp_pool_callback_nb); +#endif + + return 0; +} + +void ehca_destroy_comp_pool(void) +{ +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + int i; + + unregister_cpu_notifier(&comp_pool_callback_nb); + + for (i = 0; i < NR_CPUS; i++) { + if (cpu_online(i)) + destroy_comp_task(pool, i); + } +#endif + + return; +} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h new file mode 100644 index 00000000000..85bf1fe16fe --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -0,0 +1,77 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions and structs for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick <schickhj@de.ibm.com> + * Khadija Souissi <souissi@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IRQ_H +#define __EHCA_IRQ_H + + +struct ehca_shca; + +#include <linux/interrupt.h> +#include <linux/types.h> +#include <asm/atomic.h> + +int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs); +void ehca_tasklet_neq(unsigned long data); + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs); +void ehca_tasklet_eq(unsigned long data); + +struct ehca_cpu_comp_task { + wait_queue_head_t wait_queue; + struct list_head cq_list; + struct task_struct *task; + spinlock_t task_lock; + int cq_jobs; +}; + +struct ehca_comp_pool { + struct ehca_cpu_comp_task *cpu_comp_tasks; + int last_cpu; + spinlock_t last_cpu_lock; +}; + +int ehca_create_comp_pool(void); +void ehca_destroy_comp_pool(void); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h new file mode 100644 index 00000000000..319c39d47f3 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -0,0 +1,182 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions for internal functions + * + * Authors: Heiko J Schick <schickhj@de.ibm.com> + * Dietmar Decker <ddecker@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IVERBS_H__ +#define __EHCA_IVERBS_H__ + +#include "ehca_classes.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); + +int ehca_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); + +int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); + +int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, + struct ib_port_modify *props); + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_dealloc_pd(struct ib_pd *pd); + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_destroy_ah(struct ib_ah *ah); + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, u64 *iova_start); + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, struct ib_udata *udata); + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, int mr_access_flags, u64 *iova_start); + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +int ehca_dereg_mr(struct ib_mr *mr); + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd); + +int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + +int ehca_dealloc_mw(struct ib_mw *mw); + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, u64 iova); + +int ehca_unmap_fmr(struct list_head *fmr_list); + +int ehca_dealloc_fmr(struct ib_fmr *fmr); + +enum ehca_eq_type { + EHCA_EQ = 0, /* Event Queue */ + EHCA_NEQ /* Notification Event Queue */ +}; + +int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, + enum ehca_eq_type type, const u32 length); + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); + + +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_destroy_cq(struct ib_cq *cq); + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + +int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify); + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_destroy_qp(struct ib_qp *qp); + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); + +int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + +int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, + struct ib_qp_init_attr *qp_init_attr); + +int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata); + +int ehca_dealloc_ucontext(struct ib_ucontext *context); + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +void ehca_poll_eqs(unsigned long data); + +int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped, + struct vm_area_struct **vma); + +int ehca_mmap_register(u64 physical,void **mapped, + struct vm_area_struct **vma); + +int ehca_munmap(unsigned long addr, size_t len); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c new file mode 100644 index 00000000000..2380994418a --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -0,0 +1,818 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * module start stop, hca detection + * + * Authors: Heiko J Schick <schickhj@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); +MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); +MODULE_VERSION("SVNEHCA_0016"); + +int ehca_open_aqp1 = 0; +int ehca_debug_level = 0; +int ehca_hw_level = 0; +int ehca_nr_ports = 2; +int ehca_use_hp_mr = 0; +int ehca_port_act_time = 30; +int ehca_poll_all_eqs = 1; +int ehca_static_rate = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, int, 0); +module_param_named(debug_level, ehca_debug_level, int, 0); +module_param_named(hw_level, ehca_hw_level, int, 0); +module_param_named(nr_ports, ehca_nr_ports, int, 0); +module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0); +module_param_named(port_act_time, ehca_port_act_time, int, 0); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); +module_param_named(static_rate, ehca_static_rate, int, 0); + +MODULE_PARM_DESC(open_aqp1, + "AQP1 on startup (0: no (default), 1: yes)"); +MODULE_PARM_DESC(debug_level, + "debug level" + " (0: no debug traces (default), 1: with debug traces)"); +MODULE_PARM_DESC(hw_level, + "hardware level" + " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)"); +MODULE_PARM_DESC(nr_ports, + "number of connected ports (default: 2)"); +MODULE_PARM_DESC(use_hp_mr, + "high performance MRs (0: no (default), 1: yes)"); +MODULE_PARM_DESC(port_act_time, + "time to wait for port activation (default: 30 sec)"); +MODULE_PARM_DESC(poll_all_eqs, + "polls all event queues periodically" + " (0: no, 1: yes (default))"); +MODULE_PARM_DESC(static_rate, + "set permanent static rate (default: disabled)"); + +spinlock_t ehca_qp_idr_lock; +spinlock_t ehca_cq_idr_lock; +DEFINE_IDR(ehca_qp_idr); +DEFINE_IDR(ehca_cq_idr); + +static struct list_head shca_list; /* list of all registered ehcas */ +static spinlock_t shca_list_lock; + +static struct timer_list poll_eqs_timer; + +static int ehca_create_slab_caches(void) +{ + int ret; + + ret = ehca_init_pd_cache(); + if (ret) { + ehca_gen_err("Cannot create PD SLAB cache."); + return ret; + } + + ret = ehca_init_cq_cache(); + if (ret) { + ehca_gen_err("Cannot create CQ SLAB cache."); + goto create_slab_caches2; + } + + ret = ehca_init_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create QP SLAB cache."); + goto create_slab_caches3; + } + + ret = ehca_init_av_cache(); + if (ret) { + ehca_gen_err("Cannot create AV SLAB cache."); + goto create_slab_caches4; + } + + ret = ehca_init_mrmw_cache(); + if (ret) { + ehca_gen_err("Cannot create MR&MW SLAB cache."); + goto create_slab_caches5; + } + + return 0; + +create_slab_caches5: + ehca_cleanup_av_cache(); + +create_slab_caches4: + ehca_cleanup_qp_cache(); + +create_slab_caches3: + ehca_cleanup_cq_cache(); + +create_slab_caches2: + ehca_cleanup_pd_cache(); + + return ret; +} + +static void ehca_destroy_slab_caches(void) +{ + ehca_cleanup_mrmw_cache(); + ehca_cleanup_av_cache(); + ehca_cleanup_qp_cache(); + ehca_cleanup_cq_cache(); + ehca_cleanup_pd_cache(); +} + +#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) +#define EHCA_REVID EHCA_BMASK_IBM(40,63) + +int ehca_sense_attributes(struct ehca_shca *shca) +{ + int ret = 0; + u64 h_ret; + struct hipz_query_hca *rblock; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_gen_err("Cannot allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query device properties. h_ret=%lx", + h_ret); + ret = -EPERM; + goto num_ports1; + } + + if (ehca_nr_ports == 1) + shca->num_ports = 1; + else + shca->num_ports = (u8)rblock->num_ports; + + ehca_gen_dbg(" ... found %x ports", rblock->num_ports); + + if (ehca_hw_level == 0) { + u32 hcaaver; + u32 revid; + + hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); + revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); + + ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); + + if ((hcaaver == 1) && (revid == 0)) + shca->hw_level = 0; + else if ((hcaaver == 1) && (revid == 1)) + shca->hw_level = 1; + else if ((hcaaver == 1) && (revid == 2)) + shca->hw_level = 2; + } + ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); + + shca->sport[0].rate = IB_RATE_30_GBPS; + shca->sport[1].rate = IB_RATE_30_GBPS; + +num_ports1: + kfree(rblock); + return ret; +} + +static int init_node_guid(struct ehca_shca *shca) +{ + int ret = 0; + struct hipz_query_hca *rblock; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto init_node_guid1; + } + + memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); + +init_node_guid1: + kfree(rblock); + return ret; +} + +int ehca_register_device(struct ehca_shca *shca) +{ + int ret; + + ret = init_node_guid(shca); + if (ret) + return ret; + + strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); + shca->ib_device.owner = THIS_MODULE; + + shca->ib_device.uverbs_abi_ver = 5; + shca->ib_device.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); + + shca->ib_device.node_type = RDMA_NODE_IB_CA; + shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev; + shca->ib_device.query_device = ehca_query_device; + shca->ib_device.query_port = ehca_query_port; + shca->ib_device.query_gid = ehca_query_gid; + shca->ib_device.query_pkey = ehca_query_pkey; + /* shca->in_device.modify_device = ehca_modify_device */ + shca->ib_device.modify_port = ehca_modify_port; + shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; + shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; + shca->ib_device.alloc_pd = ehca_alloc_pd; + shca->ib_device.dealloc_pd = ehca_dealloc_pd; + shca->ib_device.create_ah = ehca_create_ah; + /* shca->ib_device.modify_ah = ehca_modify_ah; */ + shca->ib_device.query_ah = ehca_query_ah; + shca->ib_device.destroy_ah = ehca_destroy_ah; + shca->ib_device.create_qp = ehca_create_qp; + shca->ib_device.modify_qp = ehca_modify_qp; + shca->ib_device.query_qp = ehca_query_qp; + shca->ib_device.destroy_qp = ehca_destroy_qp; + shca->ib_device.post_send = ehca_post_send; + shca->ib_device.post_recv = ehca_post_recv; + shca->ib_device.create_cq = ehca_create_cq; + shca->ib_device.destroy_cq = ehca_destroy_cq; + shca->ib_device.resize_cq = ehca_resize_cq; + shca->ib_device.poll_cq = ehca_poll_cq; + /* shca->ib_device.peek_cq = ehca_peek_cq; */ + shca->ib_device.req_notify_cq = ehca_req_notify_cq; + /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ + shca->ib_device.get_dma_mr = ehca_get_dma_mr; + shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; + shca->ib_device.reg_user_mr = ehca_reg_user_mr; + shca->ib_device.query_mr = ehca_query_mr; + shca->ib_device.dereg_mr = ehca_dereg_mr; + shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; + shca->ib_device.alloc_mw = ehca_alloc_mw; + shca->ib_device.bind_mw = ehca_bind_mw; + shca->ib_device.dealloc_mw = ehca_dealloc_mw; + shca->ib_device.alloc_fmr = ehca_alloc_fmr; + shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; + shca->ib_device.unmap_fmr = ehca_unmap_fmr; + shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; + shca->ib_device.attach_mcast = ehca_attach_mcast; + shca->ib_device.detach_mcast = ehca_detach_mcast; + /* shca->ib_device.process_mad = ehca_process_mad; */ + shca->ib_device.mmap = ehca_mmap; + + ret = ib_register_device(&shca->ib_device); + if (ret) + ehca_err(&shca->ib_device, + "ib_register_device() failed ret=%x", ret); + + return ret; +} + +static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) +{ + struct ehca_sport *sport = &shca->sport[port - 1]; + struct ib_cq *ibcq; + struct ib_qp *ibqp; + struct ib_qp_init_attr qp_init_attr; + int ret; + + if (sport->ibcq_aqp1) { + ehca_err(&shca->ib_device, "AQP1 CQ is already created."); + return -EPERM; + } + + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10); + if (IS_ERR(ibcq)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); + return PTR_ERR(ibcq); + } + sport->ibcq_aqp1 = ibcq; + + if (sport->ibqp_aqp1) { + ehca_err(&shca->ib_device, "AQP1 QP is already created."); + ret = -EPERM; + goto create_aqp1; + } + + memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); + qp_init_attr.send_cq = ibcq; + qp_init_attr.recv_cq = ibcq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = 100; + qp_init_attr.cap.max_recv_wr = 100; + qp_init_attr.cap.max_send_sge = 2; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.qp_type = IB_QPT_GSI; + qp_init_attr.port_num = port; + qp_init_attr.qp_context = NULL; + qp_init_attr.event_handler = NULL; + qp_init_attr.srq = NULL; + + ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); + if (IS_ERR(ibqp)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); + ret = PTR_ERR(ibqp); + goto create_aqp1; + } + sport->ibqp_aqp1 = ibqp; + + return 0; + +create_aqp1: + ib_destroy_cq(sport->ibcq_aqp1); + return ret; +} + +static int ehca_destroy_aqp1(struct ehca_sport *sport) +{ + int ret; + + ret = ib_destroy_qp(sport->ibqp_aqp1); + if (ret) { + ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret); + return ret; + } + + ret = ib_destroy_cq(sport->ibcq_aqp1); + if (ret) + ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret); + + return ret; +} + +static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", + ehca_debug_level); +} + +static ssize_t ehca_store_debug_level(struct device_driver *ddp, + const char *buf, size_t count) +{ + int value = (*buf) - '0'; + if (value >= 0 && value <= 9) + ehca_debug_level = value; + return 1; +} + +DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, + ehca_show_debug_level, ehca_store_debug_level); + +void ehca_create_driver_sysfs(struct ibmebus_driver *drv) +{ + driver_create_file(&drv->driver, &driver_attr_debug_level); +} + +void ehca_remove_driver_sysfs(struct ibmebus_driver *drv) +{ + driver_remove_file(&drv->driver, &driver_attr_debug_level); +} + +#define EHCA_RESOURCE_ATTR(name) \ +static ssize_t ehca_show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct ehca_shca *shca; \ + struct hipz_query_hca *rblock; \ + int data; \ + \ + shca = dev->driver_data; \ + \ + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); \ + if (!rblock) { \ + dev_err(dev, "Can't allocate rblock memory."); \ + return 0; \ + } \ + \ + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ + dev_err(dev, "Can't query device properties"); \ + kfree(rblock); \ + return 0; \ + } \ + \ + data = rblock->name; \ + kfree(rblock); \ + \ + if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ + return snprintf(buf, 256, "1\n"); \ + else \ + return snprintf(buf, 256, "%d\n", data); \ + \ +} \ +static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); + +EHCA_RESOURCE_ATTR(num_ports); +EHCA_RESOURCE_ATTR(hw_ver); +EHCA_RESOURCE_ATTR(max_eq); +EHCA_RESOURCE_ATTR(cur_eq); +EHCA_RESOURCE_ATTR(max_cq); +EHCA_RESOURCE_ATTR(cur_cq); +EHCA_RESOURCE_ATTR(max_qp); +EHCA_RESOURCE_ATTR(cur_qp); +EHCA_RESOURCE_ATTR(max_mr); +EHCA_RESOURCE_ATTR(cur_mr); +EHCA_RESOURCE_ATTR(max_mw); +EHCA_RESOURCE_ATTR(cur_mw); +EHCA_RESOURCE_ATTR(max_pd); +EHCA_RESOURCE_ATTR(max_ah); + +static ssize_t ehca_show_adapter_handle(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ehca_shca *shca = dev->driver_data; + + return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle); + +} +static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); + + +void ehca_create_device_sysfs(struct ibmebus_dev *dev) +{ + device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle); + device_create_file(&dev->ofdev.dev, &dev_attr_num_ports); + device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver); + device_create_file(&dev->ofdev.dev, &dev_attr_max_eq); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq); + device_create_file(&dev->ofdev.dev, &dev_attr_max_cq); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq); + device_create_file(&dev->ofdev.dev, &dev_attr_max_qp); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp); + device_create_file(&dev->ofdev.dev, &dev_attr_max_mr); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr); + device_create_file(&dev->ofdev.dev, &dev_attr_max_mw); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw); + device_create_file(&dev->ofdev.dev, &dev_attr_max_pd); + device_create_file(&dev->ofdev.dev, &dev_attr_max_ah); +} + +void ehca_remove_device_sysfs(struct ibmebus_dev *dev) +{ + device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle); + device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports); + device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah); +} + +static int __devinit ehca_probe(struct ibmebus_dev *dev, + const struct of_device_id *id) +{ + struct ehca_shca *shca; + u64 *handle; + struct ib_pd *ibpd; + int ret; + + handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL); + if (!handle) { + ehca_gen_err("Cannot get eHCA handle for adapter: %s.", + dev->ofdev.node->full_name); + return -ENODEV; + } + + if (!(*handle)) { + ehca_gen_err("Wrong eHCA handle for adapter: %s.", + dev->ofdev.node->full_name); + return -ENODEV; + } + + shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); + if (!shca) { + ehca_gen_err("Cannot allocate shca memory."); + return -ENOMEM; + } + + shca->ibmebus_dev = dev; + shca->ipz_hca_handle.handle = *handle; + dev->ofdev.dev.driver_data = shca; + + ret = ehca_sense_attributes(shca); + if (ret < 0) { + ehca_gen_err("Cannot sense eHCA attributes."); + goto probe1; + } + + ret = ehca_register_device(shca); + if (ret) { + ehca_gen_err("Cannot register Infiniband device"); + goto probe1; + } + + /* create event queues */ + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create EQ."); + goto probe2; + } + + ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create NEQ."); + goto probe3; + } + + /* create internal protection domain */ + ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL); + if (IS_ERR(ibpd)) { + ehca_err(&shca->ib_device, "Cannot create internal PD."); + ret = PTR_ERR(ibpd); + goto probe4; + } + + shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); + shca->pd->ib_pd.device = &shca->ib_device; + + /* create internal max MR */ + ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); + + if (ret) { + ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x", + ret); + goto probe5; + } + + /* create AQP1 for port 1 */ + if (ehca_open_aqp1 == 1) { + shca->sport[0].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 1); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 1."); + goto probe6; + } + } + + /* create AQP1 for port 2 */ + if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { + shca->sport[1].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 2); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 2."); + goto probe7; + } + } + + ehca_create_device_sysfs(dev); + + spin_lock(&shca_list_lock); + list_add(&shca->shca_list, &shca_list); + spin_unlock(&shca_list_lock); + + return 0; + +probe7: + ret = ehca_destroy_aqp1(&shca->sport[0]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port 1. ret=%x", ret); + +probe6: + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%x", ret); + +probe5: + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%x", ret); + +probe4: + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy NEQ. ret=%x", ret); + +probe3: + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy EQ. ret=%x", ret); + +probe2: + ib_unregister_device(&shca->ib_device); + +probe1: + ib_dealloc_device(&shca->ib_device); + + return -EINVAL; +} + +static int __devexit ehca_remove(struct ibmebus_dev *dev) +{ + struct ehca_shca *shca = dev->ofdev.dev.driver_data; + int ret; + + ehca_remove_device_sysfs(dev); + + if (ehca_open_aqp1 == 1) { + int i; + for (i = 0; i < shca->num_ports; i++) { + ret = ehca_destroy_aqp1(&shca->sport[i]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port %x " + "ret=%x", ret, i); + } + } + + ib_unregister_device(&shca->ib_device); + + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%x", ret); + + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%x", ret); + + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret); + + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret); + + ib_dealloc_device(&shca->ib_device); + + spin_lock(&shca_list_lock); + list_del(&shca->shca_list); + spin_unlock(&shca_list_lock); + + return ret; +} + +static struct of_device_id ehca_device_table[] = +{ + { + .name = "lhca", + .compatible = "IBM,lhca", + }, + {}, +}; + +static struct ibmebus_driver ehca_driver = { + .name = "ehca", + .id_table = ehca_device_table, + .probe = ehca_probe, + .remove = ehca_remove, +}; + +void ehca_poll_eqs(unsigned long data) +{ + struct ehca_shca *shca; + + spin_lock(&shca_list_lock); + list_for_each_entry(shca, &shca_list, shca_list) { + if (shca->eq.is_initialized) + ehca_tasklet_eq((unsigned long)(void*)shca); + } + mod_timer(&poll_eqs_timer, jiffies + HZ); + spin_unlock(&shca_list_lock); +} + +int __init ehca_module_init(void) +{ + int ret; + + printk(KERN_INFO "eHCA Infiniband Device Driver " + "(Rel.: SVNEHCA_0016)\n"); + idr_init(&ehca_qp_idr); + idr_init(&ehca_cq_idr); + spin_lock_init(&ehca_qp_idr_lock); + spin_lock_init(&ehca_cq_idr_lock); + + INIT_LIST_HEAD(&shca_list); + spin_lock_init(&shca_list_lock); + + if ((ret = ehca_create_comp_pool())) { + ehca_gen_err("Cannot create comp pool."); + return ret; + } + + if ((ret = ehca_create_slab_caches())) { + ehca_gen_err("Cannot create SLAB caches"); + ret = -ENOMEM; + goto module_init1; + } + + if ((ret = ibmebus_register_driver(&ehca_driver))) { + ehca_gen_err("Cannot register eHCA device driver"); + ret = -EINVAL; + goto module_init2; + } + + ehca_create_driver_sysfs(&ehca_driver); + + if (ehca_poll_all_eqs != 1) { + ehca_gen_err("WARNING!!!"); + ehca_gen_err("It is possible to lose interrupts."); + } else { + init_timer(&poll_eqs_timer); + poll_eqs_timer.function = ehca_poll_eqs; + poll_eqs_timer.expires = jiffies + HZ; + add_timer(&poll_eqs_timer); + } + + return 0; + +module_init2: + ehca_destroy_slab_caches(); + +module_init1: + ehca_destroy_comp_pool(); + return ret; +}; + +void __exit ehca_module_exit(void) +{ + if (ehca_poll_all_eqs == 1) + del_timer_sync(&poll_eqs_timer); + + ehca_remove_driver_sysfs(&ehca_driver); + ibmebus_unregister_driver(&ehca_driver); + + ehca_destroy_slab_caches(); + + ehca_destroy_comp_pool(); + + idr_destroy(&ehca_cq_idr); + idr_destroy(&ehca_qp_idr); +}; + +module_init(ehca_module_init); +module_exit(ehca_module_exit); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c new file mode 100644 index 00000000000..32a870660bf --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mcast.c @@ -0,0 +1,131 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * mcast functions + * + * Authors: Khadija Souissi <souissik@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/err.h> +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define MAX_MC_LID 0xFFFE +#define MIN_MC_LID 0xC000 /* Multicast limits */ +#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) +#define EHCA_VALID_MULTICAST_LID(lid) \ + (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) + +int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " + "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} + +int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->pd->device, + struct ehca_shca, ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " + "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c new file mode 100644 index 00000000000..5ca65441e1d --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -0,0 +1,2261 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW functions + * + * Authors: Dietmar Decker <ddecker@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/current.h> + +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "hcp_if.h" +#include "hipz_hw.h" + +static struct kmem_cache *mr_cache; +static struct kmem_cache *mw_cache; + +static struct ehca_mr *ehca_mr_new(void) +{ + struct ehca_mr *me; + + me = kmem_cache_alloc(mr_cache, SLAB_KERNEL); + if (me) { + memset(me, 0, sizeof(struct ehca_mr)); + spin_lock_init(&me->mrlock); + } else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mr_delete(struct ehca_mr *me) +{ + kmem_cache_free(mr_cache, me); +} + +static struct ehca_mw *ehca_mw_new(void) +{ + struct ehca_mw *me; + + me = kmem_cache_alloc(mw_cache, SLAB_KERNEL); + if (me) { + memset(me, 0, sizeof(struct ehca_mw)); + spin_lock_init(&me->mwlock); + } else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mw_delete(struct ehca_mw *me) +{ + kmem_cache_free(mw_cache, me); +} + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_maxmr; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + + if (shca->maxmr) { + e_maxmr = ehca_mr_new(); + if (!e_maxmr) { + ehca_err(&shca->ib_device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto get_dma_mr_exit0; + } + + ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE, + mr_access_flags, e_pd, + &e_maxmr->ib.ib_mr.lkey, + &e_maxmr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto get_dma_mr_exit0; + } + ib_mr = &e_maxmr->ib.ib_mr; + } else { + ehca_err(&shca->ib_device, "no internal max-MR exist!"); + ib_mr = ERR_PTR(-EINVAL); + goto get_dma_mr_exit0; + } + +get_dma_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ", + PTR_ERR(ib_mr), pd, mr_access_flags); + return ib_mr; +} /* end ehca_get_dma_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + + u64 size; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 num_pages_mr; + u32 num_pages_4k; /* 4k portion "pages" */ + + if ((num_phys_buf <= 0) || !phys_buf_array) { + ehca_err(pd->device, "bad input values: num_phys_buf=%x " + "phys_buf_array=%p", num_phys_buf, phys_buf_array); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, + iova_start, &size); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit0; + } + if ((size == 0) || + (((u64)iova_start + size) < (u64)iova_start)) { + ehca_err(pd->device, "bad input values: size=%lx iova_start=%p", + size, iova_start); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_phys_mr_exit0; + } + + /* determine number of MR pages */ + num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + /* register MR on HCA */ + if (ehca_mr_is_maxmr(size, iova_start)) { + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, + e_pd, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } else { + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = num_phys_buf; + pginfo.phys_buf_array = phys_buf_array; + pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_phys_mr_exit1: + ehca_mr_delete(e_mr); +reg_phys_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + PTR_ERR(ib_mr), pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ib_mr; +} /* end ehca_reg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, + struct ib_udata *udata) +{ + struct ib_mr *ib_mr; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + int ret; + u32 num_pages_mr; + u32 num_pages_4k; /* 4k portion "pages" */ + + if (!pd) { + ehca_gen_err("bad pd=%p", pd); + return ERR_PTR(-EFAULT); + } + if (!region) { + ehca_err(pd->device, "bad input values: region=%p", region); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + if (region->page_size != PAGE_SIZE) { + ehca_err(pd->device, "page size not supported, " + "region->page_size=%x", region->page_size); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + if ((region->length == 0) || + ((region->virt_base + region->length) < region->virt_base)) { + ehca_err(pd->device, "bad input values: length=%lx " + "virt_base=%lx", region->length, region->virt_base); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_user_mr_exit0; + } + + /* determine number of MR pages */ + num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + /* register MR on HCA */ + pginfo.type = EHCA_MR_PGI_USER; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.region = region; + pginfo.next_4k = region->offset / EHCA_PAGESIZE; + pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, + (®ion->chunk_list), + list); + + ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, + region->length, mr_access_flags, e_pd, &pginfo, + &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_user_mr_exit1; + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_user_mr_exit1: + ehca_mr_delete(e_mr); +reg_user_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" + " udata=%p", + PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); + return ib_mr; +} /* end ehca_reg_user_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + int ret; + + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + u64 new_size; + u64 *new_start; + u32 new_acl; + struct ehca_pd *new_pd; + u32 tmp_lkey, tmp_rkey; + unsigned long sl_flags; + u32 num_pages_mr = 0; + u32 num_pages_4k = 0; /* 4k portion "pages" */ + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { + /* TODO not supported, because PHYP rereg hCall needs pages */ + ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " + "supported yet, mr_rereg_mask=%x", mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (mr_rereg_mask & IB_MR_REREG_PD) { + if (!pd) { + ehca_err(mr->device, "rereg with bad pd, pd=%p " + "mr_rereg_mask=%x", pd, mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + + if ((mr_rereg_mask & + ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || + (mr_rereg_mask == 0)) { + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* check other parameters */ + if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ + if (e_mr->flags & EHCA_MR_FLAG_FMR) { + ehca_err(mr->device, "not supported for FMR, mr=%p " + "flags=%x", mr, e_mr->flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (!phys_buf_array || num_phys_buf <= 0) { + ehca_err(mr->device, "bad input values: mr_rereg_mask=%x" + " phys_buf_array=%p num_phys_buf=%x", + mr_rereg_mask, phys_buf_array, num_phys_buf); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ + (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " + "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* set requested values dependent on rereg request */ + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + new_start = e_mr->start; /* new == old address */ + new_size = e_mr->size; /* new == old length */ + new_acl = e_mr->acl; /* new == old access control */ + new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/ + + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + new_start = iova_start; /* change address */ + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, + num_phys_buf, iova_start, + &new_size); + if (ret) + goto rereg_phys_mr_exit1; + if ((new_size == 0) || + (((u64)iova_start + new_size) < (u64)iova_start)) { + ehca_err(mr->device, "bad input values: new_size=%lx " + "iova_start=%p", new_size, iova_start); + ret = -EINVAL; + goto rereg_phys_mr_exit1; + } + num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = num_phys_buf; + pginfo.phys_buf_array = phys_buf_array; + pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); + } + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + new_acl = mr_access_flags; + if (mr_rereg_mask & IB_MR_REREG_PD) + new_pd = container_of(pd, struct ehca_pd, ib_pd); + + ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, + new_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto rereg_phys_mr_exit1; + + /* successful reregistration */ + if (mr_rereg_mask & IB_MR_REREG_PD) + mr->pd = pd; + mr->lkey = tmp_lkey; + mr->rkey = tmp_rkey; + +rereg_phys_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +rereg_phys_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p " + "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " + "iova_start=%p", + ret, mr, mr_rereg_mask, pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ret; +} /* end ehca_rereg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + unsigned long sl_flags; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto query_mr_exit0; + } + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto query_mr_exit0; + } + + memset(mr_attr, 0, sizeof(struct ib_mr_attr)); + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + + h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p " + "hca_hndl=%lx mr_hndl=%lx lkey=%x", + h_ret, mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca_mrmw_map_hrc_query_mr(h_ret); + goto query_mr_exit1; + } + mr_attr->pd = mr->pd; + mr_attr->device_virt_addr = hipzout.vaddr; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; + ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); + +query_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +query_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p", + ret, mr, mr_attr); + return ret; +} /* end ehca_query_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_mr(struct ib_mr *mr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto dereg_mr_exit0; + } else if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + /* TODO: BUSY: MR still has bound window(s) */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p " + "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", + h_ret, shca, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto dereg_mr_exit0; + } + + /* successful deregistration */ + ehca_mr_delete(e_mr); + +dereg_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%x mr=%p", ret, mr); + return ret; +} /* end ehca_dereg_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) +{ + struct ib_mw *ib_mw; + u64 h_ret; + struct ehca_mw *e_mw; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_mw_hipzout_parms hipzout = {{0},0}; + + e_mw = ehca_mw_new(); + if (!e_mw) { + ib_mw = ERR_PTR(-ENOMEM); + goto alloc_mw_exit0; + } + + h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " + "shca=%p hca_hndl=%lx mw=%p", + h_ret, shca, shca->ipz_hca_handle.handle, e_mw); + ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret)); + goto alloc_mw_exit1; + } + /* successful MW allocation */ + e_mw->ipz_mw_handle = hipzout.handle; + e_mw->ib_mw.rkey = hipzout.rkey; + return &e_mw->ib_mw; + +alloc_mw_exit1: + ehca_mw_delete(e_mw); +alloc_mw_exit0: + if (IS_ERR(ib_mw)) + ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd); + return ib_mw; +} /* end ehca_alloc_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* TODO: not supported up to now */ + ehca_gen_err("bind MW currently not supported by HCAD"); + + return -EPERM; +} /* end ehca_bind_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_mw(struct ib_mw *mw) +{ + u64 h_ret; + struct ehca_shca *shca = + container_of(mw->device, struct ehca_shca, ib_device); + struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); + + h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); + if (h_ret != H_SUCCESS) { + ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p " + "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", + h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, + e_mw->ipz_mw_handle.handle); + return ehca_mrmw_map_hrc_free_mw(h_ret); + } + /* successful deallocation */ + ehca_mw_delete(e_mw); + return 0; +} /* end ehca_dealloc_mw() */ + +/*----------------------------------------------------------------------*/ + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr *e_fmr; + int ret; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + + /* check other parameters */ + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (mr_access_flags & IB_ACCESS_MW_BIND) { + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { + ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " + "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) && + ((1 << fmr_attr->page_shift) != PAGE_SIZE)) { + ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + e_fmr = ehca_mr_new(); + if (!e_fmr) { + ib_fmr = ERR_PTR(-ENOMEM); + goto alloc_fmr_exit0; + } + e_fmr->flags |= EHCA_MR_FLAG_FMR; + + /* register MR on HCA */ + ret = ehca_reg_mr(shca, e_fmr, NULL, + fmr_attr->max_pages * (1 << fmr_attr->page_shift), + mr_access_flags, e_pd, &pginfo, + &tmp_lkey, &tmp_rkey); + if (ret) { + ib_fmr = ERR_PTR(ret); + goto alloc_fmr_exit1; + } + + /* successful */ + e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; + e_fmr->fmr_max_pages = fmr_attr->max_pages; + e_fmr->fmr_max_maps = fmr_attr->max_maps; + e_fmr->fmr_map_cnt = 0; + return &e_fmr->ib.ib_fmr; + +alloc_fmr_exit1: + ehca_mr_delete(e_fmr); +alloc_fmr_exit0: + if (IS_ERR(ib_fmr)) + ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x " + "fmr_attr=%p", PTR_ERR(ib_fmr), pd, + mr_access_flags, fmr_attr); + return ib_fmr; +} /* end ehca_alloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, + int list_len, + u64 iova) +{ + int ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 tmp_lkey, tmp_rkey; + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); + if (ret) + goto map_phys_fmr_exit0; + if (iova % e_fmr->fmr_page_size) { + /* only whole-numbered pages */ + ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x", + iova, e_fmr->fmr_page_size); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { + /* HCAD does not limit the maps, however trace this anyway */ + ehca_info(fmr->device, "map limit exceeded, fmr=%p " + "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", + fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); + } + + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_pages = list_len; + pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); + pginfo.page_list = page_list; + pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) / + EHCA_PAGESIZE); + + ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova, + list_len * e_fmr->fmr_page_size, + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto map_phys_fmr_exit0; + + /* successful reregistration */ + e_fmr->fmr_map_cnt++; + e_fmr->ib.ib_fmr.lkey = tmp_lkey; + e_fmr->ib.ib_fmr.rkey = tmp_rkey; + return 0; + +map_phys_fmr_exit0: + if (ret) + ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " + "iova=%lx", + ret, fmr, page_list, list_len, iova); + return ret; +} /* end ehca_map_phys_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_fmr(struct list_head *fmr_list) +{ + int ret = 0; + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = NULL; + struct ehca_shca *prev_shca; + struct ehca_mr *e_fmr; + u32 num_fmr = 0; + u32 unmap_fmr_cnt = 0; + + /* check all FMR belong to same SHCA, and check internal flag */ + list_for_each_entry(ib_fmr, fmr_list, list) { + prev_shca = shca; + if (!ib_fmr) { + ehca_gen_err("bad fmr=%p in list", ib_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + if ((shca != prev_shca) && prev_shca) { + ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " + "prev_shca=%p e_fmr=%p", + shca, prev_shca, e_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " + "e_fmr->flags=%x", e_fmr, e_fmr->flags); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + num_fmr++; + } + + /* loop over all FMRs to unmap */ + list_for_each_entry(ib_fmr, fmr_list, list) { + unmap_fmr_cnt++; + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + ret = ehca_unmap_one_fmr(shca, e_fmr); + if (ret) { + /* unmap failed, stop unmapping of rest of FMRs */ + ehca_err(&shca->ib_device, "unmap of one FMR failed, " + "stop rest, e_fmr=%p num_fmr=%x " + "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, + unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); + goto unmap_fmr_exit0; + } + } + +unmap_fmr_exit0: + if (ret) + ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", + ret, fmr_list, num_fmr, unmap_fmr_cnt); + return ret; +} /* end ehca_unmap_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_fmr(struct ib_fmr *fmr) +{ + int ret; + u64 h_ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto free_fmr_exit0; + } + + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p " + "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, fmr->lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto free_fmr_exit0; + } + /* successful deregistration */ + ehca_mr_delete(e_fmr); + return 0; + +free_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr); + return ret; +} /* end ehca_dealloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + if (ehca_use_hp_mr == 1) + hipz_acl |= 0x00000001; + + h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " + "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); + ret = ehca_mrmw_map_hrc_alloc(h_ret); + goto ehca_reg_mr_exit0; + } + + e_mr->ipz_mr_handle = hipzout.handle; + + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + if (ret) + goto ehca_reg_mr_exit1; + + /* successful registration */ + e_mr->num_pages = pginfo->num_pages; + e_mr->num_4k = pginfo->num_4k; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_mr_exit1: + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " + "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " + "pginfo=%p num_pages=%lx num_4k=%lx ret=%x", + h_ret, shca, e_mr, iova_start, size, acl, e_pd, + hipzout.lkey, pginfo, pginfo->num_pages, + pginfo->num_4k, ret); + ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " + "not recoverable"); + } +ehca_reg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " + "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " + "num_pages=%lx num_4k=%lx", + ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, + pginfo->num_pages, pginfo->num_4k); + return ret; +} /* end ehca_reg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int ret = 0; + u64 h_ret; + u32 rnum; + u64 rpage; + u32 i; + u64 *kpage; + + kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_reg_mr_rpages_exit0; + } + + /* max 512 pages per shot */ + for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) { + + if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + rnum = pginfo->num_4k % 512; /* last shot */ + if (rnum == 0) + rnum = 512; /* last shot is full */ + } else + rnum = 512; + + if (rnum > 1) { + ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " + "bad rc, ret=%x rnum=%x kpage=%p", + ret, rnum, kpage); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + rpage = virt_to_abs(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p i=%x", + kpage, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } else { /* rnum==1 */ + ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 " + "bad rc, ret=%x i=%x", ret, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, + 0, /* pagesize 4k */ + 0, rpage, rnum); + + if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + /* + * check for 'registration complete'==H_SUCCESS + * and for 'page registered'==H_PAGE_REGISTERED + */ + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "last " + "hipz_reg_rpage_mr failed, h_ret=%lx " + "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx" + " lkey=%x", h_ret, e_mr, i, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_rrpg_last(h_ret); + break; + } else + ret = 0; + } else if (h_ret != H_PAGE_REGISTERED) { + ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " + "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx " + "mr_hndl=%lx", h_ret, e_mr, i, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret); + break; + } else + ret = 0; + } /* end for(i) */ + + +ehca_reg_mr_rpages_exit1: + kfree(kpage); +ehca_reg_mr_rpages_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " + "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo, + pginfo->num_pages, pginfo->num_4k); + return ret; +} /* end ehca_reg_mr_rpages() */ + +/*----------------------------------------------------------------------*/ + +inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + u32 acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret; + u64 h_ret; + u32 hipz_acl; + u64 *kpage; + u64 rpage; + struct ehca_mr_pginfo pginfo_save; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_rereg_mr_rereg1_exit0; + } + + pginfo_save = *pginfo; + ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage); + if (ret) { + ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " + "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p", + e_mr, pginfo, pginfo->type, pginfo->num_pages, + pginfo->num_4k,kpage); + goto ehca_rereg_mr_rereg1_exit1; + } + rpage = virt_to_abs(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p", kpage); + ret = -EFAULT; + goto ehca_rereg_mr_rereg1_exit1; + } + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, rpage, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * reregistration unsuccessful, try it again with the 3 hCalls, + * e.g. this is required in case H_MR_CONDITION + * (MW bound or MR is shared) + */ + ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " + "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); + *pginfo = pginfo_save; + ret = -EAGAIN; + } else if ((u64*)hipzout.vaddr != iova_start) { + ehca_err(&shca->ib_device, "PHYP changed iova_start in " + "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " + "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, + hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey, hipzout.lkey); + ret = -EFAULT; + } else { + /* + * successful reregistration + * note: start and start_out are identical for eServer HCAs + */ + e_mr->num_pages = pginfo->num_pages; + e_mr->num_4k = pginfo->num_4k; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + } + +ehca_rereg_mr_rereg1_exit1: + kfree(kpage); +ehca_rereg_mr_rereg1_exit0: + if ( ret && (ret != -EAGAIN) ) + ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " + "pginfo=%p num_pages=%lx num_4k=%lx", + ret, *lkey, *rkey, pginfo, pginfo->num_pages, + pginfo->num_4k); + return ret; +} /* end ehca_rereg_mr_rereg1() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey) +{ + int ret = 0; + u64 h_ret; + int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ + + /* first determine reregistration hCall(s) */ + if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) || + (pginfo->num_4k > e_mr->num_4k)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx " + "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k); + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ + rereg_1_hcall = 0; + rereg_3_hcall = 1; + e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; + ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", + e_mr); + } + + if (rereg_1_hcall) { + ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, + acl, e_pd, pginfo, lkey, rkey); + if (ret) { + if (ret == -EAGAIN) + rereg_3_hcall = 1; + else + goto ehca_rereg_mr_exit0; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first deregister old MR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx " + "mr->lkey=%x", + h_ret, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto ehca_rereg_mr_exit0; + } + /* clean ehca_mr_t, without changing struct ib_mr and lock */ + save_mr = *e_mr; + ehca_mr_deletenew(e_mr); + + /* set some MR values */ + e_mr->flags = save_mr.flags; + e_mr->fmr_page_size = save_mr.fmr_page_size; + e_mr->fmr_max_pages = save_mr.fmr_max_pages; + e_mr->fmr_max_maps = save_mr.fmr_max_maps; + e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, + e_pd, pginfo, lkey, rkey); + if (ret) { + u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; + memcpy(&e_mr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_rereg_mr_exit0; + } + } + +ehca_rereg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " + "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " + "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " + "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, + acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey, + rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_rereg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr) +{ + int ret = 0; + u64 h_ret; + int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */ + struct ehca_pd *e_pd = + container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); + struct ehca_mr save_fmr; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + /* first check if reregistration hCall can be used for unmap */ + if (e_fmr->fmr_max_pages > 512) { + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (rereg_1_hcall) { + /* + * note: after using rereg hcall with len=0, + * rereg hcall must be used again for registering pages + */ + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, + 0, 0, e_pd->fw_pd, 0, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " + "mr_hndl=%lx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + rereg_3_hcall = 1; + } else { + /* successful reregistration */ + e_fmr->start = NULL; + e_fmr->size = 0; + tmp_lkey = hipzout.lkey; + tmp_rkey = hipzout.rkey; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_pages = 0; + pginfo.num_4k = 0; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_unmap_one_fmr_exit0; + } + } + +ehca_unmap_one_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " + "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages, + rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_unmap_one_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx " + "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " + "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", + h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, + shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_reg_smr(h_ret); + goto ehca_reg_smr_exit0; + } + /* successful registration */ + e_newmr->num_pages = e_origmr->num_pages; + e_newmr->num_4k = e_origmr->num_4k; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_smr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p " + "e_newmr=%p iova_start=%p acl=%x e_pd=%p", + ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); + return ret; +} /* end ehca_reg_smr() */ + +/*----------------------------------------------------------------------*/ + +/* register internal max-MR to internal SHCA */ +int ehca_reg_internal_maxmr( + struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **e_maxmr) /*OUT*/ +{ + int ret; + struct ehca_mr *e_mr; + u64 *iova_start; + u64 size_maxmr; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ib_phys_buf ib_pbuf; + u32 num_pages_mr; + u32 num_pages_4k; /* 4k portion "pages" */ + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(&shca->ib_device, "out of memory"); + ret = -ENOMEM; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + + /* register internal max-MR on HCA */ + size_maxmr = (u64)high_memory - PAGE_OFFSET; + iova_start = (u64*)KERNELBASE; + ib_pbuf.addr = 0; + ib_pbuf.size = size_maxmr; + num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = 1; + pginfo.phys_buf_array = &ib_pbuf; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, + &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ehca_err(&shca->ib_device, "reg of internal max MR failed, " + "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x " + "num_pages_4k=%x", e_mr, iova_start, size_maxmr, + num_pages_mr, num_pages_4k); + goto ehca_reg_internal_maxmr_exit1; + } + + /* successful registration of all pages */ + e_mr->ib.ib_mr.device = e_pd->ib_pd.device; + e_mr->ib.ib_mr.pd = &e_pd->ib_pd; + e_mr->ib.ib_mr.uobject = NULL; + atomic_inc(&(e_pd->ib_pd.usecnt)); + atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); + *e_maxmr = e_mr; + return 0; + +ehca_reg_internal_maxmr_exit1: + ehca_mr_delete(e_mr); +ehca_reg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p", + ret, shca, e_pd, e_maxmr); + return ret; +} /* end ehca_reg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey) +{ + u64 h_ret; + struct ehca_mr *e_origmr = shca->maxmr; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx " + "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", + h_ret, e_origmr, shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + return ehca_mrmw_map_hrc_reg_smr(h_ret); + } + /* successful registration */ + e_newmr->num_pages = e_origmr->num_pages; + e_newmr->num_4k = e_origmr->num_4k; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; +} /* end ehca_reg_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca) +{ + int ret; + struct ehca_mr *e_maxmr; + struct ib_pd *ib_pd; + + if (!shca->maxmr) { + ehca_err(&shca->ib_device, "bad call, shca=%p", shca); + ret = -EINVAL; + goto ehca_dereg_internal_maxmr_exit0; + } + + e_maxmr = shca->maxmr; + ib_pd = e_maxmr->ib.ib_mr.pd; + shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ + + ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); + if (ret) { + ehca_err(&shca->ib_device, "dereg internal max-MR failed, " + "ret=%x e_maxmr=%p shca=%p lkey=%x", + ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); + shca->maxmr = e_maxmr; + goto ehca_dereg_internal_maxmr_exit0; + } + + atomic_dec(&ib_pd->usecnt); + +ehca_dereg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p", + ret, shca, shca->maxmr); + return ret; +} /* end ehca_dereg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* + * check physical buffer array of MR verbs for validness and + * calculates MR size + */ +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size) +{ + struct ib_phys_buf *pbuf = phys_buf_array; + u64 size_count = 0; + u32 i; + + if (num_phys_buf == 0) { + ehca_gen_err("bad phys buf array len, num_phys_buf=0"); + return -EINVAL; + } + /* check first buffer */ + if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { + ehca_gen_err("iova_start/addr mismatch, iova_start=%p " + "pbuf->addr=%lx pbuf->size=%lx", + iova_start, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && + (num_phys_buf > 1)) { + ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx " + "pbuf->size=%lx", pbuf->addr, pbuf->size); + return -EINVAL; + } + + for (i = 0; i < num_phys_buf; i++) { + if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { + ehca_gen_err("bad address, i=%x pbuf->addr=%lx " + "pbuf->size=%lx", + i, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((i > 0) && /* not 1st */ + (i < (num_phys_buf - 1)) && /* not last */ + (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { + ehca_gen_err("bad size, i=%x pbuf->size=%lx", + i, pbuf->size); + return -EINVAL; + } + size_count += pbuf->size; + pbuf++; + } + + *size = size_count; + return 0; +} /* end ehca_mr_chk_buf_and_calc_size() */ + +/*----------------------------------------------------------------------*/ + +/* check page list of map FMR verb for validness */ +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len) +{ + u32 i; + u64 *page; + + if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { + ehca_gen_err("bad list_len, list_len=%x " + "e_fmr->fmr_max_pages=%x fmr=%p", + list_len, e_fmr->fmr_max_pages, e_fmr); + return -EINVAL; + } + + /* each page must be aligned */ + page = page_list; + for (i = 0; i < list_len; i++) { + if (*page % e_fmr->fmr_page_size) { + ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p " + "fmr_page_size=%x", i, *page, page, e_fmr, + e_fmr->fmr_page_size); + return -EINVAL; + } + page++; + } + + return 0; +} /* end ehca_fmr_check_page_list() */ + +/*----------------------------------------------------------------------*/ + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + struct ib_umem_chunk *prev_chunk; + struct ib_umem_chunk *chunk; + struct ib_phys_buf *pbuf; + u64 *fmrlist; + u64 num4k, pgaddr, offs4k; + u32 i = 0; + u32 j = 0; + + if (pginfo->type == EHCA_MR_PGI_PHYS) { + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->phys_buf_array + pginfo->next_buf; + num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; + offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + while (pginfo->next_4k < offs4k + num4k) { + /* sanity check */ + if ((pginfo->page_cnt >= pginfo->num_pages) || + (pginfo->page_4k_cnt >= pginfo->num_4k)) { + ehca_gen_err("page_cnt >= num_pages, " + "page_cnt=%lx " + "num_pages=%lx " + "page_4k_cnt=%lx " + "num_4k=%lx i=%x", + pginfo->page_cnt, + pginfo->num_pages, + pginfo->page_4k_cnt, + pginfo->num_4k, i); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + *kpage = phys_to_abs( + (pbuf->addr & EHCA_PAGEMASK) + + (pginfo->next_4k * EHCA_PAGESIZE)); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%lx " + "pbuf->size=%lx " + "next_4k=%lx", pbuf->addr, + pbuf->size, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->page_cnt)++; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_4k >= offs4k + num4k) { + (pginfo->next_buf)++; + pginfo->next_4k = 0; + } + } + } else if (pginfo->type == EHCA_MR_PGI_USER) { + /* loop over desired chunk entries */ + chunk = pginfo->next_chunk; + prev_chunk = pginfo->next_chunk; + list_for_each_entry_continue(chunk, + (&(pginfo->region->chunk_list)), + list) { + for (i = pginfo->next_nmap; i < chunk->nmap; ) { + pgaddr = ( page_to_pfn(chunk->page_list[i].page) + << PAGE_SHIFT ); + *kpage = phys_to_abs(pgaddr + + (pginfo->next_4k * + EHCA_PAGESIZE)); + if ( !(*kpage) ) { + ehca_gen_err("pgaddr=%lx " + "chunk->page_list[i]=%lx " + "i=%x next_4k=%lx mr=%p", + pgaddr, + (u64)sg_dma_address( + &chunk-> + page_list[i]), + i, pginfo->next_4k, e_mr); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + kpage++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_nmap)++; + pginfo->next_4k = 0; + i++; + } + j++; + if (j >= number) break; + } + if ((pginfo->next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->next_nmap >= chunk->nmap) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; + } + pginfo->next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->region->chunk_list)), + list); + } else if (pginfo->type == EHCA_MR_PGI_FMR) { + /* loop over desired page_list entries */ + fmrlist = pginfo->page_list + pginfo->next_listelem; + for (i = 0; i < number; i++) { + *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + + pginfo->next_4k * EHCA_PAGESIZE); + if ( !(*kpage) ) { + ehca_gen_err("*fmrlist=%lx fmrlist=%p " + "next_listelem=%lx next_4k=%lx", + *fmrlist, fmrlist, + pginfo->next_listelem, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + kpage++; + if (pginfo->next_4k % + (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_listelem)++; + fmrlist++; + pginfo->next_4k = 0; + } + } + } else { + ehca_gen_err("bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + +ehca_set_pagebuf_exit0: + if (ret) + ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " + "num_4k=%lx next_buf=%lx next_4k=%lx number=%x " + "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x " + "next_listelem=%lx region=%p next_chunk=%p " + "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type, + pginfo->num_pages, pginfo->num_4k, + pginfo->next_buf, pginfo->next_4k, number, kpage, + pginfo->page_cnt, pginfo->page_4k_cnt, i, + pginfo->next_listelem, pginfo->region, + pginfo->next_chunk, pginfo->next_nmap); + return ret; +} /* end ehca_set_pagebuf() */ + +/*----------------------------------------------------------------------*/ + +/* setup 1 page from page info page buffer */ +int ehca_set_pagebuf_1(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u64 *rpage) +{ + int ret = 0; + struct ib_phys_buf *tmp_pbuf; + u64 *fmrlist; + struct ib_umem_chunk *chunk; + struct ib_umem_chunk *prev_chunk; + u64 pgaddr, num4k, offs4k; + + if (pginfo->type == EHCA_MR_PGI_PHYS) { + /* sanity check */ + if ((pginfo->page_cnt >= pginfo->num_pages) || + (pginfo->page_4k_cnt >= pginfo->num_4k)) { + ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx " + "num_pages=%lx page_4k_cnt=%lx num_4k=%lx", + pginfo->page_cnt, pginfo->num_pages, + pginfo->page_4k_cnt, pginfo->num_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf; + num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; + offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) + + (pginfo->next_4k * EHCA_PAGESIZE)); + if ( !(*rpage) && tmp_pbuf->addr ) { + ehca_gen_err("tmp_pbuf->addr=%lx" + " tmp_pbuf->size=%lx next_4k=%lx", + tmp_pbuf->addr, tmp_pbuf->size, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->page_cnt)++; + if (pginfo->next_4k >= offs4k + num4k) { + (pginfo->next_buf)++; + pginfo->next_4k = 0; + } + } else if (pginfo->type == EHCA_MR_PGI_USER) { + chunk = pginfo->next_chunk; + prev_chunk = pginfo->next_chunk; + list_for_each_entry_continue(chunk, + (&(pginfo->region->chunk_list)), + list) { + pgaddr = ( page_to_pfn(chunk->page_list[ + pginfo->next_nmap].page) + << PAGE_SHIFT); + *rpage = phys_to_abs(pgaddr + + (pginfo->next_4k * EHCA_PAGESIZE)); + if ( !(*rpage) ) { + ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx" + " next_nmap=%lx next_4k=%lx mr=%p", + pgaddr, (u64)sg_dma_address( + &chunk->page_list[ + pginfo-> + next_nmap]), + pginfo->next_nmap, pginfo->next_4k, + e_mr); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_nmap)++; + pginfo->next_4k = 0; + } + if (pginfo->next_nmap >= chunk->nmap) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + } + break; + } + pginfo->next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->region->chunk_list)), + list); + } else if (pginfo->type == EHCA_MR_PGI_FMR) { + fmrlist = pginfo->page_list + pginfo->next_listelem; + *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + + pginfo->next_4k * EHCA_PAGESIZE); + if ( !(*rpage) ) { + ehca_gen_err("*fmrlist=%lx fmrlist=%p " + "next_listelem=%lx next_4k=%lx", + *fmrlist, fmrlist, pginfo->next_listelem, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_listelem)++; + pginfo->next_4k = 0; + } + } else { + ehca_gen_err("bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + +ehca_set_pagebuf_1_exit0: + if (ret) + ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " + "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p " + "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx " + "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr, + pginfo, pginfo->type, pginfo->num_pages, + pginfo->num_4k, pginfo->next_buf, pginfo->next_4k, + rpage, pginfo->page_cnt, pginfo->page_4k_cnt, + pginfo->next_listelem, pginfo->region, + pginfo->next_chunk, pginfo->next_nmap); + return ret; +} /* end ehca_set_pagebuf_1() */ + +/*----------------------------------------------------------------------*/ + +/* + * check MR if it is a max-MR, i.e. uses whole memory + * in case it's a max-MR 1 is returned, else 0 + */ +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start) +{ + /* a MR is treated as max-MR only if it fits following: */ + if ((size == ((u64)high_memory - PAGE_OFFSET)) && + (iova_start == (void*)KERNELBASE)) { + ehca_gen_dbg("this is a max-MR"); + return 1; + } else + return 0; +} /* end ehca_mr_is_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* map access control for MR/MW. This routine is used for MR and MW. */ +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl) +{ + *hipz_acl = 0; + if (ib_acl & IB_ACCESS_REMOTE_READ) + *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; + if (ib_acl & IB_ACCESS_REMOTE_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; + if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) + *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; + if (ib_acl & IB_ACCESS_LOCAL_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; + if (ib_acl & IB_ACCESS_MW_BIND) + *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; +} /* end ehca_mrmw_map_acl() */ + +/*----------------------------------------------------------------------*/ + +/* sets page size in hipz access control for MR/MW. */ +void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/ +{ + return; /* HCA supports only 4k */ +} /* end ehca_mrmw_set_pgsize_hipz_acl() */ + +/*----------------------------------------------------------------------*/ + +/* + * reverse map access control for MR/MW. + * This routine is used for MR and MW. + */ +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl) /*OUT*/ +{ + *ib_acl = 0; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) + *ib_acl |= IB_ACCESS_REMOTE_READ; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) + *ib_acl |= IB_ACCESS_REMOTE_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) + *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; + if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) + *ib_acl |= IB_ACCESS_LOCAL_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) + *ib_acl |= IB_ACCESS_MW_BIND; +} /* end ehca_mrmw_reverse_map_acl() */ + + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR/MW allocations + * Used for hipz_mr_reg_alloc and hipz_mw_alloc. + */ +int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RT_PARM: /* invalid resource type */ + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_MLENGTH_PARM: /* invalid memory length */ + case H_MEM_ACCESS_PARM: /* invalid access controls */ + case H_CONSTRAINED: /* resource constraint */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_alloc() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR register rpage + * Used for hipz_h_register_rpage_mr at registering last page + */ +int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* registration complete */ + return 0; + case H_PAGE_REGISTERED: /* page registered */ + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ +/* case H_QT_PARM: invalid queue type */ + case H_PARAMETER: /* + * invalid logical address, + * or count zero or greater 512 + */ + case H_TABLE_FULL: /* page table full */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_rrpg_last() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR register rpage + * Used for hipz_h_register_rpage_mr at registering one page, but not last page + */ +int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_PAGE_REGISTERED: /* page registered */ + return 0; + case H_SUCCESS: /* registration complete */ + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ +/* case H_QT_PARM: invalid queue type */ + case H_PARAMETER: /* + * invalid logical address, + * or count zero or greater 512 + */ + case H_TABLE_FULL: /* page table full */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_rrpg_notlast() */ + +/*----------------------------------------------------------------------*/ + +/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */ +int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_query_mr() */ + +/*----------------------------------------------------------------------*/ +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for freeing MR resource + * Used for hipz_h_free_resource_mr + */ +int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* resource freed */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_R_STATE: /* invalid resource state */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_free_mr() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for freeing MW resource + * Used for hipz_h_free_resource_mw + */ +int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* resource freed */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_R_STATE: /* invalid resource state */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_free_mw() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for SMR registrations + * Used for hipz_h_register_smr. + */ +int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_MEM_PARM: /* invalid MR virtual address */ + case H_MEM_ACCESS_PARM: /* invalid access controls */ + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_reg_smr() */ + +/*----------------------------------------------------------------------*/ + +/* + * MR destructor and constructor + * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, + * except struct ib_mr and spinlock + */ +void ehca_mr_deletenew(struct ehca_mr *mr) +{ + mr->flags = 0; + mr->num_pages = 0; + mr->num_4k = 0; + mr->acl = 0; + mr->start = NULL; + mr->fmr_page_size = 0; + mr->fmr_max_pages = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; + memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); + memset(&mr->galpas, 0, sizeof(mr->galpas)); + mr->nr_of_pages = 0; + mr->pagearray = NULL; +} /* end ehca_mr_deletenew() */ + +int ehca_init_mrmw_cache(void) +{ + mr_cache = kmem_cache_create("ehca_cache_mr", + sizeof(struct ehca_mr), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!mr_cache) + return -ENOMEM; + mw_cache = kmem_cache_create("ehca_cache_mw", + sizeof(struct ehca_mw), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!mw_cache) { + kmem_cache_destroy(mr_cache); + mr_cache = NULL; + return -ENOMEM; + } + return 0; +} + +void ehca_cleanup_mrmw_cache(void) +{ + if (mr_cache) + kmem_cache_destroy(mr_cache); + if (mw_cache) + kmem_cache_destroy(mw_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h new file mode 100644 index 00000000000..d936e40a574 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -0,0 +1,140 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW declarations and inline functions + * + * Authors: Dietmar Decker <ddecker@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EHCA_MRMW_H_ +#define _EHCA_MRMW_H_ + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int mr_access_flags, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr); + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_reg_internal_maxmr(struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **maxmr); + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca); + +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size); + +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len); + +int ehca_set_pagebuf(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage); + +int ehca_set_pagebuf_1(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u64 *rpage); + +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start); + +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl); + +void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl); + +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl); + +int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc); + +void ehca_mr_deletenew(struct ehca_mr *mr); + +#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c new file mode 100644 index 00000000000..2c3cdc6f7b3 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -0,0 +1,114 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * PD functions + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/current.h> + +#include "ehca_tools.h" +#include "ehca_iverbs.h" + +static struct kmem_cache *pd_cache; + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct ehca_pd *pd; + + pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL); + if (!pd) { + ehca_err(device, "device=%p context=%p out of memory", + device, context); + return ERR_PTR(-ENOMEM); + } + + memset(pd, 0, sizeof(struct ehca_pd)); + pd->ownpid = current->tgid; + + /* + * Kernel PD: when device = -1, 0 + * User PD: when context != -1 + */ + if (!context) { + /* + * Kernel PDs after init reuses always + * the one created in ehca_shca_reopen() + */ + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + pd->fw_pd.value = shca->pd->fw_pd.value; + } else + pd->fw_pd.value = (u64)pd; + + return &pd->ib_pd; +} + +int ehca_dealloc_pd(struct ib_pd *pd) +{ + u32 cur_pid = current->tgid; + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + kmem_cache_free(pd_cache, + container_of(pd, struct ehca_pd, ib_pd)); + + return 0; +} + +int ehca_init_pd_cache(void) +{ + pd_cache = kmem_cache_create("ehca_cache_pd", + sizeof(struct ehca_pd), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!pd_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_pd_cache(void) +{ + if (pd_cache) + kmem_cache_destroy(pd_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h new file mode 100644 index 00000000000..8707d297ce4 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -0,0 +1,259 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Hardware request structures + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _EHCA_QES_H_ +#define _EHCA_QES_H_ + +#include "ehca_tools.h" + +/* virtual scatter gather entry to specify remote adresses with length */ +struct ehca_vsgentry { + u64 vaddr; + u32 lkey; + u32 length; +}; + +#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) + +/* + * Unreliable Datagram Address Vector Format + * see IBTA Vol1 chapter 8.3 Global Routing Header + */ +struct ehca_ud_av { + u8 sl; + u8 lnh; + u16 dlid; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 slid_path_bits; + u8 reserved4; + u8 ipd; + u8 reserved5; + u8 pmtu; + u32 reserved6; + u64 reserved7; + union { + struct { + u64 word_0; /* always set to 6 */ + /*should be 0x1B for IB transport */ + u64 word_1; + u64 word_2; + u64 word_3; + u64 word_4; + } grh; + struct { + u32 wd_0; + u32 wd_1; + /* DWord_1 --> SGID */ + + u32 sgid_wd3; + u32 sgid_wd2; + + u32 sgid_wd1; + u32 sgid_wd0; + /* DWord_3 --> DGID */ + + u32 dgid_wd3; + u32 dgid_wd2; + + u32 dgid_wd1; + u32 dgid_wd0; + } grh_l; + }; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define MAX_WQE_SG_ENTRIES 252 + +#define WQE_OPTYPE_SEND 0x80 +#define WQE_OPTYPE_RDMAREAD 0x40 +#define WQE_OPTYPE_RDMAWRITE 0x20 +#define WQE_OPTYPE_CMPSWAP 0x10 +#define WQE_OPTYPE_FETCHADD 0x08 +#define WQE_OPTYPE_BIND 0x04 + +#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 +#define WQE_WRFLAG_FENCE 0x40 +#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 +#define WQE_WRFLAG_SOLIC_EVENT 0x10 + +#define WQEF_CACHE_HINT 0x80 +#define WQEF_CACHE_HINT_RD_WR 0x40 +#define WQEF_TIMED_WQE 0x20 +#define WQEF_PURGE 0x08 +#define WQEF_HIGH_NIBBLE 0xF0 + +#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 +#define MW_BIND_ACCESSCTRL_R_READ 0x20 +#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 + +struct ehca_wqe { + u64 work_request_id; + u8 optype; + u8 wr_flag; + u16 pkeyi; + u8 wqef; + u8 nr_of_data_seg; + u16 wqe_provided_slid; + u32 destination_qp_number; + u32 resync_psn_sqp; + u32 local_ee_context_qkey; + u32 immediate_data; + union { + struct { + u64 remote_virtual_adress; + u32 rkey; + u32 reserved; + u64 atomic_1st_op_dma_len; + u64 atomic_2nd_op; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + + } nud; + struct { + u64 ehca_ud_av_ptr; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } ud_avp; + struct { + struct ehca_ud_av ud_av; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - + 2]; + } ud_av; + struct { + u64 reserved0; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } all_rcv; + + struct { + u64 reserved; + u32 rkey; + u32 old_rkey; + u64 reserved1; + u64 reserved2; + u64 virtual_address; + u32 reserved3; + u32 length; + u32 reserved4; + u16 reserved5; + u8 reserved6; + u8 lr_ctl; + u32 lkey; + u32 reserved7; + u64 reserved8; + u64 reserved9; + u64 reserved10; + u64 reserved11; + } bind; + struct { + u64 reserved12; + u64 reserved13; + u32 size; + u32 start; + } inline_data; + } u; + +}; + +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) +#define WC_SE_BIT EHCA_BMASK_IBM(3,3) +#define WC_STATUS_ERROR_BIT 0x80000000 +#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 +#define WC_STATUS_PURGE_BIT 0x10 + +struct ehca_cqe { + u64 work_request_id; + u8 optype; + u8 w_completion_flags; + u16 reserved1; + u32 nr_bytes_transferred; + u32 immediate_data; + u32 local_qp_number; + u8 freed_resource_count; + u8 service_level; + u16 wqe_count; + u32 qp_token; + u32 qkey_ee_token; + u32 remote_qp_number; + u16 dlid; + u16 rlid; + u16 reserved2; + u16 pkey_index; + u32 cqe_timestamp; + u32 wqe_timestamp; + u8 wqe_timestamp_valid; + u8 reserved3; + u8 reserved4; + u8 cqe_flags; + u32 status; +}; + +struct ehca_eqe { + u64 entry; +}; + +struct ehca_mrte { + u64 starting_va; + u64 length; /* length of memory region in bytes*/ + u32 pd; + u8 key_instance; + u8 pagesize; + u8 mr_control; + u8 local_remote_access_ctrl; + u8 reserved[0x20 - 0x18]; + u64 at_pointer[4]; +}; +#endif /*_EHCA_QES_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c new file mode 100644 index 00000000000..4394123cdbd --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -0,0 +1,1507 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * QP functions + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <asm/current.h> + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static struct kmem_cache *qp_cache; + +/* + * attributes not supported by query qp + */ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ + IB_QP_MAX_QP_RD_ATOMIC | \ + IB_QP_ACCESS_FLAGS | \ + IB_QP_EN_SQD_ASYNC_NOTIFY) + +/* + * ehca (internal) qp state values + */ +enum ehca_qp_state { + EHCA_QPS_RESET = 1, + EHCA_QPS_INIT = 2, + EHCA_QPS_RTR = 3, + EHCA_QPS_RTS = 5, + EHCA_QPS_SQD = 6, + EHCA_QPS_SQE = 8, + EHCA_QPS_ERR = 128 +}; + +/* + * qp state transitions as defined by IB Arch Rel 1.1 page 431 + */ +enum ib_qp_statetrans { + IB_QPST_ANY2RESET, + IB_QPST_ANY2ERR, + IB_QPST_RESET2INIT, + IB_QPST_INIT2RTR, + IB_QPST_INIT2INIT, + IB_QPST_RTR2RTS, + IB_QPST_RTS2SQD, + IB_QPST_RTS2RTS, + IB_QPST_SQD2RTS, + IB_QPST_SQE2RTS, + IB_QPST_SQD2SQD, + IB_QPST_MAX /* nr of transitions, this must be last!!! */ +}; + +/* + * ib2ehca_qp_state maps IB to ehca qp_state + * returns ehca qp state corresponding to given ib qp state + */ +static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) +{ + switch (ib_qp_state) { + case IB_QPS_RESET: + return EHCA_QPS_RESET; + case IB_QPS_INIT: + return EHCA_QPS_INIT; + case IB_QPS_RTR: + return EHCA_QPS_RTR; + case IB_QPS_RTS: + return EHCA_QPS_RTS; + case IB_QPS_SQD: + return EHCA_QPS_SQD; + case IB_QPS_SQE: + return EHCA_QPS_SQE; + case IB_QPS_ERR: + return EHCA_QPS_ERR; + default: + ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); + return -EINVAL; + } +} + +/* + * ehca2ib_qp_state maps ehca to IB qp_state + * returns ib qp state corresponding to given ehca qp state + */ +static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state + ehca_qp_state) +{ + switch (ehca_qp_state) { + case EHCA_QPS_RESET: + return IB_QPS_RESET; + case EHCA_QPS_INIT: + return IB_QPS_INIT; + case EHCA_QPS_RTR: + return IB_QPS_RTR; + case EHCA_QPS_RTS: + return IB_QPS_RTS; + case EHCA_QPS_SQD: + return IB_QPS_SQD; + case EHCA_QPS_SQE: + return IB_QPS_SQE; + case EHCA_QPS_ERR: + return IB_QPS_ERR; + default: + ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); + return -EINVAL; + } +} + +/* + * ehca_qp_type used as index for req_attr and opt_attr of + * struct ehca_modqp_statetrans + */ +enum ehca_qp_type { + QPT_RC = 0, + QPT_UC = 1, + QPT_UD = 2, + QPT_SQP = 3, + QPT_MAX +}; + +/* + * ib2ehcaqptype maps Ib to ehca qp_type + * returns ehca qp type corresponding to ib qp type + */ +static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return QPT_SQP; + case IB_QPT_RC: + return QPT_RC; + case IB_QPT_UC: + return QPT_UC; + case IB_QPT_UD: + return QPT_UD; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, + int ib_tostate) +{ + int index = -EINVAL; + switch (ib_tostate) { + case IB_QPS_RESET: + index = IB_QPST_ANY2RESET; + break; + case IB_QPS_INIT: + switch (ib_fromstate) { + case IB_QPS_RESET: + index = IB_QPST_RESET2INIT; + break; + case IB_QPS_INIT: + index = IB_QPST_INIT2INIT; + break; + } + break; + case IB_QPS_RTR: + if (ib_fromstate == IB_QPS_INIT) + index = IB_QPST_INIT2RTR; + break; + case IB_QPS_RTS: + switch (ib_fromstate) { + case IB_QPS_RTR: + index = IB_QPST_RTR2RTS; + break; + case IB_QPS_RTS: + index = IB_QPST_RTS2RTS; + break; + case IB_QPS_SQD: + index = IB_QPST_SQD2RTS; + break; + case IB_QPS_SQE: + index = IB_QPST_SQE2RTS; + break; + } + break; + case IB_QPS_SQD: + if (ib_fromstate == IB_QPS_RTS) + index = IB_QPST_RTS2SQD; + break; + case IB_QPS_SQE: + break; + case IB_QPS_ERR: + index = IB_QPST_ANY2ERR; + break; + default: + break; + } + return index; +} + +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3 +}; + +/* + * ibqptype2servicetype returns hcp service type corresponding to given + * ib qp type used by create_qp() + */ +static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return ST_UD; + case IB_QPT_RC: + return ST_RC; + case IB_QPT_UC: + return ST_UC; + case IB_QPT_UD: + return ST_UD; + case IB_QPT_RAW_IPV6: + return -EINVAL; + case IB_QPT_RAW_ETY: + return -EINVAL; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +/* + * init_qp_queues initializes/constructs r/squeue and registers queue pages. + */ +static inline int init_qp_queues(struct ehca_shca *shca, + struct ehca_qp *my_qp, + int nr_sq_pages, + int nr_rq_pages, + int swqe_size, + int rwqe_size, + int nr_send_sges, int nr_receive_sges) +{ + int ret, cnt, ipz_rc; + void *vpage; + u64 rpage, h_ret; + struct ib_device *ib_dev = &shca->ib_device; + struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; + + ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue, + nr_sq_pages, + EHCA_PAGESIZE, swqe_size, nr_send_sges); + if (!ipz_rc) { + ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x", + ipz_rc); + return -EBUSY; + } + + ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue, + nr_rq_pages, + EHCA_PAGESIZE, rwqe_size, nr_receive_sges); + if (!ipz_rc) { + ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x", + ipz_rc); + ret = -EBUSY; + goto init_qp_queues0; + } + /* register SQ pages */ + for (cnt = 0; cnt < nr_sq_pages; cnt++) { + vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue); + if (!vpage) { + ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() " + "failed p_vpage= %p", vpage); + ret = -EINVAL; + goto init_qp_queues1; + } + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, 0, 0, + rpage, 1, + my_qp->galpas.kernel); + if (h_ret < H_SUCCESS) { + ehca_err(ib_dev, "SQ hipz_qp_register_rpage()" + " failed rc=%lx", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + } + + ipz_qeit_reset(&my_qp->ipz_squeue); + + /* register RQ pages */ + for (cnt = 0; cnt < nr_rq_pages; cnt++) { + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (!vpage) { + ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() " + "failed p_vpage = %p", vpage); + ret = -EINVAL; + goto init_qp_queues1; + } + + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, 0, 1, + rpage, 1,my_qp->galpas.kernel); + if (h_ret < H_SUCCESS) { + ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed " + "rc=%lx", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + if (cnt == (nr_rq_pages - 1)) { /* last page! */ + if (h_ret != H_SUCCESS) { + ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + "h_ret= %lx ", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "should not succeed vpage=%p", vpage); + ret = -EINVAL; + goto init_qp_queues1; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + "h_ret= %lx ", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + } + } + + ipz_qeit_reset(&my_qp->ipz_rqueue); + + return 0; + +init_qp_queues1: + ipz_queue_dtor(&my_qp->ipz_rqueue); +init_qp_queues0: + ipz_queue_dtor(&my_qp->ipz_squeue); + return ret; +} + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 }; + static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 }; + struct ehca_qp *my_qp; + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct ib_ucontext *context = NULL; + u64 h_ret; + int max_send_sge, max_recv_sge, ret; + + /* h_call's out parameters */ + struct ehca_alloc_qp_parms parms; + u32 swqe_size = 0, rwqe_size = 0; + u8 daqp_completion, isdaqp; + unsigned long flags; + + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && + init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { + ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", + init_attr->sq_sig_type); + return ERR_PTR(-EINVAL); + } + + /* save daqp completion bits */ + daqp_completion = init_attr->qp_type & 0x60; + /* save daqp bit */ + isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0; + init_attr->qp_type = init_attr->qp_type & 0x1F; + + if (init_attr->qp_type != IB_QPT_UD && + init_attr->qp_type != IB_QPT_SMI && + init_attr->qp_type != IB_QPT_GSI && + init_attr->qp_type != IB_QPT_UC && + init_attr->qp_type != IB_QPT_RC) { + ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD) + && isdaqp) { + ehca_err(pd->device, "unsupported LL QP Type=%x", + init_attr->qp_type); + return ERR_PTR(-EINVAL); + } else if (init_attr->qp_type == IB_QPT_RC && isdaqp && + (init_attr->cap.max_send_wr > 255 || + init_attr->cap.max_recv_wr > 255 )) { + ehca_err(pd->device, "Invalid Number of max_sq_wr =%x " + "or max_rq_wr=%x for QP Type=%x", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr,init_attr->qp_type); + return ERR_PTR(-EINVAL); + } else if (init_attr->qp_type == IB_QPT_UD && isdaqp && + init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + if (pd->uobject && udata) + context = pd->uobject->context; + + my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL); + if (!my_qp) { + ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + return ERR_PTR(-ENOMEM); + } + + memset(my_qp, 0, sizeof(struct ehca_qp)); + memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms)); + spin_lock_init(&my_qp->spinlock_s); + spin_lock_init(&my_qp->spinlock_r); + + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); + + my_qp->init_attr = *init_attr; + + do { + if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { + ret = -ENOMEM; + ehca_err(pd->device, "Can't reserve idr resources."); + goto create_qp_exit0; + } + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + } while (ret == -EAGAIN); + + if (ret) { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + goto create_qp_exit0; + } + + parms.servicetype = ibqptype2servicetype(init_attr->qp_type); + if (parms.servicetype < 0) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type); + goto create_qp_exit0; + } + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + parms.sigtype = HCALL_SIGT_EVERY; + else + parms.sigtype = HCALL_SIGT_BY_WQE; + + /* UD_AV CIRCUMVENTION */ + max_send_sge = init_attr->cap.max_send_sge; + max_recv_sge = init_attr->cap.max_recv_sge; + if (IB_QPT_UD == init_attr->qp_type || + IB_QPT_GSI == init_attr->qp_type || + IB_QPT_SMI == init_attr->qp_type) { + max_send_sge += 2; + max_recv_sge += 2; + } + + parms.ipz_eq_handle = shca->eq.ipz_eq_handle; + parms.daqp_ctrl = isdaqp | daqp_completion; + parms.pd = my_pd->fw_pd; + parms.max_recv_sge = max_recv_sge; + parms.max_send_sge = max_send_sge; + + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms); + + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx", + h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit1; + } + + switch (init_attr->qp_type) { + case IB_QPT_RC: + if (isdaqp == 0) { + swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ + (parms.act_nr_send_sges)]); + rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ + (parms.act_nr_recv_sges)]); + } else { /* for daqp we need to use msg size, not wqe size */ + swqe_size = da_rc_msg_size[max_send_sge]; + rwqe_size = da_rc_msg_size[max_recv_sge]; + parms.act_nr_send_sges = 1; + parms.act_nr_recv_sges = 1; + } + break; + case IB_QPT_UC: + swqe_size = offsetof(struct ehca_wqe, + u.nud.sg_list[parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, + u.nud.sg_list[parms.act_nr_recv_sges]); + break; + + case IB_QPT_UD: + case IB_QPT_GSI: + case IB_QPT_SMI: + /* UD circumvention */ + parms.act_nr_recv_sges -= 2; + parms.act_nr_send_sges -= 2; + if (isdaqp) { + swqe_size = da_ud_sq_msg_size[max_send_sge]; + rwqe_size = da_rc_msg_size[max_recv_sge]; + parms.act_nr_send_sges = 1; + parms.act_nr_recv_sges = 1; + } else { + swqe_size = offsetof(struct ehca_wqe, + u.ud_av.sg_list[parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, + u.ud_av.sg_list[parms.act_nr_recv_sges]); + } + + if (IB_QPT_GSI == init_attr->qp_type || + IB_QPT_SMI == init_attr->qp_type) { + parms.act_nr_send_wqes = init_attr->cap.max_send_wr; + parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; + parms.act_nr_send_sges = init_attr->cap.max_send_sge; + parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; + my_qp->real_qp_num = + (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; + } + + break; + + default: + break; + } + + /* initializes r/squeue and registers queue pages */ + ret = init_qp_queues(shca, my_qp, + parms.nr_sq_pages, parms.nr_rq_pages, + swqe_size, rwqe_size, + parms.act_nr_send_sges, parms.act_nr_recv_sges); + if (ret) { + ehca_err(pd->device, + "Couldn't initialize r/squeue and pages ret=%x", ret); + goto create_qp_exit2; + } + + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; + + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; + + my_qp->ib_qp.qp_num = my_qp->real_qp_num; + my_qp->ib_qp.qp_type = init_attr->qp_type; + + my_qp->qp_type = init_attr->qp_type; + my_qp->ib_qp.srq = init_attr->srq; + + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + + init_attr->cap.max_inline_data = 0; /* not supported yet */ + init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; + init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; + init_attr->cap.max_send_sge = parms.act_nr_send_sges; + init_attr->cap.max_send_wr = parms.act_nr_send_wqes; + + /* NOTE: define_apq0() not supported yet */ + if (init_attr->qp_type == IB_QPT_GSI) { + h_ret = ehca_define_sqp(shca, my_qp, init_attr); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx", + h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit3; + } + } + if (init_attr->send_cq) { + struct ehca_cq *cq = container_of(init_attr->send_cq, + struct ehca_cq, ib_cq); + ret = ehca_cq_assign_qp(cq, my_qp); + if (ret) { + ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", + ret); + goto create_qp_exit3; + } + my_qp->send_cq = cq; + } + /* copy queues, galpa data to user space */ + if (context && udata) { + struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; + struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; + struct ehca_create_qp_resp resp; + struct vm_area_struct * vma; + memset(&resp, 0, sizeof(resp)); + + resp.qp_num = my_qp->real_qp_num; + resp.token = my_qp->token; + resp.qp_type = my_qp->qp_type; + resp.qkey = my_qp->qkey; + resp.real_qp_num = my_qp->real_qp_num; + /* rqueue properties */ + resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size; + resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg; + resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; + resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; + resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000, + ipz_rqueue->queue_length, + (void**)&resp.ipz_rqueue.queue, + &vma); + if (ret) { + ehca_err(pd->device, "Could not mmap rqueue pages"); + goto create_qp_exit3; + } + my_qp->uspace_rqueue = resp.ipz_rqueue.queue; + /* squeue properties */ + resp.ipz_squeue.qe_size = ipz_squeue->qe_size; + resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; + resp.ipz_squeue.queue_length = ipz_squeue->queue_length; + resp.ipz_squeue.pagesize = ipz_squeue->pagesize; + resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000, + ipz_squeue->queue_length, + (void**)&resp.ipz_squeue.queue, + &vma); + if (ret) { + ehca_err(pd->device, "Could not mmap squeue pages"); + goto create_qp_exit4; + } + my_qp->uspace_squeue = resp.ipz_squeue.queue; + /* fw_handle */ + resp.galpas = my_qp->galpas; + ret = ehca_mmap_register(my_qp->galpas.user.fw_handle, + (void**)&resp.galpas.kernel.fw_handle, + &vma); + if (ret) { + ehca_err(pd->device, "Could not mmap fw_handle"); + goto create_qp_exit5; + } + my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; + + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + ehca_err(pd->device, "Copy to udata failed"); + ret = -EINVAL; + goto create_qp_exit6; + } + } + + return &my_qp->ib_qp; + +create_qp_exit6: + ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); + +create_qp_exit5: + ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length); + +create_qp_exit4: + ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length); + +create_qp_exit3: + ipz_queue_dtor(&my_qp->ipz_rqueue); + ipz_queue_dtor(&my_qp->ipz_squeue); + +create_qp_exit2: + hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + +create_qp_exit1: + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + +create_qp_exit0: + kmem_cache_free(qp_cache, my_qp); + return ERR_PTR(ret); +} + +/* + * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts + * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe + * returns total number of bad wqes in bad_wqe_cnt + */ +static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, + int *bad_wqe_cnt) +{ + u64 h_ret; + struct ipz_queue *squeue; + void *bad_send_wqe_p, *bad_send_wqe_v; + void *squeue_start_p, *squeue_end_p; + void *squeue_start_v, *squeue_end_v; + struct ehca_wqe *wqe; + int qp_num = my_qp->ib_qp.qp_num; + + /* get send wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &bad_send_wqe_p, NULL, 2); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" + " ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63))); + ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", + qp_num, bad_send_wqe_p); + /* convert wqe pointer to vadr */ + bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); + if (ehca_debug_level) + ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); + squeue = &my_qp->ipz_squeue; + squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L)); + squeue_end_p = squeue_start_p+squeue->queue_length; + squeue_start_v = abs_to_virt((u64)squeue_start_p); + squeue_end_v = abs_to_virt((u64)squeue_end_p); + ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p", + qp_num, squeue_start_v, squeue_end_v); + + /* loop sets wqe's purge bit */ + wqe = (struct ehca_wqe*)bad_send_wqe_v; + *bad_wqe_cnt = 0; + while (wqe->optype != 0xff && wqe->wqef != 0xff) { + if (ehca_debug_level) + ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); + wqe->nr_of_data_seg = 0; /* suppress data access */ + wqe->wqef = WQEF_PURGE; /* WQE to be purged */ + wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size); + *bad_wqe_cnt = (*bad_wqe_cnt)+1; + if ((void*)wqe >= squeue_end_v) { + wqe = squeue_start_v; + } + } + /* + * bad wqe will be reprocessed and ignored when pol_cq() is called, + * i.e. nr of wqes with flush error status is one less + */ + ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", + qp_num, (*bad_wqe_cnt)-1); + wqe->wqef = 0; + + return 0; +} + +/* + * internal_modify_qp with circumvention to handle aqp0 properly + * smi_reset2init indicates if this is an internal reset-to-init-call for + * smi. This flag must always be zero if called from ehca_modify_qp()! + * This internal func was intorduced to avoid recursion of ehca_modify_qp()! + */ +static int internal_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, int smi_reset2init) +{ + enum ib_qp_state qp_cur_state, qp_new_state; + int cnt, qp_attr_idx, ret = 0; + enum ib_qp_statetrans statetrans; + struct hcp_modify_qp_control_block *mqpcb; + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = + container_of(ibqp->pd->device, struct ehca_shca, ib_device); + u64 update_mask; + u64 h_ret; + int bad_wqe_cnt = 0; + int squeue_locked = 0; + unsigned long spl_flags = 0; + + /* do query_qp to obtain current attr values */ + mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (mqpcb == NULL) { + ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + mqpcb, my_qp->galpas.kernel); + if (h_ret != H_SUCCESS) { + ehca_err(ibqp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, ibqp->qp_num, h_ret); + ret = ehca2ib_return_code(h_ret); + goto modify_qp_exit1; + } + + qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); + + if (qp_cur_state == -EINVAL) { /* invalid qp state */ + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + mqpcb->qp_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + /* + * circumvention to set aqp0 initial state to init + * as expected by IB spec + */ + if (smi_reset2init == 0 && + ibqp->qp_type == IB_QPT_SMI && + qp_cur_state == IB_QPS_RESET && + (attr_mask & IB_QP_STATE) && + attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ + struct ib_qp_attr smiqp_attr = { + .qp_state = IB_QPS_INIT, + .port_num = my_qp->init_attr.port_num, + .pkey_index = 0, + .qkey = 0 + }; + int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | + IB_QP_PKEY_INDEX | IB_QP_QKEY; + int smirc = internal_modify_qp( + ibqp, &smiqp_attr, smiqp_attr_mask, 1); + if (smirc) { + ehca_err(ibqp->device, "SMI RESET -> INIT failed. " + "ehca_modify_qp() rc=%x", smirc); + ret = H_PARAMETER; + goto modify_qp_exit1; + } + qp_cur_state = IB_QPS_INIT; + ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); + } + /* is transmitted current state equal to "real" current state */ + if ((attr_mask & IB_QP_CUR_STATE) && + qp_cur_state != attr->cur_qp_state) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" + " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", + attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x " + "new qp_state=%x attribute_mask=%x", + my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); + + qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; + if (!smi_reset2init && + !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, + attr_mask)) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid qp transition new_state=%x cur_state=%x " + "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, + qp_cur_state, my_qp, ibqp->qp_num, attr_mask); + goto modify_qp_exit1; + } + + if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state))) + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + else { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid new qp state=%x " + "ehca_qp=%p qp_num=%x", + qp_new_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + /* retrieve state transition struct to get req and opt attrs */ + statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); + if (statetrans < 0) { + ret = -EINVAL; + ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x " + "new_qp_state=%x State_xsition=%x ehca_qp=%p " + "qp_num=%x", qp_cur_state, qp_new_state, + statetrans, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); + + if (qp_attr_idx < 0) { + ret = qp_attr_idx; + ehca_err(ibqp->device, + "Invalid QP type=%x ehca_qp=%p qp_num=%x", + ibqp->qp_type, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, + "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x", + my_qp, ibqp->qp_num, statetrans); + + /* sqe -> rts: set purge bit of bad wqe before actual trans */ + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* mark next free wqe if kernel */ + if (my_qp->uspace_squeue == 0) { + struct ehca_wqe *wqe; + /* lock send queue */ + spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + squeue_locked = 1; + /* mark next free wqe */ + wqe = (struct ehca_wqe*) + ipz_qeit_get(&my_qp->ipz_squeue); + wqe->optype = wqe->wqef = 0xff; + ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", + ibqp->qp_num, wqe); + } + ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); + if (ret) { + ehca_err(ibqp->device, "prepare_sqe_rts() failed " + "ehca_qp=%p qp_num=%x ret=%x", + my_qp, ibqp->qp_num, ret); + goto modify_qp_exit2; + } + } + + /* + * enable RDMA_Atomic_Control if reset->init und reliable con + * this is necessary since gen2 does not provide that flag, + * but pHyp requires it + */ + if (statetrans == IB_QPST_RESET2INIT && + (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { + mqpcb->rdma_atomic_ctrl = 3; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); + } + /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ + if (statetrans == IB_QPST_INIT2RTR && + (ibqp->qp_type == IB_QPT_UC) && + !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { + mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + mqpcb->prim_p_key_idx = attr->pkey_index; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); + } + if (attr_mask & IB_QP_PORT) { + if (attr->port_num < 1 || attr->port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + mqpcb->prim_phys_port = attr->port_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); + } + if (attr_mask & IB_QP_QKEY) { + mqpcb->qkey = attr->qkey; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); + } + if (attr_mask & IB_QP_AV) { + int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate); + int ehca_mult = ib_rate_to_mult(shca->sport[my_qp-> + init_attr.port_num].rate); + + mqpcb->dlid = attr->ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); + mqpcb->source_path_bits = attr->ah_attr.src_path_bits; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); + mqpcb->service_level = attr->ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); + + if (ah_mult < ehca_mult) + mqpcb->max_static_rate = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + else + mqpcb->max_static_rate = 0; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag = 1 << 31; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid.byte[cnt] = + attr->ah_attr.grh.dgid.raw[cnt]; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); + mqpcb->flow_label = attr->ah_attr.grh.flow_label; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); + mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); + mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + mqpcb->path_mtu = attr->path_mtu; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); + } + if (attr_mask & IB_QP_TIMEOUT) { + mqpcb->timeout = attr->timeout; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); + } + if (attr_mask & IB_QP_RETRY_CNT) { + mqpcb->retry_count = attr->retry_cnt; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RNR_RETRY) { + mqpcb->rnr_retry_count = attr->rnr_retry; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RQ_PSN) { + mqpcb->receive_psn = attr->rq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); + } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? + attr->max_dest_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? + attr->max_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET + (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); + } + if (attr_mask & IB_QP_ALT_PATH) { + int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate); + int ehca_mult = ib_rate_to_mult( + shca->sport[my_qp->init_attr.port_num].rate); + + mqpcb->dlid_al = attr->alt_ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1); + mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1); + mqpcb->service_level_al = attr->alt_ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1); + + if (ah_mult < ehca_mult) + mqpcb->max_static_rate = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + else + mqpcb->max_static_rate_al = 0; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag_al = 1 << 31; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); + mqpcb->source_gid_idx_al = + attr->alt_ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid_al.byte[cnt] = + attr->alt_ah_attr.grh.dgid.raw[cnt]; + + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1); + mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1); + mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1); + mqpcb->traffic_class_al = + attr->alt_ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); + } + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); + } + + if (attr_mask & IB_QP_SQ_PSN) { + mqpcb->send_psn = attr->sq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); + } + + if (attr_mask & IB_QP_DEST_QPN) { + mqpcb->dest_qp_nr = attr->dest_qp_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + mqpcb->path_migration_state = attr->path_mig_state; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); + } + + if (attr_mask & IB_QP_CAP) { + mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); + mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); + /* no support for max_send/recv_sge yet */ + } + + if (ehca_debug_level) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " + "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* doorbell to reprocessing wqes */ + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, bad_wqe_cnt-1); + ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); + } + + if (statetrans == IB_QPST_RESET2INIT || + statetrans == IB_QPST_INIT2INIT) { + mqpcb->qp_enable = 1; + mqpcb->qp_state = EHCA_QPS_INIT; + update_mask = 0; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "ENABLE in context of " + "RESET_2_INIT failed! Maybe you didn't get " + "a LID h_ret=%lx ehca_qp=%p qp_num=%x", + h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + } + + if (statetrans == IB_QPST_ANY2RESET) { + ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(&my_qp->ipz_squeue); + } + + if (attr_mask & IB_QP_QKEY) + my_qp->qkey = attr->qkey; + +modify_qp_exit2: + if (squeue_locked) { /* this means: sqe -> rts */ + spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + my_qp->sqerr_purgeflag = 1; + } + +modify_qp_exit1: + kfree(mqpcb); + + return ret; +} + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + return internal_modify_qp(ibqp, attr, attr_mask, 0); +} + +int ehca_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + u32 cur_pid = current->tgid; + int cnt, ret = 0; + u64 h_ret; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { + ehca_err(qp->device,"Invalid attribute mask " + "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", + my_qp, qp->qp_num, qp_attr_mask); + return -EINVAL; + } + + qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL ); + if (!qpcb) { + ehca_err(qp->device,"Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(qp->device,"hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, qp->qp_num, h_ret); + goto query_qp_exit1; + } + + qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); + qp_attr->qp_state = qp_attr->cur_qp_state; + + if (qp_attr->cur_qp_state == -EINVAL) { + ret = -EINVAL; + ehca_err(qp->device,"Got invalid ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + qpcb->qp_state, my_qp, qp->qp_num); + goto query_qp_exit1; + } + + if (qp_attr->qp_state == IB_QPS_SQD) + qp_attr->sq_draining = 1; + + qp_attr->qkey = qpcb->qkey; + qp_attr->path_mtu = qpcb->path_mtu; + qp_attr->path_mig_state = qpcb->path_migration_state; + qp_attr->rq_psn = qpcb->receive_psn; + qp_attr->sq_psn = qpcb->send_psn; + qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; + qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; + qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; + /* UD_AV CIRCUMVENTION */ + if (my_qp->qp_type == IB_QPT_UD) { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe - 2; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe - 2; + } else { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe; + } + + qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; + qp_attr->dest_qp_num = qpcb->dest_qp_nr; + + qp_attr->pkey_index = + EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx); + + qp_attr->port_num = + EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port); + + qp_attr->timeout = qpcb->timeout; + qp_attr->retry_cnt = qpcb->retry_count; + qp_attr->rnr_retry = qpcb->rnr_retry_count; + + qp_attr->alt_pkey_index = + EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx); + + qp_attr->alt_port_num = qpcb->alt_phys_port; + qp_attr->alt_timeout = qpcb->timeout_al; + + /* primary av */ + qp_attr->ah_attr.sl = qpcb->service_level; + + if (qpcb->send_grh_flag) { + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->ah_attr.static_rate = qpcb->max_static_rate; + qp_attr->ah_attr.dlid = qpcb->dlid; + qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; + qp_attr->ah_attr.port_num = qp_attr->port_num; + + /* primary GRH */ + qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; + qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; + qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; + qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid.byte[cnt]; + + /* alternate AV */ + qp_attr->alt_ah_attr.sl = qpcb->service_level_al; + if (qpcb->send_grh_flag_al) { + qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; + qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; + qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; + + /* alternate GRH */ + qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; + qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; + qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; + qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid_al.byte[cnt]; + + /* return init attributes given in ehca_create_qp */ + if (qp_init_attr) + *qp_init_attr = my_qp->init_attr; + + if (ehca_debug_level) + ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); + +query_qp_exit1: + kfree(qpcb); + + return ret; +} + +int ehca_destroy_qp(struct ib_qp *ibqp) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + u32 cur_pid = current->tgid; + u32 qp_num = ibqp->qp_num; + int ret; + u64 h_ret; + u8 port_num; + enum ib_qp_type qp_type; + unsigned long flags; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + if (my_qp->send_cq) { + ret = ehca_cq_unassign_qp(my_qp->send_cq, + my_qp->real_qp_num); + if (ret) { + ehca_err(ibqp->device, "Couldn't unassign qp from " + "send_cq ret=%x qp_num=%x cq_num=%x", ret, + my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number); + return ret; + } + } + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* un-mmap if vma alloc */ + if (my_qp->uspace_rqueue) { + ret = ehca_munmap(my_qp->uspace_rqueue, + my_qp->ipz_rqueue.queue_length); + if (ret) + ehca_err(ibqp->device, "Could not munmap rqueue " + "qp_num=%x", qp_num); + ret = ehca_munmap(my_qp->uspace_squeue, + my_qp->ipz_squeue.queue_length); + if (ret) + ehca_err(ibqp->device, "Could not munmap squeue " + "qp_num=%x", qp_num); + ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); + if (ret) + ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x", + qp_num); + } + + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + if (h_ret != H_SUCCESS) { + ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); + return ehca2ib_return_code(h_ret); + } + + port_num = my_qp->init_attr.port_num; + qp_type = my_qp->init_attr.qp_type; + + /* no support for IB_QPT_SMI yet */ + if (qp_type == IB_QPT_GSI) { + struct ib_event event; + ehca_info(ibqp->device, "device %s: port %x is inactive.", + shca->ib_device.name, port_num); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port_num; + shca->sport[port_num - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + + ipz_queue_dtor(&my_qp->ipz_rqueue); + ipz_queue_dtor(&my_qp->ipz_squeue); + kmem_cache_free(qp_cache, my_qp); + return 0; +} + +int ehca_init_qp_cache(void) +{ + qp_cache = kmem_cache_create("ehca_cache_qp", + sizeof(struct ehca_qp), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!qp_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_qp_cache(void) +{ + if (qp_cache) + kmem_cache_destroy(qp_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c new file mode 100644 index 00000000000..b46bda1bf85 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -0,0 +1,653 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * post_send/recv, poll_cq, req_notify + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <asm-powerpc/system.h> +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, + struct ehca_wqe *wqe_p, + struct ib_recv_wr *recv_wr) +{ + u8 cnt_ds; + if (unlikely((recv_wr->num_sge < 0) || + (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->nr_of_data_seg = recv_wr->num_sge; + + for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { + wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = + recv_wr->sg_list[cnt_ds].addr; + wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = + recv_wr->sg_list[cnt_ds].lkey; + wqe_p->u.all_rcv.sg_list[cnt_ds].length = + recv_wr->sg_list[cnt_ds].length; + } + + if (ehca_debug_level) { + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); + ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + } + + return 0; +} + +#if defined(DEBUG_GSI_SEND_WR) + +/* need ib_mad struct */ +#include <rdma/ib_mad.h> + +static void trace_send_wr_ud(const struct ib_send_wr *send_wr) +{ + int idx; + int j; + while (send_wr) { + struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; + struct ib_sge *sge = send_wr->sg_list; + ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " + "send_flags=%x opcode=%x",idx, send_wr->wr_id, + send_wr->num_sge, send_wr->send_flags, + send_wr->opcode); + if (mad_hdr) { + ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " + "mgmt_class=%x class_version=%x method=%x " + "status=%x class_specific=%x tid=%lx " + "attr_id=%x resv=%x attr_mod=%x", + idx, mad_hdr->base_version, + mad_hdr->mgmt_class, + mad_hdr->class_version, mad_hdr->method, + mad_hdr->status, mad_hdr->class_specific, + mad_hdr->tid, mad_hdr->attr_id, + mad_hdr->resv, + mad_hdr->attr_mod); + } + for (j = 0; j < send_wr->num_sge; j++) { + u8 *data = (u8 *) abs_to_virt(sge->addr); + ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " + "lkey=%x", + idx, j, data, sge->length, sge->lkey); + /* assume length is n*16 */ + ehca_dmp(data, sge->length, "send_wr#%x sge#%x", + idx, j); + sge++; + } /* eof for j */ + idx++; + send_wr = send_wr->next; + } /* eof while send_wr */ +} + +#endif /* DEBUG_GSI_SEND_WR */ + +static inline int ehca_write_swqe(struct ehca_qp *qp, + struct ehca_wqe *wqe_p, + const struct ib_send_wr *send_wr) +{ + u32 idx; + u64 dma_length; + struct ehca_av *my_av; + u32 remote_qkey = send_wr->wr.ud.remote_qkey; + + if (unlikely((send_wr->num_sge < 0) || + (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = send_wr->wr_id; + + switch (send_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_SEND; + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_RDMAWRITE; + break; + case IB_WR_RDMA_READ: + wqe_p->optype = WQE_OPTYPE_RDMAREAD; + break; + default: + ehca_gen_err("Invalid opcode=%x", send_wr->opcode); + return -EINVAL; /* invalid opcode */ + } + + wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; + + wqe_p->wr_flag = 0; + + if (send_wr->send_flags & IB_SEND_SIGNALED) + wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + + if (send_wr->opcode == IB_WR_SEND_WITH_IMM || + send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + /* this might not work as long as HW does not support it */ + wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data); + wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; + } + + wqe_p->nr_of_data_seg = send_wr->num_sge; + + switch (qp->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + /* no break is intential here */ + case IB_QPT_UD: + /* IB 1.2 spec C10-15 compliance */ + if (send_wr->wr.ud.remote_qkey & 0x80000000) + remote_qkey = qp->qkey; + + wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; + wqe_p->local_ee_context_qkey = remote_qkey; + if (!send_wr->wr.ud.ah) { + ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); + return -EINVAL; + } + my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); + wqe_p->u.ud_av.ud_av = my_av->av; + + /* + * omitted check of IB_SEND_INLINE + * since HW does not support it + */ + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.ud_av.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.ud_av.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.ud_av.sg_list[idx].length = + send_wr->sg_list[idx].length; + } /* eof for idx */ + if (qp->qp_type == IB_QPT_SMI || + qp->qp_type == IB_QPT_GSI) + wqe_p->u.ud_av.ud_av.pmtu = 1; + if (qp->qp_type == IB_QPT_GSI) { + wqe_p->pkeyi = send_wr->wr.ud.pkey_index; +#ifdef DEBUG_GSI_SEND_WR + trace_send_wr_ud(send_wr); +#endif /* DEBUG_GSI_SEND_WR */ + } + break; + + case IB_QPT_UC: + if (send_wr->send_flags & IB_SEND_FENCE) + wqe_p->wr_flag |= WQE_WRFLAG_FENCE; + /* no break is intentional here */ + case IB_QPT_RC: + /* TODO: atomic not implemented */ + wqe_p->u.nud.remote_virtual_adress = + send_wr->wr.rdma.remote_addr; + wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; + + /* + * omitted checking of IB_SEND_INLINE + * since HW does not support it + */ + dma_length = 0; + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.nud.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.nud.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.nud.sg_list[idx].length = + send_wr->sg_list[idx].length; + dma_length += send_wr->sg_list[idx].length; + } /* eof idx */ + wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + + break; + + default: + ehca_gen_err("Invalid qptype=%x", qp->qp_type); + return -EINVAL; + } + + if (ehca_debug_level) { + ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); + ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); + } + return 0; +} + +/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ +static inline void map_ib_wc_status(u32 cqe_status, + enum ib_wc_status *wc_status) +{ + if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { + switch (cqe_status & 0x3F) { + case 0x01: + case 0x21: + *wc_status = IB_WC_LOC_LEN_ERR; + break; + case 0x02: + case 0x22: + *wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case 0x03: + case 0x23: + *wc_status = IB_WC_LOC_EEC_OP_ERR; + break; + case 0x04: + case 0x24: + *wc_status = IB_WC_LOC_PROT_ERR; + break; + case 0x05: + case 0x25: + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + case 0x06: + *wc_status = IB_WC_MW_BIND_ERR; + break; + case 0x07: /* remote error - look into bits 20:24 */ + switch ((cqe_status + & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { + case 0x0: + /* + * PSN Sequence Error! + * couldn't find a matching status! + */ + *wc_status = IB_WC_GENERAL_ERR; + break; + case 0x1: + *wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case 0x2: + *wc_status = IB_WC_REM_ACCESS_ERR; + break; + case 0x3: + *wc_status = IB_WC_REM_OP_ERR; + break; + case 0x4: + *wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + } + break; + case 0x08: + *wc_status = IB_WC_RETRY_EXC_ERR; + break; + case 0x09: + *wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case 0x0A: + case 0x2D: + *wc_status = IB_WC_REM_ABORT_ERR; + break; + case 0x0B: + case 0x2E: + *wc_status = IB_WC_INV_EECN_ERR; + break; + case 0x0C: + case 0x2F: + *wc_status = IB_WC_INV_EEC_STATE_ERR; + break; + case 0x0D: + *wc_status = IB_WC_BAD_RESP_ERR; + break; + case 0x10: + /* WQE purged */ + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + default: + *wc_status = IB_WC_FATAL_ERR; + + } + } else + *wc_status = IB_WC_SUCCESS; +} + +int ehca_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ib_send_wr *cur_send_wr; + struct ehca_wqe *wqe_p; + int wqe_cnt = 0; + int ret = 0; + unsigned long spl_flags; + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + + /* loop processes list of send reqs */ + for (cur_send_wr = send_wr; cur_send_wr != NULL; + cur_send_wr = cur_send_wr->next) { + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + if (bad_send_wr) + *bad_send_wr = cur_send_wr; + if (wqe_cnt == 0) { + ret = -ENOMEM; + ehca_err(qp->device, "Too many posted WQEs " + "qp_num=%x", qp->qp_num); + } + goto post_send_exit0; + } + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + *bad_send_wr = cur_send_wr; + if (wqe_cnt == 0) { + ret = -EINVAL; + ehca_err(qp->device, "Could not write WQE " + "qp_num=%x", qp->qp_num); + } + goto post_send_exit0; + } + wqe_cnt++; + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, qp->qp_num, wqe_cnt); + } /* eof for cur_send_wr */ + +post_send_exit0: + /* UNLOCK the QUEUE */ + spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, wqe_cnt); + return ret; +} + +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ib_recv_wr *cur_recv_wr; + struct ehca_wqe *wqe_p; + int wqe_cnt = 0; + int ret = 0; + unsigned long spl_flags; + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); + + /* loop processes list of send reqs */ + for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; + cur_recv_wr = cur_recv_wr->next) { + u64 start_offset = my_qp->ipz_rqueue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + if (bad_recv_wr) + *bad_recv_wr = cur_recv_wr; + if (wqe_cnt == 0) { + ret = -ENOMEM; + ehca_err(qp->device, "Too many posted WQEs " + "qp_num=%x", qp->qp_num); + } + goto post_recv_exit0; + } + /* write a RECV WQE into the QUEUE */ + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_rqueue.current_q_offset = start_offset; + *bad_recv_wr = cur_recv_wr; + if (wqe_cnt == 0) { + ret = -EINVAL; + ehca_err(qp->device, "Could not write WQE " + "qp_num=%x", qp->qp_num); + } + goto post_recv_exit0; + } + wqe_cnt++; + ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, qp->qp_num, wqe_cnt); + } /* eof for cur_recv_wr */ + +post_recv_exit0: + spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags); + iosync(); /* serialize GAL register access */ + hipz_update_rqa(my_qp, wqe_cnt); + return ret; +} + +/* + * ib_wc_opcode table converts ehca wc opcode to ib + * Since we use zero to indicate invalid opcode, the actual ib opcode must + * be decremented!!! + */ +static const u8 ib_wc_opcode[255] = { + [0x01] = IB_WC_RECV+1, + [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, + [0x04] = IB_WC_BIND_MW+1, + [0x08] = IB_WC_FETCH_ADD+1, + [0x10] = IB_WC_COMP_SWAP+1, + [0x20] = IB_WC_RDMA_WRITE+1, + [0x40] = IB_WC_RDMA_READ+1, + [0x80] = IB_WC_SEND+1 +}; + +/* internal function to poll one entry of cq */ +static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) +{ + int ret = 0; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + struct ehca_cqe *cqe; + int cqe_count = 0; + +poll_cq_one_read_cqe: + cqe = (struct ehca_cqe *) + ipz_qeit_get_inc_valid(&my_cq->ipz_queue); + if (!cqe) { + ret = -EAGAIN; + ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p " + "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret); + goto poll_cq_one_exit0; + } + + /* prevents loads being reordered across this point */ + rmb(); + + cqe_count++; + if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { + struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); + int purgeflag; + unsigned long spl_flags; + if (!qp) { + ehca_err(cq->device, "cq_num=%x qp_num=%x " + "could not find qp -> ignore cqe", + my_cq->cq_number, cqe->local_qp_number); + ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", + my_cq->cq_number, cqe->local_qp_number); + /* ignore this purged cqe */ + goto poll_cq_one_read_cqe; + } + spin_lock_irqsave(&qp->spinlock_s, spl_flags); + purgeflag = qp->sqerr_purgeflag; + spin_unlock_irqrestore(&qp->spinlock_s, spl_flags); + + if (purgeflag) { + ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " + "src_qp=%x", + cqe->local_qp_number, cqe->remote_qp_number); + if (ehca_debug_level) + ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", + cqe->local_qp_number, + cqe->remote_qp_number); + /* + * ignore this to avoid double cqes of bad wqe + * that caused sqe and turn off purge flag + */ + qp->sqerr_purgeflag = 0; + goto poll_cq_one_read_cqe; + } + } + + /* tracing cqe */ + if (ehca_debug_level) { + ehca_dbg(cq->device, + "Received COMPLETION ehca_cq=%p cq_num=%x -----", + my_cq, my_cq->cq_number); + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + ehca_dbg(cq->device, + "ehca_cq=%p cq_num=%x -------------------------", + my_cq, my_cq->cq_number); + } + + /* we got a completion! */ + wc->wr_id = cqe->work_request_id; + + /* eval ib_wc_opcode */ + wc->opcode = ib_wc_opcode[cqe->optype]-1; + if (unlikely(wc->opcode == -1)) { + ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " + "ehca_cq=%p cq_num=%x", + cqe->optype, cqe->status, my_cq, my_cq->cq_number); + /* dump cqe for other infos */ + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + /* update also queue adder to throw away this entry!!! */ + goto poll_cq_one_exit0; + } + /* eval ib_wc_status */ + if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) { + /* complete with errors */ + map_ib_wc_status(cqe->status, &wc->status); + wc->vendor_err = wc->status; + } else + wc->status = IB_WC_SUCCESS; + + wc->qp_num = cqe->local_qp_number; + wc->byte_len = cqe->nr_bytes_transferred; + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->rlid; + wc->dlid_path_bits = cqe->dlid; + wc->src_qp = cqe->remote_qp_number; + wc->wc_flags = cqe->w_completion_flags; + wc->imm_data = cpu_to_be32(cqe->immediate_data); + wc->sl = cqe->service_level; + + if (wc->status != IB_WC_SUCCESS) + ehca_dbg(cq->device, + "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " + "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " + "cqe=%p", my_cq, my_cq->cq_number, cqe->optype, + cqe->status, cqe->local_qp_number, + cqe->remote_qp_number, cqe->work_request_id, cqe); + +poll_cq_one_exit0: + if (cqe_count > 0) + hipz_update_feca(my_cq, cqe_count); + + return ret; +} + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int nr; + struct ib_wc *current_wc = wc; + int ret = 0; + unsigned long spl_flags; + + if (num_entries < 1) { + ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " + "cq_num=%x", num_entries, my_cq, my_cq->cq_number); + ret = -EINVAL; + goto poll_cq_exit0; + } + + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + for (nr = 0; nr < num_entries; nr++) { + ret = ehca_poll_cq_one(cq, current_wc); + if (ret) + break; + current_wc++; + } /* eof for nr */ + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + if (ret == -EAGAIN || !ret) + ret = nr; + +poll_cq_exit0: + return ret; +} + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + + switch (cq_notify) { + case IB_CQ_SOLICITED: + hipz_set_cqx_n0(my_cq, 1); + break; + case IB_CQ_NEXT_COMP: + hipz_set_cqx_n1(my_cq, 1); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c new file mode 100644 index 00000000000..9f16e9c7939 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -0,0 +1,111 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * SQP functions + * + * Authors: Khadija Souissi <souissi@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include <linux/module.h> +#include <linux/err.h> +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + + +/** + * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue + * pair is created successfully, the corresponding port gets active. + * + * Define Special Queue pair 0 (SMI QP) is still not supported. + * + * @qp_init_attr: Queue pair init attributes with port and queue pair type + */ + +u64 ehca_define_sqp(struct ehca_shca *shca, + struct ehca_qp *ehca_qp, + struct ib_qp_init_attr *qp_init_attr) +{ + u32 pma_qp_nr, bma_qp_nr; + u64 ret; + u8 port = qp_init_attr->port_num; + int counter; + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + + switch (qp_init_attr->qp_type) { + case IB_QPT_SMI: + /* function not supported yet */ + break; + case IB_QPT_GSI: + ret = hipz_h_define_aqp1(shca->ipz_hca_handle, + ehca_qp->ipz_qp_handle, + ehca_qp->galpas.kernel, + (u32) qp_init_attr->port_num, + &pma_qp_nr, &bma_qp_nr); + + if (ret != H_SUCCESS) { + ehca_err(&shca->ib_device, + "Can't define AQP1 for port %x. rc=%lx", + port, ret); + return ret; + } + break; + default: + ehca_err(&shca->ib_device, "invalid qp_type=%x", + qp_init_attr->qp_type); + return H_PARAMETER; + } + + for (counter = 0; + shca->sport[port - 1].port_state != IB_PORT_ACTIVE && + counter < ehca_port_act_time; + counter++) { + ehca_dbg(&shca->ib_device, "... wait until port %x is active", + port); + msleep_interruptible(1000); + } + + if (counter == ehca_port_act_time) { + ehca_err(&shca->ib_device, "Port %x is not active.", port); + return H_HARDWARE; + } + + return H_SUCCESS; +} diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h new file mode 100644 index 00000000000..9f56bb846d9 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -0,0 +1,172 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * auxiliary functions + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Khadija Souissi <souissik@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef EHCA_TOOLS_H +#define EHCA_TOOLS_H + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/idr.h> +#include <linux/kthread.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/vmalloc.h> +#include <linux/version.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/device.h> + +#include <asm/abs_addr.h> +#include <asm/ibmebus.h> +#include <asm/io.h> +#include <asm/pgtable.h> + +extern int ehca_debug_level; + +#define ehca_dbg(ib_dev, format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ + "PU%04x EHCA_DBG:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, \ + ## arg); \ + } while (0) + +#define ehca_info(ib_dev, format, arg...) \ + dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, ## arg) + +#define ehca_warn(ib_dev, format, arg...) \ + dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, ## arg) + +#define ehca_err(ib_dev, format, arg...) \ + dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, ## arg) + +/* use this one only if no ib_dev available */ +#define ehca_gen_dbg(format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\ + get_paca()->paca_index, __FUNCTION__, ## arg); \ + } while (0) + +#define ehca_gen_warn(format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\ + get_paca()->paca_index, __FUNCTION__, ## arg); \ + } while (0) + +#define ehca_gen_err(format, arg...) \ + printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, ## arg) + +/** + * ehca_dmp - printk a memory block, whose length is n*8 bytes. + * Each line has the following layout: + * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex> + */ +#define ehca_dmp(adr, len, format, args...) \ + do { \ + unsigned int x; \ + unsigned int l = (unsigned int)(len); \ + unsigned char *deb = (unsigned char*)(adr); \ + for (x = 0; x < l; x += 16) { \ + printk("EHCA_DMP:%s" format \ + " adr=%p ofs=%04x %016lx %016lx\n", \ + __FUNCTION__, ##args, deb, x, \ + *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ + deb += 16; \ + } \ + } while (0) + +/* define a bitmask, little endian version */ +#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) + +/* define a bitmask, the ibm way... */ +#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) + +/* internal function, don't use */ +#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) + +/* internal function, don't use */ +#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) + +/** + * EHCA_BMASK_SET - return value shifted and masked by mask + * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable + * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask + * in variable + */ +#define EHCA_BMASK_SET(mask,value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask)) + +/** + * EHCA_BMASK_GET - extract a parameter from value by mask + */ +#define EHCA_BMASK_GET(mask,value) \ + (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask))) + + +/* Converts ehca to ib return code */ +static inline int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_BUSY: + return -EBUSY; + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + + +#endif /* EHCA_TOOLS_H */ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c new file mode 100644 index 00000000000..e08764e4aef --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -0,0 +1,392 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * userspace support verbs + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/current.h> + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata) +{ + struct ehca_ucontext *my_context; + + my_context = kzalloc(sizeof *my_context, GFP_KERNEL); + if (!my_context) { + ehca_err(device, "Out of memory device=%p", device); + return ERR_PTR(-ENOMEM); + } + + return &my_context->ib_ucontext; +} + +int ehca_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); + return 0; +} + +struct page *ehca_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + struct page *mypage = NULL; + u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; + u32 idr_handle = fileoffset >> 32; + u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ + u32 cur_pid = current->tgid; + unsigned long flags; + struct ehca_cq *cq; + struct ehca_qp *qp; + struct ehca_pd *pd; + u64 offset; + void *vaddr; + + switch (q_type) { + case 1: /* CQ */ + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, idr_handle); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) { + ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + + if (cq->ownpid != cur_pid) { + ehca_err(cq->ib_cq.device, + "Invalid caller pid=%x ownpid=%x", + cur_pid, cq->ownpid); + return NOPAGE_SIGBUS; + } + + if (rsrc_type == 2) { + ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&cq->ipz_queue, offset); + ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p", + offset, vaddr); + mypage = virt_to_page(vaddr); + } + break; + + case 2: /* QP */ + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, idr_handle); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) { + ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + + pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (pd->ownpid != cur_pid) { + ehca_err(qp->ib_qp.device, + "Invalid caller pid=%x ownpid=%x", + cur_pid, pd->ownpid); + return NOPAGE_SIGBUS; + } + + if (rsrc_type == 2) { /* rqueue */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset); + ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", + offset, vaddr); + mypage = virt_to_page(vaddr); + } else if (rsrc_type == 3) { /* squeue */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset); + ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", + offset, vaddr); + mypage = virt_to_page(vaddr); + } + break; + + default: + ehca_gen_err("bad queue type %x", q_type); + return NOPAGE_SIGBUS; + } + + if (!mypage) { + ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + get_page(mypage); + + return mypage; +} + +static struct vm_operations_struct ehcau_vm_ops = { + .nopage = ehca_nopage, +}; + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; + u32 idr_handle = fileoffset >> 32; + u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ + u32 cur_pid = current->tgid; + u32 ret; + u64 vsize, physical; + unsigned long flags; + struct ehca_cq *cq; + struct ehca_qp *qp; + struct ehca_pd *pd; + + switch (q_type) { + case 1: /* CQ */ + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, idr_handle); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) + return -EINVAL; + + if (cq->ownpid != cur_pid) { + ehca_err(cq->ib_cq.device, + "Invalid caller pid=%x ownpid=%x", + cur_pid, cq->ownpid); + return -ENOMEM; + } + + if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) + return -EINVAL; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq); + vma->vm_flags |= VM_RESERVED; + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_err(cq->ib_cq.device, "invalid vsize=%lx", + vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = cq->galpas.user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO | VM_RESERVED; + + ehca_dbg(cq->ib_cq.device, + "vsize=%lx physical=%lx", vsize, physical); + ret = remap_pfn_range(vma, vma->vm_start, + physical >> PAGE_SHIFT, vsize, + vma->vm_page_prot); + if (ret) { + ehca_err(cq->ib_cq.device, + "remap_pfn_range() failed ret=%x", + ret); + return -ENOMEM; + } + break; + + case 2: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + break; + + default: + ehca_err(cq->ib_cq.device, "bad resource type %x", + rsrc_type); + return -EINVAL; + } + break; + + case 2: /* QP */ + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, idr_handle); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) + return -EINVAL; + + pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (pd->ownpid != cur_pid) { + ehca_err(qp->ib_qp.device, + "Invalid caller pid=%x ownpid=%x", + cur_pid, pd->ownpid); + return -ENOMEM; + } + + if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) + return -EINVAL; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp); + vma->vm_flags |= VM_RESERVED; + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_err(qp->ib_qp.device, "invalid vsize=%lx", + vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = qp->galpas.user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO | VM_RESERVED; + + ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx", + vsize, physical); + ret = remap_pfn_range(vma, vma->vm_start, + physical >> PAGE_SHIFT, vsize, + vma->vm_page_prot); + if (ret) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%x", + ret); + return -ENOMEM; + } + break; + + case 2: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + break; + + case 3: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + break; + + default: + ehca_err(qp->ib_qp.device, "bad resource type %x", + rsrc_type); + return -EINVAL; + } + break; + + default: + ehca_gen_err("bad queue type %x", q_type); + return -EINVAL; + } + + return 0; +} + +int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped, + struct vm_area_struct **vma) +{ + down_write(¤t->mm->mmap_sem); + *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + foffset); + up_write(¤t->mm->mmap_sem); + if (!(*mapped)) { + ehca_gen_err("couldn't mmap foffset=%lx length=%lx", + foffset, length); + return -EINVAL; + } + + *vma = find_vma(current->mm, (u64)*mapped); + if (!(*vma)) { + down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, 0, length); + up_write(¤t->mm->mmap_sem); + ehca_gen_err("couldn't find vma queue=%p", *mapped); + return -EINVAL; + } + (*vma)->vm_flags |= VM_RESERVED; + (*vma)->vm_ops = &ehcau_vm_ops; + + return 0; +} + +int ehca_mmap_register(u64 physical, void **mapped, + struct vm_area_struct **vma) +{ + int ret; + unsigned long vsize; + /* ehca hw supports only 4k page */ + ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma); + if (ret) { + ehca_gen_err("could'nt mmap physical=%lx", physical); + return ret; + } + + (*vma)->vm_flags |= VM_RESERVED; + vsize = (*vma)->vm_end - (*vma)->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", + (*vma)->vm_end - (*vma)->vm_start); + return -EINVAL; + } + + (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot); + (*vma)->vm_flags |= VM_IO | VM_RESERVED; + + ret = remap_pfn_range((*vma), (*vma)->vm_start, + physical >> PAGE_SHIFT, vsize, + (*vma)->vm_page_prot); + if (ret) { + ehca_gen_err("remap_pfn_range() failed ret=%x", ret); + return -ENOMEM; + } + + return 0; + +} + +int ehca_munmap(unsigned long addr, size_t len) { + int ret = 0; + struct mm_struct *mm = current->mm; + if (mm) { + down_write(&mm->mmap_sem); + ret = do_munmap(mm, addr, len); + up_write(&mm->mmap_sem); + } + return ret; +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c new file mode 100644 index 00000000000..3fb46e67df8 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -0,0 +1,874 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Gerd Bayer <gerd.bayer@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/hvcall.h> +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hcp_phyp.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) +#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) +#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) +#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) +#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) +#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) +#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) + +#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) +#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) +#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) + +#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) +#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) + +#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) + +/* direct access qp controls */ +#define DAQP_CTRL_ENABLE 0x01 +#define DAQP_CTRL_SEND_COMP 0x20 +#define DAQP_CTRL_RECV_COMP 0x40 + +static u32 get_longbusy_msecs(int longbusy_rc) +{ + switch (longbusy_rc) { + case H_LONG_BUSY_ORDER_1_MSEC: + return 1; + case H_LONG_BUSY_ORDER_10_MSEC: + return 10; + case H_LONG_BUSY_ORDER_100_MSEC: + return 100; + case H_LONG_BUSY_ORDER_1_SEC: + return 1000; + case H_LONG_BUSY_ORDER_10_SEC: + return 10000; + case H_LONG_BUSY_ORDER_100_SEC: + return 100000; + default: + return 1; + } +} + +static long ehca_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) +{ + long ret; + int i, sleep_msecs; + + ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " + "arg5=%lx arg6=%lx arg7=%lx", + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + + for (i = 0; i < 5; i++) { + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehca_gen_err("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx ", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7); + + ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret); + return ret; + + } + + return H_BUSY; +} + +static long ehca_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) +{ + long ret; + int i, sleep_msecs; + + ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " + "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7, + arg8, arg9); + + for (i = 0; i < 5; i++) { + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehca_gen_err("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" + " arg9=%lx" + " out1=%lx out2=%lx out3=%lx out4=%lx" + " out5=%lx out6=%lx out7=%lx out8=%lx" + " out9=%lx", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9, + outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + + ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx " + "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx " + "out9=%lx", + opcode, ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], outs[8]); + return ret; + + } + + return H_BUSY; +} +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 *act_nr_of_entries, + u32 *act_pages, + u32 *eq_ist) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + u64 allocate_controls; + + /* resource type */ + allocate_controls = 3ULL; + + /* ISN is associated */ + if (neq_control != 1) + allocate_controls = (1ULL << (63 - 7)) | allocate_controls; + else /* notification event queue */ + allocate_controls = (1ULL << 63) | allocate_controls; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + number_of_entries, /* r6 */ + 0, 0, 0, 0, 0, 0); + eq_handle->handle = outs[0]; + *act_nr_of_entries = (u32)outs[3]; + *act_pages = (u32)outs[4]; + *eq_ist = (u32)outs[5]; + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resource - ret=%lx ", ret); + + return ret; +} + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask) +{ + return ehca_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle.handle, /* r4 */ + eq_handle.handle, /* r5 */ + event_mask, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 2, /* r5 */ + param->eq_handle.handle, /* r6 */ + cq->token, /* r7 */ + param->nr_cqe, /* r8 */ + 0, 0, 0, 0); + cq->ipz_cq_handle.handle = outs[0]; + param->act_nr_of_entries = (u32)outs[3]; + param->act_pages = (u32)outs[4]; + + if (ret == H_SUCCESS) + hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp, + struct ehca_alloc_qp_parms *parms) +{ + u64 ret; + u64 allocate_controls; + u64 max_r10_reg; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1; + u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1; + int daqp_ctrl = parms->daqp_ctrl; + + allocate_controls = + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, + (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) + | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, + (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, + (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, + parms->ud_av_l_key_ctl) + | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); + + max_r10_reg = + EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, + max_nr_send_wqes) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, + max_nr_receive_wqes) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, + parms->max_send_sge) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, + parms->max_recv_sge); + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + qp->send_cq->ipz_cq_handle.handle, + qp->recv_cq->ipz_cq_handle.handle, + parms->ipz_eq_handle.handle, + ((u64)qp->token << 32) | parms->pd.value, + max_r10_reg, /* r10 */ + parms->ud_av_l_key_ctl, /* r11 */ + 0); + qp->ipz_qp_handle.handle = outs[0]; + qp->real_qp_num = (u32)outs[1]; + parms->act_nr_send_sges = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); + parms->act_nr_recv_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); + parms->act_nr_send_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); + parms->act_nr_recv_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); + parms->nr_sq_pages = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); + parms->nr_rq_pages = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); + + if (ret == H_SUCCESS) + hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block) +{ + u64 ret; + u64 r_cb = virt_to_abs(query_port_response_block); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response block not page aligned"); + return H_PARAMETER; + } + + ret = ehca_plpar_hcall_norets(H_QUERY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + r_cb, /* r6 */ + 0, 0, 0, 0); + + if (ehca_debug_level) + ehca_dmp(query_port_response_block, 64, "response_block"); + + return ret; +} + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock) +{ + u64 r_cb = virt_to_abs(query_hca_rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response_block=%p not page aligned", + query_hca_rblock); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_QUERY_HCA, + adapter_handle.handle, /* r4 */ + r_cb, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count) +{ + return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, + adapter_handle.handle, /* r4 */ + queue_type | pagesize << 8, /* r5 */ + resource_handle, /* r6 */ + logical_address_of_page, /* r7 */ + count, /* r8 */ + 0, 0); +} + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + if (count != 1) { + ehca_gen_err("Ppage counter=%lx", count); + return H_PARAMETER; + } + return hipz_h_register_rpage(adapter_handle, + pagesize, + queue_type, + eq_handle.handle, + logical_address_of_page, count); +} + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, + u32 ist) +{ + u64 ret; + ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, + adapter_handle.handle, /* r4 */ + ist, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret != H_SUCCESS && ret != H_BUSY) + ehca_gen_err("Could not query interrupt state."); + + return ret; +} + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal) +{ + if (count != 1) { + ehca_gen_err("Page counter=%lx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + cq_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa) +{ + if (count != 1) { + ehca_gen_err("Page counter=%lx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle,pagesize,queue_type, + qp_handle.handle,logical_address_of_page, + count); +} + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe2processed, + void **log_addr_next_rq_wqe2processed, + int dis_and_get_function_code) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + dis_and_get_function_code, /* r5 */ + qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (log_addr_next_sq_wqe2processed) + *log_addr_next_sq_wqe2processed = (void*)outs[0]; + if (log_addr_next_rq_wqe2processed) + *log_addr_next_rq_wqe2processed = (void*)outs[1]; + + return ret; +} + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + update_mask, /* r6 */ + virt_to_abs(mqpcb), /* r7 */ + 0, 0, 0, 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Insufficient resources ret=%lx", ret); + + return ret; +} + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal) +{ + return ehca_plpar_hcall_norets(H_QUERY_QP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + virt_to_abs(qqpcb), /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = hcp_galpas_dtor(&qp->galpas); + if (ret) { + ehca_gen_err("Could not destruct qp->galpas"); + return H_RESOURCE; + } + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + /* function code */ + 1, /* r5 */ + qp->ipz_qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (ret == H_HARDWARE) + ehca_gen_err("HCA not operational. ret=%lx", ret); + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + qp->ipz_qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource still in use. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port) +{ + return ehca_plpar_hcall_norets(H_DEFINE_AQP0, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0, 0, 0); + *pma_qp_nr = (u32)outs[0]; + *bma_qp_nr = (u32)outs[1]; + + if (ret == H_ALIAS_EXIST) + ehca_gen_err("AQP1 already exists. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + u64 ret; + + ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + return ehca_plpar_hcall_norets(H_DETACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); +} + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag) +{ + u64 ret; + + ret = hcp_galpas_dtor(&cq->galpas); + if (ret) { + ehca_gen_err("Could not destruct cp->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + force_flag != 0 ? 1L : 0L, /* r6 */ + 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret); + + return ret; +} + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq) +{ + u64 ret; + + ret = hcp_galpas_dtor(&eq->galpas); + if (ret) { + ehca_gen_err("Could not destruct eq->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + eq->ipz_eq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource in use. ret=%lx ", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 5, /* r5 */ + vaddr, /* r6 */ + length, /* r7 */ + (((u64)access_ctrl) << 32ULL), /* r8 */ + pd.value, /* r9 */ + 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + u64 ret; + + if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { + ehca_gen_err("logical_address_of_page not on a 4k boundary " + "adapter_handle=%lx mr=%p mr_handle=%lx " + "pagesize=%x queue_type=%x " + "logical_address_of_page=%lx count=%lx", + adapter_handle.handle, mr, + mr->ipz_mr_handle.handle, pagesize, queue_type, + logical_address_of_page, count); + ret = H_PARAMETER; + } else + ret = hipz_h_register_rpage(adapter_handle, pagesize, + queue_type, + mr->ipz_mr_handle.handle, + logical_address_of_page, count); + return ret; +} + +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->len = outs[0]; + outparms->vaddr = outs[1]; + outparms->acl = outs[4] >> 32; + outparms->lkey = (u32)(outs[5] >> 32); + outparms->rkey = (u32)(outs[5] & (0xffffffff)); + + return ret; +} + +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + length, /* r7 */ + /* r8 */ + ((((u64)access_ctrl) << 32ULL) | pd.value), + mr_addr_cb, /* r9 */ + 0, 0, 0); + outparms->vaddr = outs[1]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, + adapter_handle.handle, /* r4 */ + orig_mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + (((u64)access_ctrl) << 32ULL), /* r7 */ + pd.value, /* r8 */ + 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 6, /* r5 */ + pd.value, /* r6 */ + 0, 0, 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MW, outs, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count) +{ + u64 r_cb = virt_to_abs(rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("rblock not page aligned."); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_ERROR_DATA, + adapter_handle.handle, + ressource_handle, + r_cb, + 0, 0, 0, 0); +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h new file mode 100644 index 00000000000..587ebd47095 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -0,0 +1,261 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Gerd Bayer <gerd.bayer@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_IF_H__ +#define __HCP_IF_H__ + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "hipz_hw.h" + +/* + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize + * resources, create the empty EQPT (ring). + */ +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 * act_nr_of_entries, + u32 * act_pages, + u32 * eq_ist); + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask); +/* + * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize + * resources, create the empty CQPT (ring). + */ +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param); + + +/* + * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, + * initialize resources, create empty QPPTs (2 rings). + */ +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp, + struct ehca_alloc_qp_parms *parms); + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block); + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock); + +/* + * hipz_h_register_rpage internal function in hcp_if.h for all + * hcp_H_REGISTER_RPAGE calls. + */ +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count); + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle + hcp_adapter_handle, + u32 ist); + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal); + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa); + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe_tb_processed, + void **log_addr_next_rq_wqe_tb_processed, + int dis_and_get_function_code); +enum hcall_sigt { + HCALL_SIGT_NO_CQE = 0, + HCALL_SIGT_BY_WQE = 1, + HCALL_SIGT_EVERY = 2 +}; + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal); + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal); + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp); + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port); + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr); + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag); + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq); + +/* + * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +/* hipz_h_query_mr queries MR in HW and FW */ +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_free_resource_mr frees MR resources in HW and FW */ +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr); + +/* hipz_h_reregister_pmr reregisters MR in HW and FW */ +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_smr register shared MR in HW and FW */ +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* + * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_query_mw queries MW in HW and FW */ +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_free_resource_mw frees MW resources in HW and FW */ +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw); + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count); + +#endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c new file mode 100644 index 00000000000..0b1a4772c78 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -0,0 +1,80 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * load store abstraction for ehca register access with tracing + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +int hcall_map_page(u64 physaddr, u64 *mapaddr) +{ + *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE)); + return 0; +} + +int hcall_unmap_page(u64 mapaddr) +{ + iounmap((volatile void __iomem*)mapaddr); + return 0; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, + u64 paddr_kernel, u64 paddr_user) +{ + int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); + if (ret) + return ret; + + galpas->user.fw_handle = paddr_user; + + return 0; +} + +int hcp_galpas_dtor(struct h_galpas *galpas) +{ + if (galpas->kernel.fw_handle) { + int ret = hcall_unmap_page(galpas->kernel.fw_handle); + if (ret) + return ret; + } + + galpas->user.fw_handle = galpas->kernel.fw_handle = 0; + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h new file mode 100644 index 00000000000..5305c2a3ed9 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -0,0 +1,90 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware calls + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> + * Gerd Bayer <gerd.bayer@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_PHYP_H__ +#define __HCP_PHYP_H__ + + +/* + * eHCA page (mapped into memory) + * resource to access eHCA register pages in CPU address space +*/ +struct h_galpa { + u64 fw_handle; + /* for pSeries this is a 64bit memory address where + I/O memory is mapped into CPU address space (kv) */ +}; + +/* + * resource to access eHCA address space registers, all types + */ +struct h_galpas { + u32 pid; /*PID of userspace galpa checking */ + struct h_galpa user; /* user space accessible resource, + set to 0 if unused */ + struct h_galpa kernel; /* kernel space accessible resource, + set to 0 if unused */ +}; + +static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) +{ + u64 addr = galpa.fw_handle + offset; + return *(volatile u64 __force *)addr; +} + +static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) +{ + u64 addr = galpa.fw_handle + offset; + *(volatile u64 __force *)addr = value; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, + u64 paddr_kernel, u64 paddr_user); + +int hcp_galpas_dtor(struct h_galpas *galpas); + +int hcall_map_page(u64 physaddr, u64 * mapaddr); + +int hcall_unmap_page(u64 mapaddr); + +#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h new file mode 100644 index 00000000000..9dac93d0214 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_fns.h @@ -0,0 +1,68 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_H__ +#define __HIPZ_FNS_H__ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +#include "hipz_fns_core.h" + +#define hipz_galpa_store_eq(gal, offset, value) \ + hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_eq(gal, offset) \ + hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qped(gal, offset, value) \ + hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) + +#define hipz_galpa_load_qped(gal, offset) \ + hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) + +#define hipz_galpa_store_mrmw(gal, offset, value) \ + hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) + +#define hipz_galpa_load_mrmw(gal, offset) \ + hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) + +#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h new file mode 100644 index 00000000000..20898a15344 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h @@ -0,0 +1,100 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch <raisch@de.ibm.com> + * Heiko J Schick <schickhj@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_CORE_H__ +#define __HIPZ_FNS_CORE_H__ + +#include "hcp_phyp.h" +#include "hipz_hw.h" + +#define hipz_galpa_store_cq(gal, offset, value) \ + hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_cq(gal, offset) \ + hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qp(gal,offset, value) \ + hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) +#define hipz_galpa_load_qp(gal, offset) \ + hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) + +static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, + EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); +} + +static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, + EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); +} + +static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) +{ + hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, + EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); +} + +static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n0_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, + EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, + value)); + cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); +} + +static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n1_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, + EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); + cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); +} + +#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h new file mode 100644 index 00000000000..3fc92b031c5 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -0,0 +1,388 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * eHCA register definitions + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_HW_H__ +#define __HIPZ_HW_H__ + +#include "ehca_tools.h" + +/* QP Table Entry Memory Map */ +struct hipz_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; +/* 0x20*/ + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rqa; + u64 qpx_rqc; +/* 0x40*/ + u64 qpx_st; + u64 qpx_pmstate; + u64 qpx_pmfa; + u64 qpx_pkey; +/* 0x60*/ + u64 qpx_pkeya; + u64 qpx_pkeyb; + u64 qpx_pkeyc; + u64 qpx_pkeyd; +/* 0x80*/ + u64 qpx_qkey; + u64 qpx_dqp; + u64 qpx_dlidp; + u64 qpx_portp; +/* 0xa0*/ + u64 qpx_slidp; + u64 qpx_slidpp; + u64 qpx_dlida; + u64 qpx_porta; +/* 0xc0*/ + u64 qpx_slida; + u64 qpx_slidpa; + u64 qpx_slvl; + u64 qpx_ipd; +/* 0xe0*/ + u64 qpx_mtu; + u64 qpx_lato; + u64 qpx_rlimit; + u64 qpx_rnrlimit; +/* 0x100*/ + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_nspsn; +/* 0x120*/ + u64 qpx_nspsnhwm; + u64 reserved1; + u64 qpx_sdsi; + u64 qpx_sdsbc; +/* 0x140*/ + u64 qpx_sqwsize; + u64 qpx_sqwts; + u64 qpx_lsn; + u64 qpx_nssn; +/* 0x160 */ + u64 qpx_mor; + u64 qpx_cor; + u64 qpx_sqsize; + u64 qpx_erc; +/* 0x180*/ + u64 qpx_rnrrc; + u64 qpx_ernrwt; + u64 qpx_rnrresp; + u64 qpx_lmsna; +/* 0x1a0 */ + u64 qpx_sqhpc; + u64 qpx_sqcptp; + u64 qpx_sigt; + u64 qpx_wqecnt; +/* 0x1c0*/ + u64 qpx_rqhp; + u64 qpx_rqptp; + u64 qpx_rqsize; + u64 qpx_nrr; +/* 0x1e0*/ + u64 qpx_rdmac; + u64 qpx_nrpsn; + u64 qpx_lapsn; + u64 qpx_lcr; +/* 0x200*/ + u64 qpx_rwc; + u64 qpx_rwva; + u64 qpx_rdsi; + u64 qpx_rdsbc; +/* 0x220*/ + u64 qpx_rqwsize; + u64 qpx_crmsn; + u64 qpx_rdd; + u64 qpx_larpsn; +/* 0x240*/ + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; +/* 0x260*/ + u64 qpx_aaelog; + u64 qpx_ram; + u64 qpx_rdmaqe0; + u64 qpx_rdmaqe1; +/* 0x280*/ + u64 qpx_rdmaqe2; + u64 qpx_rdmaqe3; + u64 qpx_nrpsnhwm; +/* 0x298*/ + u64 reserved[(0x400 - 0x298) / 8]; +/* 0x400 extended data */ + u64 reserved_ext[(0x500 - 0x400) / 8]; +/* 0x500 */ + u64 reserved2[(0x1000 - 0x500) / 8]; +/* 0x1000 */ +}; + +#define QPX_SQADDER EHCA_BMASK_IBM(48,63) +#define QPX_RQADDER EHCA_BMASK_IBM(48,63) + +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) + +/* MRMWPT Entry Memory Map */ +struct hipz_mrmwmm { + /* 0x00 */ + u64 mrx_hcr; + + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + /* 0x20 */ + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + /* 0x40 */ + u64 reserved4[(0x200 - 0x40) / 8]; + /* 0x200 */ + u64 mrx_ctl[64]; + +}; + +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x) + +struct hipz_qpedmm { + /* 0x00 */ + u64 reserved0[(0x400) / 8]; + /* 0x400 */ + u64 qpedx_phh; + u64 qpedx_ppsgp; + /* 0x410 */ + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + /* 0x420 */ + u64 qpedx_ppdgu; + u64 qpedx_aph; + /* 0x430 */ + u64 qpedx_apsgp; + u64 qpedx_apsgu; + /* 0x440 */ + u64 qpedx_apdgp; + u64 qpedx_apdgu; + /* 0x450 */ + u64 qpedx_apav; + u64 qpedx_apsav; + /* 0x460 */ + u64 qpedx_hcr; + u64 reserved1[4]; + /* 0x488 */ + u64 qpedx_rrl0; + /* 0x490 */ + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + /* 0x4a0 */ + u64 reserved2; + u64 qpedx_rrl1; + /* 0x4b0 */ + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + /* 0x4c0 */ + u64 reserved3; + u64 qpedx_rrl2; + /* 0x4d0 */ + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + /* 0x4e0 */ + u64 reserved4; + u64 qpedx_rrl3; + /* 0x4f0 */ + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x) + +/* CQ Table Entry Memory Map */ +struct hipz_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; +/* 0x20 */ + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; +/* 0x40 */ + u64 cqx_ep; + u64 cqx_eq; +/* 0x50 */ + u64 reserved1; + u64 cqx_n0; +/* 0x60 */ + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +/* 0x1000 */ +}; + +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63) +#define CQX_FECADDER EHCA_BMASK_IBM(32,63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0) + +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x) + +/* EQ Table Entry Memory Map */ +struct hipz_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + + u64 eqx_herr; + u64 eqx_aer; +/* 0x20 */ + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + +/* 0x40 */ + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; +/* 0x60 */ + u64 eqx_it; + +}; + +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x) + +/* access control defines for MR/MW */ +#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 +#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 +#define HIPZ_ACCESSCTRL_R_READ 0x00200000 +#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 +#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 + +/* query hca response block */ +struct hipz_query_hca { + u32 cur_reliable_dg; + u32 cur_qp; + u32 cur_cq; + u32 cur_eq; + u32 cur_mr; + u32 cur_mw; + u32 cur_ee_context; + u32 cur_mcast_grp; + u32 cur_qp_attached_mcast_grp; + u32 reserved1; + u32 cur_ipv6_qp; + u32 cur_eth_qp; + u32 cur_hp_mr; + u32 reserved2[3]; + u32 max_rd_domain; + u32 max_qp; + u32 max_cq; + u32 max_eq; + u32 max_mr; + u32 max_hp_mr; + u32 max_mw; + u32 max_mrwpte; + u32 max_special_mrwpte; + u32 max_rd_ee_context; + u32 max_mcast_grp; + u32 max_total_mcast_qp_attach; + u32 max_mcast_qp_attach; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 internal_clock_frequency; + u32 max_pd; + u32 max_ah; + u32 max_cqe; + u32 max_wqes_wq; + u32 max_partitions; + u32 max_rr_ee_context; + u32 max_rr_qp; + u32 max_rr_hca; + u32 max_act_wqs_ee_context; + u32 max_act_wqs_qp; + u32 max_sge; + u32 max_sge_rd; + u32 memory_page_size_supported; + u64 max_mr_size; + u32 local_ca_ack_delay; + u32 num_ports; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 node_guid; + u64 hca_cap_indicators; + u32 data_counter_register_size; + u32 max_shared_rq; + u32 max_isns_eq; + u32 max_neq; +} __attribute__ ((packed)); + +/* query port response block */ +struct hipz_query_port { + u32 state; + u32 bad_pkey_cntr; + u32 lmc; + u32 lid; + u32 subnet_timeout; + u32 qkey_viol_cntr; + u32 sm_sl; + u32 sm_lid; + u32 capability_mask; + u32 init_type_reply; + u32 pkey_tbl_len; + u32 gid_tbl_len; + u64 gid_prefix; + u32 port_nr; + u16 pkey_entries[16]; + u8 reserved1[32]; + u32 trent_size; + u32 trbuf_size; + u64 max_msg_sz; + u32 max_mtu; + u32 vl_cap; + u8 reserved2[1900]; + u64 guid_entries[255]; +} __attribute__ ((packed)); + +#endif diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c new file mode 100644 index 00000000000..e028ff1588c --- /dev/null +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -0,0 +1,149 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_tools.h" +#include "ipz_pt_fn.h" + +void *ipz_qpageit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + ret = NULL; + } + if (((u64)ret) % EHCA_PAGESIZE) { + ehca_gen_err("ERROR!! not at PAGE-Boundary"); + return NULL; + } + return ret; +} + +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +int ipz_queue_ctor(struct ipz_queue *queue, + const u32 nr_of_pages, + const u32 pagesize, const u32 qe_size, const u32 nr_of_sg) +{ + int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; + int f; + + if (pagesize > PAGE_SIZE) { + ehca_gen_err("FATAL ERROR: pagesize=%x is greater " + "than kernel page size", pagesize); + return 0; + } + if (!pages_per_kpage) { + ehca_gen_err("FATAL ERROR: invalid kernel page size. " + "pages_per_kpage=%x", pages_per_kpage); + return 0; + } + queue->queue_length = nr_of_pages * pagesize; + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("ERROR!! didn't get the memory"); + return 0; + } + memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); + /* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ + f = 0; + while (f < nr_of_pages) { + u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL); + int k; + if (!kpage) + goto ipz_queue_ctor_exit0; /*NOMEM*/ + for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) { + (queue->queue_pages)[f] = (struct ipz_page *)kpage; + kpage += EHCA_PAGESIZE; + f++; + } + } + + queue->current_q_offset = 0; + queue->qe_size = qe_size; + queue->act_nr_of_sg = nr_of_sg; + queue->pagesize = pagesize; + queue->toggle_state = 1; + return 1; + + ipz_queue_ctor_exit0: + ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x", + queue, f, nr_of_pages); + for (f = 0; f < nr_of_pages; f += pages_per_kpage) { + if (!(queue->queue_pages)[f]) + break; + free_page((unsigned long)(queue->queue_pages)[f]); + } + return 0; +} + +int ipz_queue_dtor(struct ipz_queue *queue) +{ + int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; + int g; + int nr_pages; + + if (!queue || !queue->queue_pages) { + ehca_gen_dbg("queue or queue_pages is NULL"); + return 0; + } + nr_pages = queue->queue_length / queue->pagesize; + for (g = 0; g < nr_pages; g += pages_per_kpage) + free_page((unsigned long)(queue->queue_pages)[g]); + vfree(queue->queue_pages); + + return 1; +} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h new file mode 100644 index 00000000000..2f13509d525 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -0,0 +1,247 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin <fomin@de.ibm.com> + * Reinhard Ernst <rernst@de.ibm.com> + * Christoph Raisch <raisch@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPZ_PT_FN_H__ +#define __IPZ_PT_FN_H__ + +#define EHCA_PAGESHIFT 12 +#define EHCA_PAGESIZE 4096UL +#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) +#define EHCA_PT_ENTRIES 512UL + +#include "ehca_tools.h" +#include "ehca_qes.h" + +/* struct generic ehca page */ +struct ipz_page { + u8 entries[EHCA_PAGESIZE]; +}; + +/* struct generic queue in linux kernel virtual memory (kv) */ +struct ipz_queue { + u64 current_q_offset; /* current queue entry */ + + struct ipz_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 dummy3; /* 64 bit alignment */ +}; + +/* + * return current Queue Entry for a certain q_offset + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) +{ + struct ipz_page *current_page; + if (q_offset >= queue->queue_length) + return NULL; + current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; +} + +/* + * return current Queue Entry + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_get(struct ipz_queue *queue) +{ + return ipz_qeit_calc(queue, queue->current_q_offset); +} + +/* + * return current Queue Page , increment Queue Page iterator from + * page to page in struct ipz_queue, last increment will return 0! and + * NOT wrap + * returns address (kv) of Queue Page + * warning don't use in parallel with ipz_QE_get_inc() + */ +void *ipz_qpageit_get_inc(struct ipz_queue *queue); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + u32 cqe_flags = cqe->cqe_flags; + + if ((cqe_flags >> 7) != (queue->toggle_state & 1)) + return NULL; + + ipz_qeit_get_inc(queue); + return cqe; +} + +/* + * returns and resets Queue Entry iterator + * returns address (kv) of first Queue Entry + */ +static inline void *ipz_qeit_reset(struct ipz_queue *queue) +{ + queue->current_q_offset = 0; + return ipz_qeit_get(queue); +} + +/* struct generic page table */ +struct ipz_pt { + u64 entries[EHCA_PT_ENTRIES]; +}; + +/* struct page table for a queue, only to be used in pf */ +struct ipz_qpt { + /* queue page tables (kv), use u64 because we know the element length */ + u64 *qpts; + u32 n_qpts; + u32 n_ptes; /* number of page table entries */ + u64 *current_pte_addr; +}; + +/* + * constructor for a ipz_queue_t, placement new for ipz_queue_t, + * new for all dependent datastructors + * all QP Tables are the same + * flow: + * allocate+pin queue + * see ipz_qpt_ctor() + * returns true if ok, false if out of memory + */ +int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, + const u32 pagesize, const u32 qe_size, + const u32 nr_of_sg); + +/* + * destructor for a ipz_queue_t + * -# free queue + * see ipz_queue_ctor() + * returns true if ok, false if queue was NULL-ptr of free failed + */ +int ipz_queue_dtor(struct ipz_queue *queue); + +/* + * constructor for a ipz_qpt_t, + * placement new for struct ipz_queue, new for all dependent datastructors + * all QP Tables are the same, + * flow: + * -# allocate+pin queue + * -# initialise ptcb + * -# allocate+pin PTs + * -# link PTs to a ring, according to HCA Arch, set bit62 id needed + * -# the ring must have room for exactly nr_of_PTEs + * see ipz_qpt_ctor() + */ +void ipz_qpt_ctor(struct ipz_qpt *qpt, + const u32 nr_of_qes, + const u32 pagesize, + const u32 qe_size, + const u8 lowbyte, const u8 toggle, + u32 * act_nr_of_QEs, u32 * act_nr_of_pages); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + * fix EQ page problems + */ +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); + +/* + * return current Event Queue Entry, increment Queue Entry iterator + * by one step in struct ipz_queue if valid, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_queue_QPageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *) ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + ipz_qeit_eq_get_inc(queue); /* this is a good one */ + return ret; +} + +/* returns address (GX) of first queue entry */ +static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) +{ + return be64_to_cpu(qpt->qpts[0]); +} + +/* returns address (kv) of first page of queue page table */ +static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) +{ + return qpt->qpts; +} + +#endif /* __IPZ_PT_FN_H__ */ diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 1db9489f1e8..574a678e7fd 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -1,16 +1,9 @@ -config IPATH_CORE - tristate "QLogic InfiniPath Driver" - depends on 64BIT && PCI_MSI && NET - ---help--- - This is a low-level driver for QLogic InfiniPath host channel - adapters (HCAs) based on the HT-400 and PE-800 chips. - config INFINIBAND_IPATH - tristate "QLogic InfiniPath Verbs Driver" - depends on IPATH_CORE && INFINIBAND + tristate "QLogic InfiniPath Driver" + depends on PCI_MSI && 64BIT && INFINIBAND ---help--- - This is a driver that provides InfiniBand verbs support for - QLogic InfiniPath host channel adapters (HCAs). This - allows these devices to be used with both kernel upper level - protocols such as IP-over-InfiniBand as well as with userspace - applications (in conjunction with InfiniBand userspace access). + This is a driver for QLogic InfiniPath host channel adapters, + including InfiniBand verbs support. This driver allows these + devices to be used with both kernel upper level protocols such + as IP-over-InfiniBand as well as with userspace applications + (in conjunction with InfiniBand userspace access). diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index b0bf7286413..5e29cb0095e 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -1,36 +1,35 @@ EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \ -DIPATH_KERN_TYPE=0 -obj-$(CONFIG_IPATH_CORE) += ipath_core.o obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o -ipath_core-y := \ +ib_ipath-y := \ + ipath_cq.o \ ipath_diag.o \ ipath_driver.o \ ipath_eeprom.o \ ipath_file_ops.o \ ipath_fs.o \ - ipath_ht400.o \ + ipath_iba6110.o \ + ipath_iba6120.o \ ipath_init_chip.o \ ipath_intr.o \ - ipath_layer.o \ - ipath_pe800.o \ - ipath_stats.o \ - ipath_sysfs.o \ - ipath_user_pages.o - -ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o - -ib_ipath-y := \ - ipath_cq.o \ ipath_keys.o \ + ipath_layer.o \ ipath_mad.o \ + ipath_mmap.o \ ipath_mr.o \ ipath_qp.o \ ipath_rc.o \ ipath_ruc.o \ ipath_srq.o \ + ipath_stats.o \ + ipath_sysfs.o \ ipath_uc.o \ ipath_ud.o \ - ipath_verbs.o \ - ipath_verbs_mcast.o + ipath_user_pages.o \ + ipath_verbs_mcast.o \ + ipath_verbs.o + +ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o +ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 062bd392e7e..f577905e3ac 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -106,9 +106,9 @@ struct infinipath_stats { __u64 sps_ether_spkts; /* number of "ethernet" packets received by driver */ __u64 sps_ether_rpkts; - /* number of SMA packets sent by driver */ + /* number of SMA packets sent by driver. Obsolete. */ __u64 sps_sma_spkts; - /* number of SMA packets received by driver */ + /* number of SMA packets received by driver. Obsolete. */ __u64 sps_sma_rpkts; /* number of times all ports rcvhdrq was full and packet dropped */ __u64 sps_hdrqfull; @@ -138,7 +138,7 @@ struct infinipath_stats { __u64 sps_pageunlocks; /* * Number of packets dropped in kernel other than errors (ether - * packets if ipath not configured, sma/mad, etc.) + * packets if ipath not configured, etc.) */ __u64 sps_krdrops; /* pad for future growth */ @@ -153,8 +153,6 @@ struct infinipath_stats { #define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */ /* Device has been disabled via admin request */ #define IPATH_STATUS_ADMIN_DISABLED 0x4 -#define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */ -#define IPATH_STATUS_SMA 0x10 /* user SMA running */ /* Chip has been found and initted */ #define IPATH_STATUS_CHIP_PRESENT 0x20 /* IB link is at ACTIVE, usable for data traffic */ @@ -465,12 +463,11 @@ struct __ipath_sendpkt { struct ipath_iovec sps_iov[4]; }; -/* Passed into SMA special file's ->read and ->write methods. */ -struct ipath_sma_pkt -{ - __u32 unit; /* unit on which to send packet */ - __u64 data; /* address of payload in userspace */ - __u32 len; /* length of payload */ +/* Passed into diag data special file's ->write method. */ +struct ipath_diag_pkt { + __u32 unit; + __u64 data; + __u32 len; }; /* diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 3efee341c9b..049221bc590 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -42,20 +42,28 @@ * @entry: work completion entry to add * @sig: true if @entry is a solicitated entry * - * This may be called with one of the qp->s_lock or qp->r_rq.lock held. + * This may be called with qp->s_lock held. */ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) { + struct ipath_cq_wc *wc = cq->queue; unsigned long flags; + u32 head; u32 next; spin_lock_irqsave(&cq->lock, flags); - if (cq->head == cq->ibcq.cqe) + /* + * Note that the head pointer might be writable by user processes. + * Take care to verify it is a sane value. + */ + head = wc->head; + if (head >= (unsigned) cq->ibcq.cqe) { + head = cq->ibcq.cqe; next = 0; - else - next = cq->head + 1; - if (unlikely(next == cq->tail)) { + } else + next = head + 1; + if (unlikely(next == wc->tail)) { spin_unlock_irqrestore(&cq->lock, flags); if (cq->ibcq.event_handler) { struct ib_event ev; @@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) } return; } - cq->queue[cq->head] = *entry; - cq->head = next; + wc->queue[head] = *entry; + wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && solicited)) { @@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct ipath_cq *cq = to_icq(ibcq); + struct ipath_cq_wc *wc = cq->queue; unsigned long flags; int npolled; spin_lock_irqsave(&cq->lock, flags); for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (cq->tail == cq->head) + if (wc->tail == wc->head) break; - *entry = cq->queue[cq->tail]; - if (cq->tail == cq->ibcq.cqe) - cq->tail = 0; + *entry = wc->queue[wc->tail]; + if (wc->tail >= cq->ibcq.cqe) + wc->tail = 0; else - cq->tail++; + wc->tail++; } spin_unlock_irqrestore(&cq->lock, flags); @@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, { struct ipath_ibdev *dev = to_idev(ibdev); struct ipath_cq *cq; - struct ib_wc *wc; + struct ipath_cq_wc *wc; struct ib_cq *ret; - if (entries > ib_ipath_max_cqes) { + if (entries < 1 || entries > ib_ipath_max_cqes) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } if (dev->n_cqs_allocated == ib_ipath_max_cqs) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } - /* - * Need to use vmalloc() if we want to support large #s of - * entries. - */ + /* Allocate the completion queue structure. */ cq = kmalloc(sizeof(*cq), GFP_KERNEL); if (!cq) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } /* - * Need to use vmalloc() if we want to support large #s of entries. + * Allocate the completion queue entries and head/tail pointers. + * This is allocated separately so that it can be resized and + * also mapped into user space. + * We need to use vmalloc() in order to support mmap and large + * numbers of entries. */ - wc = vmalloc(sizeof(*wc) * (entries + 1)); + wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries); if (!wc) { - kfree(cq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_cq; } + + /* + * Return the address of the WC as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) wc; + int err; + + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_wc; + } + + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + cq->ip = ip; + ip->context = context; + ip->obj = wc; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(*wc) + + sizeof(struct ib_wc) * entries); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } else + cq->ip = NULL; + /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return @@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, cq->triggered = 0; spin_lock_init(&cq->lock); tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); - cq->head = 0; - cq->tail = 0; + wc->head = 0; + wc->tail = 0; cq->queue = wc; ret = &cq->ibcq; dev->n_cqs_allocated++; + goto done; -bail: +bail_wc: + vfree(wc); + +bail_cq: + kfree(cq); + +done: return ret; } @@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq) tasklet_kill(&cq->comptask); dev->n_cqs_allocated--; - vfree(cq->queue); + if (cq->ip) + kref_put(&cq->ip->ref, ipath_release_mmap_info); + else + vfree(cq->queue); kfree(cq); return 0; @@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) spin_lock_irqsave(&cq->lock, flags); /* * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow - * any other transitions. + * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). */ if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = notify; @@ -264,46 +319,86 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { struct ipath_cq *cq = to_icq(ibcq); - struct ib_wc *wc, *old_wc; - u32 n; + struct ipath_cq_wc *old_wc = cq->queue; + struct ipath_cq_wc *wc; + u32 head, tail, n; int ret; + if (cqe < 1 || cqe > ib_ipath_max_cqes) { + ret = -EINVAL; + goto bail; + } + /* * Need to use vmalloc() if we want to support large #s of entries. */ - wc = vmalloc(sizeof(*wc) * (cqe + 1)); + wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe); if (!wc) { ret = -ENOMEM; goto bail; } + /* + * Return the address of the WC as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + __u64 offset = (__u64) wc; + + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (ret) + goto bail; + } + spin_lock_irq(&cq->lock); - if (cq->head < cq->tail) - n = cq->ibcq.cqe + 1 + cq->head - cq->tail; + /* + * Make sure head and tail are sane since they + * might be user writable. + */ + head = old_wc->head; + if (head > (u32) cq->ibcq.cqe) + head = (u32) cq->ibcq.cqe; + tail = old_wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + if (head < tail) + n = cq->ibcq.cqe + 1 + head - tail; else - n = cq->head - cq->tail; + n = head - tail; if (unlikely((u32)cqe < n)) { spin_unlock_irq(&cq->lock); vfree(wc); ret = -EOVERFLOW; goto bail; } - for (n = 0; cq->tail != cq->head; n++) { - wc[n] = cq->queue[cq->tail]; - if (cq->tail == cq->ibcq.cqe) - cq->tail = 0; + for (n = 0; tail != head; n++) { + wc->queue[n] = old_wc->queue[tail]; + if (tail == (u32) cq->ibcq.cqe) + tail = 0; else - cq->tail++; + tail++; } cq->ibcq.cqe = cqe; - cq->head = n; - cq->tail = 0; - old_wc = cq->queue; + wc->head = n; + wc->tail = 0; cq->queue = wc; spin_unlock_irq(&cq->lock); vfree(old_wc); + if (cq->ip) { + struct ipath_ibdev *dev = to_idev(ibcq->device); + struct ipath_mmap_info *ip = cq->ip; + + ip->obj = wc; + ip->size = PAGE_ALIGN(sizeof(*wc) + + sizeof(struct ib_wc) * cqe); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + ret = 0; bail: diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index f415beda0d3..df69f0d80b8 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -60,7 +60,6 @@ #define __IPATH_USER_SEND 0x1000 /* use user mode send */ #define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ #define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ -#define __IPATH_SMADBG 0x8000 /* sma packet debug */ #define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */ #define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */ #define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */ @@ -84,7 +83,6 @@ /* print mmap/nopage stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x0 #define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ -#define __IPATH_SMADBG 0x0 /* process startup (init)/exit messages */ #define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ #define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */ #define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */ diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 147dd89e21c..29958b6e021 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -41,11 +41,12 @@ * through the /sys/bus/pci resource mmap interface. */ +#include <linux/io.h> #include <linux/pci.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" int ipath_diag_inuse; @@ -274,6 +275,158 @@ bail: return ret; } +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off); + +static struct file_operations diagpkt_file_ops = { + .owner = THIS_MODULE, + .write = ipath_diagpkt_write, +}; + +static struct cdev *diagpkt_cdev; +static struct class_device *diagpkt_class_dev; + +int __init ipath_diagpkt_add(void) +{ + return ipath_cdev_init(IPATH_DIAGPKT_MINOR, + "ipath_diagpkt", &diagpkt_file_ops, + &diagpkt_cdev, &diagpkt_class_dev); +} + +void __exit ipath_diagpkt_remove(void) +{ + ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev); +} + +/** + * ipath_diagpkt_write - write an IB packet + * @fp: the diag data device file pointer + * @data: ipath_diag_pkt structure saying where to get the packet + * @count: size of data to write + * @off: unused by this code + */ +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off) +{ + u32 __iomem *piobuf; + u32 plen, clen, pbufn; + struct ipath_diag_pkt dp; + u32 *tmpbuf = NULL; + struct ipath_devdata *dd; + ssize_t ret = 0; + u64 val; + + if (count < sizeof(dp)) { + ret = -EINVAL; + goto bail; + } + + if (copy_from_user(&dp, data, sizeof(dp))) { + ret = -EFAULT; + goto bail; + } + + /* send count must be an exact number of dwords */ + if (dp.len & 3) { + ret = -EINVAL; + goto bail; + } + + clen = dp.len >> 2; + + dd = ipath_lookup(dp.unit); + if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || + !dd->ipath_kregbase) { + ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n", + dp.unit); + ret = -ENODEV; + goto bail; + } + + if (ipath_diag_inuse && !diag_set_link && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + diag_set_link = 1; + ipath_cdbg(VERBOSE, "Trying to set to set link active for " + "diag pkt\n"); + ipath_set_linkstate(dd, IPATH_IB_LINKARM); + ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); + } + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit); + ret = -ENODEV; + goto bail; + } + val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; + if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM && + val != IPATH_IBSTATE_ACTIVE) { + ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", + dd->ipath_unit, (unsigned long long) val); + ret = -EINVAL; + goto bail; + } + + /* need total length before first word written */ + /* +1 word is for the qword padding */ + plen = sizeof(u32) + dp.len; + + if ((plen + 4) > dd->ipath_ibmaxlen) { + ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", + plen - 4, dd->ipath_ibmaxlen); + ret = -EINVAL; + goto bail; /* before writing pbc */ + } + tmpbuf = vmalloc(plen); + if (!tmpbuf) { + dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " + "failing\n"); + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmpbuf, + (const void __user *) (unsigned long) dp.data, + dp.len)) { + ret = -EFAULT; + goto bail; + } + + piobuf = ipath_getpiobuf(dd, &pbufn); + if (!piobuf) { + ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n", + dd->ipath_unit); + ret = -EBUSY; + goto bail; + } + + plen >>= 2; /* in dwords */ + + if (ipath_debug & __IPATH_PKTDBG) + ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", + dd->ipath_unit, plen - 1, pbufn); + + /* we have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order */ + writeq(plen, piobuf); + ipath_flush_wc(); + /* copy all by the trigger word, then flush, so it's written + * to chip before trigger word, then write trigger word, then + * flush again, so packet is sent. */ + __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + ipath_flush_wc(); + __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + ipath_flush_wc(); + + ret = sizeof(dp); + +bail: + vfree(tmpbuf); + return ret; +} + static int ipath_diag_release(struct inode *in, struct file *fp) { mutex_lock(&ipath_mutex); diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index f98518d912b..2108466c7e3 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -39,7 +39,7 @@ #include <linux/vmalloc.h> #include "ipath_kernel.h" -#include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" static void ipath_update_pio_bufs(struct ipath_devdata *); @@ -51,8 +51,6 @@ const char *ipath_get_unit_name(int unit) return iname; } -EXPORT_SYMBOL_GPL(ipath_get_unit_name); - #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " #define PFX IPATH_DRV_NAME ": " @@ -60,13 +58,13 @@ EXPORT_SYMBOL_GPL(ipath_get_unit_name); * The size has to be longer than this string, so we can append * board/chip information to it in the init code. */ -const char ipath_core_version[] = IPATH_IDSTR "\n"; +const char ib_ipath_version[] = IPATH_IDSTR "\n"; static struct idr unit_table; DEFINE_SPINLOCK(ipath_devs_lock); LIST_HEAD(ipath_dev_list); -wait_queue_head_t ipath_sma_state_wait; +wait_queue_head_t ipath_state_wait; unsigned ipath_debug = __IPATH_INFO; @@ -403,10 +401,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, /* setup the chip-specific functions, as early as possible. */ switch (ent->device) { case PCI_DEVICE_ID_INFINIPATH_HT: - ipath_init_ht400_funcs(dd); + ipath_init_iba6110_funcs(dd); break; case PCI_DEVICE_ID_INFINIPATH_PE800: - ipath_init_pe800_funcs(dd); + ipath_init_iba6120_funcs(dd); break; default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " @@ -440,7 +438,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, } dd->ipath_pcirev = rev; +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + dd->ipath_kregbase = __ioremap(addr, len, + (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); +#else dd->ipath_kregbase = ioremap_nocache(addr, len); +#endif if (!dd->ipath_kregbase) { ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", @@ -503,7 +507,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipathfs_add_device(dd); ipath_user_add(dd); ipath_diag_add(dd); - ipath_layer_add(dd); + ipath_register_ib_device(dd); goto bail; @@ -532,7 +536,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) return; dd = pci_get_drvdata(pdev); - ipath_layer_remove(dd); + ipath_unregister_ib_device(dd->verbs_dev); ipath_diag_remove(dd); ipath_user_remove(dd); ipathfs_remove_device(dd); @@ -607,21 +611,23 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, * * wait up to msecs milliseconds for IB link state change to occur for * now, take the easy polling route. Currently used only by - * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise + * ipath_set_linkstate. Returns 0 if state reached, otherwise * -ETIMEDOUT state can have multiple states set, for any of several * transitions. */ -int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) +static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, + int msecs) { - dd->ipath_sma_state_wanted = state; - wait_event_interruptible_timeout(ipath_sma_state_wait, + dd->ipath_state_wanted = state; + wait_event_interruptible_timeout(ipath_state_wait, (dd->ipath_flags & state), msecs_to_jiffies(msecs)); - dd->ipath_sma_state_wanted = 0; + dd->ipath_state_wanted = 0; if (!(dd->ipath_flags & state)) { u64 val; - ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n", + ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" + " ms\n", /* test INIT ahead of DOWN, both can be set */ (state & IPATH_LINKINIT) ? "INIT" : ((state & IPATH_LINKDOWN) ? "DOWN" : @@ -807,58 +813,6 @@ bail: return skb; } -/** - * ipath_rcv_layer - receive a packet for the layered (ethernet) driver - * @dd: the infinipath device - * @etail: the sk_buff number - * @tlen: the total packet length - * @hdr: the ethernet header - * - * Separate routine for better overall optimization - */ -static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail, - u32 tlen, struct ether_header *hdr) -{ - u32 elen; - u8 pad, *bthbytes; - struct sk_buff *skb, *nskb; - - if (dd->ipath_port0_skbs && - hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) { - /* - * Allocate a new sk_buff to replace the one we give - * to the network stack. - */ - nskb = ipath_alloc_skb(dd, GFP_ATOMIC); - if (!nskb) { - /* count OK packets that we drop */ - ipath_stats.sps_krdrops++; - return; - } - - bthbytes = (u8 *) hdr->bth; - pad = (bthbytes[1] >> 4) & 3; - /* +CRC32 */ - elen = tlen - (sizeof(*hdr) + pad + sizeof(u32)); - - skb = dd->ipath_port0_skbs[etail]; - dd->ipath_port0_skbs[etail] = nskb; - skb_put(skb, elen); - - dd->ipath_f_put_tid(dd, etail + (u64 __iomem *) - ((char __iomem *) dd->ipath_kregbase - + dd->ipath_rcvegrbase), 0, - virt_to_phys(nskb->data)); - - __ipath_layer_rcv(dd, hdr, skb); - - /* another ether packet received */ - ipath_stats.sps_ether_rpkts++; - } - else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP) - __ipath_layer_rcv_lid(dd, hdr); -} - static void ipath_rcv_hdrerr(struct ipath_devdata *dd, u32 eflags, u32 l, @@ -972,26 +926,17 @@ reloop: if (unlikely(eflags)) ipath_rcv_hdrerr(dd, eflags, l, etail, rc); else if (etype == RCVHQ_RCV_TYPE_NON_KD) { - int ret = __ipath_verbs_rcv(dd, rc + 1, - ebuf, tlen); - if (ret == -ENODEV) - ipath_cdbg(VERBOSE, - "received IB packet, " - "not SMA (QP=%x)\n", qp); - if (dd->ipath_lli_counter) - dd->ipath_lli_counter--; - - } else if (etype == RCVHQ_RCV_TYPE_EAGER) { - if (qp == IPATH_KD_QP && - bthbytes[0] == ipath_layer_rcv_opcode && - ebuf) - ipath_rcv_layer(dd, etail, tlen, - (struct ether_header *)hdr); - else - ipath_cdbg(PKT, "typ %x, opcode %x (eager, " - "qp=%x), len %x; ignored\n", - etype, bthbytes[0], qp, tlen); + ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen); + if (dd->ipath_lli_counter) + dd->ipath_lli_counter--; + ipath_cdbg(PKT, "typ %x, opcode %x (eager, " + "qp=%x), len %x; ignored\n", + etype, bthbytes[0], qp, tlen); } + else if (etype == RCVHQ_RCV_TYPE_EAGER) + ipath_cdbg(PKT, "typ %x, opcode %x (eager, " + "qp=%x), len %x; ignored\n", + etype, bthbytes[0], qp, tlen); else if (etype == RCVHQ_RCV_TYPE_EXPECTED) ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", be32_to_cpu(hdr->bth[0]) & 0xff); @@ -1024,7 +969,8 @@ reloop: */ if (l == hdrqtail || (i && !(i&0xf))) { u64 lval; - if (l == hdrqtail) /* PE-800 interrupt only on last */ + if (l == hdrqtail) + /* request IBA6120 interrupt only on last */ lval = dd->ipath_rhdrhead_intr_off | l; else lval = l; @@ -1038,7 +984,7 @@ reloop: } if (!dd->ipath_rhdrhead_intr_off && !reloop) { - /* HT-400 workaround; we can have a race clearing chip + /* IBA6110 workaround; we can have a race clearing chip * interrupt with another interrupt about to be delivered, * and can clear it before it is delivered on the GPIO * workaround. By doing the extra check here for the @@ -1211,7 +1157,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) * * do appropriate marking as busy, etc. * returns buffer number if one found (>=0), negative number is error. - * Used by ipath_sma_send_pkt and ipath_layer_send + * Used by ipath_layer_send */ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum) { @@ -1317,13 +1263,6 @@ rescan: goto bail; } - if (updated) - /* - * ran out of bufs, now some (at least this one we just - * got) are now available, so tell the layered driver. - */ - __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); - /* * set next starting place. Since it's just an optimization, * it doesn't matter who wins on this, so no locking @@ -1500,7 +1439,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd) return ret; } -void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) +static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) { static const char *what[4] = { [0] = "DOWN", @@ -1511,7 +1450,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) & INFINIPATH_IBCC_LINKCMD_MASK; - ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate " + ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate " "is %s\n", dd->ipath_unit, what[linkcmd], ipath_ibcstatus_str[ @@ -1520,7 +1459,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); /* flush all queued sends when going to DOWN or INIT, to be sure that - * they don't block SMA and other MAD packets */ + * they don't block MAD packets */ if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) { ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, INFINIPATH_S_ABORT); @@ -1534,6 +1473,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) dd->ipath_ibcctrl | which); } +int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) +{ + u32 lstate; + int ret; + + switch (newstate) { + case IPATH_IB_LINKDOWN: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN_SLEEP: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN_DISABLE: + ipath_set_ib_lstate(dd, + INFINIPATH_IBCC_LINKINITCMD_DISABLE << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKINIT: + if (dd->ipath_flags & IPATH_LINKINIT) { + ret = 0; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << + INFINIPATH_IBCC_LINKCMD_SHIFT); + lstate = IPATH_LINKINIT; + break; + + case IPATH_IB_LINKARM: + if (dd->ipath_flags & IPATH_LINKARMED) { + ret = 0; + goto bail; + } + if (!(dd->ipath_flags & + (IPATH_LINKINIT | IPATH_LINKACTIVE))) { + ret = -EINVAL; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED << + INFINIPATH_IBCC_LINKCMD_SHIFT); + /* + * Since the port can transition to ACTIVE by receiving + * a non VL 15 packet, wait for either state. + */ + lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; + break; + + case IPATH_IB_LINKACTIVE: + if (dd->ipath_flags & IPATH_LINKACTIVE) { + ret = 0; + goto bail; + } + if (!(dd->ipath_flags & IPATH_LINKARMED)) { + ret = -EINVAL; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE << + INFINIPATH_IBCC_LINKCMD_SHIFT); + lstate = IPATH_LINKACTIVE; + break; + + default: + ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); + ret = -EINVAL; + goto bail; + } + ret = ipath_wait_linkstate(dd, lstate, 2000); + +bail: + return ret; +} + +/** + * ipath_set_mtu - set the MTU + * @dd: the infinipath device + * @arg: the new MTU + * + * we can handle "any" incoming size, the issue here is whether we + * need to restrict our outgoing size. For now, we don't do any + * sanity checking on this, and we don't deal with what happens to + * programs that are already running when the size changes. + * NOTE: changing the MTU will usually cause the IBC to go back to + * link initialize (IPATH_IBSTATE_INIT) state... + */ +int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) +{ + u32 piosize; + int changed = 0; + int ret; + + /* + * mtu is IB data payload max. It's the largest power of 2 less + * than piosize (or even larger, since it only really controls the + * largest we can receive; we can send the max of the mtu and + * piosize). We check that it's one of the valid IB sizes. + */ + if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && + arg != 4096) { + ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); + ret = -EINVAL; + goto bail; + } + if (dd->ipath_ibmtu == arg) { + ret = 0; /* same as current */ + goto bail; + } + + piosize = dd->ipath_ibmaxlen; + dd->ipath_ibmtu = arg; + + if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { + /* Only if it's not the initial value (or reset to it) */ + if (piosize != dd->ipath_init_ibmaxlen) { + dd->ipath_ibmaxlen = piosize; + changed = 1; + } + } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { + piosize = arg + IPATH_PIO_MAXIBHDR; + ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " + "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, + arg); + dd->ipath_ibmaxlen = piosize; + changed = 1; + } + + if (changed) { + /* + * set the IBC maxpktlength to the size of our pio + * buffers in words + */ + u64 ibc = dd->ipath_ibcctrl; + ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << + INFINIPATH_IBCC_MAXPKTLEN_SHIFT); + + piosize = piosize - 2 * sizeof(u32); /* ignore pbc */ + dd->ipath_ibmaxlen = piosize; + piosize /= sizeof(u32); /* in words */ + /* + * for ICRC, which we only send in diag test pkt mode, and + * we don't need to worry about that for mtu + */ + piosize += 1; + + ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + dd->ipath_ibcctrl = ibc; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + dd->ipath_f_tidtemplate(dd); + } + + ret = 0; + +bail: + return ret; +} + +int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) +{ + dd->ipath_lid = arg; + dd->ipath_lmc = lmc; + + return 0; +} + /** * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register * @dd: the infinipath device @@ -1637,13 +1750,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* - * we are shutting down, so tell the layered driver. We don't do - * this on just a link state change, much like ethernet, a cable - * unplug, etc. doesn't change driver state - */ - ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN); - /* disable IBC */ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, @@ -1743,7 +1849,7 @@ static int __init infinipath_init(void) { int ret; - ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version); + ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); /* * These must be called before the driver is registered with @@ -1776,8 +1882,18 @@ static int __init infinipath_init(void) goto bail_group; } + ret = ipath_diagpkt_add(); + if (ret < 0) { + printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " + "diag data device: error %d\n", -ret); + goto bail_ipathfs; + } + goto bail; +bail_ipathfs: + ipath_exit_ipathfs(); + bail_group: ipath_driver_remove_group(&ipath_driver.driver); @@ -1888,6 +2004,8 @@ static void __exit infinipath_cleanup(void) struct ipath_devdata *dd, *tmp; unsigned long flags; + ipath_diagpkt_remove(); + ipath_exit_ipathfs(); ipath_driver_remove_group(&ipath_driver.driver); @@ -1998,5 +2116,22 @@ bail: return ret; } +int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) +{ + u64 val; + if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) { + return -1; + } + if ( dd->ipath_rx_pol_inv != new_pol_inv ) { + dd->ipath_rx_pol_inv = new_pol_inv; + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= ((u64)dd->ipath_rx_pol_inv) << + INFINIPATH_XGXS_RX_POL_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + } + return 0; +} module_init(infinipath_init); module_exit(infinipath_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index bbaa70e57db..29930e22318 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -39,7 +39,6 @@ #include <asm/pgtable.h> #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" static int ipath_open(struct inode *, struct file *); @@ -985,15 +984,17 @@ static int mmap_piobufs(struct vm_area_struct *vma, * write combining behavior we want on the PIO buffers! */ - if (vma->vm_flags & VM_READ) { - dev_info(&dd->pcidev->dev, - "Can't map piobufs as readable (flags=%lx)\n", - vma->vm_flags); - ret = -EPERM; - goto bail; - } +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; + pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; +#endif - /* don't allow them to later change to readable with mprotect */ + /* + * don't allow them to later change to readable with mprotect (for when + * not initially mapped readable, as is normally the case) + */ vma->vm_flags &= ~VM_MAYREAD; vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; @@ -1109,7 +1110,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) ret = mmap_rcvegrbufs(vma, pd); else if (pgaddr == (u64) pd->port_rcvhdrq_phys) { /* - * The rcvhdrq itself; readonly except on HT-400 (so have + * The rcvhdrq itself; readonly except on HT (so have * to allow writable mapping), multiple pages, contiguous * from an i/o perspective. */ @@ -1149,6 +1150,7 @@ static unsigned int ipath_poll(struct file *fp, struct ipath_portdata *pd; u32 head, tail; int bit; + unsigned pollflag = 0; struct ipath_devdata *dd; pd = port_fp(fp); @@ -1185,9 +1187,12 @@ static unsigned int ipath_poll(struct file *fp, clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); pd->port_rcvwait_to++; } + else + pollflag = POLLIN | POLLRDNORM; } else { /* it's already happened; don't do wait_event overhead */ + pollflag = POLLIN | POLLRDNORM; pd->port_rcvnowait++; } @@ -1195,7 +1200,7 @@ static unsigned int ipath_poll(struct file *fp, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); - return 0; + return pollflag; } static int try_alloc_port(struct ipath_devdata *dd, int port, @@ -1297,14 +1302,14 @@ static int find_best_unit(struct file *fp) * This code is present to allow a knowledgeable person to * specify the layout of processes to processors before opening * this driver, and then we'll assign the process to the "closest" - * HT-400 to that processor (we assume reasonable connectivity, + * InfiniPath chip to that processor (we assume reasonable connectivity, * for now). This code assumes that if affinity has been set * before this point, that at most one cpu is set; for now this * is reasonable. I check for both cpus_empty() and cpus_full(), * in case some kernel variant sets none of the bits when no * affinity is set. 2.6.11 and 12 kernels have all present * cpus set. Some day we'll have to fix it up further to handle - * a cpu subset. This algorithm fails for two HT-400's connected + * a cpu subset. This algorithm fails for two HT chips connected * in tunnel fashion. Eventually this needs real topology * information. There may be some issues with dual core numbering * as well. This needs more work prior to release. @@ -1815,7 +1820,7 @@ int ipath_user_add(struct ipath_devdata *dd) if (ret < 0) { ipath_dev_err(dd, "Could not create wildcard " "minor: error %d\n", -ret); - goto bail_sma; + goto bail_user; } atomic_set(&user_setup, 1); @@ -1831,7 +1836,7 @@ int ipath_user_add(struct ipath_devdata *dd) goto bail; -bail_sma: +bail_user: user_cleanup(); bail: return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 0936d8e8d70..c8a8af0fe47 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -61,10 +61,9 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->u.generic_ip = data; + inode->i_private = data; if ((mode & S_IFMT) == S_IFDIR) { inode->i_op = &simple_dir_inode_operations; inode->i_nlink++; @@ -119,7 +118,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, u16 i; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; for (i = 0; i < NUM_COUNTERS; i++) counters[i] = ipath_snap_cntr(dd, i); @@ -139,7 +138,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf, struct ipath_devdata *dd; u64 guid; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; guid = be64_to_cpu(dd->ipath_guid); @@ -178,7 +177,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, u32 tmp, tmp2; struct ipath_devdata *dd; - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; /* so we only initialize non-zero fields. */ memset(portinfo, 0, sizeof portinfo); @@ -191,8 +190,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, portinfo[4] = (dd->ipath_lid << 16); /* - * Notimpl yet SMLID (should we store this in the driver, in case - * SMA dies?) CapabilityMask is 0, we don't support any of these + * Notimpl yet SMLID. + * CapabilityMask is 0, we don't support any of these * DiagCode is 0; we don't store any diag info for now Notimpl yet * M_KeyLeasePeriod (we don't support M_Key) */ @@ -325,7 +324,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, goto bail; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; if (ipath_eeprom_read(dd, pos, tmp, count)) { ipath_dev_err(dd, "failed to read from flash\n"); ret = -ENXIO; @@ -381,7 +380,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, goto bail_tmp; } - dd = file->f_dentry->d_inode->u.generic_ip; + dd = file->f_dentry->d_inode->i_private; if (ipath_eeprom_write(dd, pos, tmp, count)) { ret = -ENXIO; ipath_dev_err(dd, "failed to write to flash\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 3db015da6e7..5c9b509e40e 100644 --- a/drivers/infiniband/hw/ipath/ipath_ht400.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -33,7 +33,7 @@ /* * This file contains all of the code that is specific to the InfiniPath - * HT-400 chip. + * HT chip. */ #include <linux/pci.h> @@ -43,7 +43,7 @@ #include "ipath_registers.h" /* - * This lists the InfiniPath HT400 registers, in the actual chip layout. + * This lists the InfiniPath registers, in the actual chip layout. * This structure should never be directly accessed. * * The names are in InterCap form because they're taken straight from @@ -461,8 +461,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, * times. */ if (dd->ipath_flags & IPATH_INITTED) { - ipath_dev_err(dd, "Fatal Error (freeze " - "mode), no longer usable\n"); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); isfatal = 1; } *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; @@ -537,7 +538,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & INFINIPATH_HWE_HTCMISCERR7) strlcat(msg, "[HT core Misc7]", msgl); if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, HT-400 unusable]", + strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; @@ -553,7 +554,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & _IPATH_PLL_FAIL) { snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), HT-400 unusable]", + "[PLL failed (%llx), InfiniPath hardware unusable]", (unsigned long long) (hwerrs & _IPATH_PLL_FAIL)); strlcat(msg, bitsmsg, msgl); /* ignore from now on, so disable until driver reloaded */ @@ -610,18 +611,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, break; case 5: /* - * HT-460 original production board; two production levels, with + * original production board; two production levels, with * different serial number ranges. See ipath_ht_early_init() for * case where we enable IPATH_GPIO_INTR for later serial # range. */ - n = "InfiniPath_HT-460"; + n = "InfiniPath_QHT7040"; break; case 6: n = "OEM_Board_3"; break; case 7: - /* HT-460 small form factor production board */ - n = "InfiniPath_HT-465"; + /* small form factor production board */ + n = "InfiniPath_QHT7140"; break; case 8: n = "LS/X-1"; @@ -633,7 +634,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, n = "OEM_Board_2"; break; case 11: - n = "InfiniPath_HT-470"; + n = "InfiniPath_HT-470"; /* obsoleted */ break; case 12: n = "OEM_Board_4"; @@ -641,7 +642,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, default: /* don't know, just print the number */ ipath_dev_err(dd, "Don't yet know about board " "with ID %u\n", boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u", + snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", boardrev); break; } @@ -650,11 +651,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { /* - * This version of the driver only supports the HT-400 - * Rev 3.2 + * This version of the driver only supports Rev 3.2 and 3.3 */ ipath_dev_err(dd, - "Unsupported HT-400 revision %u.%u!\n", + "Unsupported InfiniPath hardware revision %u.%u!\n", dd->ipath_majrev, dd->ipath_minrev); ret = 1; goto bail; @@ -738,11 +738,10 @@ static void ipath_check_htlink(struct ipath_devdata *dd) static int ipath_setup_ht_reset(struct ipath_devdata *dd) { - ipath_dbg("No reset possible for HT-400\n"); + ipath_dbg("No reset possible for this InfiniPath hardware\n"); return 0; } -#define HT_CAPABILITY_ID 0x08 /* HT capabilities not defined in kernel */ #define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */ #define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */ @@ -925,7 +924,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev, /* * kernels with CONFIG_PCI_MSI set the vector in the irq field of - * struct pci_device, so we use that to program the HT-400 internal + * struct pci_device, so we use that to program the internal * interrupt register (not config space) with that value. The BIOS * must still have done the basic MSI setup. */ @@ -973,7 +972,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, * do this early, before we ever enable errors or hardware errors, * mostly to avoid causing the chip to enter freeze mode. */ - pos = pci_find_capability(pdev, HT_CAPABILITY_ID); + pos = pci_find_capability(pdev, PCI_CAP_ID_HT); if (!pos) { ipath_dev_err(dd, "Couldn't find HyperTransport " "capability; no interrupts\n"); @@ -996,7 +995,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd, else if (cap_type == HT_INTR_DISC_CONFIG) ihandler = set_int_handler(dd, pdev, pos); } while ((pos = pci_find_next_capability(pdev, pos, - HT_CAPABILITY_ID))); + PCI_CAP_ID_HT))); if (!ihandler) { ipath_dev_err(dd, "Couldn't find interrupt handler in " @@ -1013,7 +1012,7 @@ bail: * @dd: the infinipath device * * Called during driver unload. - * This is currently a nop for the HT-400, not for all chips + * This is currently a nop for the HT chip, not for all chips */ static void ipath_setup_ht_cleanup(struct ipath_devdata *dd) { @@ -1290,6 +1289,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) val &= ~INFINIPATH_XGXS_RESET; change = 1; } + if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & + INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { + /* need to compensate for Tx inversion in partner */ + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= dd->ipath_rx_pol_inv << + INFINIPATH_XGXS_RX_POL_SHIFT; + change = 1; + } if (change) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); @@ -1470,7 +1478,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; /* - * For HT-400, we allocate a somewhat overly large eager buffer, + * For HT, we allocate a somewhat overly large eager buffer, * such that we can guarantee that we can receive the largest * packet that we can send out. To truly support a 4KB MTU, * we need to bump this to a large value. To date, other than @@ -1531,7 +1539,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { /* - * Later production HT-460 has same changes as HT-465, so + * Later production QHT7040 has same changes as QHT7140, so * can use GPIO interrupts. They have serial #'s starting * with 128, rather than 112. */ @@ -1560,13 +1568,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) } /** - * ipath_init_ht400_funcs - set up the chip-specific function pointers + * ipath_init_iba6110_funcs - set up the chip-specific function pointers * @dd: the infinipath device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. */ -void ipath_init_ht400_funcs(struct ipath_devdata *dd) +void ipath_init_iba6110_funcs(struct ipath_devdata *dd) { dd->ipath_f_intrsetup = ipath_ht_intconfig; dd->ipath_f_bus = ipath_setup_ht_config; diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index b83f66d8262..d86516d23df 100644 --- a/drivers/infiniband/hw/ipath/ipath_pe800.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -32,7 +32,7 @@ */ /* * This file contains all of the code that is specific to the - * InfiniPath PE-800 chip. + * InfiniPath PCIe chip. */ #include <linux/interrupt.h> @@ -45,9 +45,9 @@ /* * This file contains all the chip-specific register information and - * access functions for the QLogic InfiniPath PE800, the PCI-Express chip. + * access functions for the QLogic InfiniPath PCI-Express chip. * - * This lists the InfiniPath PE800 registers, in the actual chip layout. + * This lists the InfiniPath registers, in the actual chip layout. * This structure should never be directly accessed. */ struct _infinipath_do_not_use_kernel_regs { @@ -213,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = { .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), - /* This group is pe-800-specific; and used only in this file */ /* The rcvpktled register controls one of the debug port signals, so * a packet activity LED can be connected to it. */ .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt), @@ -364,8 +363,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * and we get here multiple times */ if (dd->ipath_flags & IPATH_INITTED) { - ipath_dev_err(dd, "Fatal Error (freeze " - "mode), no longer usable\n"); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); isfatal = 1; } /* @@ -388,7 +388,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, *msg = '\0'; if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, PE-800 unusable]", + strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ *dd->ipath_statusp |= IPATH_STATUS_HWERROR; @@ -433,7 +433,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & _IPATH_PLL_FAIL) { snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), PE-800 unusable]", + "[PLL failed (%llx), InfiniPath hardware unusable]", (unsigned long long) hwerrs & _IPATH_PLL_FAIL); strlcat(msg, bitsmsg, msgl); /* ignore from now on, so disable until driver reloaded */ @@ -511,22 +511,25 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, n = "InfiniPath_Emulation"; break; case 1: - n = "InfiniPath_PE-800-Bringup"; + n = "InfiniPath_QLE7140-Bringup"; break; case 2: - n = "InfiniPath_PE-880"; + n = "InfiniPath_QLE7140"; break; case 3: - n = "InfiniPath_PE-850"; + n = "InfiniPath_QMI7140"; break; case 4: - n = "InfiniPath_PE-860"; + n = "InfiniPath_QEM7140"; + break; + case 5: + n = "InfiniPath_QMH7140"; break; default: ipath_dev_err(dd, "Don't yet know about board with ID %u\n", boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u", + snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u", boardrev); break; } @@ -534,7 +537,7 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, snprintf(name, namelen, "%s", n); if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) { - ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n", + ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n", dd->ipath_majrev, dd->ipath_minrev); ret = 1; } else @@ -651,6 +654,15 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) val &= ~INFINIPATH_XGXS_RESET; change = 1; } + if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & + INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { + /* need to compensate for Tx inversion in partner */ + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= dd->ipath_rx_pol_inv << + INFINIPATH_XGXS_RX_POL_SHIFT; + change = 1; + } if (change) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); @@ -705,7 +717,7 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); } -/* this is not yet needed on the PE800, so just return 0. */ +/* this is not yet needed on this chip, so just return 0. */ static int ipath_pe_intconfig(struct ipath_devdata *dd) { return 0; @@ -759,8 +771,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, * * This is called during driver unload. * We do the pci_disable_msi here, not in generic code, because it - * isn't used for the HT-400. If we do end up needing pci_enable_msi - * at some point in the future for HT-400, we'll move the call back + * isn't used for the HT chips. If we do end up needing pci_enable_msi + * at some point in the future for HT, we'll move the call back * into the main init_one code. */ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) @@ -780,10 +792,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) * late in 2.6.16). * All that can be done is to edit the kernel source to remove the quirk * check until that is fixed. - * We do not need to call enable_msi() for our HyperTransport chip (HT-400), - * even those it uses MSI, and we want to avoid the quirk warning, so - * So we call enable_msi only for the PE-800. If we do end up needing - * pci_enable_msi at some point in the future for HT-400, we'll move the + * We do not need to call enable_msi() for our HyperTransport chip, + * even though it uses MSI, and we want to avoid the quirk warning, so + * So we call enable_msi only for PCIe. If we do end up needing + * pci_enable_msi at some point in the future for HT, we'll move the * call back into the main init_one code. * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that @@ -971,8 +983,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd) int ret; /* Use ERROR so it shows up in logs, etc. */ - ipath_dev_err(dd, "Resetting PE-800 unit %u\n", - dd->ipath_unit); + ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit); /* keep chip from being accessed in a few places */ dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT); val = dd->ipath_control | INFINIPATH_C_RESET; @@ -1078,7 +1089,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, * @port: the port * * clear all TID entries for a port, expected and eager. - * Used from ipath_close(). On PE800, TIDs are only 32 bits, + * Used from ipath_close(). On this chip, TIDs are only 32 bits, * not 64, but they are still on 64 bit boundaries, so tidbase * is declared as u64 * for the pointer math, even though we write 32 bits */ @@ -1148,9 +1159,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_flags |= IPATH_4BYTE_TID; /* - * For openib, we need to be able to handle an IB header of 96 bytes - * or 24 dwords. HT-400 has arbitrary sized receive buffers, so we - * made them the same size as the PIO buffers. The PE-800 does not + * For openfabrics, we need to be able to handle an IB header of + * 24 dwords. HT chip has arbitrary sized receive buffers, so we + * made them the same size as the PIO buffers. This chip does not * handle arbitrary size buffers, so we need the header large enough * to handle largest IB header, but still have room for a 2KB MTU * standard IB packet. @@ -1158,11 +1169,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_rcvhdrentsize = 24; dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; - /* For HT-400, we allocate a somewhat overly large eager buffer, - * such that we can guarantee that we can receive the largest packet - * that we can send out. To truly support a 4KB MTU, we need to - * bump this to a larger value. We'll do this when I get around to - * testing 4KB sends on the PE-800, which I have not yet done. + /* + * To truly support a 4KB MTU (for usermode), we need to + * bump this to a larger value. For now, we use them for + * the kernel only. */ dd->ipath_rcvegrbufsize = 2048; /* @@ -1175,9 +1185,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; /* - * For PE-800, we can request a receive interrupt for 1 or + * We can request a receive interrupt for 1 or * more packets from current offset. For now, we set this - * up for a single packet, to match the HT-400 behavior. + * up for a single packet. */ dd->ipath_rhdrhead_intr_off = 1ULL<<32; @@ -1216,13 +1226,13 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) } /** - * ipath_init_pe800_funcs - set up the chip-specific function pointers + * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. */ -void ipath_init_pe800_funcs(struct ipath_devdata *dd) +void ipath_init_iba6120_funcs(struct ipath_devdata *dd) { dd->ipath_f_intrsetup = ipath_pe_intconfig; dd->ipath_f_bus = ipath_setup_pe_config; diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 414cdd1d80a..44669dc2e22 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO); MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); /* - * Number of buffers reserved for driver (layered drivers and SMA - * send). Reserved at end of buffer list. Initialized based on + * Number of buffers reserved for driver (verbs and layered drivers.) + * Reserved at end of buffer list. Initialized based on * number of PIO buffers if not set via module interface. * The problem with this is that it's global, but we'll use different * numbers for different chip types. So the default value is not @@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); * * Allocate the eager TID buffers and program them into infinipath. * We use the network layer alloc_skb() allocator to allocate the - * memory, and either use the buffers as is for things like SMA + * memory, and either use the buffers as is for things like verbs * packets, or pass the buffers up to the ipath layered driver and * thence the network layer, replacing them as we do so (see * ipath_rcv_layer()). @@ -240,7 +240,11 @@ static int init_chip_first(struct ipath_devdata *dd, "only supports %u\n", ipath_cfgports, dd->ipath_portcnt); } - dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports, + /* + * Allocate full portcnt array, rather than just cfgports, because + * cleanup iterates across all possible ports. + */ + dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt, GFP_KERNEL); if (!dd->ipath_pd) { @@ -446,9 +450,9 @@ static void enable_chip(struct ipath_devdata *dd, u32 val; int i; - if (!reinit) { - init_waitqueue_head(&ipath_sma_state_wait); - } + if (!reinit) + init_waitqueue_head(&ipath_state_wait); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); @@ -687,7 +691,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) / (sizeof(u64) * BITS_PER_BYTE / 2); if (ipath_kpiobufs == 0) { - /* not set by user, or set explictly to default */ + /* not set by user (this is default) */ if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) kpiobufs = 32; else @@ -946,6 +950,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp) dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val; } + ipath_kpiobufs = val; ret = 0; bail: spin_unlock_irqrestore(&ipath_devs_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 280e732660a..49bf7bb15b0 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -34,7 +34,7 @@ #include <linux/pci.h> #include "ipath_kernel.h" -#include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" /* These are all rcv-related errors which we want to count for stats */ @@ -201,7 +201,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, ib_linkstate(lstate)); } else - ipath_cdbg(SMA, "Unit %u link state %s, last " + ipath_cdbg(VERBOSE, "Unit %u link state %s, last " "was %s\n", dd->ipath_unit, ib_linkstate(lstate), ib_linkstate((unsigned) @@ -213,7 +213,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM || lstate == IPATH_IBSTATE_ACTIVE) - ipath_cdbg(SMA, "Unit %u link state down" + ipath_cdbg(VERBOSE, "Unit %u link state down" " (state 0x%x), from %s\n", dd->ipath_unit, (u32)val & IPATH_IBSTATE_MASK, @@ -269,7 +269,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, INFINIPATH_IBCS_LINKSTATE_MASK) == INFINIPATH_IBCS_L_STATE_ACTIVE) /* if from up to down be more vocal */ - ipath_cdbg(SMA, + ipath_cdbg(VERBOSE, "Unit %u link now down (%s)\n", dd->ipath_unit, ipath_ibcstatus_str[ltstate]); @@ -289,8 +289,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, *dd->ipath_statusp |= IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; dd->ipath_f_setextled(dd, lstate, ltstate); - - __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP); } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) { /* * set INIT and DOWN. Down is checked by most of the other @@ -598,11 +596,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) if (!noprint && *msg) ipath_dev_err(dd, "%s error\n", msg); - if (dd->ipath_sma_state_wanted & dd->ipath_flags) { - ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, " - "waking\n", dd->ipath_sma_state_wanted, + if (dd->ipath_state_wanted & dd->ipath_flags) { + ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " + "waking\n", dd->ipath_state_wanted, dd->ipath_flags); - wake_up_interruptible(&ipath_sma_state_wait); + wake_up_interruptible(&ipath_state_wait); } return chkerrpkts; @@ -708,11 +706,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd) { int ret; - ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); - if (ret > 0) - goto set; - - ret = __ipath_verbs_piobufavail(dd); + ret = ipath_ib_piobufavail(dd->verbs_dev); if (ret > 0) goto set; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index e9f374fb641..a8a56276ff1 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -132,12 +132,6 @@ struct _ipath_layer { void *l_arg; }; -/* Verbs layer interface */ -struct _verbs_layer { - void *l_arg; - struct timer_list l_timer; -}; - struct ipath_devdata { struct list_head ipath_list; @@ -198,7 +192,8 @@ struct ipath_devdata { void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); /* fill out chip-specific fields */ int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); - struct _verbs_layer verbs_layer; + struct ipath_ibdev *verbs_dev; + struct timer_list verbs_timer; /* total dwords sent (summed from counter) */ u64 ipath_sword; /* total dwords rcvd (summed from counter) */ @@ -241,7 +236,7 @@ struct ipath_devdata { u64 ipath_tidtemplate; /* value to write to free TIDs */ u64 ipath_tidinvalid; - /* PE-800 rcv interrupt setup */ + /* IBA6120 rcv interrupt setup */ u64 ipath_rhdrhead_intr_off; /* size of memory at ipath_kregbase */ @@ -250,8 +245,8 @@ struct ipath_devdata { u32 ipath_pioavregs; /* IPATH_POLL, etc. */ u32 ipath_flags; - /* ipath_flags sma is waiting for */ - u32 ipath_sma_state_wanted; + /* ipath_flags driver is waiting for */ + u32 ipath_state_wanted; /* last buffer for user use, first buf for kernel use is this * index. */ u32 ipath_lastport_piobuf; @@ -311,10 +306,6 @@ struct ipath_devdata { u32 ipath_pcibar0; /* so we can rewrite it after a chip reset */ u32 ipath_pcibar1; - /* sequential tries for SMA send and no bufs */ - u32 ipath_nosma_bufs; - /* duration (seconds) ipath_nosma_bufs set */ - u32 ipath_nosma_secs; /* HT/PCI Vendor ID (here for NodeInfo) */ u16 ipath_vendorid; @@ -512,6 +503,8 @@ struct ipath_devdata { u8 ipath_pci_cacheline; /* LID mask control */ u8 ipath_lmc; + /* Rx Polarity inversion (compensate for ~tx on partner) */ + u8 ipath_rx_pol_inv; /* local link integrity counter */ u32 ipath_lli_counter; @@ -523,18 +516,6 @@ extern struct list_head ipath_dev_list; extern spinlock_t ipath_devs_lock; extern struct ipath_devdata *ipath_lookup(int unit); -extern u16 ipath_layer_rcv_opcode; -extern int __ipath_layer_intr(struct ipath_devdata *, u32); -extern int ipath_layer_intr(struct ipath_devdata *, u32); -extern int __ipath_layer_rcv(struct ipath_devdata *, void *, - struct sk_buff *); -extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *); -extern int __ipath_verbs_piobufavail(struct ipath_devdata *); -extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32); - -void ipath_layer_add(struct ipath_devdata *); -void ipath_layer_remove(struct ipath_devdata *); - int ipath_init_chip(struct ipath_devdata *, int); int ipath_enable_wc(struct ipath_devdata *dd); void ipath_disable_wc(struct ipath_devdata *dd); @@ -549,9 +530,8 @@ void ipath_cdev_cleanup(struct cdev **cdevp, int ipath_diag_add(struct ipath_devdata *); void ipath_diag_remove(struct ipath_devdata *); -void ipath_diag_bringup_link(struct ipath_devdata *); -extern wait_queue_head_t ipath_sma_state_wait; +extern wait_queue_head_t ipath_state_wait; int ipath_user_add(struct ipath_devdata *dd); void ipath_user_remove(struct ipath_devdata *dd); @@ -582,12 +562,14 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); int ipath_parse_ushort(const char *str, unsigned short *valp); -int ipath_wait_linkstate(struct ipath_devdata *, u32, int); -void ipath_set_ib_lstate(struct ipath_devdata *, int); void ipath_kreceive(struct ipath_devdata *); int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); int ipath_reset_device(int); void ipath_get_faststats(unsigned long); +int ipath_set_linkstate(struct ipath_devdata *, u8); +int ipath_set_mtu(struct ipath_devdata *, u16); +int ipath_set_lid(struct ipath_devdata *, u32, u8); +int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* for use in system calls, where we want to know device type, etc. */ #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data) @@ -642,10 +624,8 @@ void ipath_free_data(struct ipath_portdata *dd); int ipath_waitfor_mdio_cmdready(struct ipath_devdata *); int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *); u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); -/* init PE-800-specific func */ -void ipath_init_pe800_funcs(struct ipath_devdata *); -/* init HT-400-specific func */ -void ipath_init_ht400_funcs(struct ipath_devdata *); +void ipath_init_iba6120_funcs(struct ipath_devdata *); +void ipath_init_iba6110_funcs(struct ipath_devdata *); void ipath_get_eeprom_info(struct ipath_devdata *); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); @@ -801,7 +781,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, struct device_driver; -extern const char ipath_core_version[]; +extern const char ib_ipath_version[]; int ipath_driver_create_group(struct device_driver *); void ipath_driver_remove_group(struct device_driver *); @@ -810,6 +790,9 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *); void ipath_device_remove_group(struct device *, struct ipath_devdata *); int ipath_expose_reset(struct device *); +int ipath_diagpkt_add(void); +void ipath_diagpkt_remove(void); + int ipath_init_ipathfs(void); void ipath_exit_ipathfs(void); int ipathfs_add_device(struct ipath_devdata *); @@ -831,10 +814,10 @@ const char *ipath_get_unit_name(int unit); extern struct mutex ipath_mutex; -#define IPATH_DRV_NAME "ipath_core" +#define IPATH_DRV_NAME "ib_ipath" #define IPATH_MAJOR 233 #define IPATH_USER_MINOR_BASE 0 -#define IPATH_SMA_MINOR 128 +#define IPATH_DIAGPKT_MINOR 127 #define IPATH_DIAG_MINOR_BASE 129 #define IPATH_NMINORS 255 diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index a5ca279370a..ba1b93226ca 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -34,6 +34,7 @@ #include <asm/io.h> #include "ipath_verbs.h" +#include "ipath_kernel.h" /** * ipath_alloc_lkey - allocate an lkey @@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) r = (r + 1) & (rkt->max - 1); if (r == n) { spin_unlock_irqrestore(&rkt->lock, flags); - _VERBS_INFO("LKEY table full\n"); + ipath_dbg(KERN_INFO "LKEY table full\n"); ret = 0; goto bail; } diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index b28c6f81c73..e46aa4ed2a7 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -42,26 +42,20 @@ #include "ipath_kernel.h" #include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" /* Acquire before ipath_devs_lock. */ static DEFINE_MUTEX(ipath_layer_mutex); -static int ipath_verbs_registered; - u16 ipath_layer_rcv_opcode; static int (*layer_intr)(void *, u32); static int (*layer_rcv)(void *, void *, struct sk_buff *); static int (*layer_rcv_lid)(void *, void *); -static int (*verbs_piobufavail)(void *); -static void (*verbs_rcv)(void *, void *, void *, u32); static void *(*layer_add_one)(int, struct ipath_devdata *); static void (*layer_remove_one)(void *); -static void *(*verbs_add_one)(int, struct ipath_devdata *); -static void (*verbs_remove_one)(void *); -static void (*verbs_timer_cb)(void *); int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg) { @@ -107,302 +101,16 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr) return ret; } -int __ipath_verbs_piobufavail(struct ipath_devdata *dd) -{ - int ret = -ENODEV; - - if (dd->verbs_layer.l_arg && verbs_piobufavail) - ret = verbs_piobufavail(dd->verbs_layer.l_arg); - - return ret; -} - -int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf, - u32 tlen) -{ - int ret = -ENODEV; - - if (dd->verbs_layer.l_arg && verbs_rcv) { - verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen); - ret = 0; - } - - return ret; -} - -int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate) +void ipath_layer_lid_changed(struct ipath_devdata *dd) { - u32 lstate; - int ret; - - switch (newstate) { - case IPATH_IB_LINKDOWN: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_SLEEP: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_DISABLE: - ipath_set_ib_lstate(dd, - INFINIPATH_IBCC_LINKINITCMD_DISABLE << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKINIT: - if (dd->ipath_flags & IPATH_LINKINIT) { - ret = 0; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << - INFINIPATH_IBCC_LINKCMD_SHIFT); - lstate = IPATH_LINKINIT; - break; - - case IPATH_IB_LINKARM: - if (dd->ipath_flags & IPATH_LINKARMED) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & - (IPATH_LINKINIT | IPATH_LINKACTIVE))) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED << - INFINIPATH_IBCC_LINKCMD_SHIFT); - /* - * Since the port can transition to ACTIVE by receiving - * a non VL 15 packet, wait for either state. - */ - lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; - break; - - case IPATH_IB_LINKACTIVE: - if (dd->ipath_flags & IPATH_LINKACTIVE) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & IPATH_LINKARMED)) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE << - INFINIPATH_IBCC_LINKCMD_SHIFT); - lstate = IPATH_LINKACTIVE; - break; - - default: - ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); - ret = -EINVAL; - goto bail; - } - ret = ipath_wait_linkstate(dd, lstate, 2000); - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate); - -/** - * ipath_layer_set_mtu - set the MTU - * @dd: the infinipath device - * @arg: the new MTU - * - * we can handle "any" incoming size, the issue here is whether we - * need to restrict our outgoing size. For now, we don't do any - * sanity checking on this, and we don't deal with what happens to - * programs that are already running when the size changes. - * NOTE: changing the MTU will usually cause the IBC to go back to - * link initialize (IPATH_IBSTATE_INIT) state... - */ -int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg) -{ - u32 piosize; - int changed = 0; - int ret; - - /* - * mtu is IB data payload max. It's the largest power of 2 less - * than piosize (or even larger, since it only really controls the - * largest we can receive; we can send the max of the mtu and - * piosize). We check that it's one of the valid IB sizes. - */ - if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && - arg != 4096) { - ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); - ret = -EINVAL; - goto bail; - } - if (dd->ipath_ibmtu == arg) { - ret = 0; /* same as current */ - goto bail; - } - - piosize = dd->ipath_ibmaxlen; - dd->ipath_ibmtu = arg; - - if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { - /* Only if it's not the initial value (or reset to it) */ - if (piosize != dd->ipath_init_ibmaxlen) { - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { - piosize = arg + IPATH_PIO_MAXIBHDR; - ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " - "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, - arg); - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - - if (changed) { - /* - * set the IBC maxpktlength to the size of our pio - * buffers in words - */ - u64 ibc = dd->ipath_ibcctrl; - ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << - INFINIPATH_IBCC_MAXPKTLEN_SHIFT); - - piosize = piosize - 2 * sizeof(u32); /* ignore pbc */ - dd->ipath_ibmaxlen = piosize; - piosize /= sizeof(u32); /* in words */ - /* - * for ICRC, which we only send in diag test pkt mode, and - * we don't need to worry about that for mtu - */ - piosize += 1; - - ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; - dd->ipath_ibcctrl = ibc; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - dd->ipath_f_tidtemplate(dd); - } - - ret = 0; - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_mtu); - -int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) -{ - dd->ipath_lid = arg; - dd->ipath_lmc = lmc; - mutex_lock(&ipath_layer_mutex); if (dd->ipath_layer.l_arg && layer_intr) layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID); mutex_unlock(&ipath_layer_mutex); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_set_lid); - -int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid) -{ - /* XXX - need to inform anyone who cares this just happened. */ - dd->ipath_guid = guid; - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_guid); - -__be64 ipath_layer_get_guid(struct ipath_devdata *dd) -{ - return dd->ipath_guid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_guid); - -u32 ipath_layer_get_nguid(struct ipath_devdata *dd) -{ - return dd->ipath_nguid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_nguid); - -u32 ipath_layer_get_majrev(struct ipath_devdata *dd) -{ - return dd->ipath_majrev; } -EXPORT_SYMBOL_GPL(ipath_layer_get_majrev); - -u32 ipath_layer_get_minrev(struct ipath_devdata *dd) -{ - return dd->ipath_minrev; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_minrev); - -u32 ipath_layer_get_pcirev(struct ipath_devdata *dd) -{ - return dd->ipath_pcirev; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev); - -u32 ipath_layer_get_flags(struct ipath_devdata *dd) -{ - return dd->ipath_flags; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_flags); - -struct device *ipath_layer_get_device(struct ipath_devdata *dd) -{ - return &dd->pcidev->dev; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_device); - -u16 ipath_layer_get_deviceid(struct ipath_devdata *dd) -{ - return dd->ipath_deviceid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid); - -u32 ipath_layer_get_vendorid(struct ipath_devdata *dd) -{ - return dd->ipath_vendorid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid); - -u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd) -{ - return dd->ipath_lastibcstat; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat); - -u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd) -{ - return dd->ipath_ibmtu; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu); - void ipath_layer_add(struct ipath_devdata *dd) { mutex_lock(&ipath_layer_mutex); @@ -411,10 +119,6 @@ void ipath_layer_add(struct ipath_devdata *dd) dd->ipath_layer.l_arg = layer_add_one(dd->ipath_unit, dd); - if (verbs_add_one) - dd->verbs_layer.l_arg = - verbs_add_one(dd->ipath_unit, dd); - mutex_unlock(&ipath_layer_mutex); } @@ -427,11 +131,6 @@ void ipath_layer_remove(struct ipath_devdata *dd) dd->ipath_layer.l_arg = NULL; } - if (dd->verbs_layer.l_arg && verbs_remove_one) { - verbs_remove_one(dd->verbs_layer.l_arg); - dd->verbs_layer.l_arg = NULL; - } - mutex_unlock(&ipath_layer_mutex); } @@ -463,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), if (dd->ipath_layer.l_arg) continue; - if (!(*dd->ipath_statusp & IPATH_STATUS_SMA)) - *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA; - spin_unlock_irqrestore(&ipath_devs_lock, flags); dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd); spin_lock_irqsave(&ipath_devs_lock, flags); @@ -509,107 +205,6 @@ void ipath_layer_unregister(void) EXPORT_SYMBOL_GPL(ipath_layer_unregister); -static void __ipath_verbs_timer(unsigned long arg) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) arg; - - /* - * If port 0 receive packet interrupts are not available, or - * can be missed, poll the receive queue - */ - if (dd->ipath_flags & IPATH_POLL_RX_INTR) - ipath_kreceive(dd); - - /* Handle verbs layer timeouts. */ - if (dd->verbs_layer.l_arg && verbs_timer_cb) - verbs_timer_cb(dd->verbs_layer.l_arg); - - mod_timer(&dd->verbs_layer.l_timer, jiffies + 1); -} - -/** - * ipath_verbs_register - verbs layer registration - * @l_piobufavail: callback for when PIO buffers become available - * @l_rcv: callback for receiving a packet - * @l_timer_cb: timer callback - * @ipath_devdata: device data structure is put here - */ -int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *), - void (*l_remove)(void *arg), - int (*l_piobufavail) (void *arg), - void (*l_rcv) (void *arg, void *rhdr, - void *data, u32 tlen), - void (*l_timer_cb) (void *arg)) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - mutex_lock(&ipath_layer_mutex); - - verbs_add_one = l_add; - verbs_remove_one = l_remove; - verbs_piobufavail = l_piobufavail; - verbs_rcv = l_rcv; - verbs_timer_cb = l_timer_cb; - - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - if (!(dd->ipath_flags & IPATH_INITTED)) - continue; - - if (dd->verbs_layer.l_arg) - continue; - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd); - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - mutex_unlock(&ipath_layer_mutex); - - ipath_verbs_registered = 1; - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_verbs_register); - -void ipath_verbs_unregister(void) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - mutex_lock(&ipath_layer_mutex); - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; - - if (dd->verbs_layer.l_arg && verbs_remove_one) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - verbs_remove_one(dd->verbs_layer.l_arg); - spin_lock_irqsave(&ipath_devs_lock, flags); - dd->verbs_layer.l_arg = NULL; - } - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - verbs_add_one = NULL; - verbs_remove_one = NULL; - verbs_piobufavail = NULL; - verbs_rcv = NULL; - verbs_timer_cb = NULL; - - ipath_verbs_registered = 0; - - mutex_unlock(&ipath_layer_mutex); -} - -EXPORT_SYMBOL_GPL(ipath_verbs_unregister); - int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax) { int ret; @@ -698,390 +293,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd) EXPORT_SYMBOL_GPL(ipath_layer_get_bcast); -u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd) -{ - return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey); - -static void update_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - sge->vaddr += length; - sge->length -= length; - sge->sge_length -= length; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - return; - sge->n = 0; - } - sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = sge->mr->map[sge->m]->segs[sge->n].length; - } -} - -#ifdef __LITTLE_ENDIAN -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); - data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#else -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); - data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#endif - -static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, - u32 length) -{ - u32 extra = 0; - u32 data = 0; - u32 last; - - while (1) { - u32 len = ss->sge.length; - u32 off; - - BUG_ON(len == 0); - if (len > length) - len = length; - if (len > ss->sge.sge_length) - len = ss->sge.sge_length; - /* If the source address is not aligned, try to align it. */ - off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); - if (off) { - u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & - ~(sizeof(u32) - 1)); - u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); - u32 y; - - y = sizeof(u32) - off; - if (len > y) - len = y; - if (len + extra >= sizeof(u32)) { - data |= set_upper_bits(v, extra * - BITS_PER_BYTE); - len = sizeof(u32) - extra; - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, len, extra); - if (len == length) { - last = data; - break; - } - extra += len; - } - } else if (extra) { - /* Source address is aligned. */ - u32 *addr = (u32 *) ss->sge.vaddr; - int shift = extra * BITS_PER_BYTE; - int ushift = 32 - shift; - u32 l = len; - - while (l >= sizeof(u32)) { - u32 v = *addr; - - data |= set_upper_bits(v, shift); - __raw_writel(data, piobuf); - data = get_upper_bits(v, ushift); - piobuf++; - addr++; - l -= sizeof(u32); - } - /* - * We still have 'extra' number of bytes leftover. - */ - if (l) { - u32 v = *addr; - - if (l + extra >= sizeof(u32)) { - data |= set_upper_bits(v, shift); - len -= l + extra - sizeof(u32); - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, l, - extra); - if (len == length) { - last = data; - break; - } - extra += l; - } - } else if (len == length) { - last = data; - break; - } - } else if (len == length) { - u32 w; - - /* - * Need to round up for the last dword in the - * packet. - */ - w = (len + 3) >> 2; - __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); - piobuf += w - 1; - last = ((u32 *) ss->sge.vaddr)[w - 1]; - break; - } else { - u32 w = len >> 2; - - __iowrite32_copy(piobuf, ss->sge.vaddr, w); - piobuf += w; - - extra = len & (sizeof(u32) - 1); - if (extra) { - u32 v = ((u32 *) ss->sge.vaddr)[w]; - - /* Clear unused upper bytes */ - data = clear_upper_bytes(v, extra, 0); - } - } - update_sge(ss, len); - length -= len; - } - /* Update address before sending packet. */ - update_sge(ss, length); - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(last, piobuf); - /* be sure trigger word is written */ - ipath_flush_wc(); -} - -/** - * ipath_verbs_send - send a packet from the verbs layer - * @dd: the infinipath device - * @hdrwords: the number of words in the header - * @hdr: the packet header - * @len: the length of the packet in bytes - * @ss: the SGE to send - * - * This is like ipath_sma_send_pkt() in that we need to be able to send - * packets after the chip is initialized (MADs) but also like - * ipath_layer_send_hdr() since its used by the verbs layer. - */ -int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, - u32 *hdr, u32 len, struct ipath_sge_state *ss) -{ - u32 __iomem *piobuf; - u32 plen; - int ret; - - /* +1 is for the qword padding of pbc */ - plen = hdrwords + ((len + 3) >> 2) + 1; - if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { - ipath_dbg("packet len 0x%x too long, failing\n", plen); - ret = -EINVAL; - goto bail; - } - - /* Get a PIO buffer to use. */ - piobuf = ipath_getpiobuf(dd, NULL); - if (unlikely(piobuf == NULL)) { - ret = -EBUSY; - goto bail; - } - - /* - * Write len to control qword, no flags. - * We have to flush after the PBC for correctness on some cpus - * or WC buffer can be written out of order. - */ - writeq(plen, piobuf); - ipath_flush_wc(); - piobuf += 2; - if (len == 0) { - /* - * If there is just the header portion, must flush before - * writing last word of header for correctness, and after - * the last header word (trigger word). - */ - __iowrite32_copy(piobuf, hdr, hdrwords - 1); - ipath_flush_wc(); - __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); - ipath_flush_wc(); - ret = 0; - goto bail; - } - - __iowrite32_copy(piobuf, hdr, hdrwords); - piobuf += hdrwords; - - /* The common case is aligned and contained in one segment. */ - if (likely(ss->num_sge == 1 && len <= ss->sge.length && - !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { - u32 w; - u32 *addr = (u32 *) ss->sge.vaddr; - - /* Update address before sending packet. */ - update_sge(ss, len); - /* Need to round up for the last dword in the packet. */ - w = (len + 3) >> 2; - __iowrite32_copy(piobuf, addr, w - 1); - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(addr[w - 1], piobuf + w - 1); - /* be sure trigger word is written */ - ipath_flush_wc(); - ret = 0; - goto bail; - } - copy_io(piobuf, ss, len); - ret = 0; - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_verbs_send); - -int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait) -{ - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ipath_dbg("unit %u not usable\n", dd->ipath_unit); - ret = -EINVAL; - goto bail; - } - *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); - - ret = 0; - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters); - -/** - * ipath_layer_get_counters - get various chip counters - * @dd: the infinipath device - * @cntrs: counters are placed here - * - * Return the counters needed by recv_pma_get_portcounters(). - */ -int ipath_layer_get_counters(struct ipath_devdata *dd, - struct ipath_layer_counters *cntrs) -{ - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ipath_dbg("unit %u not usable\n", dd->ipath_unit); - ret = -EINVAL; - goto bail; - } - cntrs->symbol_error_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); - cntrs->link_error_recovery_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); - /* - * The link downed counter counts when the other side downs the - * connection. We add in the number of times we downed the link - * due to local link integrity errors to compensate. - */ - cntrs->link_downed_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); - cntrs->port_rcv_errors = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); - cntrs->port_rcv_remphys_errors = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); - cntrs->port_xmit_discards = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); - cntrs->port_xmit_data = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - cntrs->port_rcv_data = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - cntrs->port_xmit_packets = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - cntrs->port_rcv_packets = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - cntrs->local_link_integrity_errors = dd->ipath_lli_errors; - cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ - - ret = 0; - -bail: - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_counters); - -int ipath_layer_want_buffer(struct ipath_devdata *dd) -{ - set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_want_buffer); - int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr) { int ret = 0; @@ -1153,389 +364,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd) } EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int); - -int ipath_layer_enable_timer(struct ipath_devdata *dd) -{ - /* - * HT-400 has a design flaw where the chip and kernel idea - * of the tail register don't always agree, and therefore we won't - * get an interrupt on the next packet received. - * If the board supports per packet receive interrupts, use it. - * Otherwise, the timer function periodically checks for packets - * to cover this case. - * Either way, the timer is needed for verbs layer related - * processing. - */ - if (dd->ipath_flags & IPATH_GPIO_INTR) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, - 0x2074076542310ULL); - /* Enable GPIO bit 2 interrupt */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - (u64) (1 << 2)); - } - - init_timer(&dd->verbs_layer.l_timer); - dd->verbs_layer.l_timer.function = __ipath_verbs_timer; - dd->verbs_layer.l_timer.data = (unsigned long)dd; - dd->verbs_layer.l_timer.expires = jiffies + 1; - add_timer(&dd->verbs_layer.l_timer); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_enable_timer); - -int ipath_layer_disable_timer(struct ipath_devdata *dd) -{ - /* Disable GPIO bit 2 interrupt */ - if (dd->ipath_flags & IPATH_GPIO_INTR) - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); - - del_timer_sync(&dd->verbs_layer.l_timer); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_disable_timer); - -/** - * ipath_layer_set_verbs_flags - set the verbs layer flags - * @dd: the infinipath device - * @flags: the flags to set - */ -int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags) -{ - struct ipath_devdata *ss; - unsigned long lflags; - - spin_lock_irqsave(&ipath_devs_lock, lflags); - - list_for_each_entry(ss, &ipath_dev_list, ipath_list) { - if (!(ss->ipath_flags & IPATH_INITTED)) - continue; - if ((flags & IPATH_VERBS_KERNEL_SMA) && - !(*ss->ipath_statusp & IPATH_STATUS_SMA)) - *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA; - else - *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; - } - - spin_unlock_irqrestore(&ipath_devs_lock, lflags); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags); - -/** - * ipath_layer_get_npkeys - return the size of the PKEY table for port 0 - * @dd: the infinipath device - */ -unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd) -{ - return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys); - -/** - * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table - * @dd: the infinipath device - * @index: the PKEY index - */ -unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index) -{ - unsigned ret; - - if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) - ret = 0; - else - ret = dd->ipath_pd[0]->port_pkeys[index]; - - return ret; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_pkey); - -/** - * ipath_layer_get_pkeys - return the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the pkey table is placed here - */ -int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys) -{ - struct ipath_portdata *pd = dd->ipath_pd[0]; - - memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys); - -/** - * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the infinipath device - * @key: the PKEY index - * - * Return true if this was the last reference and the hardware table entry - * needs to be changed. - */ -static int rm_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (dd->ipath_pkeys[i] != key) - continue; - if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { - dd->ipath_pkeys[i] = 0; - ret = 1; - goto bail; - } - break; - } - - ret = 0; - -bail: - return ret; -} - -/** - * add_pkey - add the given PKEY to the hardware table - * @dd: the infinipath device - * @key: the PKEY - * - * Return an error code if unable to add the entry, zero if no change, - * or 1 if the hardware PKEY register needs to be updated. - */ -static int add_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - u16 lkey = key & 0x7FFF; - int any = 0; - int ret; - - if (lkey == 0x7FFF) { - ret = 0; - goto bail; - } - - /* Look for an empty slot or a matching PKEY. */ - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i]) { - any++; - continue; - } - /* If it matches exactly, try to increment the ref count */ - if (dd->ipath_pkeys[i] == key) { - if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { - ret = 0; - goto bail; - } - /* Lost the race. Look for an empty slot below. */ - atomic_dec(&dd->ipath_pkeyrefs[i]); - any++; - } - /* - * It makes no sense to have both the limited and unlimited - * PKEY set at the same time since the unlimited one will - * disable the limited one. - */ - if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { - ret = -EEXIST; - goto bail; - } - } - if (!any) { - ret = -EBUSY; - goto bail; - } - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i] && - atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { - /* for ipathstats, etc. */ - ipath_stats.sps_pkeys[i] = lkey; - dd->ipath_pkeys[i] = key; - ret = 1; - goto bail; - } - } - ret = -EBUSY; - -bail: - return ret; -} - -/** - * ipath_layer_set_pkeys - set the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the PKEY table - */ -int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys) -{ - struct ipath_portdata *pd; - int i; - int changed = 0; - - pd = dd->ipath_pd[0]; - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - u16 key = pkeys[i]; - u16 okey = pd->port_pkeys[i]; - - if (key == okey) - continue; - /* - * The value of this PKEY table entry is changing. - * Remove the old entry in the hardware's array of PKEYs. - */ - if (okey & 0x7FFF) - changed |= rm_pkey(dd, okey); - if (key & 0x7FFF) { - int ret = add_pkey(dd, key); - - if (ret < 0) - key = 0; - else - changed |= ret; - } - pd->port_pkeys[i] = key; - } - if (changed) { - u64 pkey; - - pkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", - (unsigned long long) pkey); - ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, - pkey); - } - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys); - -/** - * ipath_layer_get_linkdowndefaultstate - get the default linkdown state - * @dd: the infinipath device - * - * Returns zero if the default is POLL, 1 if the default is SLEEP. - */ -int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd) -{ - return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate); - -/** - * ipath_layer_set_linkdowndefaultstate - set the default linkdown state - * @dd: the infinipath device - * @sleep: the new state - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, - int sleep) -{ - if (sleep) - dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - else - dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate); - -int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold); - -/** - * ipath_layer_set_phyerrthreshold - set the physical error threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold); - -int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold); - -/** - * ipath_layer_set_overrunthreshold - set the overrun threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold); - -int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, - size_t namelen) -{ - return dd->ipath_f_get_boardname(dd, name, namelen); -} -EXPORT_SYMBOL_GPL(ipath_layer_get_boardname); - -u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd) -{ - return dd->ipath_rcvhdrentsize; -} -EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize); diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h index 71485096fca..3854a4eae68 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ b/drivers/infiniband/hw/ipath/ipath_layer.h @@ -40,73 +40,9 @@ */ struct sk_buff; -struct ipath_sge_state; struct ipath_devdata; struct ether_header; -struct ipath_layer_counters { - u64 symbol_error_counter; - u64 link_error_recovery_counter; - u64 link_downed_counter; - u64 port_rcv_errors; - u64 port_rcv_remphys_errors; - u64 port_xmit_discards; - u64 port_xmit_data; - u64 port_rcv_data; - u64 port_xmit_packets; - u64 port_rcv_packets; - u32 local_link_integrity_errors; - u32 excessive_buffer_overrun_errors; -}; - -/* - * A segment is a linear region of low physical memory. - * XXX Maybe we should use phys addr here and kmap()/kunmap(). - * Used by the verbs layer. - */ -struct ipath_seg { - void *vaddr; - size_t length; -}; - -/* The number of ipath_segs that fit in a page. */ -#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) - -struct ipath_segarray { - struct ipath_seg segs[IPATH_SEGSZ]; -}; - -struct ipath_mregion { - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of ipath_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - struct ipath_segarray *map[0]; /* the segments */ -}; - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct ipath_sge { - struct ipath_mregion *mr; - void *vaddr; /* current pointer into the segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -struct ipath_sge_state { - struct ipath_sge *sg_list; /* next SGE to be used if any */ - struct ipath_sge sge; /* progress state for the current SGE */ - u8 num_sge; -}; - int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), void (*l_remove)(void *), int (*l_intr)(void *, u32), @@ -114,62 +50,14 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), struct sk_buff *), u16 rcv_opcode, int (*l_rcv_lid)(void *, void *)); -int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *), - void (*l_remove)(void *arg), - int (*l_piobufavail)(void *arg), - void (*l_rcv)(void *arg, void *rhdr, - void *data, u32 tlen), - void (*l_timer_cb)(void *arg)); void ipath_layer_unregister(void); -void ipath_verbs_unregister(void); int ipath_layer_open(struct ipath_devdata *, u32 * pktmax); u16 ipath_layer_get_lid(struct ipath_devdata *dd); int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *); u16 ipath_layer_get_bcast(struct ipath_devdata *dd); -u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd); -int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state); -int ipath_layer_set_mtu(struct ipath_devdata *, u16); -int ipath_set_lid(struct ipath_devdata *, u32, u8); int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr); -int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, - u32 * hdr, u32 len, struct ipath_sge_state *ss); int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd); -int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, - size_t namelen); -int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait); -int ipath_layer_get_counters(struct ipath_devdata *dd, - struct ipath_layer_counters *cntrs); -int ipath_layer_want_buffer(struct ipath_devdata *dd); -int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid); -__be64 ipath_layer_get_guid(struct ipath_devdata *); -u32 ipath_layer_get_nguid(struct ipath_devdata *); -u32 ipath_layer_get_majrev(struct ipath_devdata *); -u32 ipath_layer_get_minrev(struct ipath_devdata *); -u32 ipath_layer_get_pcirev(struct ipath_devdata *); -u32 ipath_layer_get_flags(struct ipath_devdata *dd); -struct device *ipath_layer_get_device(struct ipath_devdata *dd); -u16 ipath_layer_get_deviceid(struct ipath_devdata *dd); -u32 ipath_layer_get_vendorid(struct ipath_devdata *); -u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd); -u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd); -int ipath_layer_enable_timer(struct ipath_devdata *dd); -int ipath_layer_disable_timer(struct ipath_devdata *dd); -int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags); -unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd); -unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index); -int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys); -int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys); -int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd); -int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, - int sleep); -int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd); -int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n); -int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd); -int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n); -u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd); /* ipath_ether interrupt values */ #define IPATH_LAYER_INT_IF_UP 0x2 @@ -178,9 +66,6 @@ u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd); #define IPATH_LAYER_INT_SEND_CONTINUE 0x10 #define IPATH_LAYER_INT_BCAST 0x40 -/* _verbs_layer.l_flags */ -#define IPATH_VERBS_KERNEL_SMA 0x1 - extern unsigned ipath_debug; /* debugging bit mask */ #endif /* _IPATH_LAYER_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index d3402341b7d..72d1db89db8 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -101,15 +101,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, nip->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ nip->sys_guid = to_idev(ibdev)->sys_image_guid; - nip->node_guid = ipath_layer_get_guid(dd); + nip->node_guid = dd->ipath_guid; nip->port_guid = nip->sys_guid; - nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd)); - nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd)); - majrev = ipath_layer_get_majrev(dd); - minrev = ipath_layer_get_minrev(dd); + nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); + nip->device_id = cpu_to_be16(dd->ipath_deviceid); + majrev = dd->ipath_majrev; + minrev = dd->ipath_minrev; nip->revision = cpu_to_be32((majrev << 16) | minrev); nip->local_port_num = port; - vendor = ipath_layer_get_vendorid(dd); + vendor = dd->ipath_vendorid; nip->vendor_id[0] = 0; nip->vendor_id[1] = vendor >> 8; nip->vendor_id[2] = vendor; @@ -133,13 +133,89 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp, */ if (startgx == 0) /* The first is a copy of the read-only HW GUID. */ - *p = ipath_layer_get_guid(to_idev(ibdev)->dd); + *p = to_idev(ibdev)->dd->ipath_guid; else smp->status |= IB_SMP_INVALID_FIELD; return reply(smp); } + +static int get_overrunthreshold(struct ipath_devdata *dd) +{ + return (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; +} + +/** + * set_overrunthreshold - set the overrun threshold + * @dd: the infinipath device + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n) +{ + unsigned v; + + v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; + if (v != n) { + dd->ipath_ibcctrl &= + ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << + INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); + dd->ipath_ibcctrl |= + (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + } + return 0; +} + +static int get_phyerrthreshold(struct ipath_devdata *dd) +{ + return (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; +} + +/** + * set_phyerrthreshold - set the physical error threshold + * @dd: the infinipath device + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) +{ + unsigned v; + + v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; + if (v != n) { + dd->ipath_ibcctrl &= + ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); + dd->ipath_ibcctrl |= + (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + } + return 0; +} + +/** + * get_linkdowndefaultstate - get the default linkdown state + * @dd: the infinipath device + * + * Returns zero if the default is POLL, 1 if the default is SLEEP. + */ +static int get_linkdowndefaultstate(struct ipath_devdata *dd) +{ + return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); +} + static int recv_subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, u8 port) { @@ -166,7 +242,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, (dev->mkeyprot_resv_lmc >> 6) == 0) pip->mkey = dev->mkey; pip->gid_prefix = dev->gid_prefix; - lid = ipath_layer_get_lid(dev->dd); + lid = dev->dd->ipath_lid; pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; pip->sm_lid = cpu_to_be16(dev->sm_lid); pip->cap_mask = cpu_to_be32(dev->port_cap_flags); @@ -177,14 +253,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, pip->link_width_supported = 3; /* 1x or 4x */ pip->link_width_active = 2; /* 4x */ pip->linkspeed_portstate = 0x10; /* 2.5Gbps */ - ibcstat = ipath_layer_get_lastibcstat(dev->dd); + ibcstat = dev->dd->ipath_lastibcstat; pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1; pip->portphysstate_linkdown = (ipath_cvt_physportstate[ibcstat & 0xf] << 4) | - (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2); + (get_linkdowndefaultstate(dev->dd) ? 1 : 2); pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc; pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */ - switch (ipath_layer_get_ibmtu(dev->dd)) { + switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; break; @@ -217,7 +293,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, pip->mkey_violations = cpu_to_be16(dev->mkey_violations); /* P_KeyViolations are counted by hardware. */ pip->pkey_violations = - cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) - + cpu_to_be16((ipath_get_cr_errpkey(dev->dd) - dev->z_pkey_violations) & 0xFFFF); pip->qkey_violations = cpu_to_be16(dev->qkey_violations); /* Only the hardware GUID is supported for now */ @@ -226,8 +302,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, /* 32.768 usec. response time (guessing) */ pip->resv_resptimevalue = 3; pip->localphyerrors_overrunerrors = - (ipath_layer_get_phyerrthreshold(dev->dd) << 4) | - ipath_layer_get_overrunthreshold(dev->dd); + (get_phyerrthreshold(dev->dd) << 4) | + get_overrunthreshold(dev->dd); /* pip->max_credit_hint; */ /* pip->link_roundtrip_latency[3]; */ @@ -237,6 +313,20 @@ bail: return ret; } +/** + * get_pkeys - return the PKEY table for port 0 + * @dd: the infinipath device + * @pkeys: the pkey table is placed here + */ +static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) +{ + struct ipath_portdata *pd = dd->ipath_pd[0]; + + memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); + + return 0; +} + static int recv_subn_get_pkeytable(struct ib_smp *smp, struct ib_device *ibdev) { @@ -249,9 +339,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp, memset(smp->data, 0, sizeof(smp->data)); if (startpx == 0) { struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_layer_get_npkeys(dev->dd); + unsigned i, n = ipath_get_npkeys(dev->dd); - ipath_layer_get_pkeys(dev->dd, p); + get_pkeys(dev->dd, p); for (i = 0; i < n; i++) q[i] = cpu_to_be16(p[i]); @@ -269,6 +359,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp, } /** + * set_linkdowndefaultstate - set the default linkdown state + * @dd: the infinipath device + * @sleep: the new state + * + * Note that this will only take effect when the link state changes. + */ +static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep) +{ + if (sleep) + dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; + else + dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + return 0; +} + +/** * recv_subn_set_portinfo - set port information * @smp: the incoming SM packet * @ibdev: the infiniband device @@ -290,7 +398,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, u8 state; u16 lstate; u32 mtu; - int ret; + int ret, ore; if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) goto err; @@ -304,7 +412,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); lid = be16_to_cpu(pip->lid); - if (lid != ipath_layer_get_lid(dev->dd)) { + if (lid != dev->dd->ipath_lid) { /* Must be a valid unicast LID address. */ if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) goto err; @@ -342,11 +450,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case 0: /* NOP */ break; case 1: /* SLEEP */ - if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1)) + if (set_linkdowndefaultstate(dev->dd, 1)) goto err; break; case 2: /* POLL */ - if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0)) + if (set_linkdowndefaultstate(dev->dd, 0)) goto err; break; default: @@ -376,7 +484,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, /* XXX We have already partially updated our state! */ goto err; } - ipath_layer_set_mtu(dev->dd, mtu); + ipath_set_mtu(dev->dd, mtu); dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; @@ -392,20 +500,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, * later. */ if (pip->pkey_violations == 0) - dev->z_pkey_violations = - ipath_layer_get_cr_errpkey(dev->dd); + dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd); if (pip->qkey_violations == 0) dev->qkey_violations = 0; - if (ipath_layer_set_phyerrthreshold( - dev->dd, - (pip->localphyerrors_overrunerrors >> 4) & 0xF)) + ore = pip->localphyerrors_overrunerrors; + if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF)) goto err; - if (ipath_layer_set_overrunthreshold( - dev->dd, - (pip->localphyerrors_overrunerrors & 0xF))) + if (set_overrunthreshold(dev->dd, (ore & 0xF))) goto err; dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; @@ -423,7 +527,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, * is down or is being set to down. */ state = pip->linkspeed_portstate & 0xF; - flags = ipath_layer_get_flags(dev->dd); + flags = dev->dd->ipath_flags; lstate = (pip->portphysstate_linkdown >> 4) & 0xF; if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) goto err; @@ -439,7 +543,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, /* FALLTHROUGH */ case IB_PORT_DOWN: if (lstate == 0) - if (ipath_layer_get_linkdowndefaultstate(dev->dd)) + if (get_linkdowndefaultstate(dev->dd)) lstate = IPATH_IB_LINKDOWN_SLEEP; else lstate = IPATH_IB_LINKDOWN; @@ -451,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, lstate = IPATH_IB_LINKDOWN_DISABLE; else goto err; - ipath_layer_set_linkstate(dev->dd, lstate); + ipath_set_linkstate(dev->dd, lstate); if (flags & IPATH_LINKACTIVE) { event.event = IB_EVENT_PORT_ERR; ib_dispatch_event(&event); @@ -460,7 +564,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case IB_PORT_ARMED: if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE))) break; - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM); if (flags & IPATH_LINKACTIVE) { event.event = IB_EVENT_PORT_ERR; ib_dispatch_event(&event); @@ -469,7 +573,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case IB_PORT_ACTIVE: if (!(flags & IPATH_LINKARMED)) break; - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE); event.event = IB_EVENT_PORT_ACTIVE; ib_dispatch_event(&event); break; @@ -493,6 +597,152 @@ done: return ret; } +/** + * rm_pkey - decrecment the reference count for the given PKEY + * @dd: the infinipath device + * @key: the PKEY index + * + * Return true if this was the last reference and the hardware table entry + * needs to be changed. + */ +static int rm_pkey(struct ipath_devdata *dd, u16 key) +{ + int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (dd->ipath_pkeys[i] != key) + continue; + if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { + dd->ipath_pkeys[i] = 0; + ret = 1; + goto bail; + } + break; + } + + ret = 0; + +bail: + return ret; +} + +/** + * add_pkey - add the given PKEY to the hardware table + * @dd: the infinipath device + * @key: the PKEY + * + * Return an error code if unable to add the entry, zero if no change, + * or 1 if the hardware PKEY register needs to be updated. + */ +static int add_pkey(struct ipath_devdata *dd, u16 key) +{ + int i; + u16 lkey = key & 0x7FFF; + int any = 0; + int ret; + + if (lkey == 0x7FFF) { + ret = 0; + goto bail; + } + + /* Look for an empty slot or a matching PKEY. */ + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i]) { + any++; + continue; + } + /* If it matches exactly, try to increment the ref count */ + if (dd->ipath_pkeys[i] == key) { + if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { + ret = 0; + goto bail; + } + /* Lost the race. Look for an empty slot below. */ + atomic_dec(&dd->ipath_pkeyrefs[i]); + any++; + } + /* + * It makes no sense to have both the limited and unlimited + * PKEY set at the same time since the unlimited one will + * disable the limited one. + */ + if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ret = -EBUSY; + goto bail; + } + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i] && + atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { + /* for ipathstats, etc. */ + ipath_stats.sps_pkeys[i] = lkey; + dd->ipath_pkeys[i] = key; + ret = 1; + goto bail; + } + } + ret = -EBUSY; + +bail: + return ret; +} + +/** + * set_pkeys - set the PKEY table for port 0 + * @dd: the infinipath device + * @pkeys: the PKEY table + */ +static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) +{ + struct ipath_portdata *pd; + int i; + int changed = 0; + + pd = dd->ipath_pd[0]; + + for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { + u16 key = pkeys[i]; + u16 okey = pd->port_pkeys[i]; + + if (key == okey) + continue; + /* + * The value of this PKEY table entry is changing. + * Remove the old entry in the hardware's array of PKEYs. + */ + if (okey & 0x7FFF) + changed |= rm_pkey(dd, okey); + if (key & 0x7FFF) { + int ret = add_pkey(dd, key); + + if (ret < 0) + key = 0; + else + changed |= ret; + } + pd->port_pkeys[i] = key; + } + if (changed) { + u64 pkey; + + pkey = (u64) dd->ipath_pkeys[0] | + ((u64) dd->ipath_pkeys[1] << 16) | + ((u64) dd->ipath_pkeys[2] << 32) | + ((u64) dd->ipath_pkeys[3] << 48); + ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", + (unsigned long long) pkey); + ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, + pkey); + } + return 0; +} + static int recv_subn_set_pkeytable(struct ib_smp *smp, struct ib_device *ibdev) { @@ -500,13 +750,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp, __be16 *p = (__be16 *) smp->data; u16 *q = (u16 *) smp->data; struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_layer_get_npkeys(dev->dd); + unsigned i, n = ipath_get_npkeys(dev->dd); for (i = 0; i < n; i++) q[i] = be16_to_cpu(p[i]); - if (startpx != 0 || - ipath_layer_set_pkeys(dev->dd, q) != 0) + if (startpx != 0 || set_pkeys(dev->dd, q) != 0) smp->status |= IB_SMP_INVALID_FIELD; return recv_subn_get_pkeytable(smp, ibdev); @@ -844,10 +1093,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; u8 port_select = p->port_select; - ipath_layer_get_counters(dev->dd, &cntrs); + ipath_get_counters(dev->dd, &cntrs); /* Adjust counters for any resets done. */ cntrs.symbol_error_counter -= dev->z_symbol_error_counter; @@ -944,8 +1193,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp, u64 swords, rwords, spkts, rpkts, xwait; u8 port_select = p->port_select; - ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); + ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, + &rpkts, &xwait); /* Adjust counters for any resets done. */ swords -= dev->z_port_xmit_data; @@ -978,13 +1227,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp, struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; /* * Since the HW doesn't support clearing counters, we save the * current count and subtract it from future responses. */ - ipath_layer_get_counters(dev->dd, &cntrs); + ipath_get_counters(dev->dd, &cntrs); if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) dev->z_symbol_error_counter = cntrs.symbol_error_counter; @@ -1041,8 +1290,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp, struct ipath_ibdev *dev = to_idev(ibdev); u64 swords, rwords, spkts, rpkts, xwait; - ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); + ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, + &rpkts, &xwait); if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) dev->z_port_xmit_data = swords; diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c new file mode 100644 index 00000000000..11b7378ff21 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <asm/pgtable.h> + +#include "ipath_verbs.h" + +/** + * ipath_release_mmap_info - free mmap info structure + * @ref: a pointer to the kref within struct ipath_mmap_info + */ +void ipath_release_mmap_info(struct kref *ref) +{ + struct ipath_mmap_info *ip = + container_of(ref, struct ipath_mmap_info, ref); + + vfree(ip->obj); + kfree(ip); +} + +/* + * open and close keep track of how many times the CQ is mapped, + * to avoid releasing it. + */ +static void ipath_vma_open(struct vm_area_struct *vma) +{ + struct ipath_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); + ip->mmap_cnt++; +} + +static void ipath_vma_close(struct vm_area_struct *vma) +{ + struct ipath_mmap_info *ip = vma->vm_private_data; + + ip->mmap_cnt--; + kref_put(&ip->ref, ipath_release_mmap_info); +} + +static struct vm_operations_struct ipath_vm_ops = { + .open = ipath_vma_open, + .close = ipath_vma_close, +}; + +/** + * ipath_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct ipath_ibdev *dev = to_idev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct ipath_mmap_info *ip, **pp; + int ret = -EINVAL; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_irq(&dev->pending_lock); + for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) { + /* Only the creator is allowed to mmap the object */ + if (context != ip->context || (void *) offset != ip->obj) + continue; + /* Don't allow a mmap larger than the object. */ + if (size > ip->size) + break; + + *pp = ip->next; + spin_unlock_irq(&dev->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) + goto done; + vma->vm_ops = &ipath_vm_ops; + vma->vm_private_data = ip; + ipath_vma_open(vma); + goto done; + } + spin_unlock_irq(&dev->pending_lock); +done: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index 4ac31a5da33..b36f6fb3e37 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -36,6 +36,18 @@ #include "ipath_verbs.h" +/* Fast memory region */ +struct ipath_fmr { + struct ib_fmr ibfmr; + u8 page_shift; + struct ipath_mregion mr; /* must be last */ +}; + +static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct ipath_fmr, ibfmr); +} + /** * ipath_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 83e557be591..224b0f40767 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -35,7 +35,7 @@ #include <linux/vmalloc.h> #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" #define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) @@ -44,19 +44,6 @@ #define find_next_offset(map, off) find_next_zero_bit((map)->page, \ BITS_PER_PAGE, off) -#define TRANS_INVALID 0 -#define TRANS_ANY2RST 1 -#define TRANS_RST2INIT 2 -#define TRANS_INIT2INIT 3 -#define TRANS_INIT2RTR 4 -#define TRANS_RTR2RTS 5 -#define TRANS_RTS2RTS 6 -#define TRANS_SQERR2RTS 7 -#define TRANS_ANY2ERR 8 -#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */ -#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */ -#define TRANS_SQD2RTS 11 /* error if not drained */ - /* * Convert the AETH credit code into the number of credits. */ @@ -287,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt) free_qpn(qpt, qp->ibqp.qp_num); if (!atomic_dec_and_test(&qp->refcount) || !ipath_destroy_qp(&qp->ibqp)) - _VERBS_INFO("QP memory leak!\n"); + ipath_dbg(KERN_INFO "QP memory leak!\n"); qp = nqp; } } @@ -355,8 +342,10 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; - qp->r_rq.head = 0; - qp->r_rq.tail = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } qp->r_reuse_sge = 0; } @@ -373,8 +362,8 @@ void ipath_error_qp(struct ipath_qp *qp) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; - _VERBS_INFO("QP%d/%d in error state\n", - qp->ibqp.qp_num, qp->remote_qpn); + ipath_dbg(KERN_INFO "QP%d/%d in error state\n", + qp->ibqp.qp_num, qp->remote_qpn); spin_lock(&dev->pending_lock); /* XXX What if its already removed by the timeout code? */ @@ -410,15 +399,32 @@ void ipath_error_qp(struct ipath_qp *qp) qp->s_hdrwords = 0; qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - wc.opcode = IB_WC_RECV; - spin_lock(&qp->r_rq.lock); - while (qp->r_rq.tail != qp->r_rq.head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id; - if (++qp->r_rq.tail >= qp->r_rq.size) - qp->r_rq.tail = 0; - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + if (qp->r_rq.wq) { + struct ipath_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + wc.opcode = IB_WC_RECV; + while (tail != head) { + wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); } - spin_unlock(&qp->r_rq.lock); } /** @@ -426,11 +432,12 @@ void ipath_error_qp(struct ipath_qp *qp) * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify + * @udata: user data for ipathverbs.so * * Returns 0 on success, otherwise returns an errno. */ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask) + int attr_mask, struct ib_udata *udata) { struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); @@ -448,19 +455,46 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr_mask)) goto inval; - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid == 0 || attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) goto inval; + if ((attr->ah_attr.ah_flags & IB_AH_GRH) && + (attr->ah_attr.grh.sgid_index > 1)) + goto inval; + } + if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd)) + if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) goto inval; if (attr_mask & IB_QP_MIN_RNR_TIMER) if (attr->min_rnr_timer > 31) goto inval; + if (attr_mask & IB_QP_PORT) + if (attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_PATH_MTU) + if (attr->path_mtu > IB_MTU_4096) + goto inval; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > 1) + goto inval; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + if (attr->max_rd_atomic > 1) + goto inval; + + if (attr_mask & IB_QP_PATH_MIG_STATE) + if (attr->path_mig_state != IB_MIG_MIGRATED && + attr->path_mig_state != IB_MIG_REARM) + goto inval; + switch (new_state) { case IB_QPS_RESET: ipath_reset_qp(qp); @@ -511,6 +545,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MIN_RNR_TIMER) qp->r_min_rnr_timer = attr->min_rnr_timer; + if (attr_mask & IB_QP_TIMEOUT) + qp->timeout = attr->timeout; + if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; @@ -543,7 +580,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->dest_qp_num = qp->remote_qpn; attr->qp_access_flags = qp->qp_access_flags; attr->cap.max_send_wr = qp->s_size - 1; - attr->cap.max_recv_wr = qp->r_rq.size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; attr->cap.max_send_sge = qp->s_max_sge; attr->cap.max_recv_sge = qp->r_rq.max_sge; attr->cap.max_inline_data = 0; @@ -557,7 +594,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->max_dest_rd_atomic = 1; attr->min_rnr_timer = qp->r_min_rnr_timer; attr->port_num = 1; - attr->timeout = 0; + attr->timeout = qp->timeout; attr->retry_cnt = qp->s_retry_cnt; attr->rnr_retry = qp->s_rnr_retry; attr->alt_port_num = 0; @@ -569,9 +606,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->recv_cq = qp->ibqp.recv_cq; init_attr->srq = qp->ibqp.srq; init_attr->cap = attr->cap; - init_attr->sq_sig_type = - (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) - ? IB_SIGNAL_REQ_WR : 0; + if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->qp_type = qp->ibqp.qp_type; init_attr->port_num = 1; return 0; @@ -596,13 +634,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp) } else { u32 min, max, x; u32 credits; - + struct ipath_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; /* * Compute the number of credits available (RWQEs). * XXX Not holding the r_rq.lock here so there is a small * chance that the pair of reads are not atomic. */ - credits = qp->r_rq.head - qp->r_rq.tail; + credits = head - tail; if ((int)credits < 0) credits += qp->r_rq.size; /* @@ -679,27 +727,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - qp = kmalloc(sizeof(*qp), GFP_KERNEL); + sz = sizeof(*qp); + if (init_attr->srq) { + struct ipath_srq *srq = to_isrq(init_attr->srq); + + sz += sizeof(*qp->r_sg_list) * + srq->rq.max_sge; + } else + sz += sizeof(*qp->r_sg_list) * + init_attr->cap.max_recv_sge; + qp = kmalloc(sz, GFP_KERNEL); if (!qp) { - vfree(swq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_swq; } if (init_attr->srq) { + sz = 0; qp->r_rq.size = 0; qp->r_rq.max_sge = 0; qp->r_rq.wq = NULL; + init_attr->cap.max_recv_wr = 0; + init_attr->cap.max_recv_sge = 0; } else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; - sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) + + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct ipath_rwqe); - qp->r_rq.wq = vmalloc(qp->r_rq.size * sz); + qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + + qp->r_rq.size * sz); if (!qp->r_rq.wq) { - kfree(qp); - vfree(swq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_qp; } } @@ -719,24 +777,19 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; - qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ? - 1 << IPATH_S_SIGNAL_REQ_WR : 0; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; + else + qp->s_flags = 0; dev = to_idev(ibpd->device); err = ipath_alloc_qpn(&dev->qp_table, qp, init_attr->qp_type); if (err) { - vfree(swq); - vfree(qp->r_rq.wq); - kfree(qp); ret = ERR_PTR(err); - goto bail; + goto bail_rwq; } + qp->ip = NULL; ipath_reset_qp(qp); - - /* Tell the core driver that the kernel SMA is present. */ - if (init_attr->qp_type == IB_QPT_SMI) - ipath_layer_set_verbs_flags(dev->dd, - IPATH_VERBS_KERNEL_SMA); break; default: @@ -747,8 +800,63 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, init_attr->cap.max_inline_data = 0; + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) qp->r_rq.wq; + int err; + + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_rwq; + } + + if (qp->r_rq.wq) { + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_rwq; + } + qp->ip = ip; + ip->context = ibpd->uobject->context; + ip->obj = qp->r_rq.wq; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + qp->r_rq.size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + } + + spin_lock(&dev->n_qps_lock); + if (dev->n_qps_allocated == ib_ipath_max_qps) { + spin_unlock(&dev->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_qps_allocated++; + spin_unlock(&dev->n_qps_lock); + ret = &qp->ibqp; + goto bail; +bail_ip: + kfree(qp->ip); +bail_rwq: + vfree(qp->r_rq.wq); +bail_qp: + kfree(qp); +bail_swq: + vfree(swq); bail: return ret; } @@ -768,15 +876,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp) struct ipath_ibdev *dev = to_idev(ibqp->device); unsigned long flags; - /* Tell the core driver that the kernel SMA is gone. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) - ipath_layer_set_verbs_flags(dev->dd, 0); - - spin_lock_irqsave(&qp->r_rq.lock, flags); - spin_lock(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; - spin_unlock(&qp->s_lock); - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + spin_unlock_irqrestore(&qp->s_lock, flags); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); /* Stop the sending tasklet. */ tasklet_kill(&qp->s_task); @@ -797,8 +902,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp) if (atomic_read(&qp->refcount) != 0) ipath_free_qp(&dev->qp_table, qp); + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); vfree(qp->s_wq); - vfree(qp->r_rq.wq); kfree(qp); return 0; } @@ -850,8 +958,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n", - qp->ibqp.qp_num, qp->remote_qpn, wc->status); + ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n", + qp->ibqp.qp_num, qp->remote_qpn, wc->status); spin_lock(&dev->pending_lock); /* XXX What if its already removed by the timeout code? */ diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 774d1615ce2..a08654042c0 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_RC_##x @@ -540,7 +540,7 @@ static void send_rc_ack(struct ipath_qp *qp) lrh0 = IPATH_LRH_GRH; } /* read pkey_index w/o lock (its atomic) */ - bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); if (qp->r_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | (qp->r_nak_state << @@ -557,7 +557,7 @@ static void send_rc_ack(struct ipath_qp *qp) hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); + hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); @@ -1323,8 +1323,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * the eager header buffer size to 56 bytes so the last 4 * bytes of the BTH header (PSN) is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { psn = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 89df8f5ea99..6e23b3d632b 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -36,8 +36,7 @@ /* * This file should only be included by kernel source, and by the diags. It - * defines the registers, and their contents, for the InfiniPath HT-400 - * chip. + * defines the registers, and their contents, for InfiniPath chips. */ /* @@ -283,10 +282,12 @@ #define INFINIPATH_XGXS_RESET 0x7ULL #define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4 +#define INFINIPATH_XGXS_RX_POL_SHIFT 19 +#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ -/* TID entries (memory), HT400-only */ +/* TID entries (memory), HT-only */ #define INFINIPATH_RT_VALID 0x8000000000000000ULL #define INFINIPATH_RT_ADDR_SHIFT 0 #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 772bc59fb85..5c1da2d25e0 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* * Convert the AETH RNR timeout code into the number of milliseconds. @@ -106,6 +106,54 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) spin_unlock_irqrestore(&dev->pending_lock, flags); } +static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + int user = to_ipd(qp->ibqp.pd)->user; + int i, j, ret; + struct ib_wc wc; + + qp->r_len = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if ((user && wqe->sg_list[i].lkey == 0) || + !ipath_lkey_ok(&dev->lk_table, + &qp->r_sg_list[j], &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + qp->r_len += wqe->sg_list[i].length; + j++; + } + qp->r_sge.sge = qp->r_sg_list[0]; + qp->r_sge.sg_list = qp->r_sg_list + 1; + qp->r_sge.num_sge = j; + ret = 1; + goto bail; + +bad_lkey: + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.byte_len = 0; + wc.imm_data = 0; + wc.qp_num = qp->ibqp.qp_num; + wc.src_qp = 0; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = 0; + wc.sl = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal solicited completion event. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} + /** * ipath_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP @@ -119,71 +167,71 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) { unsigned long flags; struct ipath_rq *rq; + struct ipath_rwq *wq; struct ipath_srq *srq; struct ipath_rwqe *wqe; - int ret = 1; + void (*handler)(struct ib_event *, void *); + u32 tail; + int ret; - if (!qp->ibqp.srq) { + if (qp->ibqp.srq) { + srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; + rq = &srq->rq; + } else { + srq = NULL; + handler = NULL; rq = &qp->r_rq; - spin_lock_irqsave(&rq->lock, flags); - - if (unlikely(rq->tail == rq->head)) { - ret = 0; - goto done; - } - wqe = get_rwqe_ptr(rq, rq->tail); - qp->r_wr_id = wqe->wr_id; - if (!wr_id_only) { - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - qp->r_len = wqe->length; - } - if (++rq->tail >= rq->size) - rq->tail = 0; - goto done; } - srq = to_isrq(qp->ibqp.srq); - rq = &srq->rq; spin_lock_irqsave(&rq->lock, flags); - - if (unlikely(rq->tail == rq->head)) { - ret = 0; - goto done; - } - wqe = get_rwqe_ptr(rq, rq->tail); + wq = rq->wq; + tail = wq->tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + do { + if (unlikely(tail == wq->head)) { + spin_unlock_irqrestore(&rq->lock, flags); + ret = 0; + goto bail; + } + wqe = get_rwqe_ptr(rq, tail); + if (++tail >= rq->size) + tail = 0; + } while (!wr_id_only && !init_sge(qp, wqe)); qp->r_wr_id = wqe->wr_id; - if (!wr_id_only) { - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - qp->r_len = wqe->length; - } - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq->ibsrq.event_handler) { - struct ib_event ev; + wq->tail = tail; + + ret = 1; + if (handler) { u32 n; - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; else - n = rq->head - rq->tail; + n -= tail; if (n < srq->limit) { + struct ib_event ev; + srq->limit = 0; spin_unlock_irqrestore(&rq->lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); + handler(&ev, srq->ibsrq.srq_context); goto bail; } } - -done: spin_unlock_irqrestore(&rq->lock, flags); + bail: return ret; } @@ -422,6 +470,15 @@ done: wake_up(&qp->wait); } +static int want_buffer(struct ipath_devdata *dd) +{ + set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + + return 0; +} + /** * ipath_no_bufs_available - tell the layer driver we need buffers * @qp: the QP that caused the problem @@ -438,7 +495,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) list_add_tail(&qp->piowait, &dev->piowait); spin_unlock_irqrestore(&dev->pending_lock, flags); /* - * Note that as soon as ipath_layer_want_buffer() is called and + * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() * could be called. If we are still in the tasklet function, * tasklet_hi_schedule() will not call us until the next time @@ -448,7 +505,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) */ clear_bit(IPATH_S_BUSY, &qp->s_flags); tasklet_unlock(&qp->s_task); - ipath_layer_want_buffer(dev->dd); + want_buffer(dev->dd); dev->n_piowait++; } @@ -563,7 +620,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, hdr->hop_limit = grh->hop_limit; /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = dev->gid_prefix; - hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd); + hdr->sgid.global.interface_id = dev->dd->ipath_guid; hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ @@ -595,8 +652,7 @@ void ipath_do_ruc_send(unsigned long data) if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) goto bail; - if (unlikely(qp->remote_ah_attr.dlid == - ipath_layer_get_lid(dev->dd))) { + if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { ipath_ruc_loopback(qp); goto clear; } @@ -663,8 +719,8 @@ again: qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); - bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); + bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index f760434660b..941e866d951 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_ibdev *dev = to_idev(ibsrq->device); + struct ipath_rwq *wq; unsigned long flags; int ret; for (; wr; wr = wr->next) { struct ipath_rwqe *wqe; u32 next; - int i, j; + int i; - if (wr->num_sge > srq->rq.max_sge) { + if ((unsigned) wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; ret = -ENOMEM; goto bail; } spin_lock_irqsave(&srq->rq.lock, flags); - next = srq->rq.head + 1; + wq = srq->rq.wq; + next = wq->head + 1; if (next >= srq->rq.size) next = 0; - if (next == srq->rq.tail) { + if (next == wq->tail) { spin_unlock_irqrestore(&srq->rq.lock, flags); *bad_wr = wr; ret = -ENOMEM; goto bail; } - wqe = get_rwqe_ptr(&srq->rq, srq->rq.head); + wqe = get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; - wqe->sg_list[0].mr = NULL; - wqe->sg_list[0].vaddr = NULL; - wqe->sg_list[0].length = 0; - wqe->sg_list[0].sge_length = 0; - wqe->length = 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - /* Check LKEY */ - if (to_ipd(srq->ibsrq.pd)->user && - wr->sg_list[i].lkey == 0) { - spin_unlock_irqrestore(&srq->rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - if (wr->sg_list[i].length == 0) - continue; - if (!ipath_lkey_ok(&dev->lk_table, - &wqe->sg_list[j], - &wr->sg_list[i], - IB_ACCESS_LOCAL_WRITE)) { - spin_unlock_irqrestore(&srq->rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - wqe->length += wr->sg_list[i].length; - j++; - } - wqe->num_sge = j; - srq->rq.head = next; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + wq->head = next; spin_unlock_irqrestore(&srq->rq.lock, flags); } ret = 0; @@ -133,53 +106,95 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, if (dev->n_srqs_allocated == ib_ipath_max_srqs) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } if (srq_init_attr->attr.max_wr == 0) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) || (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } srq = kmalloc(sizeof(*srq), GFP_KERNEL); if (!srq) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } /* * Need to use vmalloc() if we want to support large #s of entries. */ srq->rq.size = srq_init_attr->attr.max_wr + 1; - sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge + + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + sizeof(struct ipath_rwqe); - srq->rq.wq = vmalloc(srq->rq.size * sz); + srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz); if (!srq->rq.wq) { - kfree(srq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_srq; } /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) srq->rq.wq; + int err; + + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_wq; + } + + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + srq->ip = ip; + ip->context = ibpd->uobject->context; + ip->obj = srq->rq.wq; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + srq->rq.size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } else + srq->ip = NULL; + + /* * ib_create_srq() will initialize srq->ibsrq. */ spin_lock_init(&srq->rq.lock); - srq->rq.head = 0; - srq->rq.tail = 0; + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; srq->rq.max_sge = srq_init_attr->attr.max_sge; srq->limit = srq_init_attr->attr.srq_limit; + dev->n_srqs_allocated++; + ret = &srq->ibsrq; + goto done; - dev->n_srqs_allocated++; +bail_wq: + vfree(srq->rq.wq); -bail: +bail_srq: + kfree(srq); + +done: return ret; } @@ -188,83 +203,130 @@ bail: * @ibsrq: the SRQ to modify * @attr: the new attributes of the SRQ * @attr_mask: indicates which attributes to modify + * @udata: user data for ipathverbs.so */ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask) + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata) { struct ipath_srq *srq = to_isrq(ibsrq); - unsigned long flags; - int ret; + int ret = 0; - if (attr_mask & IB_SRQ_MAX_WR) - if ((attr->max_wr > ib_ipath_max_srq_wrs) || - (attr->max_sge > srq->rq.max_sge)) { - ret = -EINVAL; - goto bail; - } + if (attr_mask & IB_SRQ_MAX_WR) { + struct ipath_rwq *owq; + struct ipath_rwq *wq; + struct ipath_rwqe *p; + u32 sz, size, n, head, tail; - if (attr_mask & IB_SRQ_LIMIT) - if (attr->srq_limit >= srq->rq.size) { + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > ib_ipath_max_srq_wrs) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) { ret = -EINVAL; goto bail; } - if (attr_mask & IB_SRQ_MAX_WR) { - struct ipath_rwqe *wq, *p; - u32 sz, size, n; - sz = sizeof(struct ipath_rwqe) + - attr->max_sge * sizeof(struct ipath_sge); + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; - wq = vmalloc(size * sz); + wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz); if (!wq) { ret = -ENOMEM; goto bail; } - spin_lock_irqsave(&srq->rq.lock, flags); - if (srq->rq.head < srq->rq.tail) - n = srq->rq.size + srq->rq.head - srq->rq.tail; + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = (__u64) wq; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) { + vfree(wq); + goto bail; + } + udata->outbuf = (void __user *) offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) { + vfree(wq); + goto bail; + } + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + if (head >= srq->rq.size) + head = 0; + tail = owq->tail; + if (tail >= srq->rq.size) + tail = 0; + n = head; + if (n < tail) + n += srq->rq.size - tail; else - n = srq->rq.head - srq->rq.tail; - if (size <= n || size <= srq->limit) { - spin_unlock_irqrestore(&srq->rq.lock, flags); + n -= tail; + if (size <= n) { + spin_unlock_irq(&srq->rq.lock); vfree(wq); ret = -EINVAL; goto bail; } n = 0; - p = wq; - while (srq->rq.tail != srq->rq.head) { + p = wq->wq; + while (tail != head) { struct ipath_rwqe *wqe; int i; - wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail); + wqe = get_rwqe_ptr(&srq->rq, tail); p->wr_id = wqe->wr_id; - p->length = wqe->length; p->num_sge = wqe->num_sge; for (i = 0; i < wqe->num_sge; i++) p->sg_list[i] = wqe->sg_list[i]; n++; p = (struct ipath_rwqe *)((char *) p + sz); - if (++srq->rq.tail >= srq->rq.size) - srq->rq.tail = 0; + if (++tail >= srq->rq.size) + tail = 0; } - vfree(srq->rq.wq); srq->rq.wq = wq; srq->rq.size = size; - srq->rq.head = n; - srq->rq.tail = 0; - srq->rq.max_sge = attr->max_sge; - spin_unlock_irqrestore(&srq->rq.lock, flags); - } - - if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irqsave(&srq->rq.lock, flags); - srq->limit = attr->srq_limit; - spin_unlock_irqrestore(&srq->rq.lock, flags); + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + + vfree(owq); + + if (srq->ip) { + struct ipath_mmap_info *ip = srq->ip; + struct ipath_ibdev *dev = to_idev(srq->ibsrq.device); + + ip->obj = wq; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); } - ret = 0; bail: return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index 70351b7e35c..30a825928fc 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque) } } - if (dd->ipath_nosma_bufs) { - dd->ipath_nosma_secs += 5; - if (dd->ipath_nosma_secs >= 30) { - ipath_cdbg(SMA, "No SMA bufs avail %u seconds; " - "cancelling pending sends\n", - dd->ipath_nosma_secs); - /* - * issue an abort as well, in case we have a packet - * stuck in launch fifo. This could corrupt an - * outgoing user packet in the worst case, - * but this is a pretty catastrophic, anyway. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf, - dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k - - dd->ipath_lastport_piobuf); - /* start again, if necessary */ - dd->ipath_nosma_secs = 0; - } else - ipath_cdbg(SMA, "No SMA bufs avail %u tries, " - "after %u seconds\n", - dd->ipath_nosma_bufs, - dd->ipath_nosma_secs); - } - done: mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); } diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index b98821d7801..e299148c4b6 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -35,7 +35,6 @@ #include <linux/pci.h> #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" /** @@ -76,7 +75,7 @@ bail: static ssize_t show_version(struct device_driver *dev, char *buf) { /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version); + return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version); } static ssize_t show_num_units(struct device_driver *dev, char *buf) @@ -108,8 +107,8 @@ static const char *ipath_status_str[] = { "Initted", "Disabled", "Admin_Disabled", - "OIB_SMA", - "SMA", + "", /* This used to be the old "OIB_SMA" status. */ + "", /* This used to be the old "SMA" status. */ "Present", "IB_link_up", "IB_configured", @@ -227,7 +226,6 @@ static ssize_t store_mlid(struct device *dev, unit = dd->ipath_unit; dd->ipath_mlid = mlid; - ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST); goto bail; invalid: @@ -467,7 +465,7 @@ static ssize_t store_link_state(struct device *dev, if (ret < 0) goto invalid; - r = ipath_layer_set_linkstate(dd, state); + r = ipath_set_linkstate(dd, state); if (r < 0) { ret = r; goto bail; @@ -502,7 +500,7 @@ static ssize_t store_mtu(struct device *dev, if (ret < 0) goto invalid; - r = ipath_layer_set_mtu(dd, mtu); + r = ipath_set_mtu(dd, mtu); if (r < 0) ret = r; @@ -563,6 +561,33 @@ bail: return ret; } +static ssize_t store_rx_pol_inv(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret < 0) + goto invalid; + + r = ipath_set_rx_pol_inv(dd, val); + if (r < 0) { + ret = r; + goto bail; + } + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n"); +bail: + return ret; +} + + static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); @@ -589,6 +614,7 @@ static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); +static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -603,6 +629,7 @@ static struct attribute *dev_attributes[] = { &dev_attr_boardversion.attr, &dev_attr_unit.attr, &dev_attr_enabled.attr, + &dev_attr_rx_pol_inv.attr, NULL }; diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index c33abea2d5a..0fd3cded16b 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_UC_##x @@ -261,8 +261,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * size to 56 bytes so the last 4 bytes of * the BTH header (PSN) is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { psn = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 3466129af80..6991d1d74e3 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -34,7 +34,54 @@ #include <rdma/ib_smi.h> #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" + +static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, + u32 *lengthp, struct ipath_sge_state *ss) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + int user = to_ipd(qp->ibqp.pd)->user; + int i, j, ret; + struct ib_wc wc; + + *lengthp = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if ((user && wqe->sg_list[i].lkey == 0) || + !ipath_lkey_ok(&dev->lk_table, + j ? &ss->sg_list[j - 1] : &ss->sge, + &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + *lengthp += wqe->sg_list[i].length; + j++; + } + ss->num_sge = j; + ret = 1; + goto bail; + +bad_lkey: + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.byte_len = 0; + wc.imm_data = 0; + wc.qp_num = qp->ibqp.qp_num; + wc.src_qp = 0; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = 0; + wc.sl = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal solicited completion event. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} /** * ipath_ud_loopback - handle send on loopback QPs @@ -46,6 +93,8 @@ * * This is called from ipath_post_ud_send() to forward a WQE addressed * to the same HCA. + * Note that the receive interrupt handler may be calling ipath_ud_rcv() + * while this is being called. */ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_sge_state *ss, @@ -60,7 +109,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_srq *srq; struct ipath_sge_state rsge; struct ipath_sge *sge; + struct ipath_rwq *wq; struct ipath_rwqe *wqe; + void (*handler)(struct ib_event *, void *); + u32 tail; + u32 rlen; qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn); if (!qp) @@ -94,6 +147,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, wc->imm_data = 0; } + if (wr->num_sge > 1) { + rsge.sg_list = kmalloc((wr->num_sge - 1) * + sizeof(struct ipath_sge), + GFP_ATOMIC); + } else + rsge.sg_list = NULL; + /* * Get the next work request entry to find where to put the data. * Note that it is safe to drop the lock after changing rq->tail @@ -101,37 +161,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, */ if (qp->ibqp.srq) { srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; rq = &srq->rq; } else { srq = NULL; + handler = NULL; rq = &qp->r_rq; } + spin_lock_irqsave(&rq->lock, flags); - if (rq->tail == rq->head) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto done; + wq = rq->wq; + tail = wq->tail; + while (1) { + if (unlikely(tail == wq->head)) { + spin_unlock_irqrestore(&rq->lock, flags); + dev->n_pkt_drops++; + goto bail_sge; + } + wqe = get_rwqe_ptr(rq, tail); + if (++tail >= rq->size) + tail = 0; + if (init_sge(qp, wqe, &rlen, &rsge)) + break; + wq->tail = tail; } /* Silently drop packets which are too big. */ - wqe = get_rwqe_ptr(rq, rq->tail); - if (wc->byte_len > wqe->length) { + if (wc->byte_len > rlen) { spin_unlock_irqrestore(&rq->lock, flags); dev->n_pkt_drops++; - goto done; + goto bail_sge; } + wq->tail = tail; wc->wr_id = wqe->wr_id; - rsge.sge = wqe->sg_list[0]; - rsge.sg_list = wqe->sg_list + 1; - rsge.num_sge = wqe->num_sge; - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq && srq->ibsrq.event_handler) { + if (handler) { u32 n; - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; else - n = rq->head - rq->tail; + n -= tail; if (n < srq->limit) { struct ib_event ev; @@ -140,12 +215,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); + handler(&ev, srq->ibsrq.srq_context); } else spin_unlock_irqrestore(&rq->lock, flags); } else spin_unlock_irqrestore(&rq->lock, flags); + ah_attr = &to_iah(wr->wr.ud.ah)->attr; if (ah_attr->ah_flags & IB_AH_GRH) { ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); @@ -186,7 +261,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, wc->src_qp = sqp->ibqp.qp_num; /* XXX do we know which pkey matched? Only needed for GSI. */ wc->pkey_index = 0; - wc->slid = ipath_layer_get_lid(dev->dd) | + wc->slid = dev->dd->ipath_lid | (ah_attr->src_path_bits & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1)); wc->sl = ah_attr->sl; @@ -196,6 +271,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc, wr->send_flags & IB_SEND_SOLICITED); +bail_sge: + kfree(rsge.sg_list); done: if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -276,7 +353,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) ss.num_sge++; } /* Check for invalid packet size. */ - if (len > ipath_layer_get_ibmtu(dev->dd)) { + if (len > dev->dd->ipath_ibmtu) { ret = -EINVAL; goto bail; } @@ -298,7 +375,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) dev->n_unicast_xmit++; lid = ah_attr->dlid & ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); - if (unlikely(lid == ipath_layer_get_lid(dev->dd))) { + if (unlikely(lid == dev->dd->ipath_lid)) { /* * Pass in an uninitialized ib_wc to save stack * space. @@ -327,7 +404,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = dev->gid_prefix; qp->s_hdr.u.l.grh.sgid.global.interface_id = - ipath_layer_get_guid(dev->dd); + dev->dd->ipath_guid; qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid; /* * Don't worry about sending to locally attached multicast @@ -357,7 +434,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC); - lid = ipath_layer_get_lid(dev->dd); + lid = dev->dd->ipath_lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); @@ -368,7 +445,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) bth0 |= 1 << 23; bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : - ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + ipath_get_pkey(dev->dd, qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); /* * Use the multicast QP if the destination LID is a multicast LID. @@ -433,13 +510,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int opcode; u32 hdrsize; u32 pad; - unsigned long flags; struct ib_wc wc; u32 qkey; u32 src_qp; - struct ipath_rq *rq; - struct ipath_srq *srq; - struct ipath_rwqe *wqe; u16 dlid; int header_in_data; @@ -458,8 +531,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * the eager header buffer size to 56 bytes so the last 12 * bytes of the IB header is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { qkey = be32_to_cpu(((__be32 *) data)[1]); src_qp = be32_to_cpu(((__be32 *) data)[2]); @@ -547,19 +619,10 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* * Get the next work request entry to find where to put the data. - * Note that it is safe to drop the lock after changing rq->tail - * since ipath_post_receive() won't fill the empty slot. */ - if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); - rq = &srq->rq; - } else { - srq = NULL; - rq = &qp->r_rq; - } - spin_lock_irqsave(&rq->lock, flags); - if (rq->tail == rq->head) { - spin_unlock_irqrestore(&rq->lock, flags); + if (qp->r_reuse_sge) + qp->r_reuse_sge = 0; + else if (!ipath_get_rwqe(qp, 0)) { /* * Count VL15 packets dropped due to no receive buffer. * Otherwise, count them as buffer overruns since usually, @@ -573,39 +636,11 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto bail; } /* Silently drop packets which are too big. */ - wqe = get_rwqe_ptr(rq, rq->tail); - if (wc.byte_len > wqe->length) { - spin_unlock_irqrestore(&rq->lock, flags); + if (wc.byte_len > qp->r_len) { + qp->r_reuse_sge = 1; dev->n_pkt_drops++; goto bail; } - wc.wr_id = wqe->wr_id; - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq && srq->ibsrq.event_handler) { - u32 n; - - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; - else - n = rq->head - rq->tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); - } else - spin_unlock_irqrestore(&rq->lock, flags); - } else - spin_unlock_irqrestore(&rq->lock, flags); if (has_grh) { ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, sizeof(struct ib_grh)); @@ -614,6 +649,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); + wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; wc.vendor_err = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index d70a9b6b523..b8381c5e72b 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -33,15 +33,13 @@ #include <rdma/ib_mad.h> #include <rdma/ib_user_verbs.h> +#include <linux/io.h> #include <linux/utsname.h> #include "ipath_kernel.h" #include "ipath_verbs.h" #include "ipath_common.h" -/* Not static, because we don't want the compiler removing it */ -const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR; - static unsigned int ib_ipath_qp_table_size = 251; module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); @@ -52,10 +50,6 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, MODULE_PARM_DESC(lkey_table_size, "LKEY table size in bits (2^n, 1 <= n <= 23)"); -unsigned int ib_ipath_debug; /* debug mask */ -module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(debug, "Verbs debug mask"); - static unsigned int ib_ipath_max_pds = 0xFFFF; module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_pds, @@ -79,6 +73,10 @@ module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); +unsigned int ib_ipath_max_qps = 16384; +module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); + unsigned int ib_ipath_max_sges = 0x60; module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); @@ -109,9 +107,9 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("QLogic <support@pathscale.com>"); -MODULE_DESCRIPTION("QLogic InfiniPath driver"); +static unsigned int ib_ipath_disable_sma; +module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA"); const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, @@ -125,6 +123,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_ERR] = 0, }; +struct ipath_ucontext { + struct ib_ucontext ibucontext; +}; + +static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext + *ibucontext) +{ + return container_of(ibucontext, struct ipath_ucontext, ibucontext); +} + /* * Translate ib_wr_opcode into ib_wc_opcode. */ @@ -277,11 +285,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct ipath_qp *qp = to_iqp(ibqp); + struct ipath_rwq *wq = qp->r_rq.wq; unsigned long flags; int ret; /* Check that state is OK to post receive. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { *bad_wr = wr; ret = -EINVAL; goto bail; @@ -290,59 +299,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, for (; wr; wr = wr->next) { struct ipath_rwqe *wqe; u32 next; - int i, j; + int i; - if (wr->num_sge > qp->r_rq.max_sge) { + if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; ret = -ENOMEM; goto bail; } spin_lock_irqsave(&qp->r_rq.lock, flags); - next = qp->r_rq.head + 1; + next = wq->head + 1; if (next >= qp->r_rq.size) next = 0; - if (next == qp->r_rq.tail) { + if (next == wq->tail) { spin_unlock_irqrestore(&qp->r_rq.lock, flags); *bad_wr = wr; ret = -ENOMEM; goto bail; } - wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head); + wqe = get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; - wqe->sg_list[0].mr = NULL; - wqe->sg_list[0].vaddr = NULL; - wqe->sg_list[0].length = 0; - wqe->sg_list[0].sge_length = 0; - wqe->length = 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - /* Check LKEY */ - if (to_ipd(qp->ibqp.pd)->user && - wr->sg_list[i].lkey == 0) { - spin_unlock_irqrestore(&qp->r_rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - if (wr->sg_list[i].length == 0) - continue; - if (!ipath_lkey_ok( - &to_idev(qp->ibqp.device)->lk_table, - &wqe->sg_list[j], &wr->sg_list[i], - IB_ACCESS_LOCAL_WRITE)) { - spin_unlock_irqrestore(&qp->r_rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - wqe->length += wr->sg_list[i].length; - j++; - } - wqe->num_sge = j; - qp->r_rq.head = next; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + wq->head = next; spin_unlock_irqrestore(&qp->r_rq.lock, flags); } ret = 0; @@ -377,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: + if (ib_ipath_disable_sma) + break; + /* FALLTHROUGH */ case IB_QPT_UD: ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); break; @@ -395,7 +379,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, } /** - * ipath_ib_rcv - process and incoming packet + * ipath_ib_rcv - process an incoming packet * @arg: the device pointer * @rhdr: the header of the packet * @data: the packet data @@ -404,9 +388,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, * This is called from ipath_kreceive() to process an incoming packet at * interrupt level. Tlen is the length of the header + data + CRC in bytes. */ -static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen) +void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, + u32 tlen) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_ib_header *hdr = rhdr; struct ipath_other_headers *ohdr; struct ipath_qp *qp; @@ -427,7 +411,7 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen) lid = be16_to_cpu(hdr->lrh[1]); if (lid < IPATH_MULTICAST_LID_BASE) { lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); - if (unlikely(lid != ipath_layer_get_lid(dev->dd))) { + if (unlikely(lid != dev->dd->ipath_lid)) { dev->rcv_errors++; goto bail; } @@ -495,9 +479,8 @@ bail:; * This is called from ipath_do_rcv_timer() at interrupt level to check for * QPs which need retransmits and to collect performance numbers. */ -static void ipath_ib_timer(void *arg) +void ipath_ib_timer(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_qp *resend = NULL; struct list_head *last; struct ipath_qp *qp; @@ -539,19 +522,19 @@ static void ipath_ib_timer(void *arg) if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && --dev->pma_sample_start == 0) { dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; - ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword, - &dev->ipath_rword, - &dev->ipath_spkts, - &dev->ipath_rpkts, - &dev->ipath_xmit_wait); + ipath_snapshot_counters(dev->dd, &dev->ipath_sword, + &dev->ipath_rword, + &dev->ipath_spkts, + &dev->ipath_rpkts, + &dev->ipath_xmit_wait); } if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { if (dev->pma_sample_interval == 0) { u64 ta, tb, tc, td, te; dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; - ipath_layer_snapshot_counters(dev->dd, &ta, &tb, - &tc, &td, &te); + ipath_snapshot_counters(dev->dd, &ta, &tb, + &tc, &td, &te); dev->ipath_sword = ta - dev->ipath_sword; dev->ipath_rword = tb - dev->ipath_rword; @@ -581,6 +564,362 @@ static void ipath_ib_timer(void *arg) } } +static void update_sge(struct ipath_sge_state *ss, u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + sge->vaddr += length; + sge->length -= length; + sge->sge_length -= length; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + return; + sge->n = 0; + } + sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = sge->mr->map[sge->m]->segs[sge->n].length; + } +} + +#ifdef __LITTLE_ENDIAN +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); + data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#else +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); + data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#endif + +static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, + u32 length) +{ + u32 extra = 0; + u32 data = 0; + u32 last; + + while (1) { + u32 len = ss->sge.length; + u32 off; + + BUG_ON(len == 0); + if (len > length) + len = length; + if (len > ss->sge.sge_length) + len = ss->sge.sge_length; + /* If the source address is not aligned, try to align it. */ + off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); + if (off) { + u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & + ~(sizeof(u32) - 1)); + u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); + u32 y; + + y = sizeof(u32) - off; + if (len > y) + len = y; + if (len + extra >= sizeof(u32)) { + data |= set_upper_bits(v, extra * + BITS_PER_BYTE); + len = sizeof(u32) - extra; + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, len, extra); + if (len == length) { + last = data; + break; + } + extra += len; + } + } else if (extra) { + /* Source address is aligned. */ + u32 *addr = (u32 *) ss->sge.vaddr; + int shift = extra * BITS_PER_BYTE; + int ushift = 32 - shift; + u32 l = len; + + while (l >= sizeof(u32)) { + u32 v = *addr; + + data |= set_upper_bits(v, shift); + __raw_writel(data, piobuf); + data = get_upper_bits(v, ushift); + piobuf++; + addr++; + l -= sizeof(u32); + } + /* + * We still have 'extra' number of bytes leftover. + */ + if (l) { + u32 v = *addr; + + if (l + extra >= sizeof(u32)) { + data |= set_upper_bits(v, shift); + len -= l + extra - sizeof(u32); + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, l, + extra); + if (len == length) { + last = data; + break; + } + extra += l; + } + } else if (len == length) { + last = data; + break; + } + } else if (len == length) { + u32 w; + + /* + * Need to round up for the last dword in the + * packet. + */ + w = (len + 3) >> 2; + __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); + piobuf += w - 1; + last = ((u32 *) ss->sge.vaddr)[w - 1]; + break; + } else { + u32 w = len >> 2; + + __iowrite32_copy(piobuf, ss->sge.vaddr, w); + piobuf += w; + + extra = len & (sizeof(u32) - 1); + if (extra) { + u32 v = ((u32 *) ss->sge.vaddr)[w]; + + /* Clear unused upper bytes */ + data = clear_upper_bytes(v, extra, 0); + } + } + update_sge(ss, len); + length -= len; + } + /* Update address before sending packet. */ + update_sge(ss, length); + /* must flush early everything before trigger word */ + ipath_flush_wc(); + __raw_writel(last, piobuf); + /* be sure trigger word is written */ + ipath_flush_wc(); +} + +/** + * ipath_verbs_send - send a packet + * @dd: the infinipath device + * @hdrwords: the number of words in the header + * @hdr: the packet header + * @len: the length of the packet in bytes + * @ss: the SGE to send + */ +int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, + u32 *hdr, u32 len, struct ipath_sge_state *ss) +{ + u32 __iomem *piobuf; + u32 plen; + int ret; + + /* +1 is for the qword padding of pbc */ + plen = hdrwords + ((len + 3) >> 2) + 1; + if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { + ipath_dbg("packet len 0x%x too long, failing\n", plen); + ret = -EINVAL; + goto bail; + } + + /* Get a PIO buffer to use. */ + piobuf = ipath_getpiobuf(dd, NULL); + if (unlikely(piobuf == NULL)) { + ret = -EBUSY; + goto bail; + } + + /* + * Write len to control qword, no flags. + * We have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order. + */ + writeq(plen, piobuf); + ipath_flush_wc(); + piobuf += 2; + if (len == 0) { + /* + * If there is just the header portion, must flush before + * writing last word of header for correctness, and after + * the last header word (trigger word). + */ + __iowrite32_copy(piobuf, hdr, hdrwords - 1); + ipath_flush_wc(); + __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); + ipath_flush_wc(); + ret = 0; + goto bail; + } + + __iowrite32_copy(piobuf, hdr, hdrwords); + piobuf += hdrwords; + + /* The common case is aligned and contained in one segment. */ + if (likely(ss->num_sge == 1 && len <= ss->sge.length && + !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { + u32 w; + u32 *addr = (u32 *) ss->sge.vaddr; + + /* Update address before sending packet. */ + update_sge(ss, len); + /* Need to round up for the last dword in the packet. */ + w = (len + 3) >> 2; + __iowrite32_copy(piobuf, addr, w - 1); + /* must flush early everything before trigger word */ + ipath_flush_wc(); + __raw_writel(addr[w - 1], piobuf + w - 1); + /* be sure trigger word is written */ + ipath_flush_wc(); + ret = 0; + goto bail; + } + copy_io(piobuf, ss, len); + ret = 0; + +bail: + return ret; +} + +int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait) +{ + int ret; + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_dbg("unit %u not usable\n", dd->ipath_unit); + ret = -EINVAL; + goto bail; + } + *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); + *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); + + ret = 0; + +bail: + return ret; +} + +/** + * ipath_get_counters - get various chip counters + * @dd: the infinipath device + * @cntrs: counters are placed here + * + * Return the counters needed by recv_pma_get_portcounters(). + */ +int ipath_get_counters(struct ipath_devdata *dd, + struct ipath_verbs_counters *cntrs) +{ + int ret; + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_dbg("unit %u not usable\n", dd->ipath_unit); + ret = -EINVAL; + goto bail; + } + cntrs->symbol_error_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); + cntrs->link_error_recovery_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + /* + * The link downed counter counts when the other side downs the + * connection. We add in the number of times we downed the link + * due to local link integrity errors to compensate. + */ + cntrs->link_downed_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); + cntrs->port_rcv_errors = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); + cntrs->port_rcv_remphys_errors = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); + cntrs->port_xmit_discards = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); + cntrs->port_xmit_data = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + cntrs->port_rcv_data = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + cntrs->port_xmit_packets = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); + cntrs->port_rcv_packets = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + cntrs->local_link_integrity_errors = dd->ipath_lli_errors; + cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ + + ret = 0; + +bail: + return ret; +} + /** * ipath_ib_piobufavail - callback when a PIO buffer is available * @arg: the device pointer @@ -591,9 +930,8 @@ static void ipath_ib_timer(void *arg) * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and * return zero). */ -static int ipath_ib_piobufavail(void *arg) +int ipath_ib_piobufavail(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_qp *qp; unsigned long flags; @@ -624,14 +962,14 @@ static int ipath_query_device(struct ib_device *ibdev, IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID; props->page_size_cap = PAGE_SIZE; - props->vendor_id = ipath_layer_get_vendorid(dev->dd); - props->vendor_part_id = ipath_layer_get_deviceid(dev->dd); - props->hw_ver = ipath_layer_get_pcirev(dev->dd); + props->vendor_id = dev->dd->ipath_vendorid; + props->vendor_part_id = dev->dd->ipath_deviceid; + props->hw_ver = dev->dd->ipath_pcirev; props->sys_image_guid = dev->sys_image_guid; props->max_mr_size = ~0ull; - props->max_qp = dev->qp_table.max; + props->max_qp = ib_ipath_max_qps; props->max_qp_wr = ib_ipath_max_qp_wrs; props->max_sge = ib_ipath_max_sges; props->max_cq = ib_ipath_max_cqs; @@ -647,7 +985,7 @@ static int ipath_query_device(struct ib_device *ibdev, props->max_srq_sge = ib_ipath_max_srq_sges; /* props->local_ca_ack_delay */ props->atomic_cap = IB_ATOMIC_HCA; - props->max_pkeys = ipath_layer_get_npkeys(dev->dd); + props->max_pkeys = ipath_get_npkeys(dev->dd); props->max_mcast_grp = ib_ipath_max_mcast_grps; props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * @@ -672,12 +1010,17 @@ const u8 ipath_cvt_physportstate[16] = { [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, }; +u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) +{ + return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); +} + static int ipath_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct ipath_ibdev *dev = to_idev(ibdev); enum ib_mtu mtu; - u16 lid = ipath_layer_get_lid(dev->dd); + u16 lid = dev->dd->ipath_lid; u64 ibcstat; memset(props, 0, sizeof(*props)); @@ -685,16 +1028,16 @@ static int ipath_query_port(struct ib_device *ibdev, props->lmc = dev->mkeyprot_resv_lmc & 7; props->sm_lid = dev->sm_lid; props->sm_sl = dev->sm_sl; - ibcstat = ipath_layer_get_lastibcstat(dev->dd); + ibcstat = dev->dd->ipath_lastibcstat; props->state = ((ibcstat >> 4) & 0x3) + 1; /* See phys_state_show() */ props->phys_state = ipath_cvt_physportstate[ - ipath_layer_get_lastibcstat(dev->dd) & 0xf]; + dev->dd->ipath_lastibcstat & 0xf]; props->port_cap_flags = dev->port_cap_flags; props->gid_tbl_len = 1; props->max_msg_sz = 0x80000000; - props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd); - props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) - + props->pkey_tbl_len = ipath_get_npkeys(dev->dd); + props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) - dev->z_pkey_violations; props->qkey_viol_cntr = dev->qkey_violations; props->active_width = IB_WIDTH_4X; @@ -704,7 +1047,7 @@ static int ipath_query_port(struct ib_device *ibdev, props->init_type_reply = 0; props->max_mtu = IB_MTU_4096; - switch (ipath_layer_get_ibmtu(dev->dd)) { + switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; break; @@ -763,7 +1106,7 @@ static int ipath_modify_port(struct ib_device *ibdev, dev->port_cap_flags |= props->set_port_cap_mask; dev->port_cap_flags &= ~props->clr_port_cap_mask; if (port_modify_mask & IB_PORT_SHUTDOWN) - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) dev->qkey_violations = 0; return 0; @@ -780,7 +1123,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port, goto bail; } gid->global.subnet_prefix = dev->gid_prefix; - gid->global.interface_id = ipath_layer_get_guid(dev->dd); + gid->global.interface_id = dev->dd->ipath_guid; ret = 0; @@ -803,18 +1146,22 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, * we allow allocations of more than we report for this value. */ - if (dev->n_pds_allocated == ib_ipath_max_pds) { + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) { ret = ERR_PTR(-ENOMEM); goto bail; } - pd = kmalloc(sizeof *pd, GFP_KERNEL); - if (!pd) { + spin_lock(&dev->n_pds_lock); + if (dev->n_pds_allocated == ib_ipath_max_pds) { + spin_unlock(&dev->n_pds_lock); + kfree(pd); ret = ERR_PTR(-ENOMEM); goto bail; } dev->n_pds_allocated++; + spin_unlock(&dev->n_pds_lock); /* ib_alloc_pd() will initialize pd->ibpd. */ pd->user = udata != NULL; @@ -830,7 +1177,9 @@ static int ipath_dealloc_pd(struct ib_pd *ibpd) struct ipath_pd *pd = to_ipd(ibpd); struct ipath_ibdev *dev = to_idev(ibpd->device); + spin_lock(&dev->n_pds_lock); dev->n_pds_allocated--; + spin_unlock(&dev->n_pds_lock); kfree(pd); @@ -851,11 +1200,6 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, struct ib_ah *ret; struct ipath_ibdev *dev = to_idev(pd->device); - if (dev->n_ahs_allocated == ib_ipath_max_ahs) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - /* A multicast address requires a GRH (see ch. 8.4.1). */ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && ah_attr->dlid != IPATH_PERMISSIVE_LID && @@ -881,7 +1225,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, goto bail; } + spin_lock(&dev->n_ahs_lock); + if (dev->n_ahs_allocated == ib_ipath_max_ahs) { + spin_unlock(&dev->n_ahs_lock); + kfree(ah); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + dev->n_ahs_allocated++; + spin_unlock(&dev->n_ahs_lock); /* ib_create_ah() will initialize ah->ibah. */ ah->attr = *ah_attr; @@ -903,7 +1256,9 @@ static int ipath_destroy_ah(struct ib_ah *ibah) struct ipath_ibdev *dev = to_idev(ibah->device); struct ipath_ah *ah = to_iah(ibah); + spin_lock(&dev->n_ahs_lock); dev->n_ahs_allocated--; + spin_unlock(&dev->n_ahs_lock); kfree(ah); @@ -919,25 +1274,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) return 0; } +/** + * ipath_get_npkeys - return the size of the PKEY table for port 0 + * @dd: the infinipath device + */ +unsigned ipath_get_npkeys(struct ipath_devdata *dd) +{ + return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); +} + +/** + * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table + * @dd: the infinipath device + * @index: the PKEY index + */ +unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) +{ + unsigned ret; + + if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) + ret = 0; + else + ret = dd->ipath_pd[0]->port_pkeys[index]; + + return ret; +} + static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { struct ipath_ibdev *dev = to_idev(ibdev); int ret; - if (index >= ipath_layer_get_npkeys(dev->dd)) { + if (index >= ipath_get_npkeys(dev->dd)) { ret = -EINVAL; goto bail; } - *pkey = ipath_layer_get_pkey(dev->dd, index); + *pkey = ipath_get_pkey(dev->dd, index); ret = 0; bail: return ret; } - /** * ipath_alloc_ucontext - allocate a ucontest * @ibdev: the infiniband device @@ -970,26 +1350,91 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context) static int ipath_verbs_register_sysfs(struct ib_device *dev); +static void __verbs_timer(unsigned long arg) +{ + struct ipath_devdata *dd = (struct ipath_devdata *) arg; + + /* + * If port 0 receive packet interrupts are not available, or + * can be missed, poll the receive queue + */ + if (dd->ipath_flags & IPATH_POLL_RX_INTR) + ipath_kreceive(dd); + + /* Handle verbs layer timeouts. */ + ipath_ib_timer(dd->verbs_dev); + + mod_timer(&dd->verbs_timer, jiffies + 1); +} + +static int enable_timer(struct ipath_devdata *dd) +{ + /* + * Early chips had a design flaw where the chip and kernel idea + * of the tail register don't always agree, and therefore we won't + * get an interrupt on the next packet received. + * If the board supports per packet receive interrupts, use it. + * Otherwise, the timer function periodically checks for packets + * to cover this case. + * Either way, the timer is needed for verbs layer related + * processing. + */ + if (dd->ipath_flags & IPATH_GPIO_INTR) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, + 0x2074076542310ULL); + /* Enable GPIO bit 2 interrupt */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, + (u64) (1 << 2)); + } + + init_timer(&dd->verbs_timer); + dd->verbs_timer.function = __verbs_timer; + dd->verbs_timer.data = (unsigned long)dd; + dd->verbs_timer.expires = jiffies + 1; + add_timer(&dd->verbs_timer); + + return 0; +} + +static int disable_timer(struct ipath_devdata *dd) +{ + /* Disable GPIO bit 2 interrupt */ + if (dd->ipath_flags & IPATH_GPIO_INTR) + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); + + del_timer_sync(&dd->verbs_timer); + + return 0; +} + /** * ipath_register_ib_device - register our device with the infiniband core - * @unit: the device number to register * @dd: the device data structure * Return the allocated ipath_ibdev pointer or NULL on error. */ -static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) +int ipath_register_ib_device(struct ipath_devdata *dd) { - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; struct ipath_ibdev *idev; struct ib_device *dev; int ret; idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); - if (idev == NULL) + if (idev == NULL) { + ret = -ENOMEM; goto bail; + } dev = &idev->ibdev; /* Only need to initialize non-zero fields. */ + spin_lock_init(&idev->n_pds_lock); + spin_lock_init(&idev->n_ahs_lock); + spin_lock_init(&idev->n_cqs_lock); + spin_lock_init(&idev->n_qps_lock); + spin_lock_init(&idev->n_srqs_lock); + spin_lock_init(&idev->n_mcast_grps_lock); + spin_lock_init(&idev->qp_table.lock); spin_lock_init(&idev->lk_table.lock); idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); @@ -1030,7 +1475,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) idev->link_width_enabled = 3; /* 1x or 4x */ /* Snapshot current HW counters to "clear" them. */ - ipath_layer_get_counters(dd, &cntrs); + ipath_get_counters(dd, &cntrs); idev->z_symbol_error_counter = cntrs.symbol_error_counter; idev->z_link_error_recovery_counter = cntrs.link_error_recovery_counter; @@ -1054,14 +1499,14 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) * device types in the system, we can't be sure this is unique. */ if (!sys_image_guid) - sys_image_guid = ipath_layer_get_guid(dd); + sys_image_guid = dd->ipath_guid; idev->sys_image_guid = sys_image_guid; - idev->ib_unit = unit; + idev->ib_unit = dd->ipath_unit; idev->dd = dd; strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); dev->owner = THIS_MODULE; - dev->node_guid = ipath_layer_get_guid(dd); + dev->node_guid = dd->ipath_guid; dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; dev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -1093,9 +1538,9 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->node_type = IB_NODE_CA; + dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; - dev->dma_device = ipath_layer_get_device(dd); + dev->dma_device = &dd->pcidev->dev; dev->class_dev.dev = dev->dma_device; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; @@ -1137,9 +1582,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) dev->attach_mcast = ipath_multicast_attach; dev->detach_mcast = ipath_multicast_detach; dev->process_mad = ipath_process_mad; + dev->mmap = ipath_mmap; snprintf(dev->node_desc, sizeof(dev->node_desc), - IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename); + IPATH_IDSTR " %s", system_utsname.nodename); ret = ib_register_device(dev); if (ret) @@ -1148,7 +1594,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) if (ipath_verbs_register_sysfs(dev)) goto err_class; - ipath_layer_enable_timer(dd); + enable_timer(dd); goto bail; @@ -1160,37 +1606,32 @@ err_lk: kfree(idev->qp_table.table); err_qp: ib_dealloc_device(dev); - _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n", - unit, -ret); + ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); idev = NULL; bail: - return idev; + dd->verbs_dev = idev; + return ret; } -static void ipath_unregister_ib_device(void *arg) +void ipath_unregister_ib_device(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ib_device *ibdev = &dev->ibdev; - ipath_layer_disable_timer(dev->dd); + disable_timer(dev->dd); ib_unregister_device(ibdev); if (!list_empty(&dev->pending[0]) || !list_empty(&dev->pending[1]) || !list_empty(&dev->pending[2])) - _VERBS_ERROR("ipath%d pending list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "pending list not empty!\n"); if (!list_empty(&dev->piowait)) - _VERBS_ERROR("ipath%d piowait list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "piowait list not empty!\n"); if (!list_empty(&dev->rnrwait)) - _VERBS_ERROR("ipath%d rnrwait list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); if (!ipath_mcast_tree_empty()) - _VERBS_ERROR("ipath%d multicast table memory leak!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "multicast table memory leak!\n"); /* * Note that ipath_unregister_ib_device() can be called before all * the QPs are destroyed! @@ -1201,25 +1642,12 @@ static void ipath_unregister_ib_device(void *arg) ib_dealloc_device(ibdev); } -static int __init ipath_verbs_init(void) -{ - return ipath_verbs_register(ipath_register_ib_device, - ipath_unregister_ib_device, - ipath_ib_piobufavail, ipath_ib_rcv, - ipath_ib_timer); -} - -static void __exit ipath_verbs_cleanup(void) -{ - ipath_verbs_unregister(); -} - static ssize_t show_rev(struct class_device *cdev, char *buf) { struct ipath_ibdev *dev = container_of(cdev, struct ipath_ibdev, ibdev.class_dev); - return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd)); + return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); } static ssize_t show_hca(struct class_device *cdev, char *buf) @@ -1228,7 +1656,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) container_of(cdev, struct ipath_ibdev, ibdev.class_dev); int ret; - ret = ipath_layer_get_boardname(dev->dd, buf, 128); + ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); if (ret < 0) goto bail; strcat(buf, "\n"); @@ -1305,6 +1733,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev) bail: return ret; } - -module_init(ipath_verbs_init); -module_exit(ipath_verbs_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 2df684727dc..09bbb3f9a21 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -38,10 +38,10 @@ #include <linux/spinlock.h> #include <linux/kernel.h> #include <linux/interrupt.h> +#include <linux/kref.h> #include <rdma/ib_pack.h> #include "ipath_layer.h" -#include "verbs_debug.h" #define QPN_MAX (1 << 24) #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) @@ -50,7 +50,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IPATH_UVERBS_ABI_VERSION 1 +#define IPATH_UVERBS_ABI_VERSION 2 /* * Define an ib_cq_notify value that is not valid so we know when CQ @@ -152,19 +152,6 @@ struct ipath_mcast { int n_attached; }; -/* Memory region */ -struct ipath_mr { - struct ib_mr ibmr; - struct ipath_mregion mr; /* must be last */ -}; - -/* Fast memory region */ -struct ipath_fmr { - struct ib_fmr ibfmr; - u8 page_shift; - struct ipath_mregion mr; /* must be last */ -}; - /* Protection domain */ struct ipath_pd { struct ib_pd ibpd; @@ -178,58 +165,89 @@ struct ipath_ah { }; /* - * Quick description of our CQ/QP locking scheme: - * - * We have one global lock that protects dev->cq/qp_table. Each - * struct ipath_cq/qp also has its own lock. An individual qp lock - * may be taken inside of an individual cq lock. Both cqs attached to - * a qp may be locked, with the send cq locked first. No other - * nesting should be done. - * - * Each struct ipath_cq/qp also has an atomic_t ref count. The - * pointer from the cq/qp_table to the struct counts as one reference. - * This reference also is good for access through the consumer API, so - * modifying the CQ/QP etc doesn't need to take another reference. - * Access because of a completion being polled does need a reference. - * - * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the - * destroy function to sleep on. - * - * This means that access from the consumer API requires nothing but - * taking the struct's lock. - * - * Access because of a completion event should go as follows: - * - lock cq/qp_table and look up struct - * - increment ref count in struct - * - drop cq/qp_table lock - * - lock struct, do your thing, and unlock struct - * - decrement ref count; if zero, wake up waiters - * - * To destroy a CQ/QP, we can do the following: - * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock - * - decrement ref count - * - wait_event until ref count is zero - * - * It is the consumer's responsibilty to make sure that no QP - * operations (WQE posting or state modification) are pending when the - * QP is destroyed. Also, the consumer must make sure that calls to - * qp_modify are serialized. - * - * Possible optimizations (wait for profile data to see if/where we - * have locks bouncing between CPUs): - * - split cq/qp table lock into n separate (cache-aligned) locks, - * indexed (say) by the page in the table + * This structure is used by ipath_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct ipath_mmap_info { + struct ipath_mmap_info *next; + struct ib_ucontext *context; + void *obj; + struct kref ref; + unsigned size; + unsigned mmap_cnt; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. */ +struct ipath_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ +}; +/* + * The completion queue structure. + */ struct ipath_cq { struct ib_cq ibcq; struct tasklet_struct comptask; spinlock_t lock; u8 notify; u8 triggered; - u32 head; /* new records added to the head */ - u32 tail; /* poll_cq() reads from here. */ - struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */ + struct ipath_cq_wc *queue; + struct ipath_mmap_info *ip; +}; + +/* + * A segment is a linear region of low physical memory. + * XXX Maybe we should use phys addr here and kmap()/kunmap(). + * Used by the verbs layer. + */ +struct ipath_seg { + void *vaddr; + size_t length; +}; + +/* The number of ipath_segs that fit in a page. */ +#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) + +struct ipath_segarray { + struct ipath_seg segs[IPATH_SEGSZ]; +}; + +struct ipath_mregion { + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of ipath_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + struct ipath_segarray *map[0]; /* the segments */ +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct ipath_sge { + struct ipath_mregion *mr; + void *vaddr; /* current pointer into the segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* Memory region */ +struct ipath_mr { + struct ib_mr ibmr; + struct ipath_mregion mr; /* must be last */ }; /* @@ -248,32 +266,50 @@ struct ipath_swqe { /* * Receive work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->r_max_sge. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). */ struct ipath_rwqe { u64 wr_id; - u32 length; /* total length of data in sg_list */ u8 num_sge; - struct ipath_sge sg_list[0]; + struct ib_sge sg_list[0]; }; -struct ipath_rq { - spinlock_t lock; +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct ipath_rwq { u32 head; /* new work requests posted to the head */ u32 tail; /* receives pull requests from here. */ + struct ipath_rwqe wq[0]; +}; + +struct ipath_rq { + struct ipath_rwq *wq; + spinlock_t lock; u32 size; /* size of RWQE array */ u8 max_sge; - struct ipath_rwqe *wq; /* RWQE array */ }; struct ipath_srq { struct ib_srq ibsrq; struct ipath_rq rq; + struct ipath_mmap_info *ip; /* send signal when number of RWQEs < limit */ u32 limit; }; +struct ipath_sge_state { + struct ipath_sge *sg_list; /* next SGE to be used if any */ + struct ipath_sge sge; /* progress state for the current SGE */ + u8 num_sge; +}; + /* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). @@ -293,6 +329,7 @@ struct ipath_qp { atomic_t refcount; wait_queue_head_t wait; struct tasklet_struct s_task; + struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; struct ipath_sge_state s_sge; /* current send request data */ /* current RDMA read send data */ @@ -334,6 +371,7 @@ struct ipath_qp { u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ u8 s_pkey_index; /* PKEY index to use */ + u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ @@ -345,7 +383,8 @@ struct ipath_qp { u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ struct ipath_swqe *s_wq; /* send work queue */ - struct ipath_rq r_rq; /* receive work queue */ + struct ipath_rq r_rq; /* receive work queue */ + struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; /* @@ -369,15 +408,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp, /* * Since struct ipath_rwqe is not a fixed size, we can't simply index into - * struct ipath_rq.wq. This function does the array index computation. + * struct ipath_rwq.wq. This function does the array index computation. */ static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, unsigned n) { return (struct ipath_rwqe *) - ((char *) rq->wq + + ((char *) rq->wq->wq + (sizeof(struct ipath_rwqe) + - rq->max_sge * sizeof(struct ipath_sge)) * n); + rq->max_sge * sizeof(struct ib_sge)) * n); } /* @@ -417,6 +456,7 @@ struct ipath_ibdev { struct ib_device ibdev; struct list_head dev_list; struct ipath_devdata *dd; + struct ipath_mmap_info *pending_mmaps; int ib_unit; /* This is the device number */ u16 sm_lid; /* in host order */ u8 sm_sl; @@ -435,11 +475,20 @@ struct ipath_ibdev { __be64 sys_image_guid; /* in network order */ __be64 gid_prefix; /* in network order */ __be64 mkey; + u32 n_pds_allocated; /* number of PDs allocated for device */ + spinlock_t n_pds_lock; u32 n_ahs_allocated; /* number of AHs allocated for device */ + spinlock_t n_ahs_lock; u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ + spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + u64 ipath_sword; /* total dwords sent (sample result) */ u64 ipath_rword; /* total dwords received (sample result) */ u64 ipath_spkts; /* total packets sent (sample result) */ @@ -494,8 +543,19 @@ struct ipath_ibdev { struct ipath_opcode_stats opstats[128]; }; -struct ipath_ucontext { - struct ib_ucontext ibucontext; +struct ipath_verbs_counters { + u64 symbol_error_counter; + u64 link_error_recovery_counter; + u64 link_downed_counter; + u64 port_rcv_errors; + u64 port_rcv_remphys_errors; + u64 port_xmit_discards; + u64 port_xmit_data; + u64 port_rcv_data; + u64 port_xmit_packets; + u64 port_rcv_packets; + u32 local_link_integrity_errors; + u32 excessive_buffer_overrun_errors; }; static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) @@ -503,11 +563,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) return container_of(ibmr, struct ipath_mr, ibmr); } -static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct ipath_fmr, ibfmr); -} - static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd) { return container_of(ibpd, struct ipath_pd, ibpd); @@ -545,12 +600,6 @@ int ipath_process_mad(struct ib_device *ibdev, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); -static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct ipath_ucontext, ibucontext); -} - /* * Compare the lower 24 bits of the two values. * Returns an integer <, ==, or > than zero. @@ -562,6 +611,13 @@ static inline int ipath_cmp24(u32 a, u32 b) struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid); +int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait); + +int ipath_get_counters(struct ipath_devdata *dd, + struct ipath_verbs_counters *cntrs); + int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); @@ -579,7 +635,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, int ipath_destroy_qp(struct ib_qp *ibqp); int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask); + int attr_mask, struct ib_udata *udata); int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); @@ -592,6 +648,9 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc); void ipath_get_credit(struct ipath_qp *qp, u32 aeth); +int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, + u32 *hdr, u32 len, struct ipath_sge_state *ss); + void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, @@ -638,7 +697,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, struct ib_udata *udata); int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask); + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata); int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); @@ -680,6 +740,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list); int ipath_dealloc_fmr(struct ib_fmr *ibfmr); +void ipath_release_mmap_info(struct kref *ref); + +int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); void ipath_insert_rnr_queue(struct ipath_qp *qp); @@ -700,6 +764,22 @@ int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, u32 pmtu, u32 *bth0p, u32 *bth2p); +int ipath_register_ib_device(struct ipath_devdata *); + +void ipath_unregister_ib_device(struct ipath_ibdev *); + +void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); + +int ipath_ib_piobufavail(struct ipath_ibdev *); + +void ipath_ib_timer(struct ipath_ibdev *); + +unsigned ipath_get_npkeys(struct ipath_devdata *); + +u32 ipath_get_cr_errpkey(struct ipath_devdata *); + +unsigned ipath_get_pkey(struct ipath_devdata *, unsigned); + extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; extern const u8 ipath_cvt_physportstate[]; @@ -714,6 +794,8 @@ extern unsigned int ib_ipath_max_cqs; extern unsigned int ib_ipath_max_qp_wrs; +extern unsigned int ib_ipath_max_qps; + extern unsigned int ib_ipath_max_sges; extern unsigned int ib_ipath_max_mcast_grps; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index ee0e1d96d72..085e28b939e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -207,12 +207,17 @@ static int ipath_mcast_add(struct ipath_ibdev *dev, goto bail; } + spin_lock(&dev->n_mcast_grps_lock); if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) { + spin_unlock(&dev->n_mcast_grps_lock); ret = ENOMEM; goto bail; } dev->n_mcast_grps_allocated++; + spin_unlock(&dev->n_mcast_grps_lock); + + mcast->n_attached++; list_add_tail_rcu(&mqp->list, &mcast->qp_list); @@ -343,7 +348,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); ipath_mcast_free(mcast); + spin_lock(&dev->n_mcast_grps_lock); dev->n_mcast_grps_allocated--; + spin_unlock(&dev->n_mcast_grps_lock); } ret = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c new file mode 100644 index 00000000000..036fde662aa --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file is conditionally built on PowerPC only. Otherwise weak symbol + * versions of the functions exported from here are used. + */ + +#include "ipath_kernel.h" + +/** + * ipath_unordered_wc - indicate whether write combining is ordered + * + * PowerPC systems (at least those in the 970 processor family) + * write partially filled store buffers in address order, but will write + * completely filled store buffers in "random" order, and therefore must + * have serialization for correctness with current InfiniPath chips. + * + */ +int ipath_unordered_wc(void) +{ + return 1; +} diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h deleted file mode 100644 index 6186676f2a1..00000000000 --- a/drivers/infiniband/hw/ipath/verbs_debug.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _VERBS_DEBUG_H -#define _VERBS_DEBUG_H - -/* - * This file contains tracing code for the ib_ipath kernel module. - */ -#ifndef _VERBS_DEBUGGING /* tracing enabled or not */ -#define _VERBS_DEBUGGING 1 -#endif - -extern unsigned ib_ipath_debug; - -#define _VERBS_ERROR(fmt,...) \ - do { \ - printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_UNIT_ERROR(unit,fmt,...) \ - do { \ - printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \ - } while(0) - -#if _VERBS_DEBUGGING - -/* - * Mask values for debugging. The scheme allows us to compile out any - * of the debug tracing stuff, and if compiled in, to enable or - * disable dynamically. - * This can be set at modprobe time also: - * modprobe ib_path ib_ipath_debug=3 - */ - -#define __VERBS_INFO 0x1 /* generic low verbosity stuff */ -#define __VERBS_DBG 0x2 /* generic debug */ -#define __VERBS_VDBG 0x4 /* verbose debug */ -#define __VERBS_SMADBG 0x8000 /* sma packet debug */ - -#define _VERBS_INFO(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_INFO)) \ - printk(KERN_INFO "%s: " fmt,"ib_ipath", \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_DBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_DBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_VDBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_SMADBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#else /* ! _VERBS_DEBUGGING */ - -#define _VERBS_INFO(fmt,...) -#define _VERBS_DBG(fmt,...) -#define _VERBS_VDBG(fmt,...) -#define _VERBS_SMADBG(fmt,...) - -#endif /* _VERBS_DEBUGGING */ - -#endif /* _VERBS_DEBUG_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index e215041b2db..69599455aca 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -90,7 +90,7 @@ static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate) case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS; case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS; case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS; - default: return port_rate; + default: return mult_to_ib_rate(port_rate); } } diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index c3bec7490f5..cd044ea2dfa 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -34,6 +34,7 @@ #include <linux/jiffies.h> #include <linux/timer.h> +#include <linux/workqueue.h> #include "mthca_dev.h" @@ -48,9 +49,41 @@ enum { static DEFINE_SPINLOCK(catas_lock); +static LIST_HEAD(catas_list); +static struct workqueue_struct *catas_wq; +static struct work_struct catas_work; + +static int catas_reset_disable; +module_param_named(catas_reset_disable, catas_reset_disable, int, 0644); +MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero"); + +static void catas_reset(void *work_ptr) +{ + struct mthca_dev *dev, *tmpdev; + LIST_HEAD(tlist); + int ret; + + mutex_lock(&mthca_device_mutex); + + spin_lock_irq(&catas_lock); + list_splice_init(&catas_list, &tlist); + spin_unlock_irq(&catas_lock); + + list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { + ret = __mthca_restart_one(dev->pdev); + if (ret) + mthca_err(dev, "Reset failed (%d)\n", ret); + else + mthca_dbg(dev, "Reset succeeded\n"); + } + + mutex_unlock(&mthca_device_mutex); +} + static void handle_catas(struct mthca_dev *dev) { struct ib_event event; + unsigned long flags; const char *type; int i; @@ -82,6 +115,14 @@ static void handle_catas(struct mthca_dev *dev) for (i = 0; i < dev->catas_err.size; ++i) mthca_err(dev, " buf[%02x]: %08x\n", i, swab32(readl(dev->catas_err.map + i))); + + if (catas_reset_disable) + return; + + spin_lock_irqsave(&catas_lock, flags); + list_add(&dev->catas_err.list, &catas_list); + queue_work(catas_wq, &catas_work); + spin_unlock_irqrestore(&catas_lock, flags); } static void poll_catas(unsigned long dev_ptr) @@ -135,6 +176,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev) dev->catas_err.timer.data = (unsigned long) dev; dev->catas_err.timer.function = poll_catas; dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; + INIT_LIST_HEAD(&dev->catas_err.list); add_timer(&dev->catas_err.timer); } @@ -153,4 +195,24 @@ void mthca_stop_catas_poll(struct mthca_dev *dev) dev->catas_err.addr), dev->catas_err.size * 4); } + + spin_lock_irq(&catas_lock); + list_del(&dev->catas_err.list); + spin_unlock_irq(&catas_lock); +} + +int __init mthca_catas_init(void) +{ + INIT_WORK(&catas_work, catas_reset, NULL); + + catas_wq = create_singlethread_workqueue("mthca_catas"); + if (!catas_wq) + return -ENOMEM; + + return 0; +} + +void mthca_catas_cleanup(void) +{ + destroy_workqueue(catas_wq); } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index deabc14b4ea..99a94d71093 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -34,7 +34,7 @@ * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <linux/sched.h> +#include <linux/completion.h> #include <linux/pci.h> #include <linux/errno.h> #include <asm/io.h> diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 3e27a084257..e393681ba7d 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -544,11 +544,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev, wq = &(*cur_qp)->rq; wqe = be32_to_cpu(cqe->wqe); wqe_index = wqe >> wq->wqe_shift; - /* - * WQE addr == base - 1 might be reported in receive completion - * with error instead of (rq size - 1) by Sinai FW 1.0.800 and - * Arbel FW 5.1.400. This bug should be fixed in later FW revs. - */ + /* + * WQE addr == base - 1 might be reported in receive completion + * with error instead of (rq size - 1) by Sinai FW 1.0.800 and + * Arbel FW 5.1.400. This bug should be fixed in later FW revs. + */ if (unlikely(wqe_index < 0)) wqe_index = wq->max - 1; entry->wr_id = (*cur_qp)->wrid[wqe_index]; diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index f8160b8de09..fe5cecf70fe 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -45,6 +45,7 @@ #include <linux/dma-mapping.h> #include <linux/timer.h> #include <linux/mutex.h> +#include <linux/list.h> #include <asm/semaphore.h> @@ -283,8 +284,11 @@ struct mthca_catas_err { unsigned long stop; u32 size; struct timer_list timer; + struct list_head list; }; +extern struct mutex mthca_device_mutex; + struct mthca_dev { struct ib_device ib_dev; struct pci_dev *pdev; @@ -450,6 +454,9 @@ void mthca_unregister_device(struct mthca_dev *dev); void mthca_start_catas_poll(struct mthca_dev *dev); void mthca_stop_catas_poll(struct mthca_dev *dev); +int __mthca_restart_one(struct pci_dev *pdev); +int mthca_catas_init(void); +void mthca_catas_cleanup(void); int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); @@ -506,7 +513,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask); + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mthca_max_srq_sge(struct mthca_dev *dev); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, @@ -521,7 +528,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index d9bc030bccc..45e106f1480 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -119,7 +119,7 @@ static void smp_snoop(struct ib_device *ibdev, mthca_update_rate(to_mdev(ibdev), port_num); update_sm_ah(to_mdev(ibdev), port_num, - be16_to_cpu(pinfo->lid), + be16_to_cpu(pinfo->sm_lid), pinfo->neighbormtu_mastersmsl & 0xf); event.device = ibdev; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 7b82c1907f0..47ea0214836 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -80,6 +80,8 @@ static int tune_pci = 0; module_param(tune_pci, int, 0444); MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero"); +struct mutex mthca_device_mutex; + static const char mthca_version[] __devinitdata = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -978,28 +980,15 @@ static struct { MTHCA_FLAG_SINAI_OPT } }; -static int __devinit mthca_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int __mthca_init_one(struct pci_dev *pdev, int hca_type) { - static int mthca_version_printed = 0; int ddr_hidden = 0; int err; struct mthca_dev *mdev; - if (!mthca_version_printed) { - printk(KERN_INFO "%s", mthca_version); - ++mthca_version_printed; - } - printk(KERN_INFO PFX "Initializing %s\n", pci_name(pdev)); - if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { - printk(KERN_ERR PFX "%s has invalid driver data %lx\n", - pci_name(pdev), id->driver_data); - return -ENODEV; - } - err = pci_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, " @@ -1065,7 +1054,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, mdev->pdev = pdev; - mdev->mthca_flags = mthca_hca_table[id->driver_data].flags; + mdev->mthca_flags = mthca_hca_table[hca_type].flags; if (ddr_hidden) mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; @@ -1099,13 +1088,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, if (err) goto err_cmd; - if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) { + if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), - (int) (mthca_hca_table[id->driver_data].latest_fw >> 32), - (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff, - (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff)); + (int) (mthca_hca_table[hca_type].latest_fw >> 32), + (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff, + (int) (mthca_hca_table[hca_type].latest_fw & 0xffff)); mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n"); } @@ -1122,6 +1111,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, goto err_unregister; pci_set_drvdata(pdev, mdev); + mdev->hca_type = hca_type; return 0; @@ -1166,7 +1156,7 @@ err_disable_pdev: return err; } -static void __devexit mthca_remove_one(struct pci_dev *pdev) +static void __mthca_remove_one(struct pci_dev *pdev) { struct mthca_dev *mdev = pci_get_drvdata(pdev); u8 status; @@ -1211,6 +1201,51 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev) } } +int __mthca_restart_one(struct pci_dev *pdev) +{ + struct mthca_dev *mdev; + + mdev = pci_get_drvdata(pdev); + if (!mdev) + return -ENODEV; + __mthca_remove_one(pdev); + return __mthca_init_one(pdev, mdev->hca_type); +} + +static int __devinit mthca_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + static int mthca_version_printed = 0; + int ret; + + mutex_lock(&mthca_device_mutex); + + if (!mthca_version_printed) { + printk(KERN_INFO "%s", mthca_version); + ++mthca_version_printed; + } + + if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { + printk(KERN_ERR PFX "%s has invalid driver data %lx\n", + pci_name(pdev), id->driver_data); + mutex_unlock(&mthca_device_mutex); + return -ENODEV; + } + + ret = __mthca_init_one(pdev, id->driver_data); + + mutex_unlock(&mthca_device_mutex); + + return ret; +} + +static void __devexit mthca_remove_one(struct pci_dev *pdev) +{ + mutex_lock(&mthca_device_mutex); + __mthca_remove_one(pdev); + mutex_unlock(&mthca_device_mutex); +} + static struct pci_device_id mthca_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR), .driver_data = TAVOR }, @@ -1248,13 +1283,24 @@ static int __init mthca_init(void) { int ret; + mutex_init(&mthca_device_mutex); + ret = mthca_catas_init(); + if (ret) + return ret; + ret = pci_register_driver(&mthca_driver); - return ret < 0 ? ret : 0; + if (ret < 0) { + mthca_catas_cleanup(); + return ret; + } + + return 0; } static void __exit mthca_cleanup(void) { pci_unregister_driver(&mthca_driver); + mthca_catas_cleanup(); } module_init(mthca_init); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 265b1d1c4a6..981fe2eebdf 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1288,7 +1288,7 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); - dev->ib_dev.node_type = IB_NODE_CA; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; dev->ib_dev.class_dev.dev = &dev->pdev->dev; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 2e8f6f36e0a..5e5c58b9920 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -408,7 +408,7 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr, ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; ib_ah_attr->static_rate = mthca_rate_to_ib(dev, - path->static_rate & 0x7, + path->static_rate & 0xf, ib_ah_attr->port_num); ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; if (ib_ah_attr->ah_flags) { @@ -472,10 +472,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m if (qp->transport == RC || qp->transport == UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); + qp_attr->alt_pkey_index = + be32_to_cpu(context->alt_path.port_pkey) & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } - qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; - qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f; + qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; + qp_attr->port_num = + (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING; @@ -486,11 +490,9 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); qp_attr->min_rnr_timer = (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; - qp_attr->port_num = qp_attr->ah_attr.port_num; qp_attr->timeout = context->pri_path.ackto >> 3; qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5; - qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; qp_attr->alt_timeout = context->alt_path.ackto >> 3; qp_init_attr->cap = qp_attr->cap; @@ -527,7 +529,8 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, return 0; } -int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -842,11 +845,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) { - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, - qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL); mthca_wq_reset(&qp->sq); qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index b60a9d79ae5..0f316c87bf6 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -358,7 +358,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) } int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask) + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c index 8e9219842be..8b728486410 100644 --- a/drivers/infiniband/hw/mthca/mthca_uar.c +++ b/drivers/infiniband/hw/mthca/mthca_uar.c @@ -60,7 +60,7 @@ int mthca_init_uar_table(struct mthca_dev *dev) ret = mthca_alloc_init(&dev->uar_table.alloc, dev->limits.num_uars, dev->limits.num_uars - 1, - dev->limits.reserved_uars); + dev->limits.reserved_uars + 1); if (ret) return ret; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 474aa214ab5..0b8a79d53a0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -336,6 +336,8 @@ static inline void ipoib_unregister_debugfs(void) { } extern int ipoib_sendq_size; extern int ipoib_recvq_size; +extern struct ib_sa_client ipoib_sa_client; + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG extern int ipoib_debug_level; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 5dde380e8db..f1cb83688b3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -141,7 +141,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file) return ret; seq = file->private_data; - seq->private = inode->u.generic_ip; + seq->private = inode->i_private; return 0; } @@ -247,7 +247,7 @@ static int ipoib_path_open(struct inode *inode, struct file *file) return ret; seq = file->private_data; - seq->private = inode->u.generic_ip; + seq->private = inode->i_private; return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5033666b148..f426a69d9a4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -169,117 +169,129 @@ static int ipoib_ib_post_receives(struct net_device *dev) return 0; } -static void ipoib_ib_handle_wc(struct net_device *dev, - struct ib_wc *wc) +static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); - unsigned int wr_id = wc->wr_id; + unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; + struct sk_buff *skb; + dma_addr_t addr; - ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n", + ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n", wr_id, wc->opcode, wc->status); - if (wr_id & IPOIB_OP_RECV) { - wr_id &= ~IPOIB_OP_RECV; - - if (wr_id < ipoib_recvq_size) { - struct sk_buff *skb = priv->rx_ring[wr_id].skb; - dma_addr_t addr = priv->rx_ring[wr_id].mapping; - - if (unlikely(wc->status != IB_WC_SUCCESS)) { - if (wc->status != IB_WC_WR_FLUSH_ERR) - ipoib_warn(priv, "failed recv event " - "(status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - priv->rx_ring[wr_id].skb = NULL; - return; - } + if (unlikely(wr_id >= ipoib_recvq_size)) { + ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n", + wr_id, ipoib_recvq_size); + return; + } - /* - * If we can't allocate a new RX buffer, dump - * this packet and reuse the old buffer. - */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { - ++priv->stats.rx_dropped; - goto repost; - } + skb = priv->rx_ring[wr_id].skb; + addr = priv->rx_ring[wr_id].mapping; - ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", - wc->byte_len, wc->slid); + if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + ipoib_warn(priv, "failed recv event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + priv->rx_ring[wr_id].skb = NULL; + return; + } - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + ++priv->stats.rx_dropped; + goto repost; + } - skb_put(skb, wc->byte_len); - skb_pull(skb, IB_GRH_BYTES); + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", + wc->byte_len, wc->slid); - if (wc->slid != priv->local_lid || - wc->src_qp != priv->qp->qp_num) { - skb->protocol = ((struct ipoib_header *) skb->data)->proto; - skb->mac.raw = skb->data; - skb_pull(skb, IPOIB_ENCAP_LEN); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - dev->last_rx = jiffies; - ++priv->stats.rx_packets; - priv->stats.rx_bytes += skb->len; + skb_put(skb, wc->byte_len); + skb_pull(skb, IB_GRH_BYTES); - skb->dev = dev; - /* XXX get correct PACKET_ type here */ - skb->pkt_type = PACKET_HOST; - netif_rx_ni(skb); - } else { - ipoib_dbg_data(priv, "dropping loopback packet\n"); - dev_kfree_skb_any(skb); - } + if (wc->slid != priv->local_lid || + wc->src_qp != priv->qp->qp_num) { + skb->protocol = ((struct ipoib_header *) skb->data)->proto; + skb->mac.raw = skb->data; + skb_pull(skb, IPOIB_ENCAP_LEN); - repost: - if (unlikely(ipoib_ib_post_receive(dev, wr_id))) - ipoib_warn(priv, "ipoib_ib_post_receive failed " - "for buf %d\n", wr_id); - } else - ipoib_warn(priv, "completion event with wrid %d\n", - wr_id); + dev->last_rx = jiffies; + ++priv->stats.rx_packets; + priv->stats.rx_bytes += skb->len; + skb->dev = dev; + /* XXX get correct PACKET_ type here */ + skb->pkt_type = PACKET_HOST; + netif_rx_ni(skb); } else { - struct ipoib_tx_buf *tx_req; - unsigned long flags; + ipoib_dbg_data(priv, "dropping loopback packet\n"); + dev_kfree_skb_any(skb); + } - if (wr_id >= ipoib_sendq_size) { - ipoib_warn(priv, "completion event with wrid %d (> %d)\n", - wr_id, ipoib_sendq_size); - return; - } +repost: + if (unlikely(ipoib_ib_post_receive(dev, wr_id))) + ipoib_warn(priv, "ipoib_ib_post_receive failed " + "for buf %d\n", wr_id); +} - ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id); +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id; + struct ipoib_tx_buf *tx_req; + unsigned long flags; - tx_req = &priv->tx_ring[wr_id]; + ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(tx_req, mapping), - tx_req->skb->len, - DMA_TO_DEVICE); + if (unlikely(wr_id >= ipoib_sendq_size)) { + ipoib_warn(priv, "send completion event with wrid %d (> %d)\n", + wr_id, ipoib_sendq_size); + return; + } - ++priv->stats.tx_packets; - priv->stats.tx_bytes += tx_req->skb->len; + tx_req = &priv->tx_ring[wr_id]; - dev_kfree_skb_any(tx_req->skb); + dma_unmap_single(priv->ca->dma_device, + pci_unmap_addr(tx_req, mapping), + tx_req->skb->len, + DMA_TO_DEVICE); - spin_lock_irqsave(&priv->tx_lock, flags); - ++priv->tx_tail; - if (netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && - priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) - netif_wake_queue(dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); + ++priv->stats.tx_packets; + priv->stats.tx_bytes += tx_req->skb->len; - if (wc->status != IB_WC_SUCCESS && - wc->status != IB_WC_WR_FLUSH_ERR) - ipoib_warn(priv, "failed send event " - "(status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); - } + dev_kfree_skb_any(tx_req->skb); + + spin_lock_irqsave(&priv->tx_lock, flags); + ++priv->tx_tail; + if (netif_queue_stopped(dev) && + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && + priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) + netif_wake_queue(dev); + spin_unlock_irqrestore(&priv->tx_lock, flags); + + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) + ipoib_warn(priv, "failed send event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); +} + +static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) +{ + if (wc->wr_id & IPOIB_OP_RECV) + ipoib_ib_handle_rx_wc(dev, wc); + else + ipoib_ib_handle_tx_wc(dev, wc); } void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) @@ -320,7 +332,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; dma_addr_t addr; - if (skb->len > dev->mtu + INFINIBAND_ALEN) { + if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", skb->len, dev->mtu + INFINIBAND_ALEN); ++priv->stats.tx_dropped; @@ -619,8 +631,10 @@ void ipoib_ib_dev_flush(void *_dev) * The device could have been brought down between the start and when * we get here, don't bring it back up if it's not configured up */ - if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { ipoib_ib_dev_up(dev); + ipoib_mcast_restart_task(dev); + } mutex_lock(&priv->vlan_mutex); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index cf71d2a5515..1eaf00e9862 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -40,7 +40,6 @@ #include <linux/init.h> #include <linux/slab.h> -#include <linux/vmalloc.h> #include <linux/kernel.h> #include <linux/if_arp.h> /* For ARPHRD_xxx */ @@ -82,6 +81,8 @@ static const u8 ipv4_bcast_addr[] = { struct workqueue_struct *ipoib_workqueue; +struct ib_sa_client ipoib_sa_client; + static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); @@ -336,7 +337,8 @@ void ipoib_flush_paths(struct net_device *dev) struct ipoib_path *path, *tp; LIST_HEAD(remove_list); - spin_lock_irq(&priv->lock); + spin_lock_irq(&priv->tx_lock); + spin_lock(&priv->lock); list_splice(&priv->path_list, &remove_list); INIT_LIST_HEAD(&priv->path_list); @@ -347,12 +349,15 @@ void ipoib_flush_paths(struct net_device *dev) list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); - spin_unlock_irq(&priv->lock); + spin_unlock(&priv->lock); + spin_unlock_irq(&priv->tx_lock); wait_for_completion(&path->done); path_free(dev, path); - spin_lock_irq(&priv->lock); + spin_lock_irq(&priv->tx_lock); + spin_lock(&priv->lock); } - spin_unlock_irq(&priv->lock); + spin_unlock(&priv->lock); + spin_unlock_irq(&priv->tx_lock); } static void path_rec_completion(int status, @@ -459,7 +464,7 @@ static int path_rec_start(struct net_device *dev, init_completion(&path->done); path->query_id = - ib_sa_path_rec_get(priv->ca, priv->port, + ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, &path->pathrec, IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | @@ -615,7 +620,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - if (!spin_trylock_irqsave(&priv->tx_lock, flags)) + if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) return NETDEV_TX_LOCKED; /* @@ -628,7 +633,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - if (skb->dst && skb->dst->neighbour) { + if (likely(skb->dst && skb->dst->neighbour)) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { ipoib_path_lookup(skb, dev); goto out; @@ -1107,13 +1112,16 @@ static void ipoib_add_one(struct ib_device *device) struct ipoib_dev_priv *priv; int s, e, p; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) return; INIT_LIST_HEAD(dev_list); - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { @@ -1137,6 +1145,9 @@ static void ipoib_remove_one(struct ib_device *device) struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + dev_list = ib_get_client_data(device, &ipoib_client); list_for_each_entry_safe(priv, tmp, dev_list, list) { @@ -1181,13 +1192,16 @@ static int __init ipoib_init_module(void) goto err_fs; } + ib_sa_register_client(&ipoib_sa_client); + ret = ib_register_client(&ipoib_client); if (ret) - goto err_wq; + goto err_sa; return 0; -err_wq: +err_sa: + ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); err_fs: @@ -1199,6 +1213,7 @@ err_fs: static void __exit ipoib_cleanup_module(void) { ib_unregister_client(&ipoib_client); + ib_sa_unregister_client(&ipoib_sa_client); ipoib_unregister_debugfs(); destroy_workqueue(ipoib_workqueue); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b5e6a7be603..3faa1820f0e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -326,6 +326,7 @@ ipoib_mcast_sendonly_join_complete(int status, /* Clear the busy flag so we try again */ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + mcast->query = NULL; } complete(&mcast->done); @@ -360,7 +361,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) init_completion(&mcast->done); - ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, + ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec, IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | @@ -471,22 +472,32 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, if (create) { comp_mask |= - IB_SA_MCMEMBER_REC_QKEY | - IB_SA_MCMEMBER_REC_SL | - IB_SA_MCMEMBER_REC_FLOW_LABEL | - IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; + IB_SA_MCMEMBER_REC_QKEY | + IB_SA_MCMEMBER_REC_MTU_SELECTOR | + IB_SA_MCMEMBER_REC_MTU | + IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | + IB_SA_MCMEMBER_REC_RATE_SELECTOR | + IB_SA_MCMEMBER_REC_RATE | + IB_SA_MCMEMBER_REC_SL | + IB_SA_MCMEMBER_REC_FLOW_LABEL | + IB_SA_MCMEMBER_REC_HOP_LIMIT; rec.qkey = priv->broadcast->mcmember.qkey; + rec.mtu_selector = IB_SA_EQ; + rec.mtu = priv->broadcast->mcmember.mtu; + rec.traffic_class = priv->broadcast->mcmember.traffic_class; + rec.rate_selector = IB_SA_EQ; + rec.rate = priv->broadcast->mcmember.rate; rec.sl = priv->broadcast->mcmember.sl; rec.flow_label = priv->broadcast->mcmember.flow_label; - rec.traffic_class = priv->broadcast->mcmember.traffic_class; + rec.hop_limit = priv->broadcast->mcmember.hop_limit; } init_completion(&mcast->done); - ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, - mcast->backoff * 1000, GFP_ATOMIC, - ipoib_mcast_join_complete, + ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, + &rec, comp_mask, mcast->backoff * 1000, + GFP_ATOMIC, ipoib_mcast_join_complete, mcast, &mcast->query); if (ret < 0) { @@ -527,7 +538,7 @@ void ipoib_mcast_join_task(void *dev_ptr) priv->local_rate = attr.active_speed * ib_width_enum_to_int(attr.active_width); } else - ipoib_warn(priv, "ib_query_port failed\n"); + ipoib_warn(priv, "ib_query_port failed\n"); } if (!priv->broadcast) { @@ -680,7 +691,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) * Just make one shot at leaving and don't wait for a reply; * if we fail, too bad. */ - ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, + ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec, IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | @@ -794,7 +805,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev) } if (priv->broadcast) { - rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); + rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); list_add_tail(&priv->broadcast->list, &remove_list); priv->broadcast = NULL; } diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index fead87d1eff..365a1b5f19e 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_ISER tristate "ISCSI RDMA Protocol" - depends on INFINIBAND && SCSI + depends on INFINIBAND && SCSI && INET select SCSI_ISCSI_ATTRS ---help--- Support for the ISCSI RDMA Protocol over InfiniBand. This diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1437d7ee3b1..2a14fe2e322 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -141,18 +141,11 @@ iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) if (sc->sc_data_direction == DMA_TO_DEVICE) { BUG_ON(ctask->total_length == 0); - /* bytes to be sent via RDMA operations */ - iser_ctask->rdma_data_count = ctask->total_length - - ctask->imm_count - - ctask->unsol_count; - debug_scsi("cmd [itt %x total %d imm %d unsol_data %d " - "rdma_data %d]\n", + debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", ctask->itt, ctask->total_length, ctask->imm_count, - ctask->unsol_count, iser_ctask->rdma_data_count); - } else - /* bytes to be sent via RDMA operations */ - iser_ctask->rdma_data_count = ctask->total_length; + ctask->unsol_count); + } iser_ctask_rdma_init(iser_ctask); } @@ -196,13 +189,10 @@ iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn, { struct iscsi_data hdr; int error = 0; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; /* Send data-out PDUs while there's still unsolicited data to send */ while (ctask->unsol_count > 0) { - iscsi_prep_unsolicit_data_pdu(ctask, &hdr, - iser_ctask->rdma_data_count); - + iscsi_prep_unsolicit_data_pdu(ctask, &hdr); debug_scsi("Sending data-out: itt 0x%x, data count %d\n", hdr.itt, ctask->data_count); @@ -555,6 +545,7 @@ static struct scsi_host_template iscsi_iser_sht = { .queuecommand = iscsi_queuecommand, .can_queue = ISCSI_XMIT_CMDS_MAX - 1, .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, + .max_sectors = 1024, .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, .eh_abort_handler = iscsi_eh_abort, .eh_host_reset_handler = iscsi_eh_host_reset, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 3350ba690cf..2cf9ae0def1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -82,8 +82,12 @@ __func__ , ## arg); \ } while (0) +#define SHIFT_4K 12 +#define SIZE_4K (1UL << SHIFT_4K) +#define MASK_4K (~(SIZE_4K-1)) + /* support upto 512KB in one RDMA */ -#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> PAGE_SHIFT) +#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISCSI_ISER_MAX_LUN 256 #define ISCSI_ISER_MAX_CMD_LEN 16 @@ -171,6 +175,7 @@ struct iser_mem_reg { u64 va; u64 len; void *mem_h; + int is_fmr; }; struct iser_regd_buf { @@ -257,7 +262,6 @@ struct iscsi_iser_conn { struct iscsi_iser_cmd_task { struct iser_desc desc; struct iscsi_iser_conn *iser_conn; - int rdma_data_count;/* RDMA bytes */ enum iser_task_status status; int command_sent; /* set if command sent */ int dir[ISER_DIRS_NUM]; /* set if dir use*/ diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 31950a522a1..d0b03f42658 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -42,6 +42,7 @@ #include "iscsi_iser.h" #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ + /** * Decrements the reference count for the * registered buffer & releases it @@ -55,7 +56,7 @@ int iser_regd_buff_release(struct iser_regd_buf *regd_buf) if ((atomic_read(®d_buf->ref_count) == 0) || atomic_dec_and_test(®d_buf->ref_count)) { /* if we used the dma mr, unreg is just NOP */ - if (regd_buf->reg.rkey != 0) + if (regd_buf->reg.is_fmr) iser_unreg_mem(®d_buf->reg); if (regd_buf->dma_addr) { @@ -90,9 +91,9 @@ void iser_reg_single(struct iser_device *device, BUG_ON(dma_mapping_error(dma_addr)); regd_buf->reg.lkey = device->mr->lkey; - regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */ regd_buf->reg.len = regd_buf->data_size; regd_buf->reg.va = dma_addr; + regd_buf->reg.is_fmr = 0; regd_buf->dma_addr = dma_addr; regd_buf->direction = direction; @@ -239,7 +240,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, int i; /* compute the offset of first element */ - page_vec->offset = (u64) sg[0].offset; + page_vec->offset = (u64) sg[0].offset & ~MASK_4K; for (i = 0; i < data->dma_nents; i++) { total_sz += sg_dma_len(&sg[i]); @@ -247,21 +248,30 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, first_addr = sg_dma_address(&sg[i]); last_addr = first_addr + sg_dma_len(&sg[i]); - start_aligned = !(first_addr & ~PAGE_MASK); - end_aligned = !(last_addr & ~PAGE_MASK); + start_aligned = !(first_addr & ~MASK_4K); + end_aligned = !(last_addr & ~MASK_4K); /* continue to collect page fragments till aligned or SG ends */ while (!end_aligned && (i + 1 < data->dma_nents)) { i++; total_sz += sg_dma_len(&sg[i]); last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]); - end_aligned = !(last_addr & ~PAGE_MASK); + end_aligned = !(last_addr & ~MASK_4K); } - first_addr = first_addr & PAGE_MASK; - - for (page = first_addr; page < last_addr; page += PAGE_SIZE) - page_vec->pages[cur_page++] = page; + /* handle the 1st page in the 1st DMA element */ + if (cur_page == 0) { + page = first_addr & MASK_4K; + page_vec->pages[cur_page] = page; + cur_page++; + page += SIZE_4K; + } else + page = first_addr; + + for (; page < last_addr; page += SIZE_4K) { + page_vec->pages[cur_page] = page; + cur_page++; + } } page_vec->data_size = total_sz; @@ -269,8 +279,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, return cur_page; } -#define MASK_4K ((1UL << 12) - 1) /* 0xFFF */ -#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & MASK_4K) == 0) +#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) /** * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned @@ -320,9 +329,9 @@ static void iser_data_buf_dump(struct iser_data_buf *data) struct scatterlist *sg = (struct scatterlist *)data->buf; int i; - for (i = 0; i < data->size; i++) + for (i = 0; i < data->dma_nents; i++) iser_err("sg[%d] dma_addr:0x%lX page:0x%p " - "off:%d sz:%d dma_len:%d\n", + "off:0x%x sz:0x%x dma_len:0x%x\n", i, (unsigned long)sg_dma_address(&sg[i]), sg[i].page, sg[i].offset, sg[i].length,sg_dma_len(&sg[i])); @@ -352,7 +361,7 @@ static void iser_page_vec_build(struct iser_data_buf *data, page_vec->length = page_vec_len; - if (page_vec_len * PAGE_SIZE < page_vec->data_size) { + if (page_vec_len * SIZE_4K < page_vec->data_size) { iser_err("page_vec too short to hold this SG\n"); iser_data_buf_dump(data); iser_dump_page_vec(page_vec); @@ -370,15 +379,18 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir cmd_dir) { struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; struct iser_regd_buf *regd_buf; int aligned_len; int err; + int i; + struct scatterlist *sg; regd_buf = &iser_ctask->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem); - if (aligned_len != mem->size) { + if (aligned_len != mem->dma_nents) { iser_err("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem); @@ -389,10 +401,38 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, mem = &iser_ctask->data_copy[cmd_dir]; } - iser_page_vec_build(mem, ib_conn->page_vec); - err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); - if (err) - return err; + /* if there a single dma entry, FMR is not needed */ + if (mem->dma_nents == 1) { + sg = (struct scatterlist *)mem->buf; + + regd_buf->reg.lkey = device->mr->lkey; + regd_buf->reg.rkey = device->mr->rkey; + regd_buf->reg.len = sg_dma_len(&sg[0]); + regd_buf->reg.va = sg_dma_address(&sg[0]); + regd_buf->reg.is_fmr = 0; + + iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X " + "va: 0x%08lX sz: %ld]\n", + (unsigned int)regd_buf->reg.lkey, + (unsigned int)regd_buf->reg.rkey, + (unsigned long)regd_buf->reg.va, + (unsigned long)regd_buf->reg.len); + } else { /* use FMR for multiple dma entries */ + iser_page_vec_build(mem, ib_conn->page_vec); + err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); + if (err) { + iser_data_buf_dump(mem); + iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, + ntoh24(iser_ctask->desc.iscsi_header.dlength)); + iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", + ib_conn->page_vec->data_size, ib_conn->page_vec->length, + ib_conn->page_vec->offset); + for (i=0 ; i<ib_conn->page_vec->length ; i++) + iser_err("page_vec[%d] = 0x%llx\n", i, + (unsigned long long) ib_conn->page_vec->pages[i]); + return err; + } + } /* take a reference on this regd buf such that it will not be released * * (eg in send dto completion) before we get the scsi response */ diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 72febf1f8ff..ecdca7fc1e4 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -88,8 +88,9 @@ static int iser_create_device_ib_res(struct iser_device *device) iser_cq_tasklet_fn, (unsigned long)device); - device->mr = ib_get_dma_mr(device->pd, - IB_ACCESS_LOCAL_WRITE); + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); if (IS_ERR(device->mr)) goto dma_mr_err; @@ -150,7 +151,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) } ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); - params.page_shift = PAGE_SHIFT; + params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * * page aligned, the map whould be of N+1 pages */ params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; @@ -604,8 +605,9 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, mem_reg->lkey = mem->fmr->lkey; mem_reg->rkey = mem->fmr->rkey; - mem_reg->len = page_vec->length * PAGE_SIZE; + mem_reg->len = page_vec->length * SIZE_4K; mem_reg->va = io_addr; + mem_reg->is_fmr = 1; mem_reg->mem_h = (void *)mem; mem_reg->va += page_vec->offset; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 8257d5a2c8f..44b9e5be668 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -96,6 +96,8 @@ static struct ib_client srp_client = { .remove = srp_remove_one }; +static struct ib_sa_client srp_sa_client; + static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) { return (struct srp_target_port *) host->hostdata; @@ -267,7 +269,8 @@ static int srp_lookup_path(struct srp_target_port *target) init_completion(&target->done); - target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev, + target->path_query_id = ib_sa_path_rec_get(&srp_sa_client, + target->srp_host->dev->dev, target->srp_host->port, &target->path, IB_SA_PATH_REC_DGID | @@ -330,7 +333,7 @@ static int srp_send_req(struct srp_target_port *target) req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); /* - * In the published SRP specification (draft rev. 16a), the + * In the published SRP specification (draft rev. 16a), the * port identifier format is 8 bytes of ID extension followed * by 8 bytes of GUID. Older drafts put the two halves in the * opposite order, so that the GUID comes first. @@ -799,13 +802,6 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) spin_unlock_irqrestore(target->scsi_host->host_lock, flags); } -static void srp_reconnect_work(void *target_ptr) -{ - struct srp_target_port *target = target_ptr; - - srp_reconnect_target(target); -} - static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) { struct srp_iu *iu; @@ -858,7 +854,6 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr) { struct srp_target_port *target = target_ptr; struct ib_wc wc; - unsigned long flags; ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(cq, 1, &wc) > 0) { @@ -866,10 +861,6 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr) printk(KERN_ERR PFX "failed %s status %d\n", wc.wr_id & SRP_OP_RECV ? "receive" : "send", wc.status); - spin_lock_irqsave(target->scsi_host->host_lock, flags); - if (target->state == SRP_TARGET_LIVE) - schedule_work(&target->work); - spin_unlock_irqrestore(target->scsi_host->host_lock, flags); break; } @@ -1461,12 +1452,28 @@ static ssize_t show_zero_req_lim(struct class_device *cdev, char *buf) return sprintf(buf, "%d\n", target->zero_req_lim); } -static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); -static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); -static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); -static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); -static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); -static CLASS_DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); +static ssize_t show_local_ib_port(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + return sprintf(buf, "%d\n", target->srp_host->port); +} + +static ssize_t show_local_ib_device(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + return sprintf(buf, "%s\n", target->srp_host->dev->dev->name); +} + +static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); +static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); +static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); +static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); +static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); +static CLASS_DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); +static CLASS_DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); +static CLASS_DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); static struct class_device_attribute *srp_host_attrs[] = { &class_device_attr_id_ext, @@ -1475,6 +1482,8 @@ static struct class_device_attribute *srp_host_attrs[] = { &class_device_attr_pkey, &class_device_attr_dgid, &class_device_attr_zero_req_lim, + &class_device_attr_local_ib_port, + &class_device_attr_local_ib_device, NULL }; @@ -1705,8 +1714,6 @@ static ssize_t srp_create_target(struct class_device *class_dev, target->scsi_host = target_host; target->srp_host = host; - INIT_WORK(&target->work, srp_reconnect_work, target); - INIT_LIST_HEAD(&target->free_reqs); INIT_LIST_HEAD(&target->req_queue); for (i = 0; i < SRP_SQ_SIZE; ++i) { @@ -1895,7 +1902,7 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->fmr_pool)) srp_dev->fmr_pool = NULL; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { @@ -1994,9 +2001,12 @@ static int __init srp_init_module(void) return ret; } + ib_sa_register_client(&srp_sa_client); + ret = ib_register_client(&srp_client); if (ret) { printk(KERN_ERR PFX "couldn't register IB client\n"); + ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); return ret; } @@ -2007,6 +2017,7 @@ static int __init srp_init_module(void) static void __exit srp_cleanup_module(void) { ib_unregister_client(&srp_client); + ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); } |