diff options
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/Makefile | 11 | ||||
-rw-r--r-- | drivers/infiniband/core/addr.c | 367 | ||||
-rw-r--r-- | drivers/infiniband/core/cache.c | 30 | ||||
-rw-r--r-- | drivers/infiniband/core/cm.c | 119 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 1927 | ||||
-rw-r--r-- | drivers/infiniband/core/fmr_pool.c | 30 | ||||
-rw-r--r-- | drivers/infiniband/core/mad.c | 97 | ||||
-rw-r--r-- | drivers/infiniband/core/mad_priv.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/sa_query.c | 31 | ||||
-rw-r--r-- | drivers/infiniband/core/ucm.c | 183 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 971 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 35 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_marshall.c | 138 | ||||
-rw-r--r-- | drivers/infiniband/core/verbs.c | 44 |
15 files changed, 3328 insertions, 661 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index ec3353f24b2..68e73ec2d1f 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,5 +1,7 @@ +infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o + obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o + ib_cm.o $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o @@ -12,8 +14,13 @@ ib_sa-y := sa_query.o ib_cm-y := cm.o +rdma_cm-y := cma.o + +ib_addr-y := addr.o + ib_umad-y := user_mad.o ib_ucm-y := ucm.o -ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o +ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ + uverbs_marshall.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c new file mode 100644 index 00000000000..d294bbc42f0 --- /dev/null +++ b/drivers/infiniband/core/addr.c @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. + * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#include <linux/mutex.h> +#include <linux/inetdevice.h> +#include <linux/workqueue.h> +#include <linux/if_arp.h> +#include <net/arp.h> +#include <net/neighbour.h> +#include <net/route.h> +#include <rdma/ib_addr.h> + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("IB Address Translation"); +MODULE_LICENSE("Dual BSD/GPL"); + +struct addr_req { + struct list_head list; + struct sockaddr src_addr; + struct sockaddr dst_addr; + struct rdma_dev_addr *addr; + void *context; + void (*callback)(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *addr, void *context); + unsigned long timeout; + int status; +}; + +static void process_req(void *data); + +static DEFINE_MUTEX(lock); +static LIST_HEAD(req_list); +static DECLARE_WORK(work, process_req, NULL); +static struct workqueue_struct *addr_wq; + +static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, + unsigned char *dst_dev_addr) +{ + switch (dev->type) { + case ARPHRD_INFINIBAND: + dev_addr->dev_type = IB_NODE_CA; + break; + default: + return -EADDRNOTAVAIL; + } + + memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); + if (dst_dev_addr) + memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); + return 0; +} + +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +{ + struct net_device *dev; + u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; + int ret; + + dev = ip_dev_find(ip); + if (!dev) + return -EADDRNOTAVAIL; + + ret = copy_addr(dev_addr, dev, NULL); + dev_put(dev); + return ret; +} +EXPORT_SYMBOL(rdma_translate_ip); + +static void set_timeout(unsigned long time) +{ + unsigned long delay; + + cancel_delayed_work(&work); + + delay = time - jiffies; + if ((long)delay <= 0) + delay = 1; + + queue_delayed_work(addr_wq, &work, delay); +} + +static void queue_req(struct addr_req *req) +{ + struct addr_req *temp_req; + + mutex_lock(&lock); + list_for_each_entry_reverse(temp_req, &req_list, list) { + if (time_after(req->timeout, temp_req->timeout)) + break; + } + + list_add(&req->list, &temp_req->list); + + if (req_list.next == &req->list) + set_timeout(req->timeout); + mutex_unlock(&lock); +} + +static void addr_send_arp(struct sockaddr_in *dst_in) +{ + struct rtable *rt; + struct flowi fl; + u32 dst_ip = dst_in->sin_addr.s_addr; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip4_u.daddr = dst_ip; + if (ip_route_output_key(&rt, &fl)) + return; + + arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev, + rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL); + ip_rt_put(rt); +} + +static int addr_resolve_remote(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct rdma_dev_addr *addr) +{ + u32 src_ip = src_in->sin_addr.s_addr; + u32 dst_ip = dst_in->sin_addr.s_addr; + struct flowi fl; + struct rtable *rt; + struct neighbour *neigh; + int ret; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip4_u.daddr = dst_ip; + fl.nl_u.ip4_u.saddr = src_ip; + ret = ip_route_output_key(&rt, &fl); + if (ret) + goto out; + + /* If the device does ARP internally, return 'done' */ + if (rt->idev->dev->flags & IFF_NOARP) { + copy_addr(addr, rt->idev->dev, NULL); + goto put; + } + + neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); + if (!neigh) { + ret = -ENODATA; + goto put; + } + + if (!(neigh->nud_state & NUD_VALID)) { + ret = -ENODATA; + goto release; + } + + if (!src_ip) { + src_in->sin_family = dst_in->sin_family; + src_in->sin_addr.s_addr = rt->rt_src; + } + + ret = copy_addr(addr, neigh->dev, neigh->ha); +release: + neigh_release(neigh); +put: + ip_rt_put(rt); +out: + return ret; +} + +static void process_req(void *data) +{ + struct addr_req *req, *temp_req; + struct sockaddr_in *src_in, *dst_in; + struct list_head done_list; + + INIT_LIST_HEAD(&done_list); + + mutex_lock(&lock); + list_for_each_entry_safe(req, temp_req, &req_list, list) { + if (req->status) { + src_in = (struct sockaddr_in *) &req->src_addr; + dst_in = (struct sockaddr_in *) &req->dst_addr; + req->status = addr_resolve_remote(src_in, dst_in, + req->addr); + } + if (req->status && time_after(jiffies, req->timeout)) + req->status = -ETIMEDOUT; + else if (req->status == -ENODATA) + continue; + + list_del(&req->list); + list_add_tail(&req->list, &done_list); + } + + if (!list_empty(&req_list)) { + req = list_entry(req_list.next, struct addr_req, list); + set_timeout(req->timeout); + } + mutex_unlock(&lock); + + list_for_each_entry_safe(req, temp_req, &done_list, list) { + list_del(&req->list); + req->callback(req->status, &req->src_addr, req->addr, + req->context); + kfree(req); + } +} + +static int addr_resolve_local(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct rdma_dev_addr *addr) +{ + struct net_device *dev; + u32 src_ip = src_in->sin_addr.s_addr; + u32 dst_ip = dst_in->sin_addr.s_addr; + int ret; + + dev = ip_dev_find(dst_ip); + if (!dev) + return -EADDRNOTAVAIL; + + if (ZERONET(src_ip)) { + src_in->sin_family = dst_in->sin_family; + src_in->sin_addr.s_addr = dst_ip; + ret = copy_addr(addr, dev, dev->dev_addr); + } else if (LOOPBACK(src_ip)) { + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } else { + ret = rdma_translate_ip((struct sockaddr *)src_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } + + dev_put(dev); + return ret; +} + +int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, + struct rdma_dev_addr *addr, int timeout_ms, + void (*callback)(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *addr, void *context), + void *context) +{ + struct sockaddr_in *src_in, *dst_in; + struct addr_req *req; + int ret = 0; + + req = kmalloc(sizeof *req, GFP_KERNEL); + if (!req) + return -ENOMEM; + memset(req, 0, sizeof *req); + + if (src_addr) + memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); + memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); + req->addr = addr; + req->callback = callback; + req->context = context; + + src_in = (struct sockaddr_in *) &req->src_addr; + dst_in = (struct sockaddr_in *) &req->dst_addr; + + req->status = addr_resolve_local(src_in, dst_in, addr); + if (req->status == -EADDRNOTAVAIL) + req->status = addr_resolve_remote(src_in, dst_in, addr); + + switch (req->status) { + case 0: + req->timeout = jiffies; + queue_req(req); + break; + case -ENODATA: + req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; + queue_req(req); + addr_send_arp(dst_in); + break; + default: + ret = req->status; + kfree(req); + break; + } + return ret; +} +EXPORT_SYMBOL(rdma_resolve_ip); + +void rdma_addr_cancel(struct rdma_dev_addr *addr) +{ + struct addr_req *req, *temp_req; + + mutex_lock(&lock); + list_for_each_entry_safe(req, temp_req, &req_list, list) { + if (req->addr == addr) { + req->status = -ECANCELED; + req->timeout = jiffies; + list_del(&req->list); + list_add(&req->list, &req_list); + set_timeout(req->timeout); + break; + } + } + mutex_unlock(&lock); +} +EXPORT_SYMBOL(rdma_addr_cancel); + +static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkt, struct net_device *orig_dev) +{ + struct arphdr *arp_hdr; + + arp_hdr = (struct arphdr *) skb->nh.raw; + + if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || + arp_hdr->ar_op == htons(ARPOP_REPLY)) + set_timeout(jiffies); + + kfree_skb(skb); + return 0; +} + +static struct packet_type addr_arp = { + .type = __constant_htons(ETH_P_ARP), + .func = addr_arp_recv, + .af_packet_priv = (void*) 1, +}; + +static int addr_init(void) +{ + addr_wq = create_singlethread_workqueue("ib_addr_wq"); + if (!addr_wq) + return -ENOMEM; + + dev_add_pack(&addr_arp); + return 0; +} + +static void addr_cleanup(void) +{ + dev_remove_pack(&addr_arp); + destroy_workqueue(addr_wq); +} + +module_init(addr_init); +module_exit(addr_cleanup); diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 50364c0b090..e05ca2cdc73 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -191,6 +191,24 @@ int ib_find_cached_pkey(struct ib_device *device, } EXPORT_SYMBOL(ib_find_cached_pkey); +int ib_get_cached_lmc(struct ib_device *device, + u8 port_num, + u8 *lmc) +{ + unsigned long flags; + int ret = 0; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, flags); + *lmc = device->cache.lmc_cache[port_num - start_port(device)]; + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_cached_lmc); + static void ib_cache_update(struct ib_device *device, u8 port) { @@ -251,6 +269,8 @@ static void ib_cache_update(struct ib_device *device, device->cache.pkey_cache[port - start_port(device)] = pkey_cache; device->cache.gid_cache [port - start_port(device)] = gid_cache; + device->cache.lmc_cache[port - start_port(device)] = tprops->lmc; + write_unlock_irq(&device->cache.lock); kfree(old_pkey_cache); @@ -305,7 +325,13 @@ static void ib_cache_setup_one(struct ib_device *device) kmalloc(sizeof *device->cache.gid_cache * (end_port(device) - start_port(device) + 1), GFP_KERNEL); - if (!device->cache.pkey_cache || !device->cache.gid_cache) { + device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * + (end_port(device) - + start_port(device) + 1), + GFP_KERNEL); + + if (!device->cache.pkey_cache || !device->cache.gid_cache || + !device->cache.lmc_cache) { printk(KERN_WARNING "Couldn't allocate cache " "for %s\n", device->name); goto err; @@ -333,6 +359,7 @@ err_cache: err: kfree(device->cache.pkey_cache); kfree(device->cache.gid_cache); + kfree(device->cache.lmc_cache); } static void ib_cache_cleanup_one(struct ib_device *device) @@ -349,6 +376,7 @@ static void ib_cache_cleanup_one(struct ib_device *device) kfree(device->cache.pkey_cache); kfree(device->cache.gid_cache); + kfree(device->cache.lmc_cache); } static struct ib_client cache_client = { diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 86fee43502c..450adfe0a4f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -32,7 +32,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $ + * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $ */ #include <linux/completion.h> @@ -132,6 +132,7 @@ struct cm_id_private { /* todo: use alternate port on send failure */ struct cm_av av; struct cm_av alt_av; + struct ib_cm_compare_data *compare_data; void *private_data; __be64 tid; @@ -253,23 +254,13 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv, cm_id_priv->private_data_len = private_data_len; } -static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num, - u16 dlid, u8 sl, u16 src_path_bits) -{ - memset(ah_attr, 0, sizeof ah_attr); - ah_attr->dlid = dlid; - ah_attr->sl = sl; - ah_attr->src_path_bits = src_path_bits; - ah_attr->port_num = port_num; -} - -static void cm_init_av_for_response(struct cm_port *port, - struct ib_wc *wc, struct cm_av *av) +static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, + struct ib_grh *grh, struct cm_av *av) { av->port = port; av->pkey_index = wc->pkey_index; - cm_set_ah_attr(&av->ah_attr, port->port_num, wc->slid, - wc->sl, wc->dlid_path_bits); + ib_init_ah_from_wc(port->cm_dev->device, port->port_num, wc, + grh, &av->ah_attr); } static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) @@ -299,9 +290,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) return ret; av->port = port; - cm_set_ah_attr(&av->ah_attr, av->port->port_num, - be16_to_cpu(path->dlid), path->sl, - be16_to_cpu(path->slid) & 0x7F); + ib_init_ah_from_path(cm_dev->device, port->port_num, path, + &av->ah_attr); av->packet_life_time = path->packet_life_time; return 0; } @@ -357,6 +347,41 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) return cm_id_priv; } +static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask) +{ + int i; + + for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++) + ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] & + ((unsigned long *) mask)[i]; +} + +static int cm_compare_data(struct ib_cm_compare_data *src_data, + struct ib_cm_compare_data *dst_data) +{ + u8 src[IB_CM_COMPARE_SIZE]; + u8 dst[IB_CM_COMPARE_SIZE]; + + if (!src_data || !dst_data) + return 0; + + cm_mask_copy(src, src_data->data, dst_data->mask); + cm_mask_copy(dst, dst_data->data, src_data->mask); + return memcmp(src, dst, IB_CM_COMPARE_SIZE); +} + +static int cm_compare_private_data(u8 *private_data, + struct ib_cm_compare_data *dst_data) +{ + u8 src[IB_CM_COMPARE_SIZE]; + + if (!dst_data) + return 0; + + cm_mask_copy(src, private_data, dst_data->mask); + return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE); +} + static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.listen_service_table.rb_node; @@ -364,14 +389,18 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; __be64 service_mask = cm_id_priv->id.service_mask; + int data_cmp; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); + data_cmp = cm_compare_data(cm_id_priv->compare_data, + cur_cm_id_priv->compare_data); if ((cur_cm_id_priv->id.service_mask & service_id) == (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + (cm_id_priv->id.device == cur_cm_id_priv->id.device) && + !data_cmp) return cur_cm_id_priv; if (cm_id_priv->id.device < cur_cm_id_priv->id.device) @@ -380,6 +409,10 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) link = &(*link)->rb_right; else if (service_id < cur_cm_id_priv->id.service_id) link = &(*link)->rb_left; + else if (service_id > cur_cm_id_priv->id.service_id) + link = &(*link)->rb_right; + else if (data_cmp < 0) + link = &(*link)->rb_left; else link = &(*link)->rb_right; } @@ -389,16 +422,20 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) } static struct cm_id_private * cm_find_listen(struct ib_device *device, - __be64 service_id) + __be64 service_id, + u8 *private_data) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; + int data_cmp; while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); + data_cmp = cm_compare_private_data(private_data, + cm_id_priv->compare_data); if ((cm_id_priv->id.service_mask & service_id) == cm_id_priv->id.service_id && - (cm_id_priv->id.device == device)) + (cm_id_priv->id.device == device) && !data_cmp) return cm_id_priv; if (device < cm_id_priv->id.device) @@ -407,6 +444,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device, node = node->rb_right; else if (service_id < cm_id_priv->id.service_id) node = node->rb_left; + else if (service_id > cm_id_priv->id.service_id) + node = node->rb_right; + else if (data_cmp < 0) + node = node->rb_left; else node = node->rb_right; } @@ -730,15 +771,14 @@ retest: wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); - if (cm_id_priv->private_data && cm_id_priv->private_data_len) - kfree(cm_id_priv->private_data); + kfree(cm_id_priv->compare_data); + kfree(cm_id_priv->private_data); kfree(cm_id_priv); } EXPORT_SYMBOL(ib_destroy_cm_id); -int ib_cm_listen(struct ib_cm_id *cm_id, - __be64 service_id, - __be64 service_mask) +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, + struct ib_cm_compare_data *compare_data) { struct cm_id_private *cm_id_priv, *cur_cm_id_priv; unsigned long flags; @@ -752,7 +792,19 @@ int ib_cm_listen(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - BUG_ON(cm_id->state != IB_CM_IDLE); + if (cm_id->state != IB_CM_IDLE) + return -EINVAL; + + if (compare_data) { + cm_id_priv->compare_data = kzalloc(sizeof *compare_data, + GFP_KERNEL); + if (!cm_id_priv->compare_data) + return -ENOMEM; + cm_mask_copy(cm_id_priv->compare_data->data, + compare_data->data, compare_data->mask); + memcpy(cm_id_priv->compare_data->mask, compare_data->mask, + IB_CM_COMPARE_SIZE); + } cm_id->state = IB_CM_LISTEN; @@ -769,6 +821,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, if (cur_cm_id_priv) { cm_id->state = IB_CM_IDLE; + kfree(cm_id_priv->compare_data); + cm_id_priv->compare_data = NULL; ret = -EBUSY; } return ret; @@ -1241,7 +1295,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, /* Find matching listen request. */ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, - req_msg->service_id); + req_msg->service_id, + req_msg->private_data); if (!listen_cm_id_priv) { spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, @@ -1276,6 +1331,7 @@ static int cm_req_handler(struct cm_work *work) cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv->id.remote_id = req_msg->local_comm_id; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); @@ -2549,7 +2605,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); sidr_req_msg->request_id = cm_id_priv->id.local_id; - sidr_req_msg->pkey = cpu_to_be16(param->pkey); + sidr_req_msg->pkey = cpu_to_be16(param->path->pkey); sidr_req_msg->service_id = param->service_id; if (param->private_data && param->private_data_len) @@ -2641,6 +2697,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); cm_id_priv->av.dgid.global.interface_id = 0; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->id.remote_id = sidr_req_msg->request_id; cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; @@ -2654,7 +2711,8 @@ static int cm_sidr_req_handler(struct cm_work *work) goto out; /* Duplicate message. */ } cur_cm_id_priv = cm_find_listen(cm_id->device, - sidr_req_msg->service_id); + sidr_req_msg->service_id, + sidr_req_msg->private_data); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); @@ -3291,7 +3349,6 @@ error: static void __exit ib_cm_cleanup(void) { - flush_workqueue(cm.wq); destroy_workqueue(cm.wq); ib_unregister_client(&cm_client); idr_destroy(&cm.local_id_table); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c new file mode 100644 index 00000000000..a76834edf60 --- /dev/null +++ b/drivers/infiniband/core/cma.c @@ -0,0 +1,1927 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. + * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + * + */ + +#include <linux/completion.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/mutex.h> +#include <linux/random.h> +#include <linux/idr.h> + +#include <net/tcp.h> + +#include <rdma/rdma_cm.h> +#include <rdma/rdma_cm_ib.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_cm.h> +#include <rdma/ib_sa.h> + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("Generic RDMA CM Agent"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define CMA_CM_RESPONSE_TIMEOUT 20 +#define CMA_MAX_CM_RETRIES 3 + +static void cma_add_one(struct ib_device *device); +static void cma_remove_one(struct ib_device *device); + +static struct ib_client cma_client = { + .name = "cma", + .add = cma_add_one, + .remove = cma_remove_one +}; + +static LIST_HEAD(dev_list); +static LIST_HEAD(listen_any_list); +static DEFINE_MUTEX(lock); +static struct workqueue_struct *cma_wq; +static DEFINE_IDR(sdp_ps); +static DEFINE_IDR(tcp_ps); + +struct cma_device { + struct list_head list; + struct ib_device *device; + __be64 node_guid; + struct completion comp; + atomic_t refcount; + struct list_head id_list; +}; + +enum cma_state { + CMA_IDLE, + CMA_ADDR_QUERY, + CMA_ADDR_RESOLVED, + CMA_ROUTE_QUERY, + CMA_ROUTE_RESOLVED, + CMA_CONNECT, + CMA_DISCONNECT, + CMA_ADDR_BOUND, + CMA_LISTEN, + CMA_DEVICE_REMOVAL, + CMA_DESTROYING +}; + +struct rdma_bind_list { + struct idr *ps; + struct hlist_head owners; + unsigned short port; +}; + +/* + * Device removal can occur at anytime, so we need extra handling to + * serialize notifying the user of device removal with other callbacks. + * We do this by disabling removal notification while a callback is in process, + * and reporting it after the callback completes. + */ +struct rdma_id_private { + struct rdma_cm_id id; + + struct rdma_bind_list *bind_list; + struct hlist_node node; + struct list_head list; + struct list_head listen_list; + struct cma_device *cma_dev; + + enum cma_state state; + spinlock_t lock; + struct completion comp; + atomic_t refcount; + wait_queue_head_t wait_remove; + atomic_t dev_remove; + + int backlog; + int timeout_ms; + struct ib_sa_query *query; + int query_id; + union { + struct ib_cm_id *ib; + } cm_id; + + u32 seq_num; + u32 qp_num; + enum ib_qp_type qp_type; + u8 srq; +}; + +struct cma_work { + struct work_struct work; + struct rdma_id_private *id; + enum cma_state old_state; + enum cma_state new_state; + struct rdma_cm_event event; +}; + +union cma_ip_addr { + struct in6_addr ip6; + struct { + __u32 pad[3]; + __u32 addr; + } ip4; +}; + +struct cma_hdr { + u8 cma_version; + u8 ip_version; /* IP version: 7:4 */ + __u16 port; + union cma_ip_addr src_addr; + union cma_ip_addr dst_addr; +}; + +struct sdp_hh { + u8 bsdh[16]; + u8 sdp_version; /* Major version: 7:4 */ + u8 ip_version; /* IP version: 7:4 */ + u8 sdp_specific1[10]; + __u16 port; + __u16 sdp_specific2; + union cma_ip_addr src_addr; + union cma_ip_addr dst_addr; +}; + +struct sdp_hah { + u8 bsdh[16]; + u8 sdp_version; +}; + +#define CMA_VERSION 0x00 +#define SDP_MAJ_VERSION 0x2 + +static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&id_priv->lock, flags); + ret = (id_priv->state == comp); + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} + +static int cma_comp_exch(struct rdma_id_private *id_priv, + enum cma_state comp, enum cma_state exch) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&id_priv->lock, flags); + if ((ret = (id_priv->state == comp))) + id_priv->state = exch; + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} + +static enum cma_state cma_exch(struct rdma_id_private *id_priv, + enum cma_state exch) +{ + unsigned long flags; + enum cma_state old; + + spin_lock_irqsave(&id_priv->lock, flags); + old = id_priv->state; + id_priv->state = exch; + spin_unlock_irqrestore(&id_priv->lock, flags); + return old; +} + +static inline u8 cma_get_ip_ver(struct cma_hdr *hdr) +{ + return hdr->ip_version >> 4; +} + +static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) +{ + hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); +} + +static inline u8 sdp_get_majv(u8 sdp_version) +{ + return sdp_version >> 4; +} + +static inline u8 sdp_get_ip_ver(struct sdp_hh *hh) +{ + return hh->ip_version >> 4; +} + +static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) +{ + hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); +} + +static void cma_attach_to_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev) +{ + atomic_inc(&cma_dev->refcount); + id_priv->cma_dev = cma_dev; + id_priv->id.device = cma_dev->device; + list_add_tail(&id_priv->list, &cma_dev->id_list); +} + +static inline void cma_deref_dev(struct cma_device *cma_dev) +{ + if (atomic_dec_and_test(&cma_dev->refcount)) + complete(&cma_dev->comp); +} + +static void cma_detach_from_dev(struct rdma_id_private *id_priv) +{ + list_del(&id_priv->list); + cma_deref_dev(id_priv->cma_dev); + id_priv->cma_dev = NULL; +} + +static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev; + union ib_gid *gid; + int ret = -ENODEV; + + gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr); + + mutex_lock(&lock); + list_for_each_entry(cma_dev, &dev_list, list) { + ret = ib_find_cached_gid(cma_dev->device, gid, + &id_priv->id.port_num, NULL); + if (!ret) { + cma_attach_to_dev(id_priv, cma_dev); + break; + } + } + mutex_unlock(&lock); + return ret; +} + +static int cma_acquire_dev(struct rdma_id_private *id_priv) +{ + switch (id_priv->id.route.addr.dev_addr.dev_type) { + case IB_NODE_CA: + return cma_acquire_ib_dev(id_priv); + default: + return -ENODEV; + } +} + +static void cma_deref_id(struct rdma_id_private *id_priv) +{ + if (atomic_dec_and_test(&id_priv->refcount)) + complete(&id_priv->comp); +} + +static void cma_release_remove(struct rdma_id_private *id_priv) +{ + if (atomic_dec_and_test(&id_priv->dev_remove)) + wake_up(&id_priv->wait_remove); +} + +struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, + void *context, enum rdma_port_space ps) +{ + struct rdma_id_private *id_priv; + + id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); + if (!id_priv) + return ERR_PTR(-ENOMEM); + + id_priv->state = CMA_IDLE; + id_priv->id.context = context; + id_priv->id.event_handler = event_handler; + id_priv->id.ps = ps; + spin_lock_init(&id_priv->lock); + init_completion(&id_priv->comp); + atomic_set(&id_priv->refcount, 1); + init_waitqueue_head(&id_priv->wait_remove); + atomic_set(&id_priv->dev_remove, 0); + INIT_LIST_HEAD(&id_priv->listen_list); + get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); + + return &id_priv->id; +} +EXPORT_SYMBOL(rdma_create_id); + +static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + struct rdma_dev_addr *dev_addr; + int ret; + + dev_addr = &id_priv->id.route.addr.dev_addr; + ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, + ib_addr_get_pkey(dev_addr), + &qp_attr.pkey_index); + if (ret) + return ret; + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr.port_num = id_priv->id.port_num; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | IB_QP_PORT); +} + +int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr) +{ + struct rdma_id_private *id_priv; + struct ib_qp *qp; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id->device != pd->device) + return -EINVAL; + + qp = ib_create_qp(pd, qp_init_attr); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + switch (id->device->node_type) { + case IB_NODE_CA: + ret = cma_init_ib_qp(id_priv, qp); + break; + default: + ret = -ENOSYS; + break; + } + + if (ret) + goto err; + + id->qp = qp; + id_priv->qp_num = qp->qp_num; + id_priv->qp_type = qp->qp_type; + id_priv->srq = (qp->srq != NULL); + return 0; +err: + ib_destroy_qp(qp); + return ret; +} +EXPORT_SYMBOL(rdma_create_qp); + +void rdma_destroy_qp(struct rdma_cm_id *id) +{ + ib_destroy_qp(id->qp); +} +EXPORT_SYMBOL(rdma_destroy_qp); + +static int cma_modify_qp_rtr(struct rdma_cm_id *id) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + if (!id->qp) + return 0; + + /* Need to update QP attributes from default values. */ + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); + if (ret) + return ret; + + qp_attr.qp_state = IB_QPS_RTR; + ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); +} + +static int cma_modify_qp_rts(struct rdma_cm_id *id) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + if (!id->qp) + return 0; + + qp_attr.qp_state = IB_QPS_RTS; + ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); +} + +static int cma_modify_qp_err(struct rdma_cm_id *id) +{ + struct ib_qp_attr qp_attr; + + if (!id->qp) + return 0; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); +} + +int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + switch (id_priv->id.device->node_type) { + case IB_NODE_CA: + ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, + qp_attr_mask); + if (qp_attr->qp_state == IB_QPS_RTR) + qp_attr->rq_psn = id_priv->seq_num; + break; + default: + ret = -ENOSYS; + break; + } + + return ret; +} +EXPORT_SYMBOL(rdma_init_qp_attr); + +static inline int cma_zero_addr(struct sockaddr *addr) +{ + struct in6_addr *ip6; + + if (addr->sa_family == AF_INET) + return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr); + else { + ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; + return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | + ip6->s6_addr32[3] | ip6->s6_addr32[4]) == 0; + } +} + +static inline int cma_loopback_addr(struct sockaddr *addr) +{ + return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr); +} + +static inline int cma_any_addr(struct sockaddr *addr) +{ + return cma_zero_addr(addr) || cma_loopback_addr(addr); +} + +static inline int cma_any_port(struct sockaddr *addr) +{ + return !((struct sockaddr_in *) addr)->sin_port; +} + +static int cma_get_net_info(void *hdr, enum rdma_port_space ps, + u8 *ip_ver, __u16 *port, + union cma_ip_addr **src, union cma_ip_addr **dst) +{ + switch (ps) { + case RDMA_PS_SDP: + if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) != + SDP_MAJ_VERSION) + return -EINVAL; + + *ip_ver = sdp_get_ip_ver(hdr); + *port = ((struct sdp_hh *) hdr)->port; + *src = &((struct sdp_hh *) hdr)->src_addr; + *dst = &((struct sdp_hh *) hdr)->dst_addr; + break; + default: + if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION) + return -EINVAL; + + *ip_ver = cma_get_ip_ver(hdr); + *port = ((struct cma_hdr *) hdr)->port; + *src = &((struct cma_hdr *) hdr)->src_addr; + *dst = &((struct cma_hdr *) hdr)->dst_addr; + break; + } + + if (*ip_ver != 4 && *ip_ver != 6) + return -EINVAL; + return 0; +} + +static void cma_save_net_info(struct rdma_addr *addr, + struct rdma_addr *listen_addr, + u8 ip_ver, __u16 port, + union cma_ip_addr *src, union cma_ip_addr *dst) +{ + struct sockaddr_in *listen4, *ip4; + struct sockaddr_in6 *listen6, *ip6; + + switch (ip_ver) { + case 4: + listen4 = (struct sockaddr_in *) &listen_addr->src_addr; + ip4 = (struct sockaddr_in *) &addr->src_addr; + ip4->sin_family = listen4->sin_family; + ip4->sin_addr.s_addr = dst->ip4.addr; + ip4->sin_port = listen4->sin_port; + + ip4 = (struct sockaddr_in *) &addr->dst_addr; + ip4->sin_family = listen4->sin_family; + ip4->sin_addr.s_addr = src->ip4.addr; + ip4->sin_port = port; + break; + case 6: + listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr; + ip6 = (struct sockaddr_in6 *) &addr->src_addr; + ip6->sin6_family = listen6->sin6_family; + ip6->sin6_addr = dst->ip6; + ip6->sin6_port = listen6->sin6_port; + + ip6 = (struct sockaddr_in6 *) &addr->dst_addr; + ip6->sin6_family = listen6->sin6_family; + ip6->sin6_addr = src->ip6; + ip6->sin6_port = port; + break; + default: + break; + } +} + +static inline int cma_user_data_offset(enum rdma_port_space ps) +{ + switch (ps) { + case RDMA_PS_SDP: + return 0; + default: + return sizeof(struct cma_hdr); + } +} + +static int cma_notify_user(struct rdma_id_private *id_priv, + enum rdma_cm_event_type type, int status, + void *data, u8 data_len) +{ + struct rdma_cm_event event; + + event.event = type; + event.status = status; + event.private_data = data; + event.private_data_len = data_len; + + return id_priv->id.event_handler(&id_priv->id, &event); +} + +static void cma_cancel_route(struct rdma_id_private *id_priv) +{ + switch (id_priv->id.device->node_type) { + case IB_NODE_CA: + if (id_priv->query) + ib_sa_cancel_query(id_priv->query_id, id_priv->query); + break; + default: + break; + } +} + +static inline int cma_internal_listen(struct rdma_id_private *id_priv) +{ + return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev && + cma_any_addr(&id_priv->id.route.addr.src_addr); +} + +static void cma_destroy_listen(struct rdma_id_private *id_priv) +{ + cma_exch(id_priv, CMA_DESTROYING); + + if (id_priv->cma_dev) { + switch (id_priv->id.device->node_type) { + case IB_NODE_CA: + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + ib_destroy_cm_id(id_priv->cm_id.ib); + break; + default: + break; + } + cma_detach_from_dev(id_priv); + } + list_del(&id_priv->listen_list); + + cma_deref_id(id_priv); + wait_for_completion(&id_priv->comp); + + kfree(id_priv); +} + +static void cma_cancel_listens(struct rdma_id_private *id_priv) +{ + struct rdma_id_private *dev_id_priv; + + mutex_lock(&lock); + list_del(&id_priv->list); + + while (!list_empty(&id_priv->listen_list)) { + dev_id_priv = list_entry(id_priv->listen_list.next, + struct rdma_id_private, listen_list); + cma_destroy_listen(dev_id_priv); + } + mutex_unlock(&lock); +} + +static void cma_cancel_operation(struct rdma_id_private *id_priv, + enum cma_state state) +{ + switch (state) { + case CMA_ADDR_QUERY: + rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); + break; + case CMA_ROUTE_QUERY: + cma_cancel_route(id_priv); + break; + case CMA_LISTEN: + if (cma_any_addr(&id_priv->id.route.addr.src_addr) && + !id_priv->cma_dev) + cma_cancel_listens(id_priv); + break; + default: + break; + } +} + +static void cma_release_port(struct rdma_id_private *id_priv) +{ + struct rdma_bind_list *bind_list = id_priv->bind_list; + + if (!bind_list) + return; + + mutex_lock(&lock); + hlist_del(&id_priv->node); + if (hlist_empty(&bind_list->owners)) { + idr_remove(bind_list->ps, bind_list->port); + kfree(bind_list); + } + mutex_unlock(&lock); +} + +void rdma_destroy_id(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv; + enum cma_state state; + + id_priv = container_of(id, struct rdma_id_private, id); + state = cma_exch(id_priv, CMA_DESTROYING); + cma_cancel_operation(id_priv, state); + + if (id_priv->cma_dev) { + switch (id->device->node_type) { + case IB_NODE_CA: + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + ib_destroy_cm_id(id_priv->cm_id.ib); + break; + default: + break; + } + mutex_lock(&lock); + cma_detach_from_dev(id_priv); + mutex_unlock(&lock); + } + + cma_release_port(id_priv); + cma_deref_id(id_priv); + wait_for_completion(&id_priv->comp); + + kfree(id_priv->id.route.path_rec); + kfree(id_priv); +} +EXPORT_SYMBOL(rdma_destroy_id); + +static int cma_rep_recv(struct rdma_id_private *id_priv) +{ + int ret; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) + goto reject; + + ret = cma_modify_qp_rts(&id_priv->id); + if (ret) + goto reject; + + ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); + if (ret) + goto reject; + + return 0; +reject: + cma_modify_qp_err(&id_priv->id); + ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + return ret; +} + +static int cma_verify_rep(struct rdma_id_private *id_priv, void *data) +{ + if (id_priv->id.ps == RDMA_PS_SDP && + sdp_get_majv(((struct sdp_hah *) data)->sdp_version) != + SDP_MAJ_VERSION) + return -EINVAL; + + return 0; +} + +static int cma_rtu_recv(struct rdma_id_private *id_priv) +{ + int ret; + + ret = cma_modify_qp_rts(&id_priv->id); + if (ret) + goto reject; + + return 0; +reject: + cma_modify_qp_err(&id_priv->id); + ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + return ret; +} + +static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +{ + struct rdma_id_private *id_priv = cm_id->context; + enum rdma_cm_event_type event; + u8 private_data_len = 0; + int ret = 0, status = 0; + + atomic_inc(&id_priv->dev_remove); + if (!cma_comp(id_priv, CMA_CONNECT)) + goto out; + + switch (ib_event->event) { + case IB_CM_REQ_ERROR: + case IB_CM_REP_ERROR: + event = RDMA_CM_EVENT_UNREACHABLE; + status = -ETIMEDOUT; + break; + case IB_CM_REP_RECEIVED: + status = cma_verify_rep(id_priv, ib_event->private_data); + if (status) + event = RDMA_CM_EVENT_CONNECT_ERROR; + else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) { + status = cma_rep_recv(id_priv); + event = status ? RDMA_CM_EVENT_CONNECT_ERROR : + RDMA_CM_EVENT_ESTABLISHED; + } else + event = RDMA_CM_EVENT_CONNECT_RESPONSE; + private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; + break; + case IB_CM_RTU_RECEIVED: + status = cma_rtu_recv(id_priv); + event = status ? RDMA_CM_EVENT_CONNECT_ERROR : + RDMA_CM_EVENT_ESTABLISHED; + break; + case IB_CM_DREQ_ERROR: + status = -ETIMEDOUT; /* fall through */ + case IB_CM_DREQ_RECEIVED: + case IB_CM_DREP_RECEIVED: + if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT)) + goto out; + event = RDMA_CM_EVENT_DISCONNECTED; + break; + case IB_CM_TIMEWAIT_EXIT: + case IB_CM_MRA_RECEIVED: + /* ignore event */ + goto out; + case IB_CM_REJ_RECEIVED: + cma_modify_qp_err(&id_priv->id); + status = ib_event->param.rej_rcvd.reason; + event = RDMA_CM_EVENT_REJECTED; + break; + default: + printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", + ib_event->event); + goto out; + } + + ret = cma_notify_user(id_priv, event, status, ib_event->private_data, + private_data_len); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.ib = NULL; + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + rdma_destroy_id(&id_priv->id); + return ret; + } +out: + cma_release_remove(id_priv); + return ret; +} + +static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event) +{ + struct rdma_id_private *id_priv; + struct rdma_cm_id *id; + struct rdma_route *rt; + union cma_ip_addr *src, *dst; + __u16 port; + u8 ip_ver; + + id = rdma_create_id(listen_id->event_handler, listen_id->context, + listen_id->ps); + if (IS_ERR(id)) + return NULL; + + rt = &id->route; + rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; + rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); + if (!rt->path_rec) + goto err; + + if (cma_get_net_info(ib_event->private_data, listen_id->ps, + &ip_ver, &port, &src, &dst)) + goto err; + + cma_save_net_info(&id->route.addr, &listen_id->route.addr, + ip_ver, port, src, dst); + rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; + if (rt->num_paths == 2) + rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; + + ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); + ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); + ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); + rt->addr.dev_addr.dev_type = IB_NODE_CA; + + id_priv = container_of(id, struct rdma_id_private, id); + id_priv->state = CMA_CONNECT; + return id_priv; +err: + rdma_destroy_id(id); + return NULL; +} + +static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +{ + struct rdma_id_private *listen_id, *conn_id; + int offset, ret; + + listen_id = cm_id->context; + atomic_inc(&listen_id->dev_remove); + if (!cma_comp(listen_id, CMA_LISTEN)) { + ret = -ECONNABORTED; + goto out; + } + + conn_id = cma_new_id(&listen_id->id, ib_event); + if (!conn_id) { + ret = -ENOMEM; + goto out; + } + + atomic_inc(&conn_id->dev_remove); + ret = cma_acquire_ib_dev(conn_id); + if (ret) { + ret = -ENODEV; + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + goto out; + } + + conn_id->cm_id.ib = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_ib_handler; + + offset = cma_user_data_offset(listen_id->id.ps); + ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, + ib_event->private_data + offset, + IB_CM_REQ_PRIVATE_DATA_SIZE - offset); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + conn_id->cm_id.ib = NULL; + cma_exch(conn_id, CMA_DESTROYING); + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + } +out: + cma_release_remove(listen_id); + return ret; +} + +static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) +{ + return cpu_to_be64(((u64)ps << 16) + + be16_to_cpu(((struct sockaddr_in *) addr)->sin_port)); +} + +static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, + struct ib_cm_compare_data *compare) +{ + struct cma_hdr *cma_data, *cma_mask; + struct sdp_hh *sdp_data, *sdp_mask; + __u32 ip4_addr; + struct in6_addr ip6_addr; + + memset(compare, 0, sizeof *compare); + cma_data = (void *) compare->data; + cma_mask = (void *) compare->mask; + sdp_data = (void *) compare->data; + sdp_mask = (void *) compare->mask; + + switch (addr->sa_family) { + case AF_INET: + ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; + if (ps == RDMA_PS_SDP) { + sdp_set_ip_ver(sdp_data, 4); + sdp_set_ip_ver(sdp_mask, 0xF); + sdp_data->dst_addr.ip4.addr = ip4_addr; + sdp_mask->dst_addr.ip4.addr = ~0; + } else { + cma_set_ip_ver(cma_data, 4); + cma_set_ip_ver(cma_mask, 0xF); + cma_data->dst_addr.ip4.addr = ip4_addr; + cma_mask->dst_addr.ip4.addr = ~0; + } + break; + case AF_INET6: + ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; + if (ps == RDMA_PS_SDP) { + sdp_set_ip_ver(sdp_data, 6); + sdp_set_ip_ver(sdp_mask, 0xF); + sdp_data->dst_addr.ip6 = ip6_addr; + memset(&sdp_mask->dst_addr.ip6, 0xFF, + sizeof sdp_mask->dst_addr.ip6); + } else { + cma_set_ip_ver(cma_data, 6); + cma_set_ip_ver(cma_mask, 0xF); + cma_data->dst_addr.ip6 = ip6_addr; + memset(&cma_mask->dst_addr.ip6, 0xFF, + sizeof cma_mask->dst_addr.ip6); + } + break; + default: + break; + } +} + +static int cma_ib_listen(struct rdma_id_private *id_priv) +{ + struct ib_cm_compare_data compare_data; + struct sockaddr *addr; + __be64 svc_id; + int ret; + + id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler, + id_priv); + if (IS_ERR(id_priv->cm_id.ib)) + return PTR_ERR(id_priv->cm_id.ib); + + addr = &id_priv->id.route.addr.src_addr; + svc_id = cma_get_service_id(id_priv->id.ps, addr); + if (cma_any_addr(addr)) + ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); + else { + cma_set_compare_data(id_priv->id.ps, addr, &compare_data); + ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data); + } + + if (ret) { + ib_destroy_cm_id(id_priv->cm_id.ib); + id_priv->cm_id.ib = NULL; + } + + return ret; +} + +static int cma_listen_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event) +{ + struct rdma_id_private *id_priv = id->context; + + id->context = id_priv->id.context; + id->event_handler = id_priv->id.event_handler; + return id_priv->id.event_handler(id, event); +} + +static void cma_listen_on_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev) +{ + struct rdma_id_private *dev_id_priv; + struct rdma_cm_id *id; + int ret; + + id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps); + if (IS_ERR(id)) + return; + + dev_id_priv = container_of(id, struct rdma_id_private, id); + + dev_id_priv->state = CMA_ADDR_BOUND; + memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, + ip_addr_size(&id_priv->id.route.addr.src_addr)); + + cma_attach_to_dev(dev_id_priv, cma_dev); + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + + ret = rdma_listen(id, id_priv->backlog); + if (ret) + goto err; + + return; +err: + cma_destroy_listen(dev_id_priv); +} + +static void cma_listen_on_all(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev; + + mutex_lock(&lock); + list_add_tail(&id_priv->list, &listen_any_list); + list_for_each_entry(cma_dev, &dev_list, list) + cma_listen_on_dev(id_priv, cma_dev); + mutex_unlock(&lock); +} + +static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af) +{ + struct sockaddr_in addr_in; + + memset(&addr_in, 0, sizeof addr_in); + addr_in.sin_family = af; + return rdma_bind_addr(id, (struct sockaddr *) &addr_in); +} + +int rdma_listen(struct rdma_cm_id *id, int backlog) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id_priv->state == CMA_IDLE) { + ret = cma_bind_any(id, AF_INET); + if (ret) + return ret; + } + + if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN)) + return -EINVAL; + + id_priv->backlog = backlog; + if (id->device) { + switch (id->device->node_type) { + case IB_NODE_CA: + ret = cma_ib_listen(id_priv); + if (ret) + goto err; + break; + default: + ret = -ENOSYS; + goto err; + } + } else + cma_listen_on_all(id_priv); + + return 0; +err: + id_priv->backlog = 0; + cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_listen); + +static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, + void *context) +{ + struct cma_work *work = context; + struct rdma_route *route; + + route = &work->id->id.route; + + if (!status) { + route->num_paths = 1; + *route->path_rec = *path_rec; + } else { + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + } + + queue_work(cma_wq, &work->work); +} + +static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, + struct cma_work *work) +{ + struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr; + struct ib_sa_path_rec path_rec; + + memset(&path_rec, 0, sizeof path_rec); + path_rec.sgid = *ib_addr_get_sgid(addr); + path_rec.dgid = *ib_addr_get_dgid(addr); + path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); + path_rec.numb_path = 1; + + id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device, + id_priv->id.port_num, &path_rec, + IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, + timeout_ms, GFP_KERNEL, + cma_query_handler, work, &id_priv->query); + + return (id_priv->query_id < 0) ? id_priv->query_id : 0; +} + +static void cma_work_handler(void *data) +{ + struct cma_work *work = data; + struct rdma_id_private *id_priv = work->id; + int destroy = 0; + + atomic_inc(&id_priv->dev_remove); + if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) + goto out; + + if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + cma_exch(id_priv, CMA_DESTROYING); + destroy = 1; + } +out: + cma_release_remove(id_priv); + cma_deref_id(id_priv); + if (destroy) + rdma_destroy_id(&id_priv->id); + kfree(work); +} + +static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) +{ + struct rdma_route *route = &id_priv->id.route; + struct cma_work *work; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler, work); + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + + route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); + if (!route->path_rec) { + ret = -ENOMEM; + goto err1; + } + + ret = cma_query_ib_route(id_priv, timeout_ms, work); + if (ret) + goto err2; + + return 0; +err2: + kfree(route->path_rec); + route->path_rec = NULL; +err1: + kfree(work); + return ret; +} + +int rdma_set_ib_paths(struct rdma_cm_id *id, + struct ib_sa_path_rec *path_rec, int num_paths) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED)) + return -EINVAL; + + id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL); + if (!id->route.path_rec) { + ret = -ENOMEM; + goto err; + } + + memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths); + return 0; +err: + cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED); + return ret; +} +EXPORT_SYMBOL(rdma_set_ib_paths); + +int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY)) + return -EINVAL; + + atomic_inc(&id_priv->refcount); + switch (id->device->node_type) { + case IB_NODE_CA: + ret = cma_resolve_ib_route(id_priv, timeout_ms); + break; + default: + ret = -ENOSYS; + break; + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED); + cma_deref_id(id_priv); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_route); + +static int cma_bind_loopback(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev; + struct ib_port_attr port_attr; + union ib_gid *gid; + u16 pkey; + int ret; + u8 p; + + mutex_lock(&lock); + list_for_each_entry(cma_dev, &dev_list, list) + for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p) + if (!ib_query_port (cma_dev->device, p, &port_attr) && + port_attr.state == IB_PORT_ACTIVE) + goto port_found; + + if (!list_empty(&dev_list)) { + p = 1; + cma_dev = list_entry(dev_list.next, struct cma_device, list); + } else { + ret = -ENODEV; + goto out; + } + +port_found: + gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr); + ret = ib_get_cached_gid(cma_dev->device, p, 0, gid); + if (ret) + goto out; + + ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); + if (ret) + goto out; + + ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); + id_priv->id.port_num = p; + cma_attach_to_dev(id_priv, cma_dev); +out: + mutex_unlock(&lock); + return ret; +} + +static void addr_handler(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *dev_addr, void *context) +{ + struct rdma_id_private *id_priv = context; + enum rdma_cm_event_type event; + + atomic_inc(&id_priv->dev_remove); + if (!id_priv->cma_dev && !status) + status = cma_acquire_dev(id_priv); + + if (status) { + if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND)) + goto out; + event = RDMA_CM_EVENT_ADDR_ERROR; + } else { + if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) + goto out; + memcpy(&id_priv->id.route.addr.src_addr, src_addr, + ip_addr_size(src_addr)); + event = RDMA_CM_EVENT_ADDR_RESOLVED; + } + + if (cma_notify_user(id_priv, event, status, NULL, 0)) { + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + cma_deref_id(id_priv); + rdma_destroy_id(&id_priv->id); + return; + } +out: + cma_release_remove(id_priv); + cma_deref_id(id_priv); +} + +static int cma_resolve_loopback(struct rdma_id_private *id_priv) +{ + struct cma_work *work; + struct sockaddr_in *src_in, *dst_in; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (!id_priv->cma_dev) { + ret = cma_bind_loopback(id_priv); + if (ret) + goto err; + } + + ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, + ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr)); + + if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) { + src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr; + dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr; + src_in->sin_family = dst_in->sin_family; + src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr; + } + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler, work); + work->old_state = CMA_ADDR_QUERY; + work->new_state = CMA_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +err: + kfree(work); + return ret; +} + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr) +{ + if (src_addr && src_addr->sa_family) + return rdma_bind_addr(id, src_addr); + else + return cma_bind_any(id, dst_addr->sa_family); +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr, int timeout_ms) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id_priv->state == CMA_IDLE) { + ret = cma_bind_addr(id, src_addr, dst_addr); + if (ret) + return ret; + } + + if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY)) + return -EINVAL; + + atomic_inc(&id_priv->refcount); + memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); + if (cma_any_addr(dst_addr)) + ret = cma_resolve_loopback(id_priv); + else + ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr, + &id->route.addr.dev_addr, + timeout_ms, addr_handler, id_priv); + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND); + cma_deref_id(id_priv); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +static void cma_bind_port(struct rdma_bind_list *bind_list, + struct rdma_id_private *id_priv) +{ + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + sin->sin_port = htons(bind_list->port); + id_priv->bind_list = bind_list; + hlist_add_head(&id_priv->node, &bind_list->owners); +} + +static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv, + unsigned short snum) +{ + struct rdma_bind_list *bind_list; + int port, start, ret; + + bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); + if (!bind_list) + return -ENOMEM; + + start = snum ? snum : sysctl_local_port_range[0]; + + do { + ret = idr_get_new_above(ps, bind_list, start, &port); + } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); + + if (ret) + goto err; + + if ((snum && port != snum) || + (!snum && port > sysctl_local_port_range[1])) { + idr_remove(ps, port); + ret = -EADDRNOTAVAIL; + goto err; + } + + bind_list->ps = ps; + bind_list->port = (unsigned short) port; + cma_bind_port(bind_list, id_priv); + return 0; +err: + kfree(bind_list); + return ret; +} + +static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) +{ + struct rdma_id_private *cur_id; + struct sockaddr_in *sin, *cur_sin; + struct rdma_bind_list *bind_list; + struct hlist_node *node; + unsigned short snum; + + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + snum = ntohs(sin->sin_port); + if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + bind_list = idr_find(ps, snum); + if (!bind_list) + return cma_alloc_port(ps, id_priv, snum); + + /* + * We don't support binding to any address if anyone is bound to + * a specific address on the same port. + */ + if (cma_any_addr(&id_priv->id.route.addr.src_addr)) + return -EADDRNOTAVAIL; + + hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { + if (cma_any_addr(&cur_id->id.route.addr.src_addr)) + return -EADDRNOTAVAIL; + + cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; + if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr) + return -EADDRINUSE; + } + + cma_bind_port(bind_list, id_priv); + return 0; +} + +static int cma_get_port(struct rdma_id_private *id_priv) +{ + struct idr *ps; + int ret; + + switch (id_priv->id.ps) { + case RDMA_PS_SDP: + ps = &sdp_ps; + break; + case RDMA_PS_TCP: + ps = &tcp_ps; + break; + default: + return -EPROTONOSUPPORT; + } + + mutex_lock(&lock); + if (cma_any_port(&id_priv->id.route.addr.src_addr)) + ret = cma_alloc_port(ps, id_priv, 0); + else + ret = cma_use_port(ps, id_priv); + mutex_unlock(&lock); + + return ret; +} + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv; + int ret; + + if (addr->sa_family != AF_INET) + return -EAFNOSUPPORT; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) + return -EINVAL; + + if (!cma_any_addr(addr)) { + ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); + if (!ret) + ret = cma_acquire_dev(id_priv); + if (ret) + goto err; + } + + memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); + ret = cma_get_port(id_priv); + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE); + return ret; +} +EXPORT_SYMBOL(rdma_bind_addr); + +static int cma_format_hdr(void *hdr, enum rdma_port_space ps, + struct rdma_route *route) +{ + struct sockaddr_in *src4, *dst4; + struct cma_hdr *cma_hdr; + struct sdp_hh *sdp_hdr; + + src4 = (struct sockaddr_in *) &route->addr.src_addr; + dst4 = (struct sockaddr_in *) &route->addr.dst_addr; + + switch (ps) { + case RDMA_PS_SDP: + sdp_hdr = hdr; + if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) + return -EINVAL; + sdp_set_ip_ver(sdp_hdr, 4); + sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + sdp_hdr->port = src4->sin_port; + break; + default: + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + cma_set_ip_ver(cma_hdr, 4); + cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + cma_hdr->port = src4->sin_port; + break; + } + return 0; +} + +static int cma_connect_ib(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_cm_req_param req; + struct rdma_route *route; + void *private_data; + int offset, ret; + + memset(&req, 0, sizeof req); + offset = cma_user_data_offset(id_priv->id.ps); + req.private_data_len = offset + conn_param->private_data_len; + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!private_data) + return -ENOMEM; + + if (conn_param->private_data && conn_param->private_data_len) + memcpy(private_data + offset, conn_param->private_data, + conn_param->private_data_len); + + id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler, + id_priv); + if (IS_ERR(id_priv->cm_id.ib)) { + ret = PTR_ERR(id_priv->cm_id.ib); + goto out; + } + + route = &id_priv->id.route; + ret = cma_format_hdr(private_data, id_priv->id.ps, route); + if (ret) + goto out; + req.private_data = private_data; + + req.primary_path = &route->path_rec[0]; + if (route->num_paths == 2) + req.alternate_path = &route->path_rec[1]; + + req.service_id = cma_get_service_id(id_priv->id.ps, + &route->addr.dst_addr); + req.qp_num = id_priv->qp_num; + req.qp_type = id_priv->qp_type; + req.starting_psn = id_priv->seq_num; + req.responder_resources = conn_param->responder_resources; + req.initiator_depth = conn_param->initiator_depth; + req.flow_control = conn_param->flow_control; + req.retry_count = conn_param->retry_count; + req.rnr_retry_count = conn_param->rnr_retry_count; + req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; + req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; + req.max_cm_retries = CMA_MAX_CM_RETRIES; + req.srq = id_priv->srq ? 1 : 0; + + ret = ib_send_cm_req(id_priv->cm_id.ib, &req); +out: + kfree(private_data); + return ret; +} + +int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT)) + return -EINVAL; + + if (!id->qp) { + id_priv->qp_num = conn_param->qp_num; + id_priv->qp_type = conn_param->qp_type; + id_priv->srq = conn_param->srq; + } + + switch (id->device->node_type) { + case IB_NODE_CA: + ret = cma_connect_ib(id_priv, conn_param); + break; + default: + ret = -ENOSYS; + break; + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED); + return ret; +} +EXPORT_SYMBOL(rdma_connect); + +static int cma_accept_ib(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_cm_rep_param rep; + int ret; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) + return ret; + + memset(&rep, 0, sizeof rep); + rep.qp_num = id_priv->qp_num; + rep.starting_psn = id_priv->seq_num; + rep.private_data = conn_param->private_data; + rep.private_data_len = conn_param->private_data_len; + rep.responder_resources = conn_param->responder_resources; + rep.initiator_depth = conn_param->initiator_depth; + rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT; + rep.failover_accepted = 0; + rep.flow_control = conn_param->flow_control; + rep.rnr_retry_count = conn_param->rnr_retry_count; + rep.srq = id_priv->srq ? 1 : 0; + + return ib_send_cm_rep(id_priv->cm_id.ib, &rep); +} + +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_CONNECT)) + return -EINVAL; + + if (!id->qp && conn_param) { + id_priv->qp_num = conn_param->qp_num; + id_priv->qp_type = conn_param->qp_type; + id_priv->srq = conn_param->srq; + } + + switch (id->device->node_type) { + case IB_NODE_CA: + if (conn_param) + ret = cma_accept_ib(id_priv, conn_param); + else + ret = cma_rep_recv(id_priv); + break; + default: + ret = -ENOSYS; + break; + } + + if (ret) + goto reject; + + return 0; +reject: + cma_modify_qp_err(id); + rdma_reject(id, NULL, 0); + return ret; +} +EXPORT_SYMBOL(rdma_accept); + +int rdma_reject(struct rdma_cm_id *id, const void *private_data, + u8 private_data_len) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_CONNECT)) + return -EINVAL; + + switch (id->device->node_type) { + case IB_NODE_CA: + ret = ib_send_cm_rej(id_priv->cm_id.ib, + IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, + private_data, private_data_len); + break; + default: + ret = -ENOSYS; + break; + } + return ret; +} +EXPORT_SYMBOL(rdma_reject); + +int rdma_disconnect(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, CMA_CONNECT) && + !cma_comp(id_priv, CMA_DISCONNECT)) + return -EINVAL; + + ret = cma_modify_qp_err(id); + if (ret) + goto out; + + switch (id->device->node_type) { + case IB_NODE_CA: + /* Initiate or respond to a disconnect. */ + if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) + ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); + break; + default: + break; + } +out: + return ret; +} +EXPORT_SYMBOL(rdma_disconnect); + +static void cma_add_one(struct ib_device *device) +{ + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + + cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); + if (!cma_dev) + return; + + cma_dev->device = device; + cma_dev->node_guid = device->node_guid; + if (!cma_dev->node_guid) + goto err; + + init_completion(&cma_dev->comp); + atomic_set(&cma_dev->refcount, 1); + INIT_LIST_HEAD(&cma_dev->id_list); + ib_set_client_data(device, &cma_client, cma_dev); + + mutex_lock(&lock); + list_add_tail(&cma_dev->list, &dev_list); + list_for_each_entry(id_priv, &listen_any_list, list) + cma_listen_on_dev(id_priv, cma_dev); + mutex_unlock(&lock); + return; +err: + kfree(cma_dev); +} + +static int cma_remove_id_dev(struct rdma_id_private *id_priv) +{ + enum cma_state state; + + /* Record that we want to remove the device */ + state = cma_exch(id_priv, CMA_DEVICE_REMOVAL); + if (state == CMA_DESTROYING) + return 0; + + cma_cancel_operation(id_priv, state); + wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove)); + + /* Check for destruction from another callback. */ + if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) + return 0; + + return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL, + 0, NULL, 0); +} + +static void cma_process_remove(struct cma_device *cma_dev) +{ + struct list_head remove_list; + struct rdma_id_private *id_priv; + int ret; + + INIT_LIST_HEAD(&remove_list); + + mutex_lock(&lock); + while (!list_empty(&cma_dev->id_list)) { + id_priv = list_entry(cma_dev->id_list.next, + struct rdma_id_private, list); + + if (cma_internal_listen(id_priv)) { + cma_destroy_listen(id_priv); + continue; + } + + list_del(&id_priv->list); + list_add_tail(&id_priv->list, &remove_list); + atomic_inc(&id_priv->refcount); + mutex_unlock(&lock); + + ret = cma_remove_id_dev(id_priv); + cma_deref_id(id_priv); + if (ret) + rdma_destroy_id(&id_priv->id); + + mutex_lock(&lock); + } + mutex_unlock(&lock); + + cma_deref_dev(cma_dev); + wait_for_completion(&cma_dev->comp); +} + +static void cma_remove_one(struct ib_device *device) +{ + struct cma_device *cma_dev; + + cma_dev = ib_get_client_data(device, &cma_client); + if (!cma_dev) + return; + + mutex_lock(&lock); + list_del(&cma_dev->list); + mutex_unlock(&lock); + + cma_process_remove(cma_dev); + kfree(cma_dev); +} + +static int cma_init(void) +{ + int ret; + + cma_wq = create_singlethread_workqueue("rdma_cm_wq"); + if (!cma_wq) + return -ENOMEM; + + ret = ib_register_client(&cma_client); + if (ret) + goto err; + return 0; + +err: + destroy_workqueue(cma_wq); + return ret; +} + +static void cma_cleanup(void) +{ + ib_unregister_client(&cma_client); + destroy_workqueue(cma_wq); + idr_destroy(&sdp_ps); + idr_destroy(&tcp_ps); +} + +module_init(cma_init); +module_exit(cma_cleanup); diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 838bf54458d..615fe9cc6c5 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -54,7 +54,7 @@ enum { /* * If an FMR is not in use, then the list member will point to either * its pool's free_list (if the FMR can be mapped again; that is, - * remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the + * remap_count < pool->max_remaps) or its pool's dirty_list (if the * FMR needs to be unmapped before being remapped). In either of * these cases it is a bug if the ref_count is not 0. In other words, * if ref_count is > 0, then the list member must not be linked into @@ -84,6 +84,7 @@ struct ib_fmr_pool { int pool_size; int max_pages; + int max_remaps; int dirty_watermark; int dirty_len; struct list_head free_list; @@ -214,8 +215,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, { struct ib_device *device; struct ib_fmr_pool *pool; + struct ib_device_attr *attr; int i; int ret; + int max_remaps; if (!params) return ERR_PTR(-EINVAL); @@ -228,6 +231,26 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, return ERR_PTR(-ENOSYS); } + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) { + printk(KERN_WARNING "couldn't allocate device attr struct"); + return ERR_PTR(-ENOMEM); + } + + ret = ib_query_device(device, attr); + if (ret) { + printk(KERN_WARNING "couldn't query device"); + kfree(attr); + return ERR_PTR(ret); + } + + if (!attr->max_map_per_fmr) + max_remaps = IB_FMR_MAX_REMAPS; + else + max_remaps = attr->max_map_per_fmr; + + kfree(attr); + pool = kmalloc(sizeof *pool, GFP_KERNEL); if (!pool) { printk(KERN_WARNING "couldn't allocate pool struct"); @@ -258,6 +281,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, pool->pool_size = 0; pool->max_pages = params->max_pages_per_fmr; + pool->max_remaps = max_remaps; pool->dirty_watermark = params->dirty_watermark; pool->dirty_len = 0; spin_lock_init(&pool->pool_lock); @@ -279,7 +303,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, struct ib_pool_fmr *fmr; struct ib_fmr_attr attr = { .max_pages = params->max_pages_per_fmr, - .max_maps = IB_FMR_MAX_REMAPS, + .max_maps = pool->max_remaps, .page_shift = params->page_shift }; @@ -489,7 +513,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) --fmr->ref_count; if (!fmr->ref_count) { - if (fmr->remap_count < IB_FMR_MAX_REMAPS) { + if (fmr->remap_count < pool->max_remaps) { list_add_tail(&fmr->list, &pool->free_list); } else { list_add_tail(&fmr->list, &pool->dirty_list); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 5ad41a64314..b38e02a5db3 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -34,6 +34,7 @@ * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ */ #include <linux/dma-mapping.h> +#include <rdma/ib_cache.h> #include "mad_priv.h" #include "mad_rmpp.h" @@ -45,8 +46,7 @@ MODULE_DESCRIPTION("kernel IB MAD API"); MODULE_AUTHOR("Hal Rosenstock"); MODULE_AUTHOR("Sean Hefty"); - -kmem_cache_t *ib_mad_cache; +static kmem_cache_t *ib_mad_cache; static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; @@ -1673,20 +1673,21 @@ static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr, rwc->recv_buf.mad->mad_hdr.mgmt_class; } -static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr, +static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_wr_private *wr, struct ib_mad_recv_wc *rwc ) { struct ib_ah_attr attr; u8 send_resp, rcv_resp; + union ib_gid sgid; + struct ib_device *device = mad_agent_priv->agent.device; + u8 port_num = mad_agent_priv->agent.port_num; + u8 lmc; send_resp = ((struct ib_mad *)(wr->send_buf.mad))-> mad_hdr.method & IB_MGMT_METHOD_RESP; rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP; - if (!send_resp && rcv_resp) - /* is request/response. GID/LIDs are both local (same). */ - return 1; - if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ return 0; @@ -1695,48 +1696,78 @@ static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr, /* Assume not equal, to avoid false positives. */ return 0; - if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH)) - return attr.dlid == rwc->wc->slid; - else if ((attr.ah_flags & IB_AH_GRH) && - (rwc->wc->wc_flags & IB_WC_GRH)) - return memcmp(attr.grh.dgid.raw, - rwc->recv_buf.grh->sgid.raw, 16) == 0; - else + if (!!(attr.ah_flags & IB_AH_GRH) != + !!(rwc->wc->wc_flags & IB_WC_GRH)) /* one has GID, other does not. Assume different */ return 0; + + if (!send_resp && rcv_resp) { + /* is request/response. */ + if (!(attr.ah_flags & IB_AH_GRH)) { + if (ib_get_cached_lmc(device, port_num, &lmc)) + return 0; + return (!lmc || !((attr.src_path_bits ^ + rwc->wc->dlid_path_bits) & + ((1 << lmc) - 1))); + } else { + if (ib_get_cached_gid(device, port_num, + attr.grh.sgid_index, &sgid)) + return 0; + return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, + 16); + } + } + + if (!(attr.ah_flags & IB_AH_GRH)) + return attr.dlid == rwc->wc->slid; + else + return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw, + 16); +} + +static inline int is_direct(u8 class) +{ + return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE); } + struct ib_mad_send_wr_private* ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *mad_recv_wc) + struct ib_mad_recv_wc *wc) { - struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wr_private *wr; struct ib_mad *mad; - mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad; + mad = (struct ib_mad *)wc->recv_buf.mad; - list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, - agent_list) { - if ((mad_send_wr->tid == mad->mad_hdr.tid) && - rcv_has_same_class(mad_send_wr, mad_recv_wc) && - rcv_has_same_gid(mad_send_wr, mad_recv_wc)) - return mad_send_wr; + list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { + if ((wr->tid == mad->mad_hdr.tid) && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) + return wr; } /* * It's possible to receive the response before we've * been notified that the send has completed */ - list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, - agent_list) { - if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && - mad_send_wr->tid == mad->mad_hdr.tid && - mad_send_wr->timeout && - rcv_has_same_class(mad_send_wr, mad_recv_wc) && - rcv_has_same_gid(mad_send_wr, mad_recv_wc)) { + list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { + if (is_data_mad(mad_agent_priv, wr->send_buf.mad) && + wr->tid == mad->mad_hdr.tid && + wr->timeout && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ - return (mad_send_wr->status == IB_WC_SUCCESS) ? - mad_send_wr : NULL; - } + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } return NULL; } diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index b4fa28d3160..d147f3bad2c 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -212,8 +212,6 @@ struct ib_mad_port_private { struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; }; -extern kmem_cache_t *ib_mad_cache; - int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); struct ib_mad_send_wr_private * diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 501cc054cb3..e911c99ff84 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -47,6 +47,7 @@ #include <rdma/ib_pack.h> #include <rdma/ib_sa.h> +#include <rdma/ib_cache.h> MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); @@ -441,6 +442,36 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) } EXPORT_SYMBOL(ib_sa_cancel_query); +int ib_init_ah_from_path(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) +{ + int ret; + u16 gid_index; + + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->dlid = be16_to_cpu(rec->dlid); + ah_attr->sl = rec->sl; + ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f; + ah_attr->port_num = port_num; + + if (rec->hop_limit > 1) { + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.dgid = rec->dgid; + + ret = ib_find_cached_gid(device, &rec->sgid, &port_num, + &gid_index); + if (ret) + return ret; + + ah_attr->grh.sgid_index = gid_index; + ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); + ah_attr->grh.hop_limit = rec->hop_limit; + ah_attr->grh.traffic_class = rec->traffic_class; + } + return 0; +} +EXPORT_SYMBOL(ib_init_ah_from_path); + static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) { unsigned long flags; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 9164a09b6cc..c1c6fda9452 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -30,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: ucm.c 2594 2005-06-13 19:46:02Z libor $ + * $Id: ucm.c 4311 2005-12-05 18:42:01Z sean.hefty $ */ #include <linux/completion.h> @@ -50,6 +50,7 @@ #include <rdma/ib_cm.h> #include <rdma/ib_user_cm.h> +#include <rdma/ib_marshall.h> MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); @@ -63,7 +64,7 @@ struct ib_ucm_device { }; struct ib_ucm_file { - struct semaphore mutex; + struct mutex file_mutex; struct file *filp; struct ib_ucm_device *device; @@ -152,7 +153,7 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) { struct ib_ucm_event *uevent; - down(&ctx->file->mutex); + mutex_lock(&ctx->file->file_mutex); list_del(&ctx->file_list); while (!list_empty(&ctx->events)) { @@ -167,7 +168,7 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) kfree(uevent); } - up(&ctx->file->mutex); + mutex_unlock(&ctx->file->file_mutex); } static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) @@ -205,36 +206,6 @@ error: return NULL; } -static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, - struct ib_sa_path_rec *kpath) -{ - if (!kpath || !upath) - return; - - memcpy(upath->dgid, kpath->dgid.raw, sizeof *upath->dgid); - memcpy(upath->sgid, kpath->sgid.raw, sizeof *upath->sgid); - - upath->dlid = kpath->dlid; - upath->slid = kpath->slid; - upath->raw_traffic = kpath->raw_traffic; - upath->flow_label = kpath->flow_label; - upath->hop_limit = kpath->hop_limit; - upath->traffic_class = kpath->traffic_class; - upath->reversible = kpath->reversible; - upath->numb_path = kpath->numb_path; - upath->pkey = kpath->pkey; - upath->sl = kpath->sl; - upath->mtu_selector = kpath->mtu_selector; - upath->mtu = kpath->mtu; - upath->rate_selector = kpath->rate_selector; - upath->rate = kpath->rate; - upath->packet_life_time = kpath->packet_life_time; - upath->preference = kpath->preference; - - upath->packet_life_time_selector = - kpath->packet_life_time_selector; -} - static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, struct ib_cm_req_event_param *kreq) { @@ -253,8 +224,10 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, ureq->srq = kreq->srq; ureq->port = kreq->port; - ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); - ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); + ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); + if (kreq->alternate_path) + ib_copy_path_rec_to_user(&ureq->alternate_path, + kreq->alternate_path); } static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, @@ -324,8 +297,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, info = evt->param.rej_rcvd.ari; break; case IB_CM_LAP_RECEIVED: - ib_ucm_event_path_get(&uvt->resp.u.lap_resp.path, - evt->param.lap_rcvd.alternate_path); + ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, + evt->param.lap_rcvd.alternate_path); uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_ALTERNATE; break; @@ -402,11 +375,11 @@ static int ib_ucm_event_handler(struct ib_cm_id *cm_id, if (result) goto err2; - down(&ctx->file->mutex); + mutex_lock(&ctx->file->file_mutex); list_add_tail(&uevent->file_list, &ctx->file->events); list_add_tail(&uevent->ctx_list, &ctx->events); wake_up_interruptible(&ctx->file->poll_wait); - up(&ctx->file->mutex); + mutex_unlock(&ctx->file->file_mutex); return 0; err2: @@ -432,7 +405,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - down(&file->mutex); + mutex_lock(&file->file_mutex); while (list_empty(&file->events)) { if (file->filp->f_flags & O_NONBLOCK) { @@ -447,9 +420,9 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE); - up(&file->mutex); + mutex_unlock(&file->file_mutex); schedule(); - down(&file->mutex); + mutex_lock(&file->file_mutex); finish_wait(&file->poll_wait, &wait); } @@ -509,7 +482,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, kfree(uevent->info); kfree(uevent); done: - up(&file->mutex); + mutex_unlock(&file->file_mutex); return result; } @@ -528,9 +501,9 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - down(&file->mutex); + mutex_lock(&file->file_mutex); ctx = ib_ucm_ctx_alloc(file); - up(&file->mutex); + mutex_unlock(&file->file_mutex); if (!ctx) return -ENOMEM; @@ -637,65 +610,11 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, return result; } -static void ib_ucm_copy_ah_attr(struct ib_ucm_ah_attr *dest_attr, - struct ib_ah_attr *src_attr) -{ - memcpy(dest_attr->grh_dgid, src_attr->grh.dgid.raw, - sizeof src_attr->grh.dgid); - dest_attr->grh_flow_label = src_attr->grh.flow_label; - dest_attr->grh_sgid_index = src_attr->grh.sgid_index; - dest_attr->grh_hop_limit = src_attr->grh.hop_limit; - dest_attr->grh_traffic_class = src_attr->grh.traffic_class; - - dest_attr->dlid = src_attr->dlid; - dest_attr->sl = src_attr->sl; - dest_attr->src_path_bits = src_attr->src_path_bits; - dest_attr->static_rate = src_attr->static_rate; - dest_attr->is_global = (src_attr->ah_flags & IB_AH_GRH); - dest_attr->port_num = src_attr->port_num; -} - -static void ib_ucm_copy_qp_attr(struct ib_ucm_init_qp_attr_resp *dest_attr, - struct ib_qp_attr *src_attr) -{ - dest_attr->cur_qp_state = src_attr->cur_qp_state; - dest_attr->path_mtu = src_attr->path_mtu; - dest_attr->path_mig_state = src_attr->path_mig_state; - dest_attr->qkey = src_attr->qkey; - dest_attr->rq_psn = src_attr->rq_psn; - dest_attr->sq_psn = src_attr->sq_psn; - dest_attr->dest_qp_num = src_attr->dest_qp_num; - dest_attr->qp_access_flags = src_attr->qp_access_flags; - - dest_attr->max_send_wr = src_attr->cap.max_send_wr; - dest_attr->max_recv_wr = src_attr->cap.max_recv_wr; - dest_attr->max_send_sge = src_attr->cap.max_send_sge; - dest_attr->max_recv_sge = src_attr->cap.max_recv_sge; - dest_attr->max_inline_data = src_attr->cap.max_inline_data; - - ib_ucm_copy_ah_attr(&dest_attr->ah_attr, &src_attr->ah_attr); - ib_ucm_copy_ah_attr(&dest_attr->alt_ah_attr, &src_attr->alt_ah_attr); - - dest_attr->pkey_index = src_attr->pkey_index; - dest_attr->alt_pkey_index = src_attr->alt_pkey_index; - dest_attr->en_sqd_async_notify = src_attr->en_sqd_async_notify; - dest_attr->sq_draining = src_attr->sq_draining; - dest_attr->max_rd_atomic = src_attr->max_rd_atomic; - dest_attr->max_dest_rd_atomic = src_attr->max_dest_rd_atomic; - dest_attr->min_rnr_timer = src_attr->min_rnr_timer; - dest_attr->port_num = src_attr->port_num; - dest_attr->timeout = src_attr->timeout; - dest_attr->retry_cnt = src_attr->retry_cnt; - dest_attr->rnr_retry = src_attr->rnr_retry; - dest_attr->alt_port_num = src_attr->alt_port_num; - dest_attr->alt_timeout = src_attr->alt_timeout; -} - static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { - struct ib_ucm_init_qp_attr_resp resp; + struct ib_uverbs_qp_attr resp; struct ib_ucm_init_qp_attr cmd; struct ib_ucm_context *ctx; struct ib_qp_attr qp_attr; @@ -718,7 +637,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, if (result) goto out; - ib_ucm_copy_qp_attr(&resp, &qp_attr); + ib_copy_qp_attr_to_user(&resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) @@ -729,6 +648,17 @@ out: return result; } +static int ucm_validate_listen(__be64 service_id, __be64 service_mask) +{ + service_id &= service_mask; + + if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || + ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) + return -EINVAL; + + return 0; +} + static ssize_t ib_ucm_listen(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) @@ -744,7 +674,13 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); - result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); + result = ucm_validate_listen(cmd.service_id, cmd.service_mask); + if (result) + goto out; + + result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask, + NULL); +out: ib_ucm_ctx_put(ctx); return result; } @@ -793,7 +729,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) { - struct ib_ucm_path_rec ucm_path; + struct ib_user_path_rec upath; struct ib_sa_path_rec *sa_path; *path = NULL; @@ -805,36 +741,14 @@ static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) if (!sa_path) return -ENOMEM; - if (copy_from_user(&ucm_path, (void __user *)(unsigned long)src, - sizeof(ucm_path))) { + if (copy_from_user(&upath, (void __user *)(unsigned long)src, + sizeof(upath))) { kfree(sa_path); return -EFAULT; } - memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof sa_path->dgid); - memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof sa_path->sgid); - - sa_path->dlid = ucm_path.dlid; - sa_path->slid = ucm_path.slid; - sa_path->raw_traffic = ucm_path.raw_traffic; - sa_path->flow_label = ucm_path.flow_label; - sa_path->hop_limit = ucm_path.hop_limit; - sa_path->traffic_class = ucm_path.traffic_class; - sa_path->reversible = ucm_path.reversible; - sa_path->numb_path = ucm_path.numb_path; - sa_path->pkey = ucm_path.pkey; - sa_path->sl = ucm_path.sl; - sa_path->mtu_selector = ucm_path.mtu_selector; - sa_path->mtu = ucm_path.mtu; - sa_path->rate_selector = ucm_path.rate_selector; - sa_path->rate = ucm_path.rate; - sa_path->packet_life_time = ucm_path.packet_life_time; - sa_path->preference = ucm_path.preference; - - sa_path->packet_life_time_selector = - ucm_path.packet_life_time_selector; - + ib_copy_path_rec_from_user(sa_path, &upath); *path = sa_path; return 0; } @@ -1130,7 +1044,6 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, param.service_id = cmd.sid; param.timeout_ms = cmd.timeout; param.max_cm_retries = cmd.max_cm_retries; - param.pkey = cmd.pkey; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { @@ -1263,7 +1176,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) INIT_LIST_HEAD(&file->ctxs); init_waitqueue_head(&file->poll_wait); - init_MUTEX(&file->mutex); + mutex_init(&file->file_mutex); filp->private_data = file; file->filp = filp; @@ -1277,11 +1190,11 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) struct ib_ucm_file *file = filp->private_data; struct ib_ucm_context *ctx; - down(&file->mutex); + mutex_lock(&file->file_mutex); while (!list_empty(&file->ctxs)) { ctx = list_entry(file->ctxs.next, struct ib_ucm_context, file_list); - up(&file->mutex); + mutex_unlock(&file->file_mutex); mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); @@ -1291,9 +1204,9 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) ib_ucm_cleanup_events(ctx); kfree(ctx); - down(&file->mutex); + mutex_lock(&file->file_mutex); } - up(&file->mutex); + mutex_unlock(&file->file_mutex); kfree(file); return 0; } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3372d67ff13..bb9bee56a82 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -132,7 +132,7 @@ struct ib_ucq_object { u32 async_events_reported; }; -extern struct mutex ib_uverbs_idr_mutex; +extern spinlock_t ib_uverbs_idr_lock; extern struct idr ib_uverbs_pd_idr; extern struct idr ib_uverbs_mr_idr; extern struct idr ib_uverbs_mw_idr; @@ -141,6 +141,8 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); + struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, int is_async, int *fd); void ib_uverbs_release_event_file(struct kref *ref); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 9f69bd48eb1..76bf61e9b55 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -50,6 +50,196 @@ (udata)->outlen = (olen); \ } while (0) +/* + * The ib_uobject locking scheme is as follows: + * + * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it + * needs to be held during all idr operations. When an object is + * looked up, a reference must be taken on the object's kref before + * dropping this lock. + * + * - Each object also has an rwsem. This rwsem must be held for + * reading while an operation that uses the object is performed. + * For example, while registering an MR, the associated PD's + * uobject.mutex must be held for reading. The rwsem must be held + * for writing while initializing or destroying an object. + * + * - In addition, each object has a "live" flag. If this flag is not + * set, then lookups of the object will fail even if it is found in + * the idr. This handles a reader that blocks and does not acquire + * the rwsem until after the object is destroyed. The destroy + * operation will set the live flag to 0 and then drop the rwsem; + * this will allow the reader to acquire the rwsem, see that the + * live flag is 0, and then drop the rwsem and its reference to + * object. The underlying storage will not be freed until the last + * reference to the object is dropped. + */ + +static void init_uobj(struct ib_uobject *uobj, u64 user_handle, + struct ib_ucontext *context) +{ + uobj->user_handle = user_handle; + uobj->context = context; + kref_init(&uobj->ref); + init_rwsem(&uobj->mutex); + uobj->live = 0; +} + +static void release_uobj(struct kref *kref) +{ + kfree(container_of(kref, struct ib_uobject, ref)); +} + +static void put_uobj(struct ib_uobject *uobj) +{ + kref_put(&uobj->ref, release_uobj); +} + +static void put_uobj_read(struct ib_uobject *uobj) +{ + up_read(&uobj->mutex); + put_uobj(uobj); +} + +static void put_uobj_write(struct ib_uobject *uobj) +{ + up_write(&uobj->mutex); + put_uobj(uobj); +} + +static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) +{ + int ret; + +retry: + if (!idr_pre_get(idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&ib_uverbs_idr_lock); + ret = idr_get_new(idr, uobj, &uobj->id); + spin_unlock(&ib_uverbs_idr_lock); + + if (ret == -EAGAIN) + goto retry; + + return ret; +} + +void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) +{ + spin_lock(&ib_uverbs_idr_lock); + idr_remove(idr, uobj->id); + spin_unlock(&ib_uverbs_idr_lock); +} + +static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + spin_lock(&ib_uverbs_idr_lock); + uobj = idr_find(idr, id); + if (uobj) + kref_get(&uobj->ref); + spin_unlock(&ib_uverbs_idr_lock); + + return uobj; +} + +static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = __idr_get_uobj(idr, id, context); + if (!uobj) + return NULL; + + down_read(&uobj->mutex); + if (!uobj->live) { + put_uobj_read(uobj); + return NULL; + } + + return uobj; +} + +static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = __idr_get_uobj(idr, id, context); + if (!uobj) + return NULL; + + down_write(&uobj->mutex); + if (!uobj->live) { + put_uobj_write(uobj); + return NULL; + } + + return uobj; +} + +static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = idr_read_uobj(idr, id, context); + return uobj ? uobj->object : NULL; +} + +static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context); +} + +static void put_pd_read(struct ib_pd *pd) +{ + put_uobj_read(pd->uobject); +} + +static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context); +} + +static void put_cq_read(struct ib_cq *cq) +{ + put_uobj_read(cq->uobject); +} + +static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context); +} + +static void put_ah_read(struct ib_ah *ah) +{ + put_uobj_read(ah->uobject); +} + +static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context); +} + +static void put_qp_read(struct ib_qp *qp) +{ + put_uobj_read(qp->uobject); +} + +static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context); +} + +static void put_srq_read(struct ib_srq *srq) +{ + put_uobj_read(srq->uobject); +} + ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -80,8 +270,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, in_len - sizeof cmd, out_len - sizeof resp); ucontext = ibdev->alloc_ucontext(ibdev, &udata); - if (IS_ERR(ucontext)) - return PTR_ERR(file->ucontext); + if (IS_ERR(ucontext)) { + ret = PTR_ERR(file->ucontext); + goto err; + } ucontext->device = ibdev; INIT_LIST_HEAD(&ucontext->pd_list); @@ -278,7 +470,8 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - uobj->context = file->ucontext; + init_uobj(uobj, 0, file->ucontext); + down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, file->ucontext, &udata); @@ -291,20 +484,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->uobject = uobj; atomic_set(&pd->usecnt, 0); - mutex_lock(&ib_uverbs_idr_mutex); - -retry: - if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto err_up; - } - - ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); - - if (ret == -EAGAIN) - goto retry; + uobj->object = pd; + ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); if (ret) - goto err_up; + goto err_idr; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -312,26 +495,27 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->pd_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + uobj->live = 1; + + up_write(&uobj->mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_pd_idr, uobj->id); +err_copy: + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); +err_idr: ib_dealloc_pd(pd); err: - kfree(uobj); + put_uobj_write(uobj); return ret; } @@ -340,37 +524,34 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; - struct ib_pd *pd; struct ib_uobject *uobj; - int ret = -EINVAL; + int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); + uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); + if (!uobj) + return -EINVAL; - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - if (!pd || pd->uobject->context != file->ucontext) - goto out; + ret = ib_dealloc_pd(uobj->object); + if (!ret) + uobj->live = 0; - uobj = pd->uobject; + put_uobj_write(uobj); - ret = ib_dealloc_pd(pd); if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); - kfree(uobj); - -out: - mutex_unlock(&ib_uverbs_idr_mutex); + put_uobj(uobj); - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, @@ -410,7 +591,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - obj->uobject.context = file->ucontext; + init_uobj(&obj->uobject, 0, file->ucontext); + down_write(&obj->uobject.mutex); /* * We ask for writable memory if any access flags other than @@ -427,23 +609,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, obj->umem.virt_base = cmd.hca_va; - mutex_lock(&ib_uverbs_idr_mutex); - - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - if (!pd || pd->uobject->context != file->ucontext) { - ret = -EINVAL; - goto err_up; - } - - if (!pd->device->reg_user_mr) { - ret = -ENOSYS; - goto err_up; - } + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) + goto err_release; mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); - goto err_up; + goto err_put; } mr->device = pd->device; @@ -452,53 +625,48 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); - memset(&resp, 0, sizeof resp); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; - -retry: - if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto err_unreg; - } - - ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id); - - if (ret == -EAGAIN) - goto retry; + obj->uobject.object = mr; + ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); if (ret) goto err_unreg; + memset(&resp, 0, sizeof resp); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; resp.mr_handle = obj->uobject.id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } + put_pd_read(pd); + mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + obj->uobject.live = 1; + + up_write(&obj->uobject.mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); +err_copy: + idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); err_unreg: ib_dereg_mr(mr); - atomic_dec(&pd->usecnt); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); +err_put: + put_pd_read(pd); +err_release: ib_umem_release(file->device->ib_dev, &obj->umem); err_free: - kfree(obj); + put_uobj_write(&obj->uobject); return ret; } @@ -508,37 +676,40 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, { struct ib_uverbs_dereg_mr cmd; struct ib_mr *mr; + struct ib_uobject *uobj; struct ib_umem_object *memobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle); - if (!mr || mr->uobject->context != file->ucontext) - goto out; + uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); + if (!uobj) + return -EINVAL; - memobj = container_of(mr->uobject, struct ib_umem_object, uobject); + memobj = container_of(uobj, struct ib_umem_object, uobject); + mr = uobj->object; ret = ib_dereg_mr(mr); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); mutex_lock(&file->mutex); - list_del(&memobj->uobject.list); + list_del(&uobj->list); mutex_unlock(&file->mutex); ib_umem_release(file->device->ib_dev, &memobj->umem); - kfree(memobj); -out: - mutex_unlock(&ib_uverbs_idr_mutex); + put_uobj(uobj); - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, @@ -577,7 +748,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq cmd; struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; - struct ib_ucq_object *uobj; + struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -595,10 +766,13 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) return -ENOMEM; + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext); + down_write(&obj->uobject.mutex); + if (cmd.comp_channel >= 0) { ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); if (!ev_file) { @@ -607,72 +781,64 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, } } - uobj->uobject.user_handle = cmd.user_handle; - uobj->uobject.context = file->ucontext; - uobj->uverbs_file = file; - uobj->comp_events_reported = 0; - uobj->async_events_reported = 0; - INIT_LIST_HEAD(&uobj->comp_list); - INIT_LIST_HEAD(&uobj->async_list); + obj->uverbs_file = file; + obj->comp_events_reported = 0; + obj->async_events_reported = 0; + INIT_LIST_HEAD(&obj->comp_list); + INIT_LIST_HEAD(&obj->async_list); cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, file->ucontext, &udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); - goto err; + goto err_file; } cq->device = file->device->ib_dev; - cq->uobject = &uobj->uobject; + cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); - mutex_lock(&ib_uverbs_idr_mutex); - -retry: - if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto err_up; - } - - ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); - - if (ret == -EAGAIN) - goto retry; + obj->uobject.object = cq; + ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); if (ret) - goto err_up; + goto err_free; memset(&resp, 0, sizeof resp); - resp.cq_handle = uobj->uobject.id; + resp.cq_handle = obj->uobject.id; resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } mutex_lock(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); + list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + obj->uobject.live = 1; + + up_write(&obj->uobject.mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); +err_copy: + idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); + -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); +err_free: ib_destroy_cq(cq); -err: +err_file: if (ev_file) - ib_uverbs_release_ucq(file, ev_file, uobj); - kfree(uobj); + ib_uverbs_release_ucq(file, ev_file, obj); + +err: + put_uobj_write(&obj->uobject); return ret; } @@ -693,11 +859,9 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - mutex_lock(&ib_uverbs_idr_mutex); - - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (!cq || cq->uobject->context != file->ucontext || !cq->device->resize_cq) - goto out; + cq = idr_read_cq(cmd.cq_handle, file->ucontext); + if (!cq) + return -EINVAL; ret = cq->device->resize_cq(cq, cmd.cqe, &udata); if (ret) @@ -711,7 +875,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, ret = -EFAULT; out: - mutex_unlock(&ib_uverbs_idr_mutex); + put_cq_read(cq); return ret ? ret : in_len; } @@ -722,6 +886,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp *resp; + struct ib_uobject *uobj; struct ib_cq *cq; struct ib_wc *wc; int ret = 0; @@ -742,15 +907,17 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, goto out_wc; } - mutex_lock(&ib_uverbs_idr_mutex); - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (!cq || cq->uobject->context != file->ucontext) { + uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); + if (!uobj) { ret = -EINVAL; goto out; } + cq = uobj->object; resp->count = ib_poll_cq(cq, cmd.ne, wc); + put_uobj_read(uobj); + for (i = 0; i < resp->count; i++) { resp->wc[i].wr_id = wc[i].wr_id; resp->wc[i].status = wc[i].status; @@ -772,7 +939,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, ret = -EFAULT; out: - mutex_unlock(&ib_uverbs_idr_mutex); kfree(resp); out_wc: @@ -785,22 +951,23 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_req_notify_cq cmd; + struct ib_uobject *uobj; struct ib_cq *cq; - int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (cq && cq->uobject->context == file->ucontext) { - ib_req_notify_cq(cq, cmd.solicited_only ? - IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - ret = in_len; - } - mutex_unlock(&ib_uverbs_idr_mutex); + uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); + if (!uobj) + return -EINVAL; + cq = uobj->object; - return ret; + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + + put_uobj_read(uobj); + + return in_len; } ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, @@ -809,52 +976,50 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; + struct ib_uobject *uobj; struct ib_cq *cq; - struct ib_ucq_object *uobj; + struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file; - u64 user_handle; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - memset(&resp, 0, sizeof resp); - - mutex_lock(&ib_uverbs_idr_mutex); + uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); + if (!uobj) + return -EINVAL; + cq = uobj->object; + ev_file = cq->cq_context; + obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); - if (!cq || cq->uobject->context != file->ucontext) - goto out; + ret = ib_destroy_cq(cq); + if (!ret) + uobj->live = 0; - user_handle = cq->uobject->user_handle; - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); - ev_file = cq->cq_context; + put_uobj_write(uobj); - ret = ib_destroy_cq(cq); if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); + idr_remove_uobj(&ib_uverbs_cq_idr, uobj); mutex_lock(&file->mutex); - list_del(&uobj->uobject.list); + list_del(&uobj->list); mutex_unlock(&file->mutex); - ib_uverbs_release_ucq(file, ev_file, uobj); + ib_uverbs_release_ucq(file, ev_file, obj); - resp.comp_events_reported = uobj->comp_events_reported; - resp.async_events_reported = uobj->async_events_reported; + memset(&resp, 0, sizeof resp); + resp.comp_events_reported = obj->comp_events_reported; + resp.async_events_reported = obj->async_events_reported; - kfree(uobj); + put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - ret = -EFAULT; - -out: - mutex_unlock(&ib_uverbs_idr_mutex); + return -EFAULT; - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, @@ -864,7 +1029,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_uverbs_create_qp cmd; struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; - struct ib_uqp_object *uobj; + struct ib_uqp_object *obj; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; @@ -882,23 +1047,21 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext); + down_write(&obj->uevent.uobject.mutex); - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle); - rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle); - srq = cmd.is_srq ? idr_find(&ib_uverbs_srq_idr, cmd.srq_handle) : NULL; + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext); + rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext); + srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; - if (!pd || pd->uobject->context != file->ucontext || - !scq || scq->uobject->context != file->ucontext || - !rcq || rcq->uobject->context != file->ucontext || - (cmd.is_srq && (!srq || srq->uobject->context != file->ucontext))) { + if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { ret = -EINVAL; - goto err_up; + goto err_put; } attr.event_handler = ib_uverbs_qp_event_handler; @@ -915,16 +1078,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.cap.max_recv_sge = cmd.max_recv_sge; attr.cap.max_inline_data = cmd.max_inline_data; - uobj->uevent.uobject.user_handle = cmd.user_handle; - uobj->uevent.uobject.context = file->ucontext; - uobj->uevent.events_reported = 0; - INIT_LIST_HEAD(&uobj->uevent.event_list); - INIT_LIST_HEAD(&uobj->mcast_list); + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); qp = pd->device->create_qp(pd, &attr, &udata); if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_up; + goto err_put; } qp->device = pd->device; @@ -932,7 +1093,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; - qp->uobject = &uobj->uevent.uobject; + qp->uobject = &obj->uevent.uobject; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; @@ -942,23 +1103,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (attr.srq) atomic_inc(&attr.srq->usecnt); - memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - -retry: - if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto err_destroy; - } - - ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uevent.uobject.id); - - if (ret == -EAGAIN) - goto retry; + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); if (ret) goto err_destroy; - resp.qp_handle = uobj->uevent.uobject.id; + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; resp.max_recv_sge = attr.cap.max_recv_sge; resp.max_send_sge = attr.cap.max_send_sge; resp.max_recv_wr = attr.cap.max_recv_wr; @@ -968,32 +1120,42 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } + put_pd_read(pd); + put_cq_read(scq); + put_cq_read(rcq); + if (srq) + put_srq_read(srq); + mutex_lock(&file->mutex); - list_add_tail(&uobj->uevent.uobject.list, &file->ucontext->qp_list); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + obj->uevent.uobject.live = 1; + + up_write(&obj->uevent.uobject.mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_qp_idr, uobj->uevent.uobject.id); +err_copy: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); - atomic_dec(&pd->usecnt); - atomic_dec(&attr.send_cq->usecnt); - atomic_dec(&attr.recv_cq->usecnt); - if (attr.srq) - atomic_dec(&attr.srq->usecnt); - -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); - kfree(uobj); +err_put: + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq) + put_cq_read(rcq); + if (srq) + put_srq_read(srq); + + put_uobj_write(&obj->uevent.uobject); return ret; } @@ -1018,15 +1180,15 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (qp && qp->uobject->context == file->ucontext) - ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - else + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { ret = -EINVAL; + goto out; + } - mutex_unlock(&ib_uverbs_idr_mutex); + ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); + + put_qp_read(qp); if (ret) goto out; @@ -1113,10 +1275,8 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, if (!attr) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) { + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { ret = -EINVAL; goto out; } @@ -1168,13 +1328,15 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; ret = ib_modify_qp(qp, attr, cmd.attr_mask); + + put_qp_read(qp); + if (ret) goto out; ret = in_len; out: - mutex_unlock(&ib_uverbs_idr_mutex); kfree(attr); return ret; @@ -1186,8 +1348,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, { struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; + struct ib_uobject *uobj; struct ib_qp *qp; - struct ib_uqp_object *uobj; + struct ib_uqp_object *obj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1195,43 +1358,43 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) - goto out; - - uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); + if (!uobj) + return -EINVAL; + qp = uobj->object; + obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); - if (!list_empty(&uobj->mcast_list)) { - ret = -EBUSY; - goto out; + if (!list_empty(&obj->mcast_list)) { + put_uobj_write(uobj); + return -EBUSY; } ret = ib_destroy_qp(qp); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); mutex_lock(&file->mutex); - list_del(&uobj->uevent.uobject.list); + list_del(&uobj->list); mutex_unlock(&file->mutex); - ib_uverbs_release_uevent(file, &uobj->uevent); + ib_uverbs_release_uevent(file, &obj->uevent); - resp.events_reported = uobj->uevent.events_reported; + resp.events_reported = obj->uevent.events_reported; - kfree(uobj); + put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - ret = -EFAULT; - -out: - mutex_unlock(&ib_uverbs_idr_mutex); + return -EFAULT; - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, @@ -1244,6 +1407,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; struct ib_qp *qp; int i, sg_ind; + int is_ud; ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1260,12 +1424,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) goto out; + is_ud = qp->qp_type == IB_QPT_UD; sg_ind = 0; last = NULL; for (i = 0; i < cmd.wr_count; ++i) { @@ -1273,12 +1436,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, buf + sizeof cmd + i * cmd.wqe_size, cmd.wqe_size)) { ret = -EFAULT; - goto out; + goto out_put; } if (user_wr->num_sge + sg_ind > cmd.sge_count) { ret = -EINVAL; - goto out; + goto out_put; } next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + @@ -1286,7 +1449,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, GFP_KERNEL); if (!next) { ret = -ENOMEM; - goto out; + goto out_put; } if (!last) @@ -1302,12 +1465,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->send_flags = user_wr->send_flags; next->imm_data = (__be32 __force) user_wr->imm_data; - if (qp->qp_type == IB_QPT_UD) { - next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, - user_wr->wr.ud.ah); + if (is_ud) { + next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, + file->ucontext); if (!next->wr.ud.ah) { ret = -EINVAL; - goto out; + goto out_put; } next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; @@ -1344,7 +1507,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, sg_ind * sizeof (struct ib_sge), next->num_sge * sizeof (struct ib_sge))) { ret = -EFAULT; - goto out; + goto out_put; } sg_ind += next->num_sge; } else @@ -1364,10 +1527,13 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, &resp, sizeof resp)) ret = -EFAULT; -out: - mutex_unlock(&ib_uverbs_idr_mutex); +out_put: + put_qp_read(qp); +out: while (wr) { + if (is_ud && wr->wr.ud.ah) + put_ah_read(wr->wr.ud.ah); next = wr->next; kfree(wr); wr = next; @@ -1482,14 +1648,15 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) goto out; resp.bad_wr = 0; ret = qp->device->post_recv(qp, wr, &bad_wr); + + put_qp_read(qp); + if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -1503,8 +1670,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, ret = -EFAULT; out: - mutex_unlock(&ib_uverbs_idr_mutex); - while (wr) { next = wr->next; kfree(wr); @@ -1533,14 +1698,15 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - mutex_lock(&ib_uverbs_idr_mutex); - - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (!srq || srq->uobject->context != file->ucontext) + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) goto out; resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + + put_srq_read(srq); + if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -1554,8 +1720,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, ret = -EFAULT; out: - mutex_unlock(&ib_uverbs_idr_mutex); - while (wr) { next = wr->next; kfree(wr); @@ -1587,17 +1751,15 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); + init_uobj(uobj, cmd.user_handle, file->ucontext); + down_write(&uobj->mutex); - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - if (!pd || pd->uobject->context != file->ucontext) { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { ret = -EINVAL; - goto err_up; + goto err; } - uobj->user_handle = cmd.user_handle; - uobj->context = file->ucontext; - attr.dlid = cmd.attr.dlid; attr.sl = cmd.attr.sl; attr.src_path_bits = cmd.attr.src_path_bits; @@ -1613,21 +1775,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, ah = ib_create_ah(pd, &attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); - goto err_up; - } - - ah->uobject = uobj; - -retry: - if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto err_destroy; + goto err; } - ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id); + ah->uobject = uobj; + uobj->object = ah; - if (ret == -EAGAIN) - goto retry; + ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); if (ret) goto err_destroy; @@ -1636,27 +1790,29 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } + put_pd_read(pd); + mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->ah_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + uobj->live = 1; + + up_write(&uobj->mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_ah_idr, uobj->id); +err_copy: + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); err_destroy: ib_destroy_ah(ah); -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); - - kfree(uobj); +err: + put_uobj_write(uobj); return ret; } @@ -1666,35 +1822,34 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, struct ib_uverbs_destroy_ah cmd; struct ib_ah *ah; struct ib_uobject *uobj; - int ret = -EINVAL; + int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); + uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); + if (!uobj) + return -EINVAL; + ah = uobj->object; - ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); - if (!ah || ah->uobject->context != file->ucontext) - goto out; + ret = ib_destroy_ah(ah); + if (!ret) + uobj->live = 0; - uobj = ah->uobject; + put_uobj_write(uobj); - ret = ib_destroy_ah(ah); if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); - kfree(uobj); + put_uobj(uobj); -out: - mutex_unlock(&ib_uverbs_idr_mutex); - - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, @@ -1703,47 +1858,43 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, { struct ib_uverbs_attach_mcast cmd; struct ib_qp *qp; - struct ib_uqp_object *uobj; + struct ib_uqp_object *obj; struct ib_uverbs_mcast_entry *mcast; - int ret = -EINVAL; + int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) - goto out; + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) + return -EINVAL; - uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); - list_for_each_entry(mcast, &uobj->mcast_list, list) + list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { ret = 0; - goto out; + goto out_put; } mcast = kmalloc(sizeof *mcast, GFP_KERNEL); if (!mcast) { ret = -ENOMEM; - goto out; + goto out_put; } mcast->lid = cmd.mlid; memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid); - if (!ret) { - uobj = container_of(qp->uobject, struct ib_uqp_object, - uevent.uobject); - list_add_tail(&mcast->list, &uobj->mcast_list); - } else + if (!ret) + list_add_tail(&mcast->list, &obj->mcast_list); + else kfree(mcast); -out: - mutex_unlock(&ib_uverbs_idr_mutex); +out_put: + put_qp_read(qp); return ret ? ret : in_len; } @@ -1753,7 +1904,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_detach_mcast cmd; - struct ib_uqp_object *uobj; + struct ib_uqp_object *obj; struct ib_qp *qp; struct ib_uverbs_mcast_entry *mcast; int ret = -EINVAL; @@ -1761,19 +1912,17 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); - if (!qp || qp->uobject->context != file->ucontext) - goto out; + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) + return -EINVAL; ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); if (ret) - goto out; + goto out_put; - uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); - list_for_each_entry(mcast, &uobj->mcast_list, list) + list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { list_del(&mcast->list); @@ -1781,8 +1930,8 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, break; } -out: - mutex_unlock(&ib_uverbs_idr_mutex); +out_put: + put_qp_read(qp); return ret ? ret : in_len; } @@ -1794,7 +1943,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_srq_resp resp; struct ib_udata udata; - struct ib_uevent_object *uobj; + struct ib_uevent_object *obj; struct ib_pd *pd; struct ib_srq *srq; struct ib_srq_init_attr attr; @@ -1810,17 +1959,17 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) return -ENOMEM; - mutex_lock(&ib_uverbs_idr_mutex); + init_uobj(&obj->uobject, 0, file->ucontext); + down_write(&obj->uobject.mutex); - pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); - - if (!pd || pd->uobject->context != file->ucontext) { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { ret = -EINVAL; - goto err_up; + goto err; } attr.event_handler = ib_uverbs_srq_event_handler; @@ -1829,69 +1978,59 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, attr.attr.max_sge = cmd.max_sge; attr.attr.srq_limit = cmd.srq_limit; - uobj->uobject.user_handle = cmd.user_handle; - uobj->uobject.context = file->ucontext; - uobj->events_reported = 0; - INIT_LIST_HEAD(&uobj->event_list); + obj->events_reported = 0; + INIT_LIST_HEAD(&obj->event_list); srq = pd->device->create_srq(pd, &attr, &udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); - goto err_up; + goto err; } srq->device = pd->device; srq->pd = pd; - srq->uobject = &uobj->uobject; + srq->uobject = &obj->uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); - memset(&resp, 0, sizeof resp); - -retry: - if (!idr_pre_get(&ib_uverbs_srq_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto err_destroy; - } - - ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->uobject.id); - - if (ret == -EAGAIN) - goto retry; + obj->uobject.object = srq; + ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); if (ret) goto err_destroy; - resp.srq_handle = uobj->uobject.id; + memset(&resp, 0, sizeof resp); + resp.srq_handle = obj->uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_idr; + goto err_copy; } + put_pd_read(pd); + mutex_lock(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); + list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); - mutex_unlock(&ib_uverbs_idr_mutex); + obj->uobject.live = 1; + + up_write(&obj->uobject.mutex); return in_len; -err_idr: - idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); +err_copy: + idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); err_destroy: ib_destroy_srq(srq); - atomic_dec(&pd->usecnt); - -err_up: - mutex_unlock(&ib_uverbs_idr_mutex); - kfree(uobj); +err: + put_uobj_write(&obj->uobject); return ret; } @@ -1907,21 +2046,16 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (!srq || srq->uobject->context != file->ucontext) { - ret = -EINVAL; - goto out; - } + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) + return -EINVAL; attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; ret = ib_modify_srq(srq, &attr, cmd.attr_mask); -out: - mutex_unlock(&ib_uverbs_idr_mutex); + put_srq_read(srq); return ret ? ret : in_len; } @@ -1942,18 +2076,16 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) + return -EINVAL; - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (srq && srq->uobject->context == file->ucontext) - ret = ib_query_srq(srq, &attr); - else - ret = -EINVAL; + ret = ib_query_srq(srq, &attr); - mutex_unlock(&ib_uverbs_idr_mutex); + put_srq_read(srq); if (ret) - goto out; + return ret; memset(&resp, 0, sizeof resp); @@ -1963,10 +2095,9 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - ret = -EFAULT; + return -EFAULT; -out: - return ret ? ret : in_len; + return in_len; } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, @@ -1975,45 +2106,45 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, { struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; + struct ib_uobject *uobj; struct ib_srq *srq; - struct ib_uevent_object *uobj; + struct ib_uevent_object *obj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&ib_uverbs_idr_mutex); - - memset(&resp, 0, sizeof resp); + uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); + if (!uobj) + return -EINVAL; + srq = uobj->object; + obj = container_of(uobj, struct ib_uevent_object, uobject); - srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); - if (!srq || srq->uobject->context != file->ucontext) - goto out; + ret = ib_destroy_srq(srq); + if (!ret) + uobj->live = 0; - uobj = container_of(srq->uobject, struct ib_uevent_object, uobject); + put_uobj_write(uobj); - ret = ib_destroy_srq(srq); if (ret) - goto out; + return ret; - idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); mutex_lock(&file->mutex); - list_del(&uobj->uobject.list); + list_del(&uobj->list); mutex_unlock(&file->mutex); - ib_uverbs_release_uevent(file, uobj); + ib_uverbs_release_uevent(file, obj); - resp.events_reported = uobj->events_reported; + memset(&resp, 0, sizeof resp); + resp.events_reported = obj->events_reported; - kfree(uobj); + put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; -out: - mutex_unlock(&ib_uverbs_idr_mutex); - return ret ? ret : in_len; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index ff092a0a94d..5ec2d49e9bb 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -66,7 +66,7 @@ enum { static struct class *uverbs_class; -DEFINE_MUTEX(ib_uverbs_idr_mutex); +DEFINE_SPINLOCK(ib_uverbs_idr_lock); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); DEFINE_IDR(ib_uverbs_mw_idr); @@ -183,21 +183,21 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, if (!context) return 0; - mutex_lock(&ib_uverbs_idr_mutex); - list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { - struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id); - idr_remove(&ib_uverbs_ah_idr, uobj->id); + struct ib_ah *ah = uobj->object; + + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); list_del(&uobj->list); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { - struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); + struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); - idr_remove(&ib_uverbs_qp_idr, uobj->id); + + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); list_del(&uobj->list); @@ -206,11 +206,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { - struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); + struct ib_cq *cq = uobj->object; struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_ucq_object *ucq = container_of(uobj, struct ib_ucq_object, uobject); - idr_remove(&ib_uverbs_cq_idr, uobj->id); + + idr_remove_uobj(&ib_uverbs_cq_idr, uobj); ib_destroy_cq(cq); list_del(&uobj->list); ib_uverbs_release_ucq(file, ev_file, ucq); @@ -218,10 +219,11 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { - struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); + struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = container_of(uobj, struct ib_uevent_object, uobject); - idr_remove(&ib_uverbs_srq_idr, uobj->id); + + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); ib_destroy_srq(srq); list_del(&uobj->list); ib_uverbs_release_uevent(file, uevent); @@ -231,11 +233,11 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, /* XXX Free MWs */ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { - struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id); + struct ib_mr *mr = uobj->object; struct ib_device *mrdev = mr->device; struct ib_umem_object *memobj; - idr_remove(&ib_uverbs_mr_idr, uobj->id); + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); ib_dereg_mr(mr); memobj = container_of(uobj, struct ib_umem_object, uobject); @@ -246,15 +248,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, } list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { - struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id); - idr_remove(&ib_uverbs_pd_idr, uobj->id); + struct ib_pd *pd = uobj->object; + + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); list_del(&uobj->list); kfree(uobj); } - mutex_unlock(&ib_uverbs_idr_mutex); - return context->device->dealloc_ucontext(context); } diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c new file mode 100644 index 00000000000..ce46b13ae02 --- /dev/null +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_marshall.h> + +static void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, + struct ib_ah_attr *src) +{ + memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid); + dst->grh.flow_label = src->grh.flow_label; + dst->grh.sgid_index = src->grh.sgid_index; + dst->grh.hop_limit = src->grh.hop_limit; + dst->grh.traffic_class = src->grh.traffic_class; + dst->dlid = src->dlid; + dst->sl = src->sl; + dst->src_path_bits = src->src_path_bits; + dst->static_rate = src->static_rate; + dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0; + dst->port_num = src->port_num; +} + +void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, + struct ib_qp_attr *src) +{ + dst->cur_qp_state = src->cur_qp_state; + dst->path_mtu = src->path_mtu; + dst->path_mig_state = src->path_mig_state; + dst->qkey = src->qkey; + dst->rq_psn = src->rq_psn; + dst->sq_psn = src->sq_psn; + dst->dest_qp_num = src->dest_qp_num; + dst->qp_access_flags = src->qp_access_flags; + + dst->max_send_wr = src->cap.max_send_wr; + dst->max_recv_wr = src->cap.max_recv_wr; + dst->max_send_sge = src->cap.max_send_sge; + dst->max_recv_sge = src->cap.max_recv_sge; + dst->max_inline_data = src->cap.max_inline_data; + + ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); + ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr); + + dst->pkey_index = src->pkey_index; + dst->alt_pkey_index = src->alt_pkey_index; + dst->en_sqd_async_notify = src->en_sqd_async_notify; + dst->sq_draining = src->sq_draining; + dst->max_rd_atomic = src->max_rd_atomic; + dst->max_dest_rd_atomic = src->max_dest_rd_atomic; + dst->min_rnr_timer = src->min_rnr_timer; + dst->port_num = src->port_num; + dst->timeout = src->timeout; + dst->retry_cnt = src->retry_cnt; + dst->rnr_retry = src->rnr_retry; + dst->alt_port_num = src->alt_port_num; + dst->alt_timeout = src->alt_timeout; +} +EXPORT_SYMBOL(ib_copy_qp_attr_to_user); + +void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, + struct ib_sa_path_rec *src) +{ + memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid); + memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid); + + dst->dlid = src->dlid; + dst->slid = src->slid; + dst->raw_traffic = src->raw_traffic; + dst->flow_label = src->flow_label; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; + dst->reversible = src->reversible; + dst->numb_path = src->numb_path; + dst->pkey = src->pkey; + dst->sl = src->sl; + dst->mtu_selector = src->mtu_selector; + dst->mtu = src->mtu; + dst->rate_selector = src->rate_selector; + dst->rate = src->rate; + dst->packet_life_time = src->packet_life_time; + dst->preference = src->preference; + dst->packet_life_time_selector = src->packet_life_time_selector; +} +EXPORT_SYMBOL(ib_copy_path_rec_to_user); + +void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, + struct ib_user_path_rec *src) +{ + memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); + memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); + + dst->dlid = src->dlid; + dst->slid = src->slid; + dst->raw_traffic = src->raw_traffic; + dst->flow_label = src->flow_label; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; + dst->reversible = src->reversible; + dst->numb_path = src->numb_path; + dst->pkey = src->pkey; + dst->sl = src->sl; + dst->mtu_selector = src->mtu_selector; + dst->mtu = src->mtu; + dst->rate_selector = src->rate_selector; + dst->rate = src->rate; + dst->packet_life_time = src->packet_life_time; + dst->preference = src->preference; + dst->packet_life_time_selector = src->packet_life_time_selector; +} +EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b78e7dc6933..468999c3880 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -125,35 +125,47 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, - struct ib_grh *grh, u8 port_num) +int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, + struct ib_grh *grh, struct ib_ah_attr *ah_attr) { - struct ib_ah_attr ah_attr; u32 flow_class; u16 gid_index; int ret; - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = wc->slid; - ah_attr.sl = wc->sl; - ah_attr.src_path_bits = wc->dlid_path_bits; - ah_attr.port_num = port_num; + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->dlid = wc->slid; + ah_attr->sl = wc->sl; + ah_attr->src_path_bits = wc->dlid_path_bits; + ah_attr->port_num = port_num; if (wc->wc_flags & IB_WC_GRH) { - ah_attr.ah_flags = IB_AH_GRH; - ah_attr.grh.dgid = grh->sgid; + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.dgid = grh->sgid; - ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num, + ret = ib_find_cached_gid(device, &grh->dgid, &port_num, &gid_index); if (ret) - return ERR_PTR(ret); + return ret; - ah_attr.grh.sgid_index = (u8) gid_index; + ah_attr->grh.sgid_index = (u8) gid_index; flow_class = be32_to_cpu(grh->version_tclass_flow); - ah_attr.grh.flow_label = flow_class & 0xFFFFF; - ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF; - ah_attr.grh.hop_limit = grh->hop_limit; + ah_attr->grh.flow_label = flow_class & 0xFFFFF; + ah_attr->grh.hop_limit = grh->hop_limit; + ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; } + return 0; +} +EXPORT_SYMBOL(ib_init_ah_from_wc); + +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, + struct ib_grh *grh, u8 port_num) +{ + struct ib_ah_attr ah_attr; + int ret; + + ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr); + if (ret) + return ERR_PTR(ret); return ib_create_ah(pd, &ah_attr); } |