diff options
Diffstat (limited to 'drivers/infiniband')
28 files changed, 1379 insertions, 563 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 151ef83cc14..3fe6f4754fa 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -366,9 +366,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id)) - return cm_id_priv; - if (service_id < cur_cm_id_priv->id.service_id) + (service_mask & cur_cm_id_priv->id.service_id) && + (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + return cur_cm_id_priv; + + if (cm_id_priv->id.device < cur_cm_id_priv->id.device) + link = &(*link)->rb_left; + else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) + link = &(*link)->rb_right; + else if (service_id < cur_cm_id_priv->id.service_id) link = &(*link)->rb_left; else link = &(*link)->rb_right; @@ -378,7 +384,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) return NULL; } -static struct cm_id_private * cm_find_listen(__be64 service_id) +static struct cm_id_private * cm_find_listen(struct ib_device *device, + __be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -386,9 +393,15 @@ static struct cm_id_private * cm_find_listen(__be64 service_id) while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); if ((cm_id_priv->id.service_mask & service_id) == - (cm_id_priv->id.service_mask & cm_id_priv->id.service_id)) + cm_id_priv->id.service_id && + (cm_id_priv->id.device == device)) return cm_id_priv; - if (service_id < cm_id_priv->id.service_id) + + if (device < cm_id_priv->id.device) + node = node->rb_left; + else if (device > cm_id_priv->id.device) + node = node->rb_right; + else if (service_id < cm_id_priv->id.service_id) node = node->rb_left; else node = node->rb_right; @@ -523,7 +536,8 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); } -struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, void *context) { struct cm_id_private *cm_id_priv; @@ -535,6 +549,7 @@ struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, memset(cm_id_priv, 0, sizeof *cm_id_priv); cm_id_priv->id.state = IB_CM_IDLE; + cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; @@ -1047,7 +1062,6 @@ static void cm_format_req_event(struct cm_work *work, req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; - param->device = cm_id_priv->av.port->mad_agent->device; param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) @@ -1226,7 +1240,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, } /* Find matching listen request. */ - listen_cm_id_priv = cm_find_listen(req_msg->service_id); + listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, + req_msg->service_id); if (!listen_cm_id_priv) { spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, @@ -1254,7 +1269,7 @@ static int cm_req_handler(struct cm_work *work) req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id = ib_create_cm_id(NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); @@ -2629,7 +2644,6 @@ static void cm_format_sidr_req_event(struct cm_work *work, param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; - param->device = work->port->mad_agent->device; param->port = work->port->port_num; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -2642,7 +2656,7 @@ static int cm_sidr_req_handler(struct cm_work *work) struct ib_wc *wc; unsigned long flags; - cm_id = ib_create_cm_id(NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -2666,7 +2680,8 @@ static int cm_sidr_req_handler(struct cm_work *work) spin_unlock_irqrestore(&cm.lock, flags); goto out; /* Duplicate message. */ } - cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id); + cur_cm_id_priv = cm_find_listen(cm_id->device, + sidr_req_msg->service_id); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 813ab70bf6d..4d3aee90c24 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -186,6 +186,7 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40 = cpu_to_be32((be32_to_cpu( req_msg->offset40) & 0xFFFFFFF9) | 0x2); + break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d3cf84e0158..5a6e4497640 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -514,6 +514,12 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { + if (device->node_type == IB_NODE_SWITCH) { + if (port_num) + return -EINVAL; + } else if (port_num < 1 || port_num > device->phys_port_cnt) + return -EINVAL; + return device->query_port(device, port_num, port_attr); } EXPORT_SYMBOL(ib_query_port); @@ -583,6 +589,12 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { + if (device->node_type == IB_NODE_SWITCH) { + if (port_num) + return -EINVAL; + } else if (port_num < 1 || port_num > device->phys_port_cnt) + return -EINVAL; + return device->modify_port(device, port_num, port_modify_mask, port_modify); } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a14ca87fda1..af302e83056 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2683,40 +2683,47 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int num_ports, cur_port, i; + int start, end, i; if (device->node_type == IB_NODE_SWITCH) { - num_ports = 1; - cur_port = 0; + start = 0; + end = 0; } else { - num_ports = device->phys_port_cnt; - cur_port = 1; + start = 1; + end = device->phys_port_cnt; } - for (i = 0; i < num_ports; i++, cur_port++) { - if (ib_mad_port_open(device, cur_port)) { + + for (i = start; i <= end; i++) { + if (ib_mad_port_open(device, i)) { printk(KERN_ERR PFX "Couldn't open %s port %d\n", - device->name, cur_port); - goto error_device_open; + device->name, i); + goto error; } - if (ib_agent_port_open(device, cur_port)) { + if (ib_agent_port_open(device, i)) { printk(KERN_ERR PFX "Couldn't open %s port %d " "for agents\n", - device->name, cur_port); - goto error_device_open; + device->name, i); + goto error_agent; } } return; -error_device_open: - while (i > 0) { - cur_port--; - if (ib_agent_port_close(device, cur_port)) +error_agent: + if (ib_mad_port_close(device, i)) + printk(KERN_ERR PFX "Couldn't close %s port %d\n", + device->name, i); + +error: + i--; + + while (i >= start) { + if (ib_agent_port_close(device, i)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", - device->name, cur_port); - if (ib_mad_port_close(device, cur_port)) + device->name, i); + if (ib_mad_port_close(device, i)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, cur_port); + device->name, i); i--; } } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e215cf0478d..0e5ef97f763 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -583,10 +583,16 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; @@ -685,10 +691,16 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, { struct ib_sa_service_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) @@ -768,10 +780,16 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 211ba3223f6..7ce7a6c782f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -65,6 +65,11 @@ struct port_table_attribute { int index; }; +static inline int ibdev_is_alive(const struct ib_device *dev) +{ + return dev->reg_state == IB_DEV_REGISTERED; +} + static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -74,6 +79,8 @@ static ssize_t port_attr_show(struct kobject *kobj, if (!port_attr->show) return -EIO; + if (!ibdev_is_alive(p->ibdev)) + return -ENODEV; return port_attr->show(p, port_attr, buf); } @@ -581,6 +588,9 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf) { struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + if (!ibdev_is_alive(dev)) + return -ENODEV; + switch (dev->node_type) { case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); @@ -595,6 +605,9 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; + if (!ibdev_is_alive(dev)) + return -ENODEV; + ret = ib_query_device(dev, &attr); if (ret) return ret; @@ -612,6 +625,9 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; + if (!ibdev_is_alive(dev)) + return -ENODEV; + ret = ib_query_device(dev, &attr); if (ret) return ret; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 5a6ba4030d4..28477565ecb 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -41,37 +41,81 @@ #include <linux/file.h> #include <linux/mount.h> #include <linux/cdev.h> +#include <linux/idr.h> #include <asm/uaccess.h> -#include "ucm.h" +#include <rdma/ib_cm.h> +#include <rdma/ib_user_cm.h> MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); -static int ucm_debug_level; +struct ib_ucm_device { + int devnum; + struct cdev dev; + struct class_device class_dev; + struct ib_device *ib_dev; +}; + +struct ib_ucm_file { + struct semaphore mutex; + struct file *filp; + struct ib_ucm_device *device; + + struct list_head ctxs; + struct list_head events; + wait_queue_head_t poll_wait; +}; + +struct ib_ucm_context { + int id; + wait_queue_head_t wait; + atomic_t ref; + int events_reported; + + struct ib_ucm_file *file; + struct ib_cm_id *cm_id; + __u64 uid; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ib_ucm_event { + struct ib_ucm_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ -module_param_named(debug_level, ucm_debug_level, int, 0644); -MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); + struct ib_cm_id *cm_id; + struct ib_ucm_event_resp resp; + void *data; + void *info; + int data_len; + int info_len; +}; enum { IB_UCM_MAJOR = 231, - IB_UCM_MINOR = 255 + IB_UCM_BASE_MINOR = 224, + IB_UCM_MAX_DEVICES = 32 }; -#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) +#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) -#define PFX "UCM: " +static void ib_ucm_add_one(struct ib_device *device); +static void ib_ucm_remove_one(struct ib_device *device); -#define ucm_dbg(format, arg...) \ - do { \ - if (ucm_debug_level > 0) \ - printk(KERN_DEBUG PFX format, ## arg); \ - } while (0) +static struct ib_client ucm_client = { + .name = "ucm", + .add = ib_ucm_add_one, + .remove = ib_ucm_remove_one +}; -static struct semaphore ctx_id_mutex; -static struct idr ctx_id_table; +static DECLARE_MUTEX(ctx_id_mutex); +static DEFINE_IDR(ctx_id_table); +static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { @@ -152,17 +196,13 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) goto error; list_add_tail(&ctx->file_list, &file->ctxs); - ucm_dbg("Allocated CM ID <%d>\n", ctx->id); return ctx; error: kfree(ctx); return NULL; } -/* - * Event portion of the API, handle CM events - * and allow event polling. - */ + static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, struct ib_sa_path_rec *kpath) { @@ -209,6 +249,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; + ureq->port = kreq->port; ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); @@ -295,6 +336,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; + uvt->resp.u.sidr_req_resp.port = + evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: @@ -387,9 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - /* - * wait - */ + down(&file->mutex); while (list_empty(&file->events)) { @@ -471,7 +512,6 @@ done: return result; } - static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) @@ -494,29 +534,27 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); + ctx->cm_id = ib_create_cm_id(file->device->ib_dev, + ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); - goto err; + goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; - goto err; + goto err2; } - return 0; -err: +err2: + ib_destroy_cm_id(ctx->cm_id); +err1: down(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); up(&ctx_id_mutex); - - if (!IS_ERR(ctx->cm_id)) - ib_destroy_cm_id(ctx->cm_id); - kfree(ctx); return result; } @@ -1184,9 +1222,6 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n", - hdr.cmd, hdr.in, hdr.out, len); - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; @@ -1231,8 +1266,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; - - ucm_dbg("Created struct\n"); + file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev); return 0; } @@ -1263,7 +1297,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } -static struct file_operations ib_ucm_fops = { +static void ib_ucm_release_class_dev(struct class_device *class_dev) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + cdev_del(&dev->dev); + clear_bit(dev->devnum, dev_map); + kfree(dev); +} + +static struct file_operations ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, @@ -1271,55 +1315,141 @@ static struct file_operations ib_ucm_fops = { .poll = ib_ucm_poll, }; +static struct class ucm_class = { + .name = "infiniband_cm", + .release = ib_ucm_release_class_dev +}; -static struct class *ib_ucm_class; -static struct cdev ib_ucm_cdev; +static ssize_t show_dev(struct class_device *class_dev, char *buf) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + return print_dev_t(buf, dev->dev.dev); +} +static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); -static int __init ib_ucm_init(void) +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { - int result; + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + return sprintf(buf, "%s\n", dev->ib_dev->name); +} +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); - result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); - if (result) { - ucm_dbg("Error <%d> registering dev\n", result); - goto err_chr; - } +static void ib_ucm_add_one(struct ib_device *device) +{ + struct ib_ucm_device *ucm_dev; + + if (!device->alloc_ucontext) + return; + + ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL); + if (!ucm_dev) + return; - cdev_init(&ib_ucm_cdev, &ib_ucm_fops); + memset(ucm_dev, 0, sizeof *ucm_dev); + ucm_dev->ib_dev = device; + + ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); + if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) + goto err; + + set_bit(ucm_dev->devnum, dev_map); + + cdev_init(&ucm_dev->dev, &ucm_fops); + ucm_dev->dev.owner = THIS_MODULE; + kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum); + if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) + goto err; - result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); - if (result) { - ucm_dbg("Error <%d> adding cdev\n", result); + ucm_dev->class_dev.class = &ucm_class; + ucm_dev->class_dev.dev = device->dma_device; + snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", + ucm_dev->devnum); + if (class_device_register(&ucm_dev->class_dev)) goto err_cdev; - } - ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); - if (IS_ERR(ib_ucm_class)) { - result = PTR_ERR(ib_ucm_class); - ucm_dbg("Error <%d> creating class\n", result); + if (class_device_create_file(&ucm_dev->class_dev, + &class_device_attr_dev)) + goto err_class; + if (class_device_create_file(&ucm_dev->class_dev, + &class_device_attr_ibdev)) goto err_class; + + ib_set_client_data(device, &ucm_client, ucm_dev); + return; + +err_class: + class_device_unregister(&ucm_dev->class_dev); +err_cdev: + cdev_del(&ucm_dev->dev); + clear_bit(ucm_dev->devnum, dev_map); +err: + kfree(ucm_dev); + return; +} + +static void ib_ucm_remove_one(struct ib_device *device) +{ + struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); + + if (!ucm_dev) + return; + + class_device_unregister(&ucm_dev->class_dev); +} + +static ssize_t show_abi_version(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); +} +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static int __init ib_ucm_init(void) +{ + int ret; + + ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, + "infiniband_cm"); + if (ret) { + printk(KERN_ERR "ucm: couldn't register device number\n"); + goto err; } - class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm"); + ret = class_register(&ucm_class); + if (ret) { + printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n"); + goto err_chrdev; + } - idr_init(&ctx_id_table); - init_MUTEX(&ctx_id_mutex); + ret = class_create_file(&ucm_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); + goto err_class; + } + ret = ib_register_client(&ucm_client); + if (ret) { + printk(KERN_ERR "ucm: couldn't register client\n"); + goto err_class; + } return 0; + err_class: - cdev_del(&ib_ucm_cdev); -err_cdev: - unregister_chrdev_region(IB_UCM_DEV, 1); -err_chr: - return result; + class_unregister(&ucm_class); +err_chrdev: + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); +err: + return ret; } static void __exit ib_ucm_cleanup(void) { - class_device_destroy(ib_ucm_class, IB_UCM_DEV); - class_destroy(ib_ucm_class); - cdev_del(&ib_ucm_cdev); - unregister_chrdev_region(IB_UCM_DEV, 1); + ib_unregister_client(&ucm_client); + class_unregister(&ucm_class); + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h deleted file mode 100644 index f46f37bc120..00000000000 --- a/drivers/infiniband/core/ucm.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $ - */ - -#ifndef UCM_H -#define UCM_H - -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/cdev.h> -#include <linux/idr.h> - -#include <rdma/ib_cm.h> -#include <rdma/ib_user_cm.h> - -struct ib_ucm_file { - struct semaphore mutex; - struct file *filp; - - struct list_head ctxs; /* list of active connections */ - struct list_head events; /* list of pending events */ - wait_queue_head_t poll_wait; -}; - -struct ib_ucm_context { - int id; - wait_queue_head_t wait; - atomic_t ref; - int events_reported; - - struct ib_ucm_file *file; - struct ib_cm_id *cm_id; - __u64 uid; - - struct list_head events; /* list of pending events. */ - struct list_head file_list; /* member in file ctx list */ -}; - -struct ib_ucm_event { - struct ib_ucm_context *ctx; - struct list_head file_list; /* member in file event list */ - struct list_head ctx_list; /* member in ctx event list */ - - struct ib_cm_id *cm_id; - struct ib_ucm_event_resp resp; - void *data; - void *info; - int data_len; - int info_len; -}; - -#endif /* UCM_H */ diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index a64d6b4dcc1..fd200c064a2 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -280,14 +280,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, length = count - sizeof (struct ib_user_mad); packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr), GFP_KERNEL); + sizeof (struct ib_rmpp_hdr), GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, sizeof (struct ib_user_mad) + - sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr))) { + sizeof (struct ib_mad_hdr) + + sizeof (struct ib_rmpp_hdr))) { ret = -EFAULT; goto err; } @@ -349,7 +349,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, } rmpp_active = 1; } else { - if (length > sizeof(struct ib_mad)) { + if (length > sizeof (struct ib_mad)) { ret = -EINVAL; goto err_ah; } @@ -376,17 +376,17 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (!rmpp_active) { /* Copy message from user into send buffer */ if (copy_from_user(packet->msg->mad, - buf + sizeof(struct ib_user_mad), length)) { + buf + sizeof (struct ib_user_mad), length)) { ret = -EFAULT; goto err_msg; } } else { - rmpp_hdr_size = sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr); + rmpp_hdr_size = sizeof (struct ib_mad_hdr) + + sizeof (struct ib_rmpp_hdr); /* Only copy MAD headers (RMPP header in place) */ memcpy(packet->msg->mad, packet->mad.data, - sizeof(struct ib_mad_hdr)); + sizeof (struct ib_mad_hdr)); /* Now, copy rest of message from user into send buffer */ if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, @@ -671,17 +671,6 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_dev(struct class_device *class_dev, char *buf) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) - return print_dev_t(buf, port->dev.dev); - else - return print_dev_t(buf, port->sm_dev.dev); -} -static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); @@ -762,6 +751,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->class_dev.class = &umad_class; port->class_dev.dev = device->dma_device; + port->class_dev.devt = port->dev.dev; snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum); @@ -771,8 +761,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, class_set_devdata(&port->class_dev, port); kref_get(&port->umad_dev->ref); - if (class_device_create_file(&port->class_dev, &class_device_attr_dev)) - goto err_class; if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev)) goto err_class; if (class_device_create_file(&port->class_dev, &class_device_attr_port)) @@ -786,6 +774,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->sm_class_dev.class = &umad_class; port->sm_class_dev.dev = device->dma_device; + port->sm_class_dev.devt = port->sm_dev.dev; snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); @@ -795,8 +784,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, class_set_devdata(&port->sm_class_dev, port); kref_get(&port->umad_dev->ref); - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev)) - goto err_sm_class; if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev)) goto err_sm_class; if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port)) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cc124344dd2..63c8085c0c9 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -3,6 +3,7 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -53,14 +54,14 @@ struct ib_uverbs_device { struct cdev dev; struct class_device class_dev; struct ib_device *ib_dev; - int num_comp; + int num_comp_vectors; }; struct ib_uverbs_event_file { struct kref ref; + struct file *file; struct ib_uverbs_file *uverbs_file; spinlock_t lock; - int fd; int is_async; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; @@ -73,8 +74,7 @@ struct ib_uverbs_file { struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; - struct ib_uverbs_event_file async_file; - struct ib_uverbs_event_file comp_file[1]; + struct ib_uverbs_event_file *async_file; }; struct ib_uverbs_event { @@ -110,10 +110,17 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd); +void ib_uverbs_release_event_file(struct kref *ref); +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); + void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event); int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, void *addr, size_t size, int write); @@ -125,21 +132,26 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); const char __user *buf, int in_len, \ int out_len) -IB_UVERBS_DECLARE_CMD(query_params); IB_UVERBS_DECLARE_CMD(get_context); IB_UVERBS_DECLARE_CMD(query_device); IB_UVERBS_DECLARE_CMD(query_port); -IB_UVERBS_DECLARE_CMD(query_gid); -IB_UVERBS_DECLARE_CMD(query_pkey); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(poll_cq); +IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); +IB_UVERBS_DECLARE_CMD(post_send); +IB_UVERBS_DECLARE_CMD(post_recv); +IB_UVERBS_DECLARE_CMD(post_srq_recv); +IB_UVERBS_DECLARE_CMD(create_ah); +IB_UVERBS_DECLARE_CMD(destroy_ah); IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 562445165d2..14583bb6e2c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,6 +34,8 @@ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ */ +#include <linux/file.h> + #include <asm/uaccess.h> #include "uverbs.h" @@ -45,29 +48,6 @@ (udata)->outlen = (olen); \ } while (0) -ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_params cmd; - struct ib_uverbs_query_params_resp resp; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - resp.num_cq_events = file->device->num_comp; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -77,7 +57,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_device *ibdev = file->device->ib_dev; struct ib_ucontext *ucontext; - int i; + struct file *filp; int ret; if (out_len < sizeof resp) @@ -110,26 +90,42 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); - resp.async_fd = file->async_file.fd; - for (i = 0; i < file->device->num_comp; ++i) - if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + - i * sizeof (__u32), - &file->comp_file[i].fd, sizeof (__u32))) { - ret = -EFAULT; - goto err_free; - } + resp.num_comp_vectors = file->device->num_comp_vectors; + + filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_free; + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_free; + goto err_file; } - file->ucontext = ucontext; + file->async_file = filp->private_data; + + INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&file->event_handler); + if (ret) + goto err_file; + + kref_get(&file->async_file->ref); + kref_get(&file->ref); + file->ucontext = ucontext; + + fd_install(resp.async_fd, filp); + up(&file->mutex); return in_len; +err_file: + put_unused_fd(resp.async_fd); + fput(filp); + err_free: ibdev->dealloc_ucontext(ucontext); @@ -255,62 +251,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_gid cmd; - struct ib_uverbs_query_gid_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, - (union ib_gid *) resp.gid); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_pkey cmd; - struct ib_uverbs_query_pkey_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, - &resp.pkey); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -349,24 +289,20 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->uobject = uobj; atomic_set(&pd->usecnt, 0); + down(&ib_uverbs_idr_mutex); + retry: if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_pd; + goto err_up; } - down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); - up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_pd; - - down(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - up(&file->mutex); + goto err_up; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -374,21 +310,22 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } - return in_len; - -err_list: - down(&file->mutex); - list_del(&uobj->list); + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->pd_list); up(&file->mutex); - down(&ib_uverbs_idr_mutex); - idr_remove(&ib_uverbs_pd_idr, uobj->id); up(&ib_uverbs_idr_mutex); -err_pd: + return in_len; + +err_idr: + idr_remove(&ib_uverbs_pd_idr, uobj->id); + +err_up: + up(&ib_uverbs_idr_mutex); ib_dealloc_pd(pd); err: @@ -459,6 +396,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && + !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) + return -EINVAL; + obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; @@ -524,24 +469,22 @@ retry: resp.mr_handle = obj->uobject.id; - down(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&obj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); err_unreg: ib_dereg_mr(mr); @@ -595,6 +538,35 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_comp_channel cmd; + struct ib_uverbs_create_comp_channel_resp resp; + struct file *filp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + put_unused_fd(resp.fd); + fput(filp); + return -EFAULT; + } + + fd_install(resp.fd, filp); + return in_len; +} + ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -603,6 +575,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -616,9 +589,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - if (cmd.event_handler >= file->device->num_comp) + if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; + if (cmd.comp_channel >= 0) + ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -641,27 +617,23 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cq->uobject = &uobj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = file; + cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); + down(&ib_uverbs_idr_mutex); + retry: if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_cq; + goto err_up; } - down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); - up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_cq; - - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); - up(&file->mutex); + goto err_up; memset(&resp, 0, sizeof resp); resp.cq_handle = uobj->uobject.id; @@ -670,21 +642,22 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } - return in_len; - -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); up(&file->mutex); - down(&ib_uverbs_idr_mutex); - idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); up(&ib_uverbs_idr_mutex); -err_cq: + return in_len; + +err_idr: + idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); + +err_up: + up(&ib_uverbs_idr_mutex); ib_destroy_cq(cq); err: @@ -692,6 +665,93 @@ err: return ret; } +ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_poll_cq cmd; + struct ib_uverbs_poll_cq_resp *resp; + struct ib_cq *cq; + struct ib_wc *wc; + int ret = 0; + int i; + int rsize; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); + if (!wc) + return -ENOMEM; + + rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); + resp = kmalloc(rsize, GFP_KERNEL); + if (!resp) { + ret = -ENOMEM; + goto out_wc; + } + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + resp->count = ib_poll_cq(cq, cmd.ne, wc); + + for (i = 0; i < resp->count; i++) { + resp->wc[i].wr_id = wc[i].wr_id; + resp->wc[i].status = wc[i].status; + resp->wc[i].opcode = wc[i].opcode; + resp->wc[i].vendor_err = wc[i].vendor_err; + resp->wc[i].byte_len = wc[i].byte_len; + resp->wc[i].imm_data = wc[i].imm_data; + resp->wc[i].qp_num = wc[i].qp_num; + resp->wc[i].src_qp = wc[i].src_qp; + resp->wc[i].wc_flags = wc[i].wc_flags; + resp->wc[i].pkey_index = wc[i].pkey_index; + resp->wc[i].slid = wc[i].slid; + resp->wc[i].sl = wc[i].sl; + resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; + resp->wc[i].port_num = wc[i].port_num; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + kfree(resp); + +out_wc: + kfree(wc); + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_req_notify_cq cmd; + struct ib_cq *cq; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (cq && cq->uobject->context == file->ucontext) { + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + ret = in_len; + } + up(&ib_uverbs_idr_mutex); + + return ret; +} + ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -700,6 +760,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_cq_resp resp; struct ib_cq *cq; struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file; struct ib_uverbs_event *evt, *tmp; u64 user_handle; int ret = -EINVAL; @@ -716,7 +777,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, goto out; user_handle = cq->uobject->user_handle; - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + ev_file = cq->cq_context; ret = ib_destroy_cq(cq); if (ret) @@ -728,19 +790,23 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->comp_file[0].lock); - list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { - list_del(&evt->list); - kfree(evt); + if (ev_file) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&ev_file->lock); + + kref_put(&ev_file->ref, ib_uverbs_release_event_file); } - spin_unlock_irq(&file->comp_file[0].lock); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.comp_events_reported = uobj->comp_events_reported; resp.async_events_reported = uobj->async_events_reported; @@ -859,24 +925,22 @@ retry: resp.qp_handle = uobj->uobject.id; - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_qp_idr, uobj->uobject.id); err_destroy: ib_destroy_qp(qp); @@ -1005,12 +1069,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.events_reported = uobj->events_reported; @@ -1026,6 +1090,468 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_send cmd; + struct ib_uverbs_post_send_resp resp; + struct ib_uverbs_send_wr *user_wr; + struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_qp *qp; + int i, sg_ind; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + + cmd.sge_count * sizeof (struct ib_uverbs_sge)) + return -EINVAL; + + if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) + return -EINVAL; + + user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); + if (!user_wr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + sg_ind = 0; + last = NULL; + for (i = 0; i < cmd.wr_count; ++i) { + if (copy_from_user(user_wr, + buf + sizeof cmd + i * cmd.wqe_size, + cmd.wqe_size)) { + ret = -EFAULT; + goto out; + } + + if (user_wr->num_sge + sg_ind > cmd.sge_count) { + ret = -EINVAL; + goto out; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto out; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + next->opcode = user_wr->opcode; + next->send_flags = user_wr->send_flags; + next->imm_data = user_wr->imm_data; + + if (qp->qp_type == IB_QPT_UD) { + next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, + user_wr->wr.ud.ah); + if (!next->wr.ud.ah) { + ret = -EINVAL; + goto out; + } + next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; + next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; + } else { + switch (next->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + case IB_WR_RDMA_READ: + next->wr.rdma.remote_addr = + user_wr->wr.rdma.remote_addr; + next->wr.rdma.rkey = + user_wr->wr.rdma.rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + next->wr.atomic.remote_addr = + user_wr->wr.atomic.remote_addr; + next->wr.atomic.compare_add = + user_wr->wr.atomic.compare_add; + next->wr.atomic.swap = user_wr->wr.atomic.swap; + next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + break; + default: + break; + } + } + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + sizeof cmd + + cmd.wr_count * cmd.wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto out; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + resp.bad_wr = 0; + ret = qp->device->post_send(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + kfree(user_wr); + + return ret ? ret : in_len; +} + +static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, + int in_len, + u32 wr_count, + u32 sge_count, + u32 wqe_size) +{ + struct ib_uverbs_recv_wr *user_wr; + struct ib_recv_wr *wr = NULL, *last, *next; + int sg_ind; + int i; + int ret; + + if (in_len < wqe_size * wr_count + + sge_count * sizeof (struct ib_uverbs_sge)) + return ERR_PTR(-EINVAL); + + if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) + return ERR_PTR(-EINVAL); + + user_wr = kmalloc(wqe_size, GFP_KERNEL); + if (!user_wr) + return ERR_PTR(-ENOMEM); + + sg_ind = 0; + last = NULL; + for (i = 0; i < wr_count; ++i) { + if (copy_from_user(user_wr, buf + i * wqe_size, + wqe_size)) { + ret = -EFAULT; + goto err; + } + + if (user_wr->num_sge + sg_ind > sge_count) { + ret = -EINVAL; + goto err; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto err; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + wr_count * wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto err; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + kfree(user_wr); + return wr; + +err: + kfree(user_wr); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ERR_PTR(ret); +} + +ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_recv cmd; + struct ib_uverbs_post_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_qp *qp; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = qp->device->post_recv(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_srq_recv cmd; + struct ib_uverbs_post_srq_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_srq *srq; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_ah cmd; + struct ib_uverbs_create_ah_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_ah *ah; + struct ib_ah_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + attr.dlid = cmd.attr.dlid; + attr.sl = cmd.attr.sl; + attr.src_path_bits = cmd.attr.src_path_bits; + attr.static_rate = cmd.attr.static_rate; + attr.port_num = cmd.attr.port_num; + attr.grh.flow_label = cmd.attr.grh.flow_label; + attr.grh.sgid_index = cmd.attr.grh.sgid_index; + attr.grh.hop_limit = cmd.attr.grh.hop_limit; + attr.grh.traffic_class = cmd.attr.grh.traffic_class; + memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); + + ah = ib_create_ah(pd, &attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto err_up; + } + + ah->uobject = uobj; + +retry: + if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.ah_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->ah_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_ah_idr, uobj->id); + +err_destroy: + ib_destroy_ah(ah); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_destroy_ah cmd; + struct ib_ah *ah; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); + if (!ah || ah->uobject->context != file->ucontext) + goto out; + + uobj = ah->uobject; + + ret = ib_destroy_ah(ah); + if (ret) + goto out; + + idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); + + down(&file->mutex); + list_del(&uobj->list); + up(&file->mutex); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1148,24 +1674,22 @@ retry: resp.srq_handle = uobj->uobject.id; - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); err_destroy: ib_destroy_srq(srq); @@ -1243,12 +1767,12 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.events_reported = uobj->events_reported; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index add45f7faa5..251c752a7ae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -3,6 +3,7 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -77,26 +78,31 @@ static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { - [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, - [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, + [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, + [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, + [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, + [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, + [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; static struct vfsmount *uverbs_event_mnt; @@ -113,7 +119,13 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) down(&ib_uverbs_idr_mutex); - /* XXX Free AHs */ + list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { + struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id); + idr_remove(&ib_uverbs_ah_idr, uobj->id); + ib_destroy_ah(ah); + list_del(&uobj->list); + kfree(uobj); + } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); @@ -188,25 +200,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, spin_lock_irq(&file->lock); - while (list_empty(&file->event_list) && file->fd >= 0) { + while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->event_list) || - file->fd < 0)) + !list_empty(&file->event_list))) return -ERESTARTSYS; spin_lock_irq(&file->lock); } - if (file->fd < 0) { - spin_unlock_irq(&file->lock); - return -ENODEV; - } - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) @@ -248,26 +254,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); - if (file->fd < 0) - pollflags = POLLERR; - else if (!list_empty(&file->event_list)) + if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } -static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) +void ib_uverbs_release_event_file(struct kref *ref) { - struct ib_uverbs_event *entry, *tmp; + struct ib_uverbs_event_file *file = + container_of(ref, struct ib_uverbs_event_file, ref); - spin_lock_irq(&file->lock); - if (file->fd != -1) { - file->fd = -1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) - kfree(entry); - } - spin_unlock_irq(&file->lock); + kfree(file); } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) @@ -280,21 +279,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&file->lock); + file->file = NULL; + list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); + } + spin_unlock_irq(&file->lock); - ib_uverbs_event_release(file); ib_uverbs_event_fasync(-1, filp, 0); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + + if (file->is_async) { + ib_unregister_event_handler(&file->uverbs_file->event_handler); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + } + kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static struct file_operations uverbs_event_fops = { - /* - * No .owner field since we artificially create event files, - * so there is no increment to the module reference count in - * the open path. All event files come from a uverbs command - * file, which already takes a module reference, so this is OK. - */ + .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, @@ -303,27 +311,37 @@ static struct file_operations uverbs_event_fops = { void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_file *file = cq_context; - struct ib_ucq_object *uobj; - struct ib_uverbs_event *entry; - unsigned long flags; + struct ib_uverbs_event_file *file = cq_context; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; + + if (!file) + return; + + spin_lock_irqsave(&file->lock, flags); + if (!file->file) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) + if (!entry) { + spin_unlock_irqrestore(&file->lock, flags); return; + } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; - spin_lock_irqsave(&file->comp_file[0].lock, flags); - list_add_tail(&entry->list, &file->comp_file[0].event_list); + list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->comp_file[0].lock, flags); + spin_unlock_irqrestore(&file->lock, flags); - wake_up_interruptible(&file->comp_file[0].poll_wait); - kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->poll_wait); + kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, @@ -334,32 +352,40 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; + spin_lock_irqsave(&file->async_file->lock, flags); + if (!file->async_file->file) { + spin_unlock_irqrestore(&file->async_file->lock, flags); + return; + } + entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) + if (!entry) { + spin_unlock_irqrestore(&file->async_file->lock, flags); return; + } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->counter = counter; - spin_lock_irqsave(&file->async_file.lock, flags); - list_add_tail(&entry->list, &file->async_file.event_list); + list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file.lock, flags); + spin_unlock_irqrestore(&file->async_file->lock, flags); - wake_up_interruptible(&file->async_file.poll_wait); - kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file->poll_wait); + kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { + struct ib_uverbs_event_file *ev_file = context_ptr; struct ib_ucq_object *uobj; uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); @@ -389,8 +415,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } -static void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); @@ -399,38 +425,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } -static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, - struct ib_uverbs_file *uverbs_file) +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd) { + struct ib_uverbs_event_file *ev_file; struct file *filp; + int ret; - spin_lock_init(&file->lock); - INIT_LIST_HEAD(&file->event_list); - init_waitqueue_head(&file->poll_wait); - file->uverbs_file = uverbs_file; - file->async_queue = NULL; - - file->fd = get_unused_fd(); - if (file->fd < 0) - return file->fd; + ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); + if (!ev_file) + return ERR_PTR(-ENOMEM); + + kref_init(&ev_file->ref); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->uverbs_file = uverbs_file; + ev_file->async_queue = NULL; + ev_file->is_async = is_async; + + *fd = get_unused_fd(); + if (*fd < 0) { + ret = *fd; + goto err; + } filp = get_empty_filp(); if (!filp) { - put_unused_fd(file->fd); - return -ENFILE; + ret = -ENFILE; + goto err_fd; } - filp->f_op = &uverbs_event_fops; + ev_file->file = filp; + + /* + * fops_get() can't fail here, because we're coming from a + * system call on a uverbs file, which will already have a + * module reference. + */ + filp->f_op = fops_get(&uverbs_event_fops); filp->f_vfsmnt = mntget(uverbs_event_mnt); filp->f_dentry = dget(uverbs_event_mnt->mnt_root); filp->f_mapping = filp->f_dentry->d_inode->i_mapping; filp->f_flags = O_RDONLY; filp->f_mode = FMODE_READ; - filp->private_data = file; + filp->private_data = ev_file; - fd_install(file->fd, filp); + return filp; - return 0; +err_fd: + put_unused_fd(*fd); + +err: + kfree(ev_file); + return ERR_PTR(ret); +} + +/* + * Look up a completion event file by FD. If lookup is successful, + * takes a ref to the event file struct that it returns; if + * unsuccessful, returns NULL. + */ +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) +{ + struct ib_uverbs_event_file *ev_file = NULL; + struct file *filp; + + filp = fget(fd); + if (!filp) + return NULL; + + if (filp->f_op != &uverbs_event_fops) + goto out; + + ev_file = filp->private_data; + if (ev_file->is_async) { + ev_file = NULL; + goto out; + } + + kref_get(&ev_file->ref); + +out: + fput(filp); + return ev_file; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, @@ -450,11 +528,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[hdr.command]) + !uverbs_cmd_table[hdr.command] || + !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -EINVAL; - if (!file->ucontext && - hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && + if (!file->ucontext && hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; @@ -477,82 +555,33 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) struct ib_uverbs_device *dev = container_of(inode->i_cdev, struct ib_uverbs_device, dev); struct ib_uverbs_file *file; - int i = 0; - int ret; if (!try_module_get(dev->ib_dev->owner)) return -ENODEV; - file = kmalloc(sizeof *file + - (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), - GFP_KERNEL); + file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) { - ret = -ENOMEM; - goto err; + module_put(dev->ib_dev->owner); + return -ENOMEM; } - file->device = dev; + file->device = dev; + file->ucontext = NULL; kref_init(&file->ref); init_MUTEX(&file->mutex); - file->ucontext = NULL; - - kref_get(&file->ref); - ret = ib_uverbs_event_init(&file->async_file, file); - if (ret) - goto err_kref; - - file->async_file.is_async = 1; - - for (i = 0; i < dev->num_comp; ++i) { - kref_get(&file->ref); - ret = ib_uverbs_event_init(&file->comp_file[i], file); - if (ret) - goto err_async; - file->comp_file[i].is_async = 0; - } - - filp->private_data = file; - INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, - ib_uverbs_event_handler); - if (ib_register_event_handler(&file->event_handler)) - goto err_async; - return 0; - -err_async: - while (i--) - ib_uverbs_event_release(&file->comp_file[i]); - - ib_uverbs_event_release(&file->async_file); - -err_kref: - /* - * One extra kref_put() because we took a reference before the - * event file creation that failed and got us here. - */ - kref_put(&file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_file); - -err: - module_put(dev->ib_dev->owner); - return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - int i; - ib_unregister_event_handler(&file->event_handler); - ib_uverbs_event_release(&file->async_file); ib_dealloc_ucontext(file->ucontext); - for (i = 0; i < file->device->num_comp; ++i) - ib_uverbs_event_release(&file->comp_file[i]); - + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); return 0; @@ -588,6 +617,15 @@ static ssize_t show_ibdev(struct class_device *class_dev, char *buf) } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); +} +static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); + static void ib_uverbs_release_class_dev(struct class_device *class_dev) { struct ib_uverbs_device *dev = @@ -631,8 +669,8 @@ static void ib_uverbs_add_one(struct ib_device *device) set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); - uverbs_dev->ib_dev = device; - uverbs_dev->num_comp = 1; + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp_vectors = 1; if (device->mmap) cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); @@ -652,6 +690,8 @@ static void ib_uverbs_add_one(struct ib_device *device) if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) goto err_class; + if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_abi_version)) + goto err_class; ib_set_client_data(device, &uverbs_client, uverbs_dev); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5081d903e56..72d3ef786db 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -523,16 +523,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr); int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->attach_mcast ? - qp->device->attach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->attach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + return -EINVAL; + + return qp->device->attach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->detach_mcast ? - qp->device->detach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->detach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + return -EINVAL; + + return qp->device->detach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_detach_mcast); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index f6a8ac02655..1bd7dc8f778 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -933,9 +933,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, goto out; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); - dev_lim->max_srq_sz = 1 << field; + dev_lim->max_srq_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); - dev_lim->max_qp_sz = 1 << field; + dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); dev_lim->reserved_qps = 1 << (field & 0xf); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); @@ -1045,6 +1045,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", dev_lim->max_pds, dev_lim->reserved_mgms); + mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz); mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7bff5a8425f..f106bac0f92 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -83,6 +83,8 @@ enum { /* Arbel FW gives us these, but we need them for Tavor */ MTHCA_MPT_ENTRY_SIZE = 0x40, MTHCA_MTT_SEG_SIZE = 0x40, + + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) }; enum { @@ -128,12 +130,16 @@ struct mthca_limits { int num_uars; int max_sg; int num_qps; + int max_wqes; + int max_qp_init_rdma; int reserved_qps; int num_srqs; + int max_srq_wqes; int reserved_srqs; int num_eecs; int reserved_eecs; int num_cqs; + int max_cqes; int reserved_cqs; int num_eqs; int reserved_eqs; @@ -148,6 +154,7 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u32 flags; u8 port_width_cap; }; @@ -447,6 +454,8 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8dfafda5ed2..e5a047a6dbe 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -83,7 +83,8 @@ enum { MTHCA_EVENT_TYPE_PATH_MIG = 0x01, MTHCA_EVENT_TYPE_COMM_EST = 0x02, MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, - MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14, MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, @@ -110,8 +111,9 @@ enum { (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) -#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ - (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE) +#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT)) #define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) #define MTHCA_EQ_DB_INC_CI (1 << 24) @@ -142,6 +144,9 @@ struct mthca_eqe { __be32 qpn; } __attribute__((packed)) qp; struct { + __be32 srqn; + } __attribute__((packed)) srq; + struct { __be32 cqn; u32 reserved1; u8 reserved2[3]; @@ -305,6 +310,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) IB_EVENT_SQ_DRAINED); break; + case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_LAST_WQE_REACHED); + break; + + case MTHCA_EVENT_TYPE_SRQ_LIMIT: + mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED); + break; + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_QP_FATAL); diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 23a3f56c789..883d1e5a79b 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -162,9 +162,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; mdev->limits.max_sg = dev_lim->max_sg; + mdev->limits.max_wqes = dev_lim->max_qp_sz; + mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; + mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE so the HCA HW can tell the difference between an + * empty CQ and a full CQ. + */ + mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; mdev->limits.reserved_cqs = dev_lim->reserved_cqs; mdev->limits.reserved_eqs = dev_lim->reserved_eqs; mdev->limits.reserved_mtts = dev_lim->reserved_mtts; @@ -172,6 +181,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; mdev->limits.port_width_cap = dev_lim->max_port_width; + mdev->limits.flags = dev_lim->flags; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. @@ -1186,6 +1196,7 @@ MODULE_DEVICE_TABLE(pci, mthca_pci_table); static struct pci_driver mthca_driver = { .name = DRV_NAME, + .owner = THIS_MODULE, .id_table = mthca_pci_table, .probe = mthca_init_one, .remove = __devexit_p(mthca_remove_one) diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index a2707605f4c..b47ea7daf08 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -37,10 +37,6 @@ #include "mthca_dev.h" #include "mthca_cmd.h" -enum { - MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) -}; - struct mthca_mgm { __be32 next_gid_index; u32 reserved[3]; @@ -189,7 +185,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } for (i = 0; i < MTHCA_QP_PER_MGM; ++i) - if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { + if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) { + mthca_dbg(dev, "QP %06x already a member of MGM\n", + ibqp->qp_num); + err = 0; + goto out; + } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); break; } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 7bd7a4bec7b..d63b1a14710 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -487,7 +487,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, } } -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db) { int group; int start, end, dir; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index bafa51544aa..29c01a4b265 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -173,7 +173,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 3f5319a4657..9e911a1ea41 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -37,6 +37,7 @@ */ #include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> #include <linux/mm.h> #include "mthca_dev.h" @@ -90,15 +91,26 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; - props->max_qp_wr = 0xffff; + props->max_qp_wr = mdev->limits.max_wqes; props->max_sge = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; - props->max_cqe = 0xffff; + props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; - props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift; + props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; + props->max_srq_wr = mdev->limits.max_srq_wqes; + props->max_srq_sge = mdev->limits.max_sg; props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; + props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->max_pkeys = mdev->limits.pkey_table_len; + props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; + props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; err = 0; out: @@ -150,9 +162,13 @@ static int mthca_query_port(struct ib_device *ibdev, props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; out: kfree(in_mad); @@ -634,6 +650,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, int nent; int err; + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) + return ERR_PTR(-EINVAL); + if (context) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); @@ -1058,6 +1077,26 @@ int mthca_register_device(struct mthca_dev *dev) strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; @@ -1077,6 +1116,7 @@ int mthca_register_device(struct mthca_dev *dev) if (dev->mthca_flags & MTHCA_FLAG_SRQ) { dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.modify_srq = mthca_modify_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; if (mthca_is_memfree(dev)) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 5fa00669f9b..2ee0a2b0fd4 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1112,8 +1112,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, struct mthca_qp *qp) { /* Sanity check QP size before proceeding */ - if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || - cap->max_send_sge > 64 || cap->max_recv_sge > 64) + if (cap->max_send_wr > dev->limits.max_wqes || + cap->max_recv_wr > dev->limits.max_wqes || + cap->max_send_sge > dev->limits.max_sg || + cap->max_recv_sge > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 18998d48c53..64f70aa1b3c 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -186,7 +186,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, int err; /* Sanity check SRQ size before proceeding */ - if (attr->max_wr > 16 << 20 || attr->max_sge > 64) + if (attr->max_wr > dev->limits.max_srq_wqes || + attr->max_sge > dev->limits.max_sg) return -EINVAL; srq->max = attr->max_wr; @@ -332,6 +333,29 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) mthca_free_mailbox(dev, mailbox); } +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + int ret; + u8 status; + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); + if (ret) + return ret; + if (status) + return -EINVAL; + } + + return 0; +} + void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type) { @@ -354,7 +378,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, event.device = &dev->ib_dev; event.event = event_type; - event.element.srq = &srq->ibsrq; + event.element.srq = &srq->ibsrq; srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); out: @@ -415,6 +439,14 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + prev_wqe = srq->last; srq->last = wqe; @@ -506,6 +538,13 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); + if (next_ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + ((struct mthca_next_seg *) wqe)->nda_op = cpu_to_be32((next_ind << srq->wqe_shift) | 1); ((struct mthca_next_seg *) wqe)->ee_nds = 0; diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index 41613ec8a04..bb015c6494c 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -38,6 +38,12 @@ #include <linux/types.h> /* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MTHCA_UVERBS_ABI_VERSION 1 + +/* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4ea1c1ca85b..6b14bd1c60a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -277,7 +277,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); -int ipoib_qp_create(struct net_device *dev); +int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); void ipoib_transport_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f7440096b5e..02d0e000657 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -387,9 +387,9 @@ int ipoib_ib_dev_open(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - ret = ipoib_qp_create(dev); + ret = ipoib_init_qp(dev); if (ret) { - ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret); + ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); return -1; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6c5bf07489f..ee303859b04 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -637,8 +637,11 @@ static void ipoib_timeout(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_warn(priv, "transmit timeout: latency %ld\n", - jiffies - dev->trans_start); + ipoib_warn(priv, "transmit timeout: latency %d msecs\n", + jiffies_to_msecs(jiffies - dev->trans_start)); + ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", + netif_queue_stopped(dev), + priv->tx_head, priv->tx_tail); /* XXX reset QP, etc. */ } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 79f59d0563e..b5902a7ec24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -92,7 +92,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) return ret; } -int ipoib_qp_create(struct net_device *dev) +int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; @@ -149,10 +149,11 @@ int ipoib_qp_create(struct net_device *dev) return 0; out_fail: - ib_destroy_qp(priv->qp); - priv->qp = NULL; + qp_attr.qp_state = IB_QPS_RESET; + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) + ipoib_warn(priv, "Failed to modify QP to RESET state\n"); - return -EINVAL; + return ret; } int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) |