diff options
Diffstat (limited to 'drivers/infiniband')
22 files changed, 498 insertions, 350 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 28058ae33d3..eaec8d7a3b7 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -152,13 +152,11 @@ static void set_timeout(unsigned long time) { unsigned long delay; - cancel_delayed_work(&work); - delay = time - jiffies; if ((long)delay <= 0) delay = 1; - queue_delayed_work(addr_wq, &work, delay); + mod_delayed_work(addr_wq, &work, delay); } static void queue_req(struct addr_req *req) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index b0d0bc8a6fb..dc3fd1e8af0 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2004,7 +2004,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) unsigned long delay; if (list_empty(&mad_agent_priv->wait_list)) { - __cancel_delayed_work(&mad_agent_priv->timed_work); + cancel_delayed_work(&mad_agent_priv->timed_work); } else { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, @@ -2013,13 +2013,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) if (time_after(mad_agent_priv->timeout, mad_send_wr->timeout)) { mad_agent_priv->timeout = mad_send_wr->timeout; - __cancel_delayed_work(&mad_agent_priv->timed_work); delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; - queue_delayed_work(mad_agent_priv->qp_info-> - port_priv->wq, - &mad_agent_priv->timed_work, delay); + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } } } @@ -2052,11 +2050,9 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ - if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) { - __cancel_delayed_work(&mad_agent_priv->timed_work); - queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->timed_work, delay); - } + if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 3ae2bfd3101..fe10a949aef 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -177,7 +177,7 @@ int __init ibnl_init(void) .input = ibnl_rcv, }; - nls = netlink_kernel_create(&init_net, NETLINK_RDMA, THIS_MODULE, &cfg); + nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg); if (!nls) { pr_warn("Failed to create netlink socket\n"); return -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 51f42061dae..6cfd4d8fd0b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1361,11 +1361,11 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct tid_info *t = dev->rdev.lldi.tids; ep = lookup_tid(t, tid); - PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (!ep) { printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); return 0; } + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 45aedf1d933..5213bab2d19 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1155,7 +1155,7 @@ static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc) */ if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) { - writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db); + writel(QID(qid) | PIDX(inc), qhp->wq.db); break; } set_current_state(TASK_UNINTERRUPTIBLE); diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 53589000fd0..8615d7cf7e0 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -42,6 +42,7 @@ */ #include <linux/slab.h> +#include <linux/smpboot.h> #include "ehca_classes.h" #include "ehca_irq.h" @@ -652,7 +653,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool *pool) +static int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -662,17 +663,20 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) ehca_dmp(cpu_online_mask, cpumask_size(), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); - pool->last_cpu = cpu; + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); spin_unlock_irqrestore(&pool->last_cpu_lock, flags); return cpu; } static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct) + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) { unsigned long flags; @@ -683,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq, __cq->nr_callbacks++; list_add_tail(&__cq->entry, &cct->cq_list); cct->cq_jobs++; - wake_up(&cct->wait_queue); + wake_up_process(thread); } else __cq->nr_callbacks++; @@ -695,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq) { int cpu_id; struct ehca_cpu_comp_task *cct; + struct task_struct *thread; int cq_jobs; unsigned long flags; @@ -702,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq) BUG_ON(!cpu_online(cpu_id)); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); spin_lock_irqsave(&cct->task_lock, flags); cq_jobs = cct->cq_jobs; @@ -710,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq) if (cq_jobs > 0) { cpu_id = find_next_online_cpu(pool); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); } - - __queue_comp_task(__cq, cct); + __queue_comp_task(__cq, cct, thread); } static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irqrestore(&cct->task_lock, flags); + spin_unlock_irq(&cct->task_lock); comp_event_callback(cq); if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_lock_irqsave(&cct->task_lock, flags); + spin_lock_irq(&cct->task_lock); spin_lock(&cq->task_lock); cq->nr_callbacks--; if (!cq->nr_callbacks) { @@ -740,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct) } spin_unlock(&cq->task_lock); } - - spin_unlock_irqrestore(&cct->task_lock, flags); } -static int comp_task(void *__cct) +static void comp_task_park(unsigned int cpu) { - struct ehca_cpu_comp_task *cct = __cct; - int cql_empty; - DECLARE_WAITQUEUE(wait, current); - - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - add_wait_queue(&cct->wait_queue, &wait); - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (cql_empty) - schedule(); - else - __set_current_state(TASK_RUNNING); - - remove_wait_queue(&cct->wait_queue, &wait); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (!cql_empty) - run_comp_task(__cct); + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); - set_current_state(TASK_INTERRUPTIBLE); + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); } - __set_current_state(TASK_RUNNING); - - return 0; -} - -static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, - int cpu) -{ - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - init_waitqueue_head(&cct->wait_queue); - cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu), - "ehca_comp/%d", cpu); - - return cct->task; + spin_unlock_irq(&target->task_lock); } -static void destroy_comp_task(struct ehca_comp_pool *pool, - int cpu) +static void comp_task_stop(unsigned int cpu, bool online) { - struct ehca_cpu_comp_task *cct; - struct task_struct *task; - unsigned long flags_cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irqsave(&cct->task_lock, flags_cct); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - task = cct->task; - cct->task = NULL; + spin_lock_irq(&cct->task_lock); cct->cq_jobs = 0; - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - - if (task) - kthread_stop(task); + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); } -static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) +static int comp_task_should_run(unsigned int cpu) { struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - LIST_HEAD(list); - struct ehca_cq *cq; - unsigned long flags_cct; - - spin_lock_irqsave(&cct->task_lock, flags_cct); - - list_splice_init(&cct->cq_list, &list); - - while (!list_empty(&list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - - list_del(&cq->entry); - __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks)); - } - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); + return cct->cq_jobs; } -static int __cpuinit comp_pool_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static void comp_task(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - struct ehca_cpu_comp_task *cct; + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if (!create_comp_task(pool, cpu)) { - ehca_gen_err("Can't create comp_task for cpu: %x", cpu); - return notifier_from_errno(-ENOMEM); - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpumask_any(cpu_online_mask)); - destroy_comp_task(pool, cpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpu); - wake_up_process(cct->task); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); - destroy_comp_task(pool, cpu); - take_over_work(pool, cpu); - break; + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); } - - return NOTIFY_OK; + spin_unlock_irq(&cct->task_lock); } -static struct notifier_block comp_pool_callback_nb __cpuinitdata = { - .notifier_call = comp_pool_callback, - .priority = 0, +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, }; int ehca_create_comp_pool(void) { - int cpu; - struct task_struct *task; + int cpu, ret = -ENOMEM; if (!ehca_scaling_code) return 0; @@ -905,38 +825,46 @@ int ehca_create_comp_pool(void) pool->last_cpu = cpumask_any(cpu_online_mask); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (pool->cpu_comp_tasks == NULL) { - kfree(pool); - return -EINVAL; - } + if (!pool->cpu_comp_tasks) + goto out_pool; - for_each_online_cpu(cpu) { - task = create_comp_task(pool, cpu); - if (task) { - kthread_bind(task, cpu); - wake_up_process(task); - } + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); } - register_hotcpu_notifier(&comp_pool_callback_nb); + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; - printk(KERN_INFO "eHCA scaling code enabled\n"); + pr_info("eHCA scaling code enabled\n"); + return ret; - return 0; +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; } void ehca_destroy_comp_pool(void) { - int i; - if (!ehca_scaling_code) return; - unregister_hotcpu_notifier(&comp_pool_callback_nb); - - for_each_online_cpu(i) - destroy_comp_task(pool, i); + smpboot_unregister_percpu_thread(&comp_pool_threads); + free_percpu(pool->cpu_comp_threads); free_percpu(pool->cpu_comp_tasks); kfree(pool); } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 3346cb06cea..5370199f08c 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data); void ehca_process_eq(struct ehca_shca *shca, int is_irq); struct ehca_cpu_comp_task { - wait_queue_head_t wait_queue; struct list_head cq_list; - struct task_struct *task; spinlock_t task_lock; int cq_jobs; + int active; }; struct ehca_comp_pool { - struct ehca_cpu_comp_task *cpu_comp_tasks; + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; int last_cpu; spinlock_t last_cpu_lock; }; diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 4fa3534ec23..74c6a942604 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -241,16 +241,16 @@ good: if (hca_pcie_cap) { devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4]; - if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL, - devctl)) { + if (pcie_capability_write_word(mdev->pdev, PCI_EXP_DEVCTL, + devctl)) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Device Control register, aborting.\n"); goto out; } linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; - if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL, - linkctl)) { + if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL, + linkctl)) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Link control register, aborting.\n"); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index d42c9f435b1..9e0895b45eb 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2679,11 +2679,9 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) } } if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) { - if (nesdev->link_recheck) - cancel_delayed_work(&nesdev->work); nesdev->link_recheck = 1; - schedule_delayed_work(&nesdev->work, - NES_LINK_RECHECK_DELAY); + mod_delayed_work(system_wq, &nesdev->work, + NES_LINK_RECHECK_DELAY); } } diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index f3a3ecf8d09..e43f6e41a6b 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -243,10 +243,9 @@ static int nes_netdev_open(struct net_device *netdev) spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) { - if (nesdev->link_recheck) - cancel_delayed_work(&nesdev->work); nesdev->link_recheck = 1; - schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); + mod_delayed_work(system_wq, &nesdev->work, + NES_LINK_RECHECK_DELAY); } spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index cb5b7f7d4d3..b29a4246ef4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2219,7 +2219,6 @@ static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp, u32 wqe_idx; if (!qp->wqe_wr_id_tbl[tail].signaled) { - expand = true; /* CQE cannot be consumed yet */ *polled = false; /* WC cannot be consumed yet */ } else { ibwc->status = IB_WC_SUCCESS; @@ -2227,10 +2226,11 @@ static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp, ibwc->qp = &qp->ibqp; ocrdma_update_wc(qp, ibwc, tail); *polled = true; - wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK; - if (tail != wqe_idx) - expand = true; /* Coalesced CQE can't be consumed yet */ } + wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK; + if (tail != wqe_idx) + expand = true; /* Coalesced CQE can't be consumed yet */ + ocrdma_hwq_inc_tail(&qp->sq); return expand; } diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 7b1b8669002..4d11575c201 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -87,7 +87,7 @@ struct qlogic_ib_stats { }; extern struct qlogic_ib_stats qib_stats; -extern struct pci_error_handlers qib_pci_err_handler; +extern const struct pci_error_handlers qib_pci_err_handler; extern struct pci_driver qib_driver; #define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index cff8a6c3216..65a2a23f6f8 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -61,8 +61,8 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; inode->i_atime = CURRENT_TIME; inode->i_mtime = inode->i_atime; diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 19f1e6c45fb..ccb119143d2 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -471,9 +471,10 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, if (port_num != port) { ibp = to_iport(ibdev, port_num); ret = check_mkey(ibp, smp, 0); - if (ret) + if (ret) { ret = IB_MAD_RESULT_FAILURE; goto bail; + } } } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 062c301ebf5..c574ec7c85e 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -273,10 +273,9 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, struct qib_msix_entry *entry) { u16 linkstat, speed; - int pos = 0, pose, ret = 1; + int pos = 0, ret = 1; - pose = pci_pcie_cap(dd->pcidev); - if (!pose) { + if (!pci_is_pcie(dd->pcidev)) { qib_dev_err(dd, "Can't find PCI Express capability!\n"); /* set up something... */ dd->lbus_width = 1; @@ -298,7 +297,7 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, if (!pos) qib_enable_intx(dd->pcidev); - pci_read_config_word(dd->pcidev, pose + PCI_EXP_LNKSTA, &linkstat); + pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); /* * speed is bits 0-3, linkwidth is bits 4-8 * no defines for them in headers @@ -516,7 +515,6 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) { int r; struct pci_dev *parent; - int ppos; u16 devid; u32 mask, bits, val; @@ -529,8 +527,7 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "Parent not root\n"); return 1; } - ppos = pci_pcie_cap(parent); - if (!ppos) + if (!pci_is_pcie(parent)) return 1; if (parent->vendor != 0x8086) return 1; @@ -587,7 +584,6 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) { int ret = 1; /* Assume the worst */ struct pci_dev *parent; - int ppos, epos; u16 pcaps, pctl, ecaps, ectl; int rc_sup, ep_sup; int rc_cur, ep_cur; @@ -598,19 +594,15 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "Parent not root\n"); goto bail; } - ppos = pci_pcie_cap(parent); - if (ppos) { - pci_read_config_word(parent, ppos + PCI_EXP_DEVCAP, &pcaps); - pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); - } else + + if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) goto bail; + pcie_capability_read_word(parent, PCI_EXP_DEVCAP, &pcaps); + pcie_capability_read_word(parent, PCI_EXP_DEVCTL, &pctl); /* Find out supported and configured values for endpoint (us) */ - epos = pci_pcie_cap(dd->pcidev); - if (epos) { - pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCAP, &ecaps); - pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, &ectl); - } else - goto bail; + pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCAP, &ecaps); + pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl); + ret = 0; /* Find max payload supported by root, endpoint */ rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD); @@ -629,14 +621,14 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) rc_cur = rc_sup; pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) | val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD); - pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); + pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl); } /* If less than (allowed, supported), bump endpoint payload */ if (rc_sup > ep_cur) { ep_cur = rc_sup; ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) | val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD); - pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); + pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); } /* @@ -654,13 +646,13 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) rc_cur = rc_sup; pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) | val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ); - pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); + pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl); } if (rc_sup > ep_cur) { ep_cur = rc_sup; ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) | val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ); - pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); + pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); } bail: return ret; @@ -753,7 +745,7 @@ qib_pci_resume(struct pci_dev *pdev) qib_init(dd, 1); /* same as re-init after reset */ } -struct pci_error_handlers qib_pci_err_handler = { +const struct pci_error_handlers qib_pci_err_handler = { .error_detected = qib_pci_error_detected, .mmio_enabled = qib_pci_mmio_enabled, .link_reset = qib_pci_link_reset, diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile index 3090100f0de..e5430dd5076 100644 --- a/drivers/infiniband/ulp/ipoib/Makefile +++ b/drivers/infiniband/ulp/ipoib/Makefile @@ -5,7 +5,8 @@ ib_ipoib-y := ipoib_main.o \ ipoib_multicast.o \ ipoib_verbs.o \ ipoib_vlan.o \ - ipoib_ethtool.o + ipoib_ethtool.o \ + ipoib_netlink.o ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca43901ed86..196eb52f003 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -104,6 +104,10 @@ enum { MAX_SEND_CQE = 16, IPOIB_CM_COPYBREAK = 256, + + IPOIB_NON_CHILD = 0, + IPOIB_LEGACY_CHILD = 1, + IPOIB_RTNL_CHILD = 2, }; #define IPOIB_OP_RECV (1ul << 31) @@ -262,7 +266,10 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_neigh_table; + struct ipoib_neigh_hash { + struct ipoib_neigh_table *ntbl; struct ipoib_neigh __rcu **buckets; struct rcu_head rcu; u32 mask; @@ -271,9 +278,9 @@ struct ipoib_neigh_hash { struct ipoib_neigh_table { struct ipoib_neigh_hash __rcu *htbl; - rwlock_t rwlock; atomic_t entries; struct completion flushed; + struct completion deleted; }; /* @@ -350,6 +357,7 @@ struct ipoib_dev_priv { struct net_device *parent; struct list_head child_intfs; struct list_head list; + int child_type; #ifdef CONFIG_INFINIBAND_IPOIB_CM struct ipoib_cm_dev_priv cm; @@ -509,6 +517,17 @@ void ipoib_event(struct ib_event_handler *handler, int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey); int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); +int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, + u16 pkey, int child_type); + +int __init ipoib_netlink_init(void); +void __exit ipoib_netlink_fini(void); + +void ipoib_set_umcast(struct net_device *ndev, int umcast_val); +int ipoib_set_mode(struct net_device *dev, const char *buf); + +void ipoib_setup(struct net_device *dev); + void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); void ipoib_drain_cq(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 24683fda8e2..175581cf478 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1448,15 +1448,10 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr, return sprintf(buf, "datagram\n"); } -static ssize_t set_mode(struct device *d, struct device_attribute *attr, - const char *buf, size_t count) +int ipoib_set_mode(struct net_device *dev, const char *buf) { - struct net_device *dev = to_net_dev(d); struct ipoib_dev_priv *priv = netdev_priv(dev); - if (!rtnl_trylock()) - return restart_syscall(); - /* flush paths if we switch modes so that connections are restarted */ if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); @@ -1467,7 +1462,8 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); - return count; + rtnl_lock(); + return 0; } if (!strcmp(buf, "datagram\n")) { @@ -1476,14 +1472,32 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); rtnl_unlock(); ipoib_flush_paths(dev); - - return count; + rtnl_lock(); + return 0; } - rtnl_unlock(); return -EINVAL; } +static ssize_t set_mode(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct net_device *dev = to_net_dev(d); + int ret; + + if (!rtnl_trylock()) + return restart_syscall(); + + ret = ipoib_set_mode(dev, buf); + + rtnl_unlock(); + + if (!ret) + return count; + + return ret; +} + static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); int ipoib_cm_add_mode_attr(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3e2085a3ee4..3f9a9ba2f9e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -173,6 +173,11 @@ static int ipoib_stop(struct net_device *dev) return 0; } +static void ipoib_uninit(struct net_device *dev) +{ + ipoib_dev_cleanup(dev); +} + static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -546,15 +551,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, struct ipoib_neigh *neigh; unsigned long flags; + spin_lock_irqsave(&priv->lock, flags); neigh = ipoib_neigh_alloc(daddr, dev); if (!neigh) { + spin_unlock_irqrestore(&priv->lock, flags); ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); return; } - spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, daddr + 4); if (!path) { path = path_rec_create(dev, daddr + 4); @@ -863,10 +868,10 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) return; - write_lock_bh(&ntbl->rwlock); + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; @@ -883,16 +888,14 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { /* was the neigh idle for two GC periods */ if (time_after(neigh_obsolete, neigh->alive)) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from path/mc list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } else { np = &neigh->hnext; @@ -902,7 +905,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) } out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_reap_neigh(struct work_struct *work) @@ -947,10 +950,8 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, struct ipoib_neigh *neigh; u32 hash_val; - write_lock_bh(&ntbl->rwlock); - htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) { neigh = NULL; goto out_unlock; @@ -961,10 +962,10 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, */ hash_val = ipoib_addr_hash(htbl, daddr); for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); neigh != NULL; neigh = rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))) { + lockdep_is_held(&priv->lock))) { if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { /* found, take one ref on behalf of the caller */ if (!atomic_inc_not_zero(&neigh->refcnt)) { @@ -987,12 +988,11 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, /* put in hash */ rcu_assign_pointer(neigh->hnext, rcu_dereference_protected(htbl->buckets[hash_val], - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); rcu_assign_pointer(htbl->buckets[hash_val], neigh); atomic_inc(&ntbl->entries); out_unlock: - write_unlock_bh(&ntbl->rwlock); return neigh; } @@ -1040,35 +1040,29 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh) struct ipoib_neigh *n; u32 hash_val; - write_lock_bh(&ntbl->rwlock); - htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) - goto out_unlock; + return; hash_val = ipoib_addr_hash(htbl, neigh->daddr); np = &htbl->buckets[hash_val]; for (n = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); n != NULL; n = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) { + lockdep_is_held(&priv->lock))) { if (n == neigh) { /* found */ rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); - goto out_unlock; + return; } else { np = &n->hnext; } } - -out_unlock: - write_unlock_bh(&ntbl->rwlock); - } static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) @@ -1080,7 +1074,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); ntbl->htbl = NULL; - rwlock_init(&ntbl->rwlock); htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); if (!htbl) return -ENOMEM; @@ -1095,6 +1088,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl->mask = (size - 1); htbl->buckets = buckets; ntbl->htbl = htbl; + htbl->ntbl = ntbl; atomic_set(&ntbl->entries, 0); /* start garbage collection */ @@ -1111,9 +1105,11 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct ipoib_neigh_hash, rcu); struct ipoib_neigh __rcu **buckets = htbl->buckets; + struct ipoib_neigh_table *ntbl = htbl->ntbl; kfree(buckets); kfree(htbl); + complete(&ntbl->deleted); } void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) @@ -1125,10 +1121,10 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) int i; /* remove all neigh connected to a given path or mcast */ - write_lock_bh(&ntbl->rwlock); + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; @@ -1138,16 +1134,14 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { /* delete neighs belong to this parent */ if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from parent list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } else { np = &neigh->hnext; @@ -1156,7 +1150,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) } } out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) @@ -1164,37 +1158,44 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; unsigned long flags; - int i; + int i, wait_flushed = 0; - write_lock_bh(&ntbl->rwlock); + init_completion(&priv->ntbl.flushed); + + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; + wait_flushed = atomic_read(&priv->ntbl.entries); + if (!wait_flushed) + goto free_htbl; + for (i = 0; i < htbl->size; i++) { struct ipoib_neigh *neigh; struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from path/mc list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } } +free_htbl: rcu_assign_pointer(ntbl->htbl, NULL); call_rcu(&htbl->rcu, neigh_hash_free_rcu); out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); + if (wait_flushed) + wait_for_completion(&priv->ntbl.flushed); } static void ipoib_neigh_hash_uninit(struct net_device *dev) @@ -1203,7 +1204,7 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) int stopped; ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); - init_completion(&priv->ntbl.flushed); + init_completion(&priv->ntbl.deleted); set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); /* Stop GC if called at init fail need to cancel work */ @@ -1211,10 +1212,9 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) if (!stopped) cancel_delayed_work(&priv->neigh_reap_task); - if (atomic_read(&priv->ntbl.entries)) { - ipoib_flush_neighs(priv); - wait_for_completion(&priv->ntbl.flushed); - } + ipoib_flush_neighs(priv); + + wait_for_completion(&priv->ntbl.deleted); } @@ -1262,6 +1262,9 @@ out: void ipoib_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; + LIST_HEAD(head); + + ASSERT_RTNL(); ipoib_delete_debug_files(dev); @@ -1270,10 +1273,9 @@ void ipoib_dev_cleanup(struct net_device *dev) /* Stop GC on child */ set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); cancel_delayed_work(&cpriv->neigh_reap_task); - unregister_netdev(cpriv->dev); - ipoib_dev_cleanup(cpriv->dev); - free_netdev(cpriv->dev); + unregister_netdevice_queue(cpriv->dev, &head); } + unregister_netdevice_many(&head); ipoib_ib_dev_cleanup(dev); @@ -1291,6 +1293,7 @@ static const struct header_ops ipoib_header_ops = { }; static const struct net_device_ops ipoib_netdev_ops = { + .ndo_uninit = ipoib_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, .ndo_change_mtu = ipoib_change_mtu, @@ -1300,7 +1303,7 @@ static const struct net_device_ops ipoib_netdev_ops = { .ndo_set_rx_mode = ipoib_set_mcast_list, }; -static void ipoib_setup(struct net_device *dev) +void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1378,12 +1381,9 @@ static ssize_t show_umcast(struct device *dev, return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); } -static ssize_t set_umcast(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +void ipoib_set_umcast(struct net_device *ndev, int umcast_val) { - struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); - unsigned long umcast_val = simple_strtoul(buf, NULL, 0); + struct ipoib_dev_priv *priv = netdev_priv(ndev); if (umcast_val > 0) { set_bit(IPOIB_FLAG_UMCAST, &priv->flags); @@ -1391,6 +1391,15 @@ static ssize_t set_umcast(struct device *dev, "by userspace\n"); } else clear_bit(IPOIB_FLAG_UMCAST, &priv->flags); +} + +static ssize_t set_umcast(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long umcast_val = simple_strtoul(buf, NULL, 0); + + ipoib_set_umcast(to_net_dev(dev), umcast_val); return count; } @@ -1662,7 +1671,6 @@ static void ipoib_remove_one(struct ib_device *device) flush_workqueue(ipoib_workqueue); unregister_netdev(priv->dev); - ipoib_dev_cleanup(priv->dev); free_netdev(priv->dev); } @@ -1714,8 +1722,15 @@ static int __init ipoib_init_module(void) if (ret) goto err_sa; + ret = ipoib_netlink_init(); + if (ret) + goto err_client; + return 0; +err_client: + ib_unregister_client(&ipoib_client); + err_sa: ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); @@ -1728,6 +1743,7 @@ err_fs: static void __exit ipoib_cleanup_module(void) { + ipoib_netlink_fini(); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); ipoib_unregister_debugfs(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 13f4aa7593c..75367249f44 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -707,9 +707,7 @@ out: neigh = ipoib_neigh_get(dev, daddr); spin_lock_irqsave(&priv->lock, flags); if (!neigh) { - spin_unlock_irqrestore(&priv->lock, flags); neigh = ipoib_neigh_alloc(daddr, dev); - spin_lock_irqsave(&priv->lock, flags); if (neigh) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c new file mode 100644 index 00000000000..74685936c94 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. - All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/netdevice.h> +#include <linux/module.h> +#include <net/rtnetlink.h> +#include "ipoib.h" + +static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = { + [IFLA_IPOIB_PKEY] = { .type = NLA_U16 }, + [IFLA_IPOIB_MODE] = { .type = NLA_U16 }, + [IFLA_IPOIB_UMCAST] = { .type = NLA_U16 }, +}; + +static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + u16 val; + + if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey)) + goto nla_put_failure; + + val = test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); + if (nla_put_u16(skb, IFLA_IPOIB_MODE, val)) + goto nla_put_failure; + + val = test_bit(IPOIB_FLAG_UMCAST, &priv->flags); + if (nla_put_u16(skb, IFLA_IPOIB_UMCAST, val)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int ipoib_changelink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + u16 mode, umcast; + int ret = 0; + + if (data[IFLA_IPOIB_MODE]) { + mode = nla_get_u16(data[IFLA_IPOIB_MODE]); + if (mode == IPOIB_MODE_DATAGRAM) + ret = ipoib_set_mode(dev, "datagram\n"); + else if (mode == IPOIB_MODE_CONNECTED) + ret = ipoib_set_mode(dev, "connected\n"); + else + ret = -EINVAL; + + if (ret < 0) + goto out_err; + } + + if (data[IFLA_IPOIB_UMCAST]) { + umcast = nla_get_u16(data[IFLA_IPOIB_UMCAST]); + ipoib_set_umcast(dev, umcast); + } + +out_err: + return ret; +} + +static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *pdev; + struct ipoib_dev_priv *ppriv; + u16 child_pkey; + int err; + + if (!tb[IFLA_LINK]) + return -EINVAL; + + pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!pdev) + return -ENODEV; + + ppriv = netdev_priv(pdev); + + if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) { + ipoib_warn(ppriv, "child creation disallowed for child devices\n"); + return -EINVAL; + } + + if (!data || !data[IFLA_IPOIB_PKEY]) { + ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n"); + child_pkey = ppriv->pkey; + } else + child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); + + err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD); + + if (!err && data) + err = ipoib_changelink(dev, tb, data); + return err; +} + +static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head) +{ + struct ipoib_dev_priv *priv, *ppriv; + + priv = netdev_priv(dev); + ppriv = netdev_priv(priv->parent); + + mutex_lock(&ppriv->vlan_mutex); + unregister_netdevice_queue(dev, head); + list_del(&priv->list); + mutex_unlock(&ppriv->vlan_mutex); +} + +static size_t ipoib_get_size(const struct net_device *dev) +{ + return nla_total_size(2) + /* IFLA_IPOIB_PKEY */ + nla_total_size(2) + /* IFLA_IPOIB_MODE */ + nla_total_size(2); /* IFLA_IPOIB_UMCAST */ +} + +static struct rtnl_link_ops ipoib_link_ops __read_mostly = { + .kind = "ipoib", + .maxtype = IFLA_IPOIB_MAX, + .policy = ipoib_policy, + .priv_size = sizeof(struct ipoib_dev_priv), + .setup = ipoib_setup, + .newlink = ipoib_new_child_link, + .changelink = ipoib_changelink, + .dellink = ipoib_unregister_child_dev, + .get_size = ipoib_get_size, + .fill_info = ipoib_fill_info, +}; + +int __init ipoib_netlink_init(void) +{ + return rtnl_link_register(&ipoib_link_ops); +} + +void __exit ipoib_netlink_fini(void) +{ + rtnl_link_unregister(&ipoib_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("ipoib"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index d7e9740c724..8292554bccb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -49,47 +49,11 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); -int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) +int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, + u16 pkey, int type) { - struct ipoib_dev_priv *ppriv, *priv; - char intf_name[IFNAMSIZ]; int result; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - ppriv = netdev_priv(pdev); - - if (!rtnl_trylock()) - return restart_syscall(); - mutex_lock(&ppriv->vlan_mutex); - - /* - * First ensure this isn't a duplicate. We check the parent device and - * then all of the child interfaces to make sure the Pkey doesn't match. - */ - if (ppriv->pkey == pkey) { - result = -ENOTUNIQ; - priv = NULL; - goto err; - } - - list_for_each_entry(priv, &ppriv->child_intfs, list) { - if (priv->pkey == pkey) { - result = -ENOTUNIQ; - priv = NULL; - goto err; - } - } - - snprintf(intf_name, sizeof intf_name, "%s.%04x", - ppriv->dev->name, pkey); - priv = ipoib_intf_alloc(intf_name); - if (!priv) { - result = -ENOMEM; - goto err; - } - priv->max_ib_mtu = ppriv->max_ib_mtu; /* MTU will be reset when mcast join happens */ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); @@ -124,24 +88,27 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ipoib_create_debug_files(priv->dev); - if (ipoib_cm_add_mode_attr(priv->dev)) - goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) - goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) - goto sysfs_failed; - - if (device_create_file(&priv->dev->dev, &dev_attr_parent)) - goto sysfs_failed; + /* RTNL childs don't need proprietary sysfs entries */ + if (type == IPOIB_LEGACY_CHILD) { + if (ipoib_cm_add_mode_attr(priv->dev)) + goto sysfs_failed; + if (ipoib_add_pkey_attr(priv->dev)) + goto sysfs_failed; + if (ipoib_add_umcast_attr(priv->dev)) + goto sysfs_failed; + + if (device_create_file(&priv->dev->dev, &dev_attr_parent)) + goto sysfs_failed; + } + priv->child_type = type; + priv->dev->iflink = ppriv->dev->ifindex; list_add_tail(&priv->list, &ppriv->child_intfs); - mutex_unlock(&ppriv->vlan_mutex); - rtnl_unlock(); - return 0; sysfs_failed: + result = -ENOMEM; ipoib_delete_debug_files(priv->dev); unregister_netdevice(priv->dev); @@ -149,11 +116,60 @@ register_failed: ipoib_dev_cleanup(priv->dev); err: + return result; +} + +int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) +{ + struct ipoib_dev_priv *ppriv, *priv; + char intf_name[IFNAMSIZ]; + struct ipoib_dev_priv *tpriv; + int result; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + ppriv = netdev_priv(pdev); + + snprintf(intf_name, sizeof intf_name, "%s.%04x", + ppriv->dev->name, pkey); + priv = ipoib_intf_alloc(intf_name); + if (!priv) + return -ENOMEM; + + if (!rtnl_trylock()) + return restart_syscall(); + + mutex_lock(&ppriv->vlan_mutex); + + /* + * First ensure this isn't a duplicate. We check the parent device and + * then all of the legacy child interfaces to make sure the Pkey + * doesn't match. + */ + if (ppriv->pkey == pkey) { + result = -ENOTUNIQ; + goto out; + } + + list_for_each_entry(tpriv, &ppriv->child_intfs, list) { + if (tpriv->pkey == pkey && + tpriv->child_type == IPOIB_LEGACY_CHILD) { + result = -ENOTUNIQ; + goto out; + } + } + + result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); + +out: mutex_unlock(&ppriv->vlan_mutex); - rtnl_unlock(); - if (priv) + + if (result) free_netdev(priv->dev); + rtnl_unlock(); + return result; } @@ -171,9 +187,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) return restart_syscall(); mutex_lock(&ppriv->vlan_mutex); list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { - if (priv->pkey == pkey) { + if (priv->pkey == pkey && + priv->child_type == IPOIB_LEGACY_CHILD) { unregister_netdevice(priv->dev); - ipoib_dev_cleanup(priv->dev); list_del(&priv->list); dev = priv->dev; break; |