diff options
Diffstat (limited to 'drivers/infiniband/hw/ehca/ehca_irq.c')
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_irq.c | 147 |
1 files changed, 84 insertions, 63 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 96eba383075..3f617b27b95 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -49,29 +49,26 @@ #include "hipz_fns.h" #include "ipz_pt_fn.h" -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) - -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) -#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16,16) - -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) static void queue_comp_task(struct ehca_cq *__cq); -static struct ehca_comp_pool* pool; -#ifdef CONFIG_HOTPLUG_CPU -static struct notifier_block comp_pool_callback_nb; -#endif +static struct ehca_comp_pool *pool; static inline void comp_event_callback(struct ehca_cq *cq) { @@ -85,8 +82,8 @@ static inline void comp_event_callback(struct ehca_cq *cq) return; } -static void print_error_data(struct ehca_shca * shca, void* data, - u64* rblock, int length) +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) { u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); u64 resource = rblock[1]; @@ -94,7 +91,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, switch (type) { case 0x1: /* Queue Pair */ { - struct ehca_qp *qp = (struct ehca_qp*)data; + struct ehca_qp *qp = (struct ehca_qp *)data; /* only print error data if AER is set */ if (rblock[6] == 0) @@ -107,7 +104,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, } case 0x4: /* Completion Queue */ { - struct ehca_cq *cq = (struct ehca_cq*)data; + struct ehca_cq *cq = (struct ehca_cq *)data; ehca_err(&shca->ib_device, "CQ 0x%x (resource=%lx) has errors.", @@ -116,7 +113,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, } default: ehca_err(&shca->ib_device, - "Unknown errror type: %lx on %s.", + "Unknown error type: %lx on %s.", type, shca->ib_device.name); break; } @@ -175,11 +172,32 @@ error_data1: } -static void qp_event_callback(struct ehca_shca *shca, - u64 eqe, +static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, enum ib_event_type event_type) { struct ib_event event; + + event.device = &shca->ib_device; + event.event = event_type; + + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; + + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } +} + +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) +{ struct ehca_qp *qp; u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); @@ -187,20 +205,22 @@ static void qp_event_callback(struct ehca_shca *shca, qp = idr_find(&ehca_qp_idr, token); read_unlock(&ehca_qp_idr_lock); - if (!qp) return; - ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); - if (!qp->ib_qp.event_handler) - return; + dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? + IB_EVENT_SRQ_ERR : event_type); - event.device = &shca->ib_device; - event.event = event_type; - event.element.qp = &qp->ib_qp; - - qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + /* + * eHCA only processes one WQE at a time for SRQ base QPs, + * so the last WQE has been processed as soon as the QP enters + * error state. + */ + if (fatal && qp->ext_type == EQPT_SRQBASE) + dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); return; } @@ -234,17 +254,17 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) switch (identifier) { case 0x02: /* path migrated */ - qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG); + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); break; case 0x03: /* communication established */ - qp_event_callback(shca, eqe, IB_EVENT_COMM_EST); + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); break; case 0x04: /* send queue drained */ - qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED); + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); break; case 0x05: /* QP error */ case 0x06: /* QP error */ - qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL); + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); break; case 0x07: /* CQ error */ case 0x08: /* CQ error */ @@ -271,13 +291,18 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) case 0x11: /* unaffiliated access error */ ehca_err(&shca->ib_device, "Unaffiliated access error."); break; - case 0x12: /* path migrating error */ - ehca_err(&shca->ib_device, "Path migration error."); + case 0x12: /* path migrating */ + ehca_err(&shca->ib_device, "Path migrating."); break; case 0x13: /* interface trace stopped */ ehca_err(&shca->ib_device, "Interface trace stopped."); break; case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; default: ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", identifier, shca->ib_device.name); @@ -572,7 +597,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +static inline int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -636,7 +661,7 @@ static void queue_comp_task(struct ehca_cq *__cq) __queue_comp_task(__cq, cct); } -static void run_comp_task(struct ehca_cpu_comp_task* cct) +static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; unsigned long flags; @@ -666,12 +691,12 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) static int comp_task(void *__cct) { - struct ehca_cpu_comp_task* cct = __cct; + struct ehca_cpu_comp_task *cct = __cct; int cql_empty; DECLARE_WAITQUEUE(wait, current); set_current_state(TASK_INTERRUPTIBLE); - while(!kthread_should_stop()) { + while (!kthread_should_stop()) { add_wait_queue(&cct->wait_queue, &wait); spin_lock_irq(&cct->task_lock); @@ -732,9 +757,7 @@ static void destroy_comp_task(struct ehca_comp_pool *pool, kthread_stop(task); } -#ifdef CONFIG_HOTPLUG_CPU -static void take_over_work(struct ehca_comp_pool *pool, - int cpu) +static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) { struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); LIST_HEAD(list); @@ -745,7 +768,7 @@ static void take_over_work(struct ehca_comp_pool *pool, list_splice_init(&cct->cq_list, &list); - while(!list_empty(&list)) { + while (!list_empty(&list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); list_del(&cq->entry); @@ -757,9 +780,9 @@ static void take_over_work(struct ehca_comp_pool *pool, } -static int comp_pool_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static int __cpuinit comp_pool_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; struct ehca_cpu_comp_task *cct; @@ -768,7 +791,7 @@ static int comp_pool_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if(!create_comp_task(pool, cpu)) { + if (!create_comp_task(pool, cpu)) { ehca_gen_err("Can't create comp_task for cpu: %x", cpu); return NOTIFY_BAD; } @@ -805,7 +828,11 @@ static int comp_pool_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#endif + +static struct notifier_block comp_pool_callback_nb __cpuinitdata = { + .notifier_call = comp_pool_callback, + .priority = 0, +}; int ehca_create_comp_pool(void) { @@ -836,11 +863,7 @@ int ehca_create_comp_pool(void) } } -#ifdef CONFIG_HOTPLUG_CPU - comp_pool_callback_nb.notifier_call = comp_pool_callback; - comp_pool_callback_nb.priority =0; - register_cpu_notifier(&comp_pool_callback_nb); -#endif + register_hotcpu_notifier(&comp_pool_callback_nb); printk(KERN_INFO "eHCA scaling code enabled\n"); @@ -854,9 +877,7 @@ void ehca_destroy_comp_pool(void) if (!ehca_scaling_code) return; -#ifdef CONFIG_HOTPLUG_CPU - unregister_cpu_notifier(&comp_pool_callback_nb); -#endif + unregister_hotcpu_notifier(&comp_pool_callback_nb); for (i = 0; i < NR_CPUS; i++) { if (cpu_online(i)) |