diff options
author | John Gregor <john.gregor@qlogic.com> | 2008-04-16 21:09:24 -0700 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2008-04-16 21:09:24 -0700 |
commit | 58411d1c012dca53ec9107bd98acb63f648e2435 (patch) | |
tree | d48edc5c3c64d91311bb4134b83bfe7b62b10ec4 /drivers/infiniband/hw/ipath/ipath_driver.c | |
parent | 6be979d71a5e8720c8560cc58713407947e5f691 (diff) |
IB/ipath: Head of Line blocking vs forward progress of user apps
There's a conflict between our need to quiesce PSM-based applications
to avoid HoL blocking when the IB link goes down and the apps' desire
to remain running so that their quiescence timout mechanism can keep
running.
The compromise is to STOP the processes for a fixed period of time and
then alternate between CONT and STOP until the link is again active.
If there are poor interactions with subnet manager configuration at a
given site, the interval can be adjusted via a module paramter.
Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_driver.c')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_driver.c | 129 |
1 files changed, 121 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 7121fe84ff8..5605f4f2752 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -77,6 +77,11 @@ unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); +static unsigned ipath_hol_timeout_ms = 13000; +module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); +MODULE_PARM_DESC(hol_timeout_ms, + "duration of user app suspension after link failure"); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("QLogic <support@pathscale.com>"); MODULE_DESCRIPTION("QLogic InfiniPath driver"); @@ -1670,11 +1675,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate " "is %s\n", dd->ipath_unit, what[linkcmd], - ipath_ibcstatus_str[ - (ipath_read_kreg64 - (dd, dd->ipath_kregs->kr_ibcstatus) >> - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); + ipath_ibcstatus_str[ipath_ib_linktrstate(dd, + ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); /* flush all queued sends when going to DOWN to be sure that * they don't block MAD packets */ if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN) @@ -1925,9 +1927,8 @@ static void ipath_run_led_override(unsigned long opaque) */ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - INFINIPATH_IBCS_LINKTRAININGSTATE_MASK; - lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) & - INFINIPATH_IBCS_LINKSTATE_MASK; + dd->ibcs_lts_mask; + lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK; dd->ipath_f_setextled(dd, lstate, ltstate); mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); @@ -1988,6 +1989,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ipath_dbg("Shutting down the device\n"); + ipath_hol_up(dd); /* make sure user processes aren't suspended */ + dd->ipath_flags |= IPATH_LINKUNK; dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | IPATH_LINKINIT | IPATH_LINKARMED | @@ -2037,6 +2040,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd) */ dd->ipath_f_quiet_serdes(dd); + /* stop all the timers that might still be running */ + del_timer_sync(&dd->ipath_hol_timer); if (dd->ipath_stats_timer_active) { del_timer_sync(&dd->ipath_stats_timer); dd->ipath_stats_timer_active = 0; @@ -2252,6 +2257,114 @@ bail: return ret; } +/* + * send a signal to all the processes that have the driver open + * through the normal interfaces (i.e., everything other than diags + * interface). Returns number of signalled processes. + */ +static int ipath_signal_procs(struct ipath_devdata *dd, int sig) +{ + int i, sub, any = 0; + pid_t pid; + + if (!dd->ipath_pd) + return 0; + for (i = 1; i < dd->ipath_cfgports; i++) { + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || + !dd->ipath_pd[i]->port_pid) + continue; + pid = dd->ipath_pd[i]->port_pid; + dev_info(&dd->pcidev->dev, "context %d in use " + "(PID %u), sending signal %d\n", + i, pid, sig); + kill_proc(pid, sig, 1); + any++; + for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { + pid = dd->ipath_pd[i]->port_subpid[sub]; + if (!pid) + continue; + dev_info(&dd->pcidev->dev, "sub-context " + "%d:%d in use (PID %u), sending " + "signal %d\n", i, sub, pid, sig); + kill_proc(pid, sig, 1); + any++; + } + } + return any; +} + +static void ipath_hol_signal_down(struct ipath_devdata *dd) +{ + if (ipath_signal_procs(dd, SIGSTOP)) + ipath_dbg("Stopped some processes\n"); + ipath_cancel_sends(dd, 1); +} + + +static void ipath_hol_signal_up(struct ipath_devdata *dd) +{ + if (ipath_signal_procs(dd, SIGCONT)) + ipath_dbg("Continued some processes\n"); +} + +/* + * link is down, stop any users processes, and flush pending sends + * to prevent HoL blocking, then start the HoL timer that + * periodically continues, then stop procs, so they can detect + * link down if they want, and do something about it. + * Timer may already be running, so use __mod_timer, not add_timer. + */ +void ipath_hol_down(struct ipath_devdata *dd) +{ + dd->ipath_hol_state = IPATH_HOL_DOWN; + ipath_hol_signal_down(dd); + dd->ipath_hol_next = IPATH_HOL_DOWNCONT; + dd->ipath_hol_timer.expires = jiffies + + msecs_to_jiffies(ipath_hol_timeout_ms); + __mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); +} + +/* + * link is up, continue any user processes, and ensure timer + * is a nop, if running. Let timer keep running, if set; it + * will nop when it sees the link is up + */ +void ipath_hol_up(struct ipath_devdata *dd) +{ + ipath_hol_signal_up(dd); + dd->ipath_hol_state = IPATH_HOL_UP; +} + +/* + * toggle the running/not running state of user proceses + * to prevent HoL blocking on chip resources, but still allow + * user processes to do link down special case handling. + * Should only be called via the timer + */ +void ipath_hol_event(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *)opaque; + + if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP + && dd->ipath_hol_state != IPATH_HOL_UP) { + dd->ipath_hol_next = IPATH_HOL_DOWNCONT; + ipath_dbg("Stopping processes\n"); + ipath_hol_signal_down(dd); + } else { /* may do "extra" if also in ipath_hol_up() */ + dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; + ipath_dbg("Continuing processes\n"); + ipath_hol_signal_up(dd); + } + if (dd->ipath_hol_state == IPATH_HOL_UP) + ipath_dbg("link's up, don't resched timer\n"); + else { + dd->ipath_hol_timer.expires = jiffies + + msecs_to_jiffies(ipath_hol_timeout_ms); + __mod_timer(&dd->ipath_hol_timer, + dd->ipath_hol_timer.expires); + } +} + int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) { u64 val; |