From cd1334f03f7b799bc6893b511daf2080e8f73863 Mon Sep 17 00:00:00 2001 From: Jack Steiner <steiner@sgi.com> Date: Wed, 17 Jun 2009 16:28:19 -0700 Subject: gru: bug fixes for GRU exception handling Bug fixes for GRU exception handling. Additional fields from the CBR must be returned to the user to allow the user to correctly diagnose GRU exceptions. Handle endcase in TFH TLB miss handling. Verify that TFH actually indicates a pending exception. Signed-off-by: Jack Steiner <steiner@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/misc/sgi-gru/gru_instructions.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'drivers/misc/sgi-gru/gru_instructions.h') diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 3fde33c1e8f..2feb885ca96 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -81,6 +81,8 @@ struct control_block_extended_exc_detail { int exopc; long exceptdet0; int exceptdet1; + int cbrstate; + int cbrexecstatus; }; /* @@ -107,7 +109,8 @@ struct gru_instruction_bits { unsigned char reserved2: 2; unsigned char istatus: 2; unsigned char isubstatus:4; - unsigned char reserved3: 2; + unsigned char reserved3: 1; + unsigned char tlb_fault_color: 1; /* DW 1 */ unsigned long idef4; /* 42 bits: TRi1, BufSize */ /* DW 2-6 */ @@ -253,6 +256,21 @@ struct gru_instruction { #define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16) #define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17) +/* CBE cbrexecstatus bits */ +#define CBR_EXS_ABORT_OCC_BIT 0 +#define CBR_EXS_INT_OCC_BIT 1 +#define CBR_EXS_PENDING_BIT 2 +#define CBR_EXS_QUEUED_BIT 3 +#define CBR_EXS_TLBHW_BIT 4 +#define CBR_EXS_EXCEPTION_BIT 5 + +#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) +#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) +#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) +#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) +#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT) +#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) + /* * Exceptions are retried for the following cases. If any OTHER bits are set * in ecause, the exception is not retryable. -- cgit v1.2.3-70-g09d2 From 270952a907220c0331fdaecbb55df892921c5e2d Mon Sep 17 00:00:00 2001 From: Jack Steiner <steiner@sgi.com> Date: Wed, 17 Jun 2009 16:28:27 -0700 Subject: gru: update to rev 0.9 of gru spec Update GRU driver to the latest version of the GRU spec. This consists of minor updates: - changes & additions to error status bits - new restriction on handling of TLB misses while in FMM mode - new field (not used by software) in TFH Signed-off-by: Jack Steiner <steiner@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/misc/sgi-gru/gru_instructions.h | 19 ++++++++++++------- drivers/misc/sgi-gru/grufault.c | 8 ++++++-- drivers/misc/sgi-gru/gruhandles.h | 3 ++- drivers/misc/sgi-gru/grukservices.c | 3 ++- drivers/misc/sgi-gru/gruprocfs.c | 1 + drivers/misc/sgi-gru/grutables.h | 1 + 6 files changed, 24 insertions(+), 11 deletions(-) (limited to 'drivers/misc/sgi-gru/gru_instructions.h') diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 2feb885ca96..eb9140d32e6 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -253,32 +253,37 @@ struct gru_instruction { #define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13) #define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14) #define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15) -#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16) -#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) +#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) /* CBE cbrexecstatus bits */ #define CBR_EXS_ABORT_OCC_BIT 0 #define CBR_EXS_INT_OCC_BIT 1 #define CBR_EXS_PENDING_BIT 2 #define CBR_EXS_QUEUED_BIT 3 -#define CBR_EXS_TLBHW_BIT 4 +#define CBR_EXS_TLB_INVAL_BIT 4 #define CBR_EXS_EXCEPTION_BIT 5 #define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) #define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) #define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) #define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) -#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT) +#define CBR_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) #define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) /* * Exceptions are retried for the following cases. If any OTHER bits are set * in ecause, the exception is not retryable. */ -#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \ - CBE_CAUSE_RA_REQUEST_TIMEOUT | \ +#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \ CBE_CAUSE_TLBHW_ERROR | \ - CBE_CAUSE_HA_REQUEST_TIMEOUT) + CBE_CAUSE_RA_REQUEST_TIMEOUT | \ + CBE_CAUSE_RA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_HA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \ + CBE_CAUSE_HA_RESPONSE_DATA_ERROR \ + ) /* Message queue head structure */ union gru_mesqhead { diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 8443e90f9f6..a489807613f 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -339,8 +339,12 @@ static int gru_try_dropin(struct gru_thread_state *gts, * Might be a hardware race OR a stupid user. Ignore FMM because FMM * is a transient state. */ - if (tfh->status != TFHSTATUS_EXCEPTION) - goto failnoexception; + if (tfh->status != TFHSTATUS_EXCEPTION) { + gru_flush_cache(tfh); + if (tfh->status != TFHSTATUS_EXCEPTION) + goto failnoexception; + STAT(tfh_stale_on_fault); + } if (tfh->state == TFHSTATE_IDLE) goto failidle; if (tfh->state == TFHSTATE_MISS_FMM && cb) diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h index 9f41e2cc09d..99ec82678f5 100644 --- a/drivers/misc/sgi-gru/gruhandles.h +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -255,7 +255,8 @@ struct gru_tlb_fault_handle { unsigned int state:3; unsigned int fill3:1; - unsigned int cause:7; + unsigned int cause:6; + unsigned int cb_int:1; unsigned int fill4:1; unsigned int indexway:12; /* DW 0 - high 32 */ diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 7d7952b27e0..ba6fcd963f3 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -406,7 +406,8 @@ static int gru_retry_exception(void *cb) return CBS_IDLE; gru_get_cb_exception_detail(cb, &excdet); - if (excdet.ecause & ~EXCEPTION_RETRY_BITS) + if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || + (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) break; if (retry-- == 0) break; diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 6ef4cb4b84c..b5b9cf5c182 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -89,6 +89,7 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, tlb_dropin_fail_fmm); printstat(s, tlb_dropin_fail_no_exception); printstat(s, tlb_dropin_fail_no_exception_war); + printstat(s, tfh_stale_on_fault); printstat(s, mmu_invalidate_range); printstat(s, mmu_invalidate_page); printstat(s, mmu_clear_flush_young); diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 6dfb3e69411..246c63883eb 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -212,6 +212,7 @@ struct gru_stats_s { atomic_long_t tlb_dropin_fail_fmm; atomic_long_t tlb_dropin_fail_no_exception; atomic_long_t tlb_dropin_fail_no_exception_war; + atomic_long_t tfh_stale_on_fault; atomic_long_t mmu_invalidate_range; atomic_long_t mmu_invalidate_page; atomic_long_t mmu_clear_flush_young; -- cgit v1.2.3-70-g09d2 From 9f2501142bd3c496afa7efdf18583aab56fe3134 Mon Sep 17 00:00:00 2001 From: Jack Steiner <steiner@sgi.com> Date: Wed, 17 Jun 2009 16:28:31 -0700 Subject: gru: cleanup gru inline functions Cleanup of GRU inline functions to eliminate unnecessary inline code. Update function descriptions. Signed-off-by: Jack Steiner <steiner@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/misc/sgi-gru/gru_instructions.h | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'drivers/misc/sgi-gru/gru_instructions.h') diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index eb9140d32e6..3c9c06618e6 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -623,9 +623,11 @@ static inline int gru_get_cb_substatus(void *cb) return cbs->isubstatus; } -/* Check the status of a CB. If the CB is in UPM mode, call the - * OS to handle the UPM status. - * Returns the CB status field value (0 for normal completion) +/* + * User interface to check an instruction status. UPM and exceptions + * are handled automatically. However, this function does NOT wait + * for an active instruction to complete. + * */ static inline int gru_check_status(void *cb) { @@ -633,34 +635,31 @@ static inline int gru_check_status(void *cb) int ret; ret = cbs->istatus; - if (ret == CBS_CALL_OS) + if (ret != CBS_ACTIVE) ret = gru_check_status_proc(cb); return ret; } -/* Wait for CB to complete. - * Returns the CB status field value (0 for normal completion) +/* + * User interface (via inline function) to wait for an instruction + * to complete. Completion status (IDLE or EXCEPTION is returned + * to the user. Exception due to hardware errors are automatically + * retried before returning an exception. + * */ static inline int gru_wait(void *cb) { - struct gru_control_block_status *cbs = (void *)cb; - int ret = cbs->istatus; - - if (ret != CBS_IDLE) - ret = gru_wait_proc(cb); - return ret; + return gru_wait_proc(cb); } -/* Wait for CB to complete. Aborts program if error. (Note: error does NOT +/* + * Wait for CB to complete. Aborts program if error. (Note: error does NOT * mean TLB mis - only fatal errors such as memory parity error or user * bugs will cause termination. */ static inline void gru_wait_abort(void *cb) { - struct gru_control_block_status *cbs = (void *)cb; - - if (cbs->istatus != CBS_IDLE) - gru_wait_abort_proc(cb); + gru_wait_abort_proc(cb); } -- cgit v1.2.3-70-g09d2