16 files changed, 1170 insertions, 326 deletions
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c
index d38a7acdb6e..d019746551f 100644
--- a/drivers/misc/cb710/sgbuf2.c
+++ b/drivers/misc/cb710/sgbuf2.c
@@ -114,7 +114,6 @@ static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data)
 		if (!left)
 			return;
 		addr += len;
-		flush_kernel_dcache_page(miter->page);
 	} while (sg_dwiter_next(miter));
 }
 
@@ -142,9 +141,6 @@ void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t da
 			return;
 	} else
 		sg_dwiter_write_slow(miter, data);
-
-	if (miter->length == miter->consumed)
-		flush_kernel_dcache_page(miter->page);
 }
 EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block);
 
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index b34cb5f79ee..2e535a0ccd5 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -173,6 +173,7 @@ at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
 		unsigned	segment;
 		unsigned	offset = (unsigned) off;
 		u8		*cp = bounce + 1;
+		int		sr;
 
 		*cp = AT25_WREN;
 		status = spi_write(at25->spi, cp, 1);
@@ -214,7 +215,6 @@ at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
 		timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT);
 		retries = 0;
 		do {
-			int	sr;
 
 			sr = spi_w8r8(at25->spi, AT25_RDSR);
 			if (sr < 0 || (sr & AT25_SR_nRDY)) {
@@ -228,7 +228,7 @@ at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
 				break;
 		} while (retries++ < 3 || time_before_eq(jiffies, timeout));
 
-		if (time_after(jiffies, timeout)) {
+		if ((sr < 0) || (sr & AT25_SR_nRDY)) {
 			dev_err(&at25->spi->dev,
 				"write %d bytes offset %d, "
 				"timeout after %u msecs\n",
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
index bcd8136d2f9..7c4c306dfa8 100644
--- a/drivers/misc/sgi-gru/Makefile
+++ b/drivers/misc/sgi-gru/Makefile
@@ -3,5 +3,5 @@ ifdef CONFIG_SGI_GRU_DEBUG
 endif
 
 obj-$(CONFIG_SGI_GRU) := gru.o
-gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o
 
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
index 3fde33c1e8f..3c9c06618e6 100644
--- a/drivers/misc/sgi-gru/gru_instructions.h
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -81,6 +81,8 @@ struct control_block_extended_exc_detail {
 	int		exopc;
 	long		exceptdet0;
 	int		exceptdet1;
+	int		cbrstate;
+	int		cbrexecstatus;
 };
 
 /*
@@ -107,7 +109,8 @@ struct gru_instruction_bits {
     unsigned char		reserved2: 2;
     unsigned char		istatus:   2;
     unsigned char		isubstatus:4;
-    unsigned char		reserved3: 2;
+    unsigned char		reserved3: 1;
+    unsigned char		tlb_fault_color: 1;
     /* DW 1 */
     unsigned long		idef4;		/* 42 bits: TRi1, BufSize */
     /* DW 2-6 */
@@ -250,17 +253,37 @@ struct gru_instruction {
 #define CBE_CAUSE_HA_RESPONSE_FATAL		(1 << 13)
 #define CBE_CAUSE_HA_RESPONSE_NON_FATAL		(1 << 14)
 #define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR	(1 << 15)
-#define CBE_CAUSE_RESPONSE_DATA_ERROR		(1 << 16)
-#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR	(1 << 17)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR	(1 << 16)
+#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR	(1 << 17)
+#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR	(1 << 18)
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT			0
+#define CBR_EXS_INT_OCC_BIT			1
+#define CBR_EXS_PENDING_BIT			2
+#define CBR_EXS_QUEUED_BIT			3
+#define CBR_EXS_TLB_INVAL_BIT			4
+#define CBR_EXS_EXCEPTION_BIT			5
+
+#define CBR_EXS_ABORT_OCC			(1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC				(1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING				(1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED				(1 << CBR_EXS_QUEUED_BIT)
+#define CBR_TLB_INVAL				(1 << CBR_EXS_TLB_INVAL_BIT)
+#define CBR_EXS_EXCEPTION			(1 << CBR_EXS_EXCEPTION_BIT)
 
 /*
  * Exceptions are retried for the following cases. If any OTHER bits are set
  * in ecause, the exception is not retryable.
  */
-#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR |		\
-			      CBE_CAUSE_RA_REQUEST_TIMEOUT |		\
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR |		\
 			      CBE_CAUSE_TLBHW_ERROR |			\
-			      CBE_CAUSE_HA_REQUEST_TIMEOUT)
+			      CBE_CAUSE_RA_REQUEST_TIMEOUT |		\
+			      CBE_CAUSE_RA_RESPONSE_NON_FATAL |		\
+			      CBE_CAUSE_HA_RESPONSE_NON_FATAL |		\
+			      CBE_CAUSE_RA_RESPONSE_DATA_ERROR |	\
+			      CBE_CAUSE_HA_RESPONSE_DATA_ERROR		\
+			      )
 
 /* Message queue head structure */
 union gru_mesqhead {
@@ -600,9 +623,11 @@ static inline int gru_get_cb_substatus(void *cb)
 	return cbs->isubstatus;
 }
 
-/* Check the status of a CB. If the CB is in UPM mode, call the
- * OS to handle the UPM status.
- * Returns the CB status field value (0 for normal completion)
+/*
+ * User interface to check an instruction status. UPM and exceptions
+ * are handled automatically. However, this function does NOT wait
+ * for an active instruction to complete.
+ *
  */
 static inline int gru_check_status(void *cb)
 {
@@ -610,34 +635,31 @@ static inline int gru_check_status(void *cb)
 	int ret;
 
 	ret = cbs->istatus;
-	if (ret == CBS_CALL_OS)
+	if (ret != CBS_ACTIVE)
 		ret = gru_check_status_proc(cb);
 	return ret;
 }
 
-/* Wait for CB to complete.
- * Returns the CB status field value (0 for normal completion)
+/*
+ * User interface (via inline function) to wait for an instruction
+ * to complete. Completion status (IDLE or EXCEPTION is returned
+ * to the user. Exception due to hardware errors are automatically
+ * retried before returning an exception.
+ *
  */
 static inline int gru_wait(void *cb)
 {
-	struct gru_control_block_status *cbs = (void *)cb;
-	int ret = cbs->istatus;
-
-	if (ret != CBS_IDLE)
-		ret = gru_wait_proc(cb);
-	return ret;
+	return gru_wait_proc(cb);
 }
 
-/* Wait for CB to complete. Aborts program if error. (Note: error does NOT
+/*
+ * Wait for CB to complete. Aborts program if error. (Note: error does NOT
  * mean TLB mis - only fatal errors such as memory parity error or user
  * bugs will cause termination.
  */
 static inline void gru_wait_abort(void *cb)
 {
-	struct gru_control_block_status *cbs = (void *)cb;
-
-	if (cbs->istatus != CBS_IDLE)
-		gru_wait_abort_proc(cb);
+	gru_wait_abort_proc(cb);
 }
 
 
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index ab118558552..679e0177828 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -166,7 +166,8 @@ static inline struct gru_state *irq_to_gru(int irq)
  * the GRU, atomic operations must be used to clear bits.
  */
 static void get_clear_fault_map(struct gru_state *gru,
-				struct gru_tlb_fault_map *map)
+				struct gru_tlb_fault_map *imap,
+				struct gru_tlb_fault_map *dmap)
 {
 	unsigned long i, k;
 	struct gru_tlb_fault_map *tfm;
@@ -177,7 +178,11 @@ static void get_clear_fault_map(struct gru_state *gru,
 		k = tfm->fault_bits[i];
 		if (k)
 			k = xchg(&tfm->fault_bits[i], 0UL);
-		map->fault_bits[i] = k;
+		imap->fault_bits[i] = k;
+		k = tfm->done_bits[i];
+		if (k)
+			k = xchg(&tfm->done_bits[i], 0UL);
+		dmap->fault_bits[i] = k;
 	}
 
 	/*
@@ -334,6 +339,12 @@ static int gru_try_dropin(struct gru_thread_state *gts,
 	 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
 	 * is a transient state.
 	 */
+	if (tfh->status != TFHSTATUS_EXCEPTION) {
+		gru_flush_cache(tfh);
+		if (tfh->status != TFHSTATUS_EXCEPTION)
+			goto failnoexception;
+		STAT(tfh_stale_on_fault);
+	}
 	if (tfh->state == TFHSTATE_IDLE)
 		goto failidle;
 	if (tfh->state == TFHSTATE_MISS_FMM && cb)
@@ -401,8 +412,17 @@ failfmm:
 	gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
 	return 0;
 
+failnoexception:
+	/* TFH status did not show exception pending */
+	gru_flush_cache(tfh);
+	if (cb)
+		gru_flush_cache(cb);
+	STAT(tlb_dropin_fail_no_exception);
+	gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state);
+	return 0;
+
 failidle:
-	/* TFH was idle  - no miss pending */
+	/* TFH state was idle  - no miss pending */
 	gru_flush_cache(tfh);
 	if (cb)
 		gru_flush_cache(cb);
@@ -438,7 +458,7 @@ failactive:
 irqreturn_t gru_intr(int irq, void *dev_id)
 {
 	struct gru_state *gru;
-	struct gru_tlb_fault_map map;
+	struct gru_tlb_fault_map imap, dmap;
 	struct gru_thread_state *gts;
 	struct gru_tlb_fault_handle *tfh = NULL;
 	int cbrnum, ctxnum;
@@ -451,11 +471,15 @@ irqreturn_t gru_intr(int irq, void *dev_id)
 			raw_smp_processor_id(), irq);
 		return IRQ_NONE;
 	}
-	get_clear_fault_map(gru, &map);
-	gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid,
-		map.fault_bits[0]);
+	get_clear_fault_map(gru, &imap, &dmap);
+
+	for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
+		complete(gru->gs_blade->bs_async_wq);
+		gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
+			gru->gs_gid, cbrnum, gru->gs_blade->bs_async_wq->done);
+	}
 
-	for_each_cbr_in_tfm(cbrnum, map.fault_bits) {
+	for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
 		tfh = get_tfh_by_index(gru, cbrnum);
 		prefetchw(tfh);	/* Helps on hdw, required for emulator */
 
@@ -472,7 +496,9 @@ irqreturn_t gru_intr(int irq, void *dev_id)
 		 * This is running in interrupt context. Trylock the mmap_sem.
 		 * If it fails, retry the fault in user context.
 		 */
-		if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
+		if (!gts->ts_force_cch_reload &&
+					down_read_trylock(&gts->ts_mm->mmap_sem)) {
+			gts->ustats.fmm_tlbdropin++;
 			gru_try_dropin(gts, tfh, NULL);
 			up_read(&gts->ts_mm->mmap_sem);
 		} else {
@@ -491,6 +517,7 @@ static int gru_user_dropin(struct gru_thread_state *gts,
 	struct gru_mm_struct *gms = gts->ts_gms;
 	int ret;
 
+	gts->ustats.upm_tlbdropin++;
 	while (1) {
 		wait_event(gms->ms_wait_queue,
 			   atomic_read(&gms->ms_range_active) == 0);
@@ -546,8 +573,8 @@ int gru_handle_user_call_os(unsigned long cb)
 	 * CCH may contain stale data if ts_force_cch_reload is set.
 	 */
 	if (gts->ts_gru && gts->ts_force_cch_reload) {
-		gru_update_cch(gts, 0);
 		gts->ts_force_cch_reload = 0;
+		gru_update_cch(gts, 0);
 	}
 
 	ret = -EAGAIN;
@@ -589,20 +616,26 @@ int gru_get_exception_detail(unsigned long arg)
 	} else if (gts->ts_gru) {
 		cbrnum = thread_cbr_number(gts, ucbnum);
 		cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
-		prefetchw(cbe);/* Harmless on hardware, required for emulator */
+		gru_flush_cache(cbe);	/* CBE not coherent */
 		excdet.opc = cbe->opccpy;
 		excdet.exopc = cbe->exopccpy;
 		excdet.ecause = cbe->ecause;
 		excdet.exceptdet0 = cbe->idef1upd;
 		excdet.exceptdet1 = cbe->idef3upd;
+		excdet.cbrstate = cbe->cbrstate;
+		excdet.cbrexecstatus = cbe->cbrexecstatus;
+		gru_flush_cache(cbe);
 		ret = 0;
 	} else {
 		ret = -EAGAIN;
 	}
 	gru_unlock_gts(gts);
 
-	gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
-		excdet.ecause);
+	gru_dbg(grudev,
+		"cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
+		"exdet0 0x%lx, exdet1 0x%x\n",
+		excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
+		excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
 	if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
 		ret = -EFAULT;
 	return ret;
@@ -627,7 +660,7 @@ static int gru_unload_all_contexts(void)
 			if (gts && mutex_trylock(&gts->ts_ctxlock)) {
 				spin_unlock(&gru->gs_lock);
 				gru_unload_context(gts, 1);
-				gru_unlock_gts(gts);
+				mutex_unlock(&gts->ts_ctxlock);
 				spin_lock(&gru->gs_lock);
 			}
 		}
@@ -669,6 +702,7 @@ int gru_user_flush_tlb(unsigned long arg)
 {
 	struct gru_thread_state *gts;
 	struct gru_flush_tlb_req req;
+	struct gru_mm_struct *gms;
 
 	STAT(user_flush_tlb);
 	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
@@ -681,8 +715,34 @@ int gru_user_flush_tlb(unsigned long arg)
 	if (!gts)
 		return -EINVAL;
 
-	gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.len);
+	gms = gts->ts_gms;
 	gru_unlock_gts(gts);
+	gru_flush_tlb_range(gms, req.vaddr, req.len);
+
+	return 0;
+}
+
+/*
+ * Fetch GSEG statisticss
+ */
+long gru_get_gseg_statistics(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_get_gseg_statistics_req req;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (gts) {
+		memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
+		gru_unlock_gts(gts);
+	} else {
+		memset(&req.stats, 0, sizeof(gts->ustats));
+	}
+
+	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+		return -EFAULT;
 
 	return 0;
 }
@@ -691,18 +751,34 @@ int gru_user_flush_tlb(unsigned long arg)
  * Register the current task as the user of the GSEG slice.
  * Needed for TLB fault interrupt targeting.
  */
-int gru_set_task_slice(long address)
+int gru_set_context_option(unsigned long arg)
 {
 	struct gru_thread_state *gts;
+	struct gru_set_context_option_req req;
+	int ret = 0;
+
+	STAT(set_context_option);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+	gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
 
-	STAT(set_task_slice);
-	gru_dbg(grudev, "address 0x%lx\n", address);
-	gts = gru_alloc_locked_gts(address);
+	gts = gru_alloc_locked_gts(req.gseg);
 	if (!gts)
 		return -EINVAL;
 
-	gts->ts_tgid_owner = current->tgid;
+	switch (req.op) {
+	case sco_gseg_owner:
+ 		/* Register the current task as the GSEG owner */
+		gts->ts_tgid_owner = current->tgid;
+		break;
+	case sco_cch_req_slice:
+ 		/* Set the CCH slice option */
+		gts->ts_cch_req_slice = req.val1 & 3;
+		break;
+	default:
+		ret = -EINVAL;
+	}
 	gru_unlock_gts(gts);
 
-	return 0;
+	return ret;
 }
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 3ce2920e2bf..aed609832bc 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/io.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/miscdevice.h>
@@ -46,6 +45,7 @@
 
 struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
 unsigned long gru_start_paddr __read_mostly;
+void *gru_start_vaddr __read_mostly;
 unsigned long gru_end_paddr __read_mostly;
 unsigned int gru_max_gids __read_mostly;
 struct gru_stats_s gru_stats;
@@ -135,11 +135,9 @@ static int gru_create_new_context(unsigned long arg)
 	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
 		return -EFAULT;
 
-	if (req.data_segment_bytes == 0 ||
-				req.data_segment_bytes > max_user_dsr_bytes)
+	if (req.data_segment_bytes > max_user_dsr_bytes)
 		return -EINVAL;
-	if (!req.control_blocks || !req.maximum_thread_count ||
-				req.control_blocks > max_user_cbrs)
+	if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count)
 		return -EINVAL;
 
 	if (!(req.options & GRU_OPT_MISS_MASK))
@@ -184,41 +182,6 @@ static long gru_get_config_info(unsigned long arg)
 }
 
 /*
- * Get GRU chiplet status
- */
-static long gru_get_chiplet_status(unsigned long arg)
-{
-	struct gru_state *gru;
-	struct gru_chiplet_info info;
-
-	if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-		return -EFAULT;
-
-	if (info.node == -1)
-		info.node = numa_node_id();
-	if (info.node >= num_possible_nodes() ||
-			info.chiplet >= GRU_CHIPLETS_PER_HUB ||
-			info.node < 0 || info.chiplet < 0)
-		return -EINVAL;
-
-	info.blade = uv_node_to_blade_id(info.node);
-	gru = get_gru(info.blade, info.chiplet);
-
-	info.total_dsr_bytes = GRU_NUM_DSR_BYTES;
-	info.total_cbr = GRU_NUM_CB;
-	info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES -
-		gru->gs_reserved_dsr_bytes;
-	info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs;
-	info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) *
-			GRU_DSR_AU_BYTES;
-	info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
-
-	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
-		return -EFAULT;
-	return 0;
-}
-
-/*
  * gru_file_unlocked_ioctl
  *
  * Called to update file attributes via IOCTL calls.
@@ -234,8 +197,8 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
 	case GRU_CREATE_CONTEXT:
 		err = gru_create_new_context(arg);
 		break;
-	case GRU_SET_TASK_SLICE:
-		err = gru_set_task_slice(arg);
+	case GRU_SET_CONTEXT_OPTION:
+		err = gru_set_context_option(arg);
 		break;
 	case GRU_USER_GET_EXCEPTION_DETAIL:
 		err = gru_get_exception_detail(arg);
@@ -243,18 +206,24 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
 	case GRU_USER_UNLOAD_CONTEXT:
 		err = gru_user_unload_context(arg);
 		break;
-	case GRU_GET_CHIPLET_STATUS:
-		err = gru_get_chiplet_status(arg);
-		break;
 	case GRU_USER_FLUSH_TLB:
 		err = gru_user_flush_tlb(arg);
 		break;
 	case GRU_USER_CALL_OS:
 		err = gru_handle_user_call_os(arg);
 		break;
+	case GRU_GET_GSEG_STATISTICS:
+		err = gru_get_gseg_statistics(arg);
+		break;
+	case GRU_KTEST:
+		err = gru_ktest(arg);
+		break;
 	case GRU_GET_CONFIG_INFO:
 		err = gru_get_config_info(arg);
 		break;
+	case GRU_DUMP_CHIPLET_STATE:
+		err = gru_dump_chiplet_request(arg);
+		break;
 	}
 	return err;
 }
@@ -282,7 +251,6 @@ static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
 	gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n",
 		bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
 		gru->gs_gru_base_paddr);
-	gru_kservices_init(gru);
 }
 
 static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
@@ -309,6 +277,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
 		memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
 		gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
 		spin_lock_init(&gru_base[bid]->bs_lock);
+		init_rwsem(&gru_base[bid]->bs_kgts_sema);
 
 		dsrbytes = 0;
 		cbrs = 0;
@@ -372,7 +341,6 @@ static int __init gru_init(void)
 {
 	int ret, irq, chip;
 	char id[10];
-	void *gru_start_vaddr;
 
 	if (!is_uv_system())
 		return 0;
@@ -422,6 +390,7 @@ static int __init gru_init(void)
 		printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
 		goto exit3;
 	}
+	gru_kservices_init();
 
 	printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
 	       GRU_DRIVER_VERSION_STR);
@@ -440,7 +409,7 @@ exit1:
 
 static void __exit gru_exit(void)
 {
-	int i, bid, gid;
+	int i, bid;
 	int order = get_order(sizeof(struct gru_state) *
 			      GRU_CHIPLETS_PER_BLADE);
 
@@ -449,10 +418,7 @@ static void __exit gru_exit(void)
 
 	for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
 		free_irq(IRQ_GRU + i, NULL);
-
-	foreach_gid(gid)
-		gru_kservices_exit(GID_TO_GRU(gid));
-
+	gru_kservices_exit();
 	for (bid = 0; bid < GRU_MAX_BLADES; bid++)
 		free_pages((unsigned long)gru_base[bid], order);
 
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
index 9b7ccb32869..37e7cfc53b9 100644
--- a/drivers/misc/sgi-gru/gruhandles.c
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -57,7 +57,7 @@ static void start_instruction(void *h)
 static int wait_instruction_complete(void *h, enum mcs_op opc)
 {
 	int status;
-	cycles_t start_time = get_cycles();
+	unsigned long start_time = get_cycles();
 
 	while (1) {
 		cpu_relax();
@@ -65,25 +65,16 @@ static int wait_instruction_complete(void *h, enum mcs_op opc)
 		if (status != CCHSTATUS_ACTIVE)
 			break;
 		if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time))
-			panic("GRU %p is malfunctioning\n", h);
+			panic("GRU %p is malfunctioning: start %ld, end %ld\n",
+			      h, start_time, (unsigned long)get_cycles());
 	}
 	if (gru_options & OPT_STATS)
 		update_mcs_stats(opc, get_cycles() - start_time);
 	return status;
 }
 
-int cch_allocate(struct gru_context_configuration_handle *cch,
-		int asidval, int sizeavail, unsigned long cbrmap,
-		unsigned long dsrmap)
+int cch_allocate(struct gru_context_configuration_handle *cch)
 {
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		cch->asid[i] = (asidval++);
-		cch->sizeavail[i] = sizeavail;
-	}
-	cch->dsr_allocation_map = dsrmap;
-	cch->cbr_allocation_map = cbrmap;
 	cch->opc = CCHOP_ALLOCATE;
 	start_instruction(cch);
 	return wait_instruction_complete(cch, cchop_allocate);
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
index 1ed74d7508c..f44112242d0 100644
--- a/drivers/misc/sgi-gru/gruhandles.h
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -39,7 +39,6 @@
 #define GRU_NUM_CBE		128
 #define GRU_NUM_TFH		128
 #define GRU_NUM_CCH		16
-#define GRU_NUM_GSH		1
 
 /* Maximum resource counts that can be reserved by user programs */
 #define GRU_NUM_USER_CBR	GRU_NUM_CBE
@@ -56,7 +55,6 @@
 #define GRU_CBE_BASE		(GRU_MCS_BASE + 0x10000)
 #define GRU_TFH_BASE		(GRU_MCS_BASE + 0x18000)
 #define GRU_CCH_BASE		(GRU_MCS_BASE + 0x20000)
-#define GRU_GSH_BASE		(GRU_MCS_BASE + 0x30000)
 
 /* User gseg constants */
 #define GRU_GSEG_STRIDE		(4 * 1024 * 1024)
@@ -251,15 +249,15 @@ struct gru_tlb_fault_handle {
 	unsigned int fill1:9;
 
 	unsigned int status:2;
-	unsigned int fill2:1;
-	unsigned int color:1;
+	unsigned int fill2:2;
 	unsigned int state:3;
 	unsigned int fill3:1;
 
-	unsigned int cause:7;		/* DW 0 - high 32 */
+	unsigned int cause:6;
+	unsigned int cb_int:1;
 	unsigned int fill4:1;
 
-	unsigned int indexway:12;
+	unsigned int indexway:12;	/* DW 0 - high 32 */
 	unsigned int fill5:4;
 
 	unsigned int ctxnum:4;
@@ -457,21 +455,7 @@ enum gru_cbr_state {
 	CBRSTATE_BUSY_INTERRUPT,
 };
 
-/* CBE cbrexecstatus bits */
-#define CBR_EXS_ABORT_OCC_BIT			0
-#define CBR_EXS_INT_OCC_BIT			1
-#define CBR_EXS_PENDING_BIT			2
-#define CBR_EXS_QUEUED_BIT			3
-#define CBR_EXS_TLBHW_BIT			4
-#define CBR_EXS_EXCEPTION_BIT			5
-
-#define CBR_EXS_ABORT_OCC			(1 << CBR_EXS_ABORT_OCC_BIT)
-#define CBR_EXS_INT_OCC				(1 << CBR_EXS_INT_OCC_BIT)
-#define CBR_EXS_PENDING				(1 << CBR_EXS_PENDING_BIT)
-#define CBR_EXS_QUEUED				(1 << CBR_EXS_QUEUED_BIT)
-#define CBR_EXS_TLBHW				(1 << CBR_EXS_TLBHW_BIT)
-#define CBR_EXS_EXCEPTION			(1 << CBR_EXS_EXCEPTION_BIT)
-
+/* CBE cbrexecstatus bits  - defined in gru_instructions.h*/
 /* CBE ecause bits  - defined in gru_instructions.h */
 
 /*
@@ -495,9 +479,7 @@ enum gru_cbr_state {
 /* minimum TLB purge count to ensure a full purge */
 #define GRUMAXINVAL		1024UL
 
-int cch_allocate(struct gru_context_configuration_handle *cch,
-       int asidval, int sizeavail, unsigned long cbrmap, unsigned long dsrmap);
-
+int cch_allocate(struct gru_context_configuration_handle *cch);
 int cch_start(struct gru_context_configuration_handle *cch);
 int cch_interrupt(struct gru_context_configuration_handle *cch);
 int cch_deallocate(struct gru_context_configuration_handle *cch);
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
new file mode 100644
index 00000000000..55eabfa8558
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -0,0 +1,232 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            Dump GRU State
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+#include "grulib.h"
+
+#define CCH_LOCK_ATTEMPTS	10
+
+static int gru_user_copy_handle(void __user **dp, void *s)
+{
+	if (copy_to_user(*dp, s, GRU_HANDLE_BYTES))
+		return -1;
+	*dp += GRU_HANDLE_BYTES;
+	return 0;
+}
+
+static int gru_dump_context_data(void *grubase,
+			struct gru_context_configuration_handle *cch,
+			void __user *ubuf, int ctxnum, int dsrcnt)
+{
+	void *cb, *cbe, *tfh, *gseg;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	tfh = grubase + GRU_TFH_BASE;
+
+	for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) {
+		if (gru_user_copy_handle(&ubuf, cb))
+			goto fail;
+		if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE))
+			goto fail;
+		if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE))
+			goto fail;
+		cb += GRU_HANDLE_STRIDE;
+	}
+	if (dsrcnt)
+		memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE);
+	return 0;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_tfm(struct gru_state *gru,
+		void __user *ubuf, void __user *ubufend)
+{
+	struct gru_tlb_fault_map *tfm;
+	int i, ret, bytes;
+
+	bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+	if (bytes > ubufend - ubuf)
+		ret = -EFBIG;
+
+	for (i = 0; i < GRU_NUM_TFM; i++) {
+		tfm = get_tfm(gru->gs_gru_base_vaddr, i);
+		if (gru_user_copy_handle(&ubuf, tfm))
+			goto fail;
+	}
+	return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_tgh(struct gru_state *gru,
+		void __user *ubuf, void __user *ubufend)
+{
+	struct gru_tlb_global_handle *tgh;
+	int i, ret, bytes;
+
+	bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+	if (bytes > ubufend - ubuf)
+		ret = -EFBIG;
+
+	for (i = 0; i < GRU_NUM_TGH; i++) {
+		tgh = get_tgh(gru->gs_gru_base_vaddr, i);
+		if (gru_user_copy_handle(&ubuf, tgh))
+			goto fail;
+	}
+	return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_context(struct gru_state *gru, int ctxnum,
+		void __user *ubuf, void __user *ubufend, char data_opt,
+		char lock_cch)
+{
+	struct gru_dump_context_header hdr;
+	struct gru_dump_context_header __user *uhdr = ubuf;
+	struct gru_context_configuration_handle *cch, *ubufcch;
+	struct gru_thread_state *gts;
+	int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0;
+	void *grubase;
+
+	memset(&hdr, 0, sizeof(hdr));
+	grubase = gru->gs_gru_base_vaddr;
+	cch = get_cch(grubase, ctxnum);
+	for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) {
+		cch_locked =  trylock_cch_handle(cch);
+		if (cch_locked)
+			break;
+		msleep(1);
+	}
+
+	ubuf += sizeof(hdr);
+	ubufcch = ubuf;
+	if (gru_user_copy_handle(&ubuf, cch))
+		goto fail;
+	if (cch_locked)
+		ubufcch->delresp = 0;
+	bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES;
+
+	if (cch_locked || !lock_cch) {
+		gts = gru->gs_gts[ctxnum];
+		if (gts && gts->ts_vma) {
+			hdr.pid = gts->ts_tgid_owner;
+			hdr.vaddr = gts->ts_vma->vm_start;
+		}
+		if (cch->state != CCHSTATE_INACTIVE) {
+			cbrcnt = hweight64(cch->cbr_allocation_map) *
+						GRU_CBR_AU_SIZE;
+			dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) *
+						GRU_DSR_AU_CL : 0;
+		}
+		bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES;
+		if (bytes > ubufend - ubuf)
+			ret = -EFBIG;
+		else
+			ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum,
+							dsrcnt);
+
+	}
+	if (cch_locked)
+		unlock_cch_handle(cch);
+	if (ret)
+		return ret;
+
+	hdr.magic = GRU_DUMP_MAGIC;
+	hdr.gid = gru->gs_gid;
+	hdr.ctxnum = ctxnum;
+	hdr.cbrcnt = cbrcnt;
+	hdr.dsrcnt = dsrcnt;
+	hdr.cch_locked = cch_locked;
+	if (!ret && copy_to_user((void __user *)uhdr, &hdr, sizeof(hdr)))
+		ret = -EFAULT;
+
+	return ret ? ret : bytes;
+
+fail:
+	unlock_cch_handle(cch);
+	return -EFAULT;
+}
+
+int gru_dump_chiplet_request(unsigned long arg)
+{
+	struct gru_state *gru;
+	struct gru_dump_chiplet_state_req req;
+	void __user *ubuf;
+	void __user *ubufend;
+	int ctxnum, ret, cnt = 0;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	/* Currently, only dump by gid is implemented */
+	if (req.gid >= gru_max_gids || req.gid < 0)
+		return -EINVAL;
+
+	gru = GID_TO_GRU(req.gid);
+	ubuf = req.buf;
+	ubufend = req.buf + req.buflen;
+
+	ret = gru_dump_tfm(gru, ubuf, ubufend);
+	if (ret < 0)
+		goto fail;
+	ubuf += ret;
+
+	ret = gru_dump_tgh(gru, ubuf, ubufend);
+	if (ret < 0)
+		goto fail;
+	ubuf += ret;
+
+	for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+		if (req.ctxnum == ctxnum || req.ctxnum < 0) {
+			ret = gru_dump_context(gru, ctxnum, ubuf, ubufend,
+						req.data_opt, req.lock_cch);
+			if (ret < 0)
+				goto fail;
+			ubuf += ret;
+			cnt++;
+		}
+	}
+
+	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+		return -EFAULT;
+	return cnt;
+
+fail:
+	return ret;
+}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index d8bd7d84a7c..79689b10f93 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -24,13 +24,13 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/miscdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
+#include <linux/delay.h>
 #include "gru.h"
 #include "grulib.h"
 #include "grutables.h"
@@ -45,18 +45,66 @@
  * resources. This will likely be replaced when we better understand the
  * kernel/user requirements.
  *
- * At boot time, the kernel permanently reserves a fixed number of
- * CBRs/DSRs for each cpu to use. The resources are all taken from
- * the GRU chiplet 1 on the blade. This leaves the full set of resources
- * of chiplet 0 available to be allocated to a single user.
+ * Blade percpu resources reserved for kernel use. These resources are
+ * reserved whenever the the kernel context for the blade is loaded. Note
+ * that the kernel context is not guaranteed to be always available. It is
+ * loaded on demand & can be stolen by a user if the user demand exceeds the
+ * kernel demand. The kernel can always reload the kernel context but
+ * a SLEEP may be required!!!.
+ *
+ * Async Overview:
+ *
+ * 	Each blade has one "kernel context" that owns GRU kernel resources
+ * 	located on the blade. Kernel drivers use GRU resources in this context
+ * 	for sending messages, zeroing memory, etc.
+ *
+ * 	The kernel context is dynamically loaded on demand. If it is not in
+ * 	use by the kernel, the kernel context can be unloaded & given to a user.
+ * 	The kernel context will be reloaded when needed. This may require that
+ * 	a context be stolen from a user.
+ * 		NOTE: frequent unloading/reloading of the kernel context is
+ * 		expensive. We are depending on batch schedulers, cpusets, sane
+ * 		drivers or some other mechanism to prevent the need for frequent
+ *	 	stealing/reloading.
+ *
+ * 	The kernel context consists of two parts:
+ * 		- 1 CB & a few DSRs that are reserved for each cpu on the blade.
+ * 		  Each cpu has it's own private resources & does not share them
+ * 		  with other cpus. These resources are used serially, ie,
+ * 		  locked, used & unlocked  on each call to a function in
+ * 		  grukservices.
+ * 		  	(Now that we have dynamic loading of kernel contexts, I
+ * 		  	 may rethink this & allow sharing between cpus....)
+ *
+ *		- Additional resources can be reserved long term & used directly
+ *		  by UV drivers located in the kernel. Drivers using these GRU
+ *		  resources can use asynchronous GRU instructions that send
+ *		  interrupts on completion.
+ *		  	- these resources must be explicitly locked/unlocked
+ *		  	- locked resources prevent (obviously) the kernel
+ *		  	  context from being unloaded.
+ *			- drivers using these resource directly issue their own
+ *			  GRU instruction and must wait/check completion.
+ *
+ * 		  When these resources are reserved, the caller can optionally
+ * 		  associate a wait_queue with the resources and use asynchronous
+ * 		  GRU instructions. When an async GRU instruction completes, the
+ * 		  driver will do a wakeup on the event.
+ *
  */
 
-/* Blade percpu resources PERMANENTLY reserved for kernel use */
+
+#define ASYNC_HAN_TO_BID(h)	((h) - 1)
+#define ASYNC_BID_TO_HAN(b)	((b) + 1)
+#define ASYNC_HAN_TO_BS(h)	gru_base[ASYNC_HAN_TO_BID(h)]
+#define KCB_TO_GID(cb)		((cb - gru_start_vaddr) /		\
+					(GRU_SIZE * GRU_CHIPLETS_PER_BLADE))
+#define KCB_TO_BS(cb)		gru_base[KCB_TO_GID(cb)]
+
 #define GRU_NUM_KERNEL_CBR	1
 #define GRU_NUM_KERNEL_DSR_BYTES 256
 #define GRU_NUM_KERNEL_DSR_CL	(GRU_NUM_KERNEL_DSR_BYTES /		\
 					GRU_CACHE_LINE_BYTES)
-#define KERNEL_CTXNUM           15
 
 /* GRU instruction attributes for all instructions */
 #define IMA			IMA_CB_DELAY
@@ -98,6 +146,108 @@ struct message_header {
 
 #define HSTATUS(mq, h)	((mq) + offsetof(struct message_queue, hstatus[h]))
 
+/*
+ * Reload the blade's kernel context into a GRU chiplet. Called holding
+ * the bs_kgts_sema for READ. Will steal user contexts if necessary.
+ */
+static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
+{
+	struct gru_state *gru;
+	struct gru_thread_state *kgts;
+	void *vaddr;
+	int ctxnum, ncpus;
+
+	up_read(&bs->bs_kgts_sema);
+	down_write(&bs->bs_kgts_sema);
+
+	if (!bs->bs_kgts)
+		bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0);
+	kgts = bs->bs_kgts;
+
+	if (!kgts->ts_gru) {
+		STAT(load_kernel_context);
+		ncpus = uv_blade_nr_possible_cpus(blade_id);
+		kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
+			GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
+		kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
+			GRU_NUM_KERNEL_DSR_BYTES * ncpus +
+				bs->bs_async_dsr_bytes);
+		while (!gru_assign_gru_context(kgts, blade_id)) {
+			msleep(1);
+			gru_steal_context(kgts, blade_id);
+		}
+		gru_load_context(kgts);
+		gru = bs->bs_kgts->ts_gru;
+		vaddr = gru->gs_gru_base_vaddr;
+		ctxnum = kgts->ts_ctxnum;
+		bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
+		bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
+	}
+	downgrade_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Free all kernel contexts that are not currently in use.
+ *   Returns 0 if all freed, else number of inuse context.
+ */
+static int gru_free_kernel_contexts(void)
+{
+	struct gru_blade_state *bs;
+	struct gru_thread_state *kgts;
+	int bid, ret = 0;
+
+	for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
+		bs = gru_base[bid];
+		if (!bs)
+			continue;
+		if (down_write_trylock(&bs->bs_kgts_sema)) {
+			kgts = bs->bs_kgts;
+			if (kgts && kgts->ts_gru)
+				gru_unload_context(kgts, 0);
+			kfree(kgts);
+			bs->bs_kgts = NULL;
+			up_write(&bs->bs_kgts_sema);
+		} else {
+			ret++;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Lock & load the kernel context for the specified blade.
+ */
+static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
+{
+	struct gru_blade_state *bs;
+
+	STAT(lock_kernel_context);
+	bs = gru_base[blade_id];
+
+	down_read(&bs->bs_kgts_sema);
+	if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
+		gru_load_kernel_context(bs, blade_id);
+	return bs;
+
+}
+
+/*
+ * Unlock the kernel context for the specified blade. Context is not
+ * unloaded but may be stolen before next use.
+ */
+static void gru_unlock_kernel_context(int blade_id)
+{
+	struct gru_blade_state *bs;
+
+	bs = gru_base[blade_id];
+	up_read(&bs->bs_kgts_sema);
+	STAT(unlock_kernel_context);
+}
+
+/*
+ * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
+ * 	- returns with preemption disabled
+ */
 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
 {
 	struct gru_blade_state *bs;
@@ -105,30 +255,148 @@ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
 
 	BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
 	preempt_disable();
-	bs = gru_base[uv_numa_blade_id()];
+	bs = gru_lock_kernel_context(uv_numa_blade_id());
 	lcpu = uv_blade_processor_id();
 	*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
 	*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
 	return 0;
 }
 
+/*
+ * Free the current cpus reserved DSR/CBR resources.
+ */
 static void gru_free_cpu_resources(void *cb, void *dsr)
 {
+	gru_unlock_kernel_context(uv_numa_blade_id());
 	preempt_enable();
 }
 
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *   Note: currently supports only 1 reservation per blade.
+ *
+ * 	input:
+ * 		blade_id  - blade on which resources should be reserved
+ * 		cbrs	  - number of CBRs
+ * 		dsr_bytes - number of DSR bytes needed
+ *	output:
+ *		handle to identify resource
+ *		(0 = async resources already reserved)
+ */
+unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+			struct completion *cmp)
+{
+	struct gru_blade_state *bs;
+	struct gru_thread_state *kgts;
+	int ret = 0;
+
+	bs = gru_base[blade_id];
+
+	down_write(&bs->bs_kgts_sema);
+
+	/* Verify no resources already reserved */
+	if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
+		goto done;
+	bs->bs_async_dsr_bytes = dsr_bytes;
+	bs->bs_async_cbrs = cbrs;
+	bs->bs_async_wq = cmp;
+	kgts = bs->bs_kgts;
+
+	/* Resources changed. Unload context if already loaded */
+	if (kgts && kgts->ts_gru)
+		gru_unload_context(kgts, 0);
+	ret = ASYNC_BID_TO_HAN(blade_id);
+
+done:
+	up_write(&bs->bs_kgts_sema);
+	return ret;
+}
+
+/*
+ * Release async resources previously reserved.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_release_async_resources(unsigned long han)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+	down_write(&bs->bs_kgts_sema);
+	bs->bs_async_dsr_bytes = 0;
+	bs->bs_async_cbrs = 0;
+	bs->bs_async_wq = NULL;
+	up_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_wait_async_cbr(unsigned long han)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+	wait_for_completion(bs->bs_async_wq);
+	mb();
+}
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ *	output:
+ *		cb  - pointer to first CBR
+ *		dsr - pointer to first DSR
+ */
+void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+	int blade_id = ASYNC_HAN_TO_BID(han);
+	int ncpus;
+
+	gru_lock_kernel_context(blade_id);
+	ncpus = uv_blade_nr_possible_cpus(blade_id);
+	if (cb)
+		*cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
+	if (dsr)
+		*dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
+}
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_unlock_async_resource(unsigned long han)
+{
+	int blade_id = ASYNC_HAN_TO_BID(han);
+
+	gru_unlock_kernel_context(blade_id);
+}
+
+/*----------------------------------------------------------------------*/
 int gru_get_cb_exception_detail(void *cb,
 		struct control_block_extended_exc_detail *excdet)
 {
 	struct gru_control_block_extended *cbe;
+	struct gru_blade_state *bs;
+	int cbrnum;
 
-	cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
-	prefetchw(cbe);	/* Harmless on hardware, required for emulator */
+	bs = KCB_TO_BS(cb);
+	cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb));
+	cbe = get_cbe(GRUBASE(cb), cbrnum);
+	gru_flush_cache(cbe);	/* CBE not coherent */
 	excdet->opc = cbe->opccpy;
 	excdet->exopc = cbe->exopccpy;
 	excdet->ecause = cbe->ecause;
 	excdet->exceptdet0 = cbe->idef1upd;
 	excdet->exceptdet1 = cbe->idef3upd;
+	gru_flush_cache(cbe);
 	return 0;
 }
 
@@ -167,13 +435,13 @@ static int gru_retry_exception(void *cb)
 	int retry = EXCEPTION_RETRY_LIMIT;
 
 	while (1)  {
-		if (gru_get_cb_message_queue_substatus(cb))
-			break;
 		if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
 			return CBS_IDLE;
-
+		if (gru_get_cb_message_queue_substatus(cb))
+			return CBS_EXCEPTION;
 		gru_get_cb_exception_detail(cb, &excdet);
-		if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
+		if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
+				(excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
 			break;
 		if (retry-- == 0)
 			break;
@@ -416,6 +684,29 @@ static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd)
 				mqd->interrupt_vector);
 }
 
+/*
+ * Handle a PUT failure. Note: if message was a 2-line message, one of the
+ * lines might have successfully have been written. Before sending the
+ * message, "present" must be cleared in BOTH lines to prevent the receiver
+ * from prematurely seeing the full message.
+ */
+static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
+			void *mesg, int lines)
+{
+	unsigned long m;
+
+	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+	if (lines == 2) {
+		gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
+		if (gru_wait(cb) != CBS_IDLE)
+			return MQE_UNEXPECTED_CB_ERR;
+	}
+	gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		return MQE_UNEXPECTED_CB_ERR;
+	send_message_queue_interrupt(mqd);
+	return MQE_OK;
+}
 
 /*
  * Handle a gru_mesq failure. Some of these failures are software recoverable
@@ -425,7 +716,6 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
 				void *mesg, int lines)
 {
 	int substatus, ret = 0;
-	unsigned long m;
 
 	substatus = gru_get_cb_message_queue_substatus(cb);
 	switch (substatus) {
@@ -447,14 +737,7 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
 		break;
 	case CBSS_PUT_NACKED:
 		STAT(mesq_send_put_nacked);
-		m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
-		gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
-		if (gru_wait(cb) == CBS_IDLE) {
-			ret = MQE_OK;
-			send_message_queue_interrupt(mqd);
-		} else {
-			ret = MQE_UNEXPECTED_CB_ERR;
-		}
+		ret = send_message_put_nacked(cb, mqd, mesg, lines);
 		break;
 	default:
 		BUG();
@@ -597,115 +880,177 @@ EXPORT_SYMBOL_GPL(gru_copy_gpa);
 
 /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
 /* 	Temp - will delete after we gain confidence in the GRU		*/
-static __cacheline_aligned unsigned long word0;
-static __cacheline_aligned unsigned long word1;
 
-static int quicktest(struct gru_state *gru)
+static int quicktest0(unsigned long arg)
 {
+	unsigned long word0;
+	unsigned long word1;
 	void *cb;
-	void *ds;
+	void *dsr;
 	unsigned long *p;
+	int ret = -EIO;
 
-	cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
-	ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
-	p = ds;
+	if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	p = dsr;
 	word0 = MAGIC;
+	word1 = 0;
 
-	gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
-	if (gru_wait(cb) != CBS_IDLE)
-		BUG();
+	gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE) {
+		printk(KERN_DEBUG "GRU quicktest0: CBR failure 1\n");
+		goto done;
+	}
 
-	if (*(unsigned long *)ds != MAGIC)
-		BUG();
-	gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
-	if (gru_wait(cb) != CBS_IDLE)
-		BUG();
+	if (*p != MAGIC) {
+		printk(KERN_DEBUG "GRU: quicktest0 bad magic 0x%lx\n", *p);
+		goto done;
+	}
+	gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE) {
+		printk(KERN_DEBUG "GRU quicktest0: CBR failure 2\n");
+		goto done;
+	}
 
-	if (word0 != word1 || word0 != MAGIC) {
-		printk
-		    ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n",
-		     gru->gs_gid, word1, MAGIC);
-		BUG();		/* ZZZ should not be fatal */
+	if (word0 != word1 || word1 != MAGIC) {
+		printk(KERN_DEBUG
+		       "GRU quicktest0 err: found 0x%lx, expected 0x%lx\n",
+		     word1, MAGIC);
+		goto done;
 	}
+	ret = 0;
 
-	return 0;
+done:
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
 }
 
+#define ALIGNUP(p, q)	((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
 
-int gru_kservices_init(struct gru_state *gru)
+static int quicktest1(unsigned long arg)
 {
-	struct gru_blade_state *bs;
-	struct gru_context_configuration_handle *cch;
-	unsigned long cbr_map, dsr_map;
-	int err, num, cpus_possible;
-
-	/*
-	 * Currently, resources are reserved ONLY on the second chiplet
-	 * on each blade. This leaves ALL resources on chiplet 0 available
-	 * for user code.
-	 */
-	bs = gru->gs_blade;
-	if (gru != &bs->bs_grus[1])
-		return 0;
-
-	cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
-
-	num = GRU_NUM_KERNEL_CBR * cpus_possible;
-	cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
-	gru->gs_reserved_cbrs += num;
-
-	num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
-	dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
-	gru->gs_reserved_dsr_bytes += num;
-
-	gru->gs_active_contexts++;
-	__set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
-	cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
-
-	bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
-					KERNEL_CTXNUM, 0);
-	bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
-					KERNEL_CTXNUM, 0);
-
-	lock_cch_handle(cch);
-	cch->tfm_fault_bit_enable = 0;
-	cch->tlb_int_enable = 0;
-	cch->tfm_done_bit_enable = 0;
-	cch->unmap_enable = 1;
-	err = cch_allocate(cch, 0, 0, cbr_map, dsr_map);
-	if (err) {
-		gru_dbg(grudev,
-			"Unable to allocate kernel CCH: gid %d, err %d\n",
-			gru->gs_gid, err);
-		BUG();
+	struct gru_message_queue_desc mqd;
+	void *p, *mq;
+	unsigned long *dw;
+	int i, ret = -EIO;
+	char mes[GRU_CACHE_LINE_BYTES], *m;
+
+	/* Need  1K cacheline aligned that does not cross page boundary */
+	p = kmalloc(4096, 0);
+	mq = ALIGNUP(p, 1024);
+	memset(mes, 0xee, sizeof(mes));
+	dw = mq;
+
+	gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
+	for (i = 0; i < 6; i++) {
+		mes[8] = i;
+		do {
+			ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
+		} while (ret == MQE_CONGESTION);
+		if (ret)
+			break;
 	}
-	if (cch_start(cch)) {
-		gru_dbg(grudev, "Unable to start kernel CCH: gid %d, err %d\n",
-			gru->gs_gid, err);
-		BUG();
+	if (ret != MQE_QUEUE_FULL || i != 4)
+		goto done;
+
+	for (i = 0; i < 6; i++) {
+		m = gru_get_next_message(&mqd);
+		if (!m || m[8] != i)
+			break;
+		gru_free_message(&mqd, m);
 	}
-	unlock_cch_handle(cch);
+	ret = (i == 4) ? 0 : -EIO;
 
-	if (gru_options & GRU_QUICKLOOK)
-		quicktest(gru);
-	return 0;
+done:
+	kfree(p);
+	return ret;
 }
 
-void gru_kservices_exit(struct gru_state *gru)
+static int quicktest2(unsigned long arg)
 {
-	struct gru_context_configuration_handle *cch;
-	struct gru_blade_state *bs;
+	static DECLARE_COMPLETION(cmp);
+	unsigned long han;
+	int blade_id = 0;
+	int numcb = 4;
+	int ret = 0;
+	unsigned long *buf;
+	void *cb0, *cb;
+	int i, k, istatus, bytes;
+
+	bytes = numcb * 4 * 8;
+	buf = kmalloc(bytes, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = -EBUSY;
+	han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
+	if (!han)
+		goto done;
+
+	gru_lock_async_resource(han, &cb0, NULL);
+	memset(buf, 0xee, bytes);
+	for (i = 0; i < numcb; i++)
+		gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
+				XTYPE_DW, 4, 1, IMA_INTERRUPT);
+
+	ret = 0;
+	for (k = 0; k < numcb; k++) {
+		gru_wait_async_cbr(han);
+		for (i = 0; i < numcb; i++) {
+			cb = cb0 + i * GRU_HANDLE_STRIDE;
+			istatus = gru_check_status(cb);
+			if (istatus == CBS_ACTIVE)
+				continue;
+			if (istatus == CBS_EXCEPTION)
+				ret = -EFAULT;
+			else if (buf[i] || buf[i + 1] || buf[i + 2] ||
+					buf[i + 3])
+				ret = -EIO;
+		}
+	}
+	BUG_ON(cmp.done);
 
-	bs = gru->gs_blade;
-	if (gru != &bs->bs_grus[1])
-		return;
+	gru_unlock_async_resource(han);
+	gru_release_async_resources(han);
+done:
+	kfree(buf);
+	return ret;
+}
 
-	cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
-	lock_cch_handle(cch);
-	if (cch_interrupt_sync(cch))
-		BUG();
-	if (cch_deallocate(cch))
+/*
+ * Debugging only. User hook for various kernel tests
+ * of driver & gru.
+ */
+int gru_ktest(unsigned long arg)
+{
+	int ret = -EINVAL;
+
+	switch (arg & 0xff) {
+	case 0:
+		ret = quicktest0(arg);
+		break;
+	case 1:
+		ret = quicktest1(arg);
+		break;
+	case 2:
+		ret = quicktest2(arg);
+		break;
+	case 99:
+		ret = gru_free_kernel_contexts();
+		break;
+	}
+	return ret;
+
+}
+
+int gru_kservices_init(void)
+{
+	return 0;
+}
+
+void gru_kservices_exit(void)
+{
+	if (gru_free_kernel_contexts())
 		BUG();
-	unlock_cch_handle(cch);
 }
 
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
index 747ed315d56..d60d34bca44 100644
--- a/drivers/misc/sgi-gru/grukservices.h
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -146,4 +146,55 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
 extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
 							unsigned int bytes);
 
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *
+ * 	input:
+ * 		blade_id  - blade on which resources should be reserved
+ * 		cbrs	  - number of CBRs
+ * 		dsr_bytes - number of DSR bytes needed
+ * 		cmp	  - completion structure for waiting for
+ * 			    async completions
+ *	output:
+ *		handle to identify resource
+ *		(0 = no resources)
+ */
+extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+				struct completion *cmp);
+
+/*
+ * Release async resources previously reserved.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_release_async_resources(unsigned long han);
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_wait_async_cbr(unsigned long han);
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ *	output:
+ *		cb  - pointer to first CBR
+ *		dsr - pointer to first DSR
+ */
+extern void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr);
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_unlock_async_resource(unsigned long han);
+
 #endif 		/* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
index e56e196a699..889bc442a3e 100644
--- a/drivers/misc/sgi-gru/grulib.h
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -32,8 +32,8 @@
 /* Set Number of Request Blocks */
 #define GRU_CREATE_CONTEXT		_IOWR(GRU_IOCTL_NUM, 1, void *)
 
-/* Register task as using the slice */
-#define GRU_SET_TASK_SLICE		_IOWR(GRU_IOCTL_NUM, 5, void *)
+/*  Set Context Options */
+#define GRU_SET_CONTEXT_OPTION		_IOWR(GRU_IOCTL_NUM, 4, void *)
 
 /* Fetch exception detail */
 #define GRU_USER_GET_EXCEPTION_DETAIL	_IOWR(GRU_IOCTL_NUM, 6, void *)
@@ -44,8 +44,11 @@
 /* For user unload context */
 #define GRU_USER_UNLOAD_CONTEXT		_IOWR(GRU_IOCTL_NUM, 9, void *)
 
-/* For fetching GRU chiplet status */
-#define GRU_GET_CHIPLET_STATUS		_IOWR(GRU_IOCTL_NUM, 10, void *)
+/* For dumpping GRU chiplet state */
+#define GRU_DUMP_CHIPLET_STATE		_IOWR(GRU_IOCTL_NUM, 11, void *)
+
+/* For getting gseg statistics */
+#define GRU_GET_GSEG_STATISTICS		_IOWR(GRU_IOCTL_NUM, 12, void *)
 
 /* For user TLB flushing (primarily for tests) */
 #define GRU_USER_FLUSH_TLB		_IOWR(GRU_IOCTL_NUM, 50, void *)
@@ -53,8 +56,26 @@
 /* Get some config options (primarily for tests & emulator) */
 #define GRU_GET_CONFIG_INFO		_IOWR(GRU_IOCTL_NUM, 51, void *)
 
+/* Various kernel self-tests */
+#define GRU_KTEST			_IOWR(GRU_IOCTL_NUM, 52, void *)
+
 #define CONTEXT_WINDOW_BYTES(th)        (GRU_GSEG_PAGESIZE * (th))
 #define THREAD_POINTER(p, th)		(p + GRU_GSEG_PAGESIZE * (th))
+#define GSEG_START(cb)			((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1)))
+
+/*
+ * Statictics kept on a per-GTS basis.
+ */
+struct gts_statistics {
+	unsigned long	fmm_tlbdropin;
+	unsigned long	upm_tlbdropin;
+	unsigned long	context_stolen;
+};
+
+struct gru_get_gseg_statistics_req {
+	unsigned long		gseg;
+	struct gts_statistics	stats;
+};
 
 /*
  * Structure used to pass TLB flush parameters to the driver
@@ -75,6 +96,16 @@ struct gru_unload_context_req {
 };
 
 /*
+ * Structure used to set context options
+ */
+enum {sco_gseg_owner, sco_cch_req_slice};
+struct gru_set_context_option_req {
+	unsigned long	gseg;
+	int		op;
+	unsigned long	val1;
+};
+
+/*
  * Structure used to pass TLB flush parameters to the driver
  */
 struct gru_flush_tlb_req {
@@ -84,6 +115,36 @@ struct gru_flush_tlb_req {
 };
 
 /*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+enum {dcs_pid, dcs_gid};
+struct gru_dump_chiplet_state_req {
+	unsigned int	op;
+	unsigned int	gid;
+	int		ctxnum;
+	char		data_opt;
+	char		lock_cch;
+	pid_t		pid;
+	void		*buf;
+	size_t		buflen;
+	/* ---- output --- */
+	unsigned int	num_contexts;
+};
+
+#define GRU_DUMP_MAGIC	0x3474ab6c
+struct gru_dump_context_header {
+	unsigned int	magic;
+	unsigned int	gid;
+	unsigned char	ctxnum;
+	unsigned char	cbrcnt;
+	unsigned char	dsrcnt;
+	pid_t		pid;
+	unsigned long	vaddr;
+	int		cch_locked;
+	unsigned long	data[0];
+};
+
+/*
  * GRU configuration info (temp - for testing)
  */
 struct gru_config_info {
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index ec3f7a17d22..3bc643dad60 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -3,11 +3,21 @@
  *
  *            DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
  *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
  *
- * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
 #include <linux/kernel.h>
@@ -96,7 +106,7 @@ static int gru_reset_asid_limit(struct gru_state *gru, int asid)
 	gid = gru->gs_gid;
 again:
 	for (i = 0; i < GRU_NUM_CCH; i++) {
-		if (!gru->gs_gts[i])
+		if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
 			continue;
 		inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
 		gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
@@ -150,7 +160,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
 	unsigned long bits = 0;
 	int i;
 
-	do {
+	while (n--) {
 		i = find_first_bit(p, mmax);
 		if (i == mmax)
 			BUG();
@@ -158,7 +168,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
 		__set_bit(i, &bits);
 		if (idx)
 			*idx++ = i;
-	} while (--n);
+	}
 	return bits;
 }
 
@@ -299,38 +309,39 @@ static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
 /*
  * Allocate a thread state structure.
  */
-static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
-					      struct gru_vma_data *vdata,
-					      int tsid)
+struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+		int cbr_au_count, int dsr_au_count, int options, int tsid)
 {
 	struct gru_thread_state *gts;
 	int bytes;
 
-	bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
-				CBR_BYTES(vdata->vd_cbr_au_count);
+	bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
 	bytes += sizeof(struct gru_thread_state);
-	gts = kzalloc(bytes, GFP_KERNEL);
+	gts = kmalloc(bytes, GFP_KERNEL);
 	if (!gts)
 		return NULL;
 
 	STAT(gts_alloc);
+	memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
 	atomic_set(&gts->ts_refcnt, 1);
 	mutex_init(&gts->ts_ctxlock);
-	gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
-	gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
-	gts->ts_user_options = vdata->vd_user_options;
+	gts->ts_cbr_au_count = cbr_au_count;
+	gts->ts_dsr_au_count = dsr_au_count;
+	gts->ts_user_options = options;
 	gts->ts_tsid = tsid;
-	gts->ts_user_options = vdata->vd_user_options;
 	gts->ts_ctxnum = NULLCTX;
-	gts->ts_mm = current->mm;
-	gts->ts_vma = vma;
 	gts->ts_tlb_int_select = -1;
-	gts->ts_gms = gru_register_mmu_notifier();
+	gts->ts_cch_req_slice = -1;
 	gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
-	if (!gts->ts_gms)
-		goto err;
+	if (vma) {
+		gts->ts_mm = current->mm;
+		gts->ts_vma = vma;
+		gts->ts_gms = gru_register_mmu_notifier();
+		if (!gts->ts_gms)
+			goto err;
+	}
 
-	gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
+	gru_dbg(grudev, "alloc gts %p\n", gts);
 	return gts;
 
 err:
@@ -381,7 +392,8 @@ struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
 	struct gru_vma_data *vdata = vma->vm_private_data;
 	struct gru_thread_state *gts, *ngts;
 
-	gts = gru_alloc_gts(vma, vdata, tsid);
+	gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, vdata->vd_dsr_au_count,
+			    vdata->vd_user_options, tsid);
 	if (!gts)
 		return NULL;
 
@@ -458,7 +470,8 @@ static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
 }
 
 static void gru_load_context_data(void *save, void *grubase, int ctxnum,
-				  unsigned long cbrmap, unsigned long dsrmap)
+				  unsigned long cbrmap, unsigned long dsrmap,
+				  int data_valid)
 {
 	void *gseg, *cb, *cbe;
 	unsigned long length;
@@ -471,12 +484,22 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum,
 	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
 
 	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
-		save += gru_copy_handle(cb, save);
-		save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
+		if (data_valid) {
+			save += gru_copy_handle(cb, save);
+			save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
+						save);
+		} else {
+			memset(cb, 0, GRU_CACHE_LINE_BYTES);
+			memset(cbe + i * GRU_HANDLE_STRIDE, 0,
+						GRU_CACHE_LINE_BYTES);
+		}
 		cb += GRU_HANDLE_STRIDE;
 	}
 
-	memcpy(gseg + GRU_DS_BASE, save, length);
+	if (data_valid)
+		memcpy(gseg + GRU_DS_BASE, save, length);
+	else
+		memset(gseg + GRU_DS_BASE, 0, length);
 }
 
 static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
@@ -506,7 +529,8 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
 	struct gru_context_configuration_handle *cch;
 	int ctxnum = gts->ts_ctxnum;
 
-	zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+	if (!is_kernel_context(gts))
+		zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
 	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
 
 	gru_dbg(grudev, "gts %p\n", gts);
@@ -514,11 +538,14 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
 	if (cch_interrupt_sync(cch))
 		BUG();
 
-	gru_unload_mm_tracker(gru, gts);
-	if (savestate)
+	if (!is_kernel_context(gts))
+		gru_unload_mm_tracker(gru, gts);
+	if (savestate) {
 		gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
 					ctxnum, gts->ts_cbr_map,
 					gts->ts_dsr_map);
+		gts->ts_data_valid = 1;
+	}
 
 	if (cch_deallocate(cch))
 		BUG();
@@ -526,24 +553,22 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
 	unlock_cch_handle(cch);
 
 	gru_free_gru_context(gts);
-	STAT(unload_context);
 }
 
 /*
  * Load a GRU context by copying it from the thread data structure in memory
  * to the GRU.
  */
-static void gru_load_context(struct gru_thread_state *gts)
+void gru_load_context(struct gru_thread_state *gts)
 {
 	struct gru_state *gru = gts->ts_gru;
 	struct gru_context_configuration_handle *cch;
-	int err, asid, ctxnum = gts->ts_ctxnum;
+	int i, err, asid, ctxnum = gts->ts_ctxnum;
 
 	gru_dbg(grudev, "gts %p\n", gts);
 	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
 
 	lock_cch_handle(cch);
-	asid = gru_load_mm_tracker(gru, gts);
 	cch->tfm_fault_bit_enable =
 	    (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
 	     || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
@@ -552,9 +577,32 @@ static void gru_load_context(struct gru_thread_state *gts)
 		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
 		cch->tlb_int_select = gts->ts_tlb_int_select;
 	}
+	if (gts->ts_cch_req_slice >= 0) {
+		cch->req_slice_set_enable = 1;
+		cch->req_slice = gts->ts_cch_req_slice;
+	} else {
+		cch->req_slice_set_enable =0;
+	}
 	cch->tfm_done_bit_enable = 0;
-	err = cch_allocate(cch, asid, gts->ts_sizeavail, gts->ts_cbr_map,
-				gts->ts_dsr_map);
+	cch->dsr_allocation_map = gts->ts_dsr_map;
+	cch->cbr_allocation_map = gts->ts_cbr_map;
+
+	if (is_kernel_context(gts)) {
+		cch->unmap_enable = 1;
+		cch->tfm_done_bit_enable = 1;
+		cch->cb_int_enable = 1;
+	} else {
+		cch->unmap_enable = 0;
+		cch->tfm_done_bit_enable = 0;
+		cch->cb_int_enable = 0;
+		asid = gru_load_mm_tracker(gru, gts);
+		for (i = 0; i < 8; i++) {
+			cch->asid[i] = asid + i;
+			cch->sizeavail[i] = gts->ts_sizeavail;
+		}
+	}
+
+	err = cch_allocate(cch);
 	if (err) {
 		gru_dbg(grudev,
 			"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
@@ -563,13 +611,11 @@ static void gru_load_context(struct gru_thread_state *gts)
 	}
 
 	gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
-			      gts->ts_cbr_map, gts->ts_dsr_map);
+			gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
 
 	if (cch_start(cch))
 		BUG();
 	unlock_cch_handle(cch);
-
-	STAT(load_context);
 }
 
 /*
@@ -599,6 +645,9 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload)
 				cch->sizeavail[i] = gts->ts_sizeavail;
 			gts->ts_tlb_int_select = gru_cpu_fault_map_id();
 			cch->tlb_int_select = gru_cpu_fault_map_id();
+			cch->tfm_fault_bit_enable =
+			  (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+			    || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
 		} else {
 			for (i = 0; i < 8; i++)
 				cch->asid[i] = 0;
@@ -642,7 +691,28 @@ static int gru_retarget_intr(struct gru_thread_state *gts)
 #define next_gru(b, g)	(((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ?  \
 				 ((g)+1) : &(b)->bs_grus[0])
 
-static void gru_steal_context(struct gru_thread_state *gts)
+static int is_gts_stealable(struct gru_thread_state *gts,
+		struct gru_blade_state *bs)
+{
+	if (is_kernel_context(gts))
+		return down_write_trylock(&bs->bs_kgts_sema);
+	else
+		return mutex_trylock(&gts->ts_ctxlock);
+}
+
+static void gts_stolen(struct gru_thread_state *gts,
+		struct gru_blade_state *bs)
+{
+	if (is_kernel_context(gts)) {
+		up_write(&bs->bs_kgts_sema);
+		STAT(steal_kernel_context);
+	} else {
+		mutex_unlock(&gts->ts_ctxlock);
+		STAT(steal_user_context);
+	}
+}
+
+void gru_steal_context(struct gru_thread_state *gts, int blade_id)
 {
 	struct gru_blade_state *blade;
 	struct gru_state *gru, *gru0;
@@ -652,8 +722,7 @@ static void gru_steal_context(struct gru_thread_state *gts)
 	cbr = gts->ts_cbr_au_count;
 	dsr = gts->ts_dsr_au_count;
 
-	preempt_disable();
-	blade = gru_base[uv_numa_blade_id()];
+	blade = gru_base[blade_id];
 	spin_lock(&blade->bs_lock);
 
 	ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
@@ -676,7 +745,7 @@ static void gru_steal_context(struct gru_thread_state *gts)
 			 * success are high. If trylock fails, try to steal a
 			 * different GSEG.
 			 */
-			if (ngts && mutex_trylock(&ngts->ts_ctxlock))
+			if (ngts && is_gts_stealable(ngts, blade))
 				break;
 			ngts = NULL;
 			flag = 1;
@@ -690,13 +759,12 @@ static void gru_steal_context(struct gru_thread_state *gts)
 	blade->bs_lru_gru = gru;
 	blade->bs_lru_ctxnum = ctxnum;
 	spin_unlock(&blade->bs_lock);
-	preempt_enable();
 
 	if (ngts) {
-		STAT(steal_context);
+		gts->ustats.context_stolen++;
 		ngts->ts_steal_jiffies = jiffies;
-		gru_unload_context(ngts, 1);
-		mutex_unlock(&ngts->ts_ctxlock);
+		gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
+		gts_stolen(ngts, blade);
 	} else {
 		STAT(steal_context_failed);
 	}
@@ -710,17 +778,17 @@ static void gru_steal_context(struct gru_thread_state *gts)
 /*
  * Scan the GRUs on the local blade & assign a GRU context.
  */
-static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts,
+						int blade)
 {
 	struct gru_state *gru, *grux;
 	int i, max_active_contexts;
 
-	preempt_disable();
 
 again:
 	gru = NULL;
 	max_active_contexts = GRU_NUM_CCH;
-	for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
+	for_each_gru_on_blade(grux, blade, i) {
 		if (check_gru_resources(grux, gts->ts_cbr_au_count,
 					gts->ts_dsr_au_count,
 					max_active_contexts)) {
@@ -760,7 +828,6 @@ again:
 		STAT(assign_context_failed);
 	}
 
-	preempt_enable();
 	return gru;
 }
 
@@ -775,6 +842,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct gru_thread_state *gts;
 	unsigned long paddr, vaddr;
+	int blade_id;
 
 	vaddr = (unsigned long)vmf->virtual_address;
 	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
@@ -789,8 +857,10 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 again:
 	mutex_lock(&gts->ts_ctxlock);
 	preempt_disable();
+	blade_id = uv_numa_blade_id();
+
 	if (gts->ts_gru) {
-		if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
+		if (gts->ts_gru->gs_blade_id != blade_id) {
 			STAT(migrated_nopfn_unload);
 			gru_unload_context(gts, 1);
 		} else {
@@ -800,12 +870,15 @@ again:
 	}
 
 	if (!gts->ts_gru) {
-		if (!gru_assign_gru_context(gts)) {
-			mutex_unlock(&gts->ts_ctxlock);
+		STAT(load_user_context);
+		if (!gru_assign_gru_context(gts, blade_id)) {
 			preempt_enable();
+			mutex_unlock(&gts->ts_ctxlock);
+			set_current_state(TASK_INTERRUPTIBLE);
 			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
+			blade_id = uv_numa_blade_id();
 			if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
-				gru_steal_context(gts);
+				gru_steal_context(gts, blade_id);
 			goto again;
 		}
 		gru_load_context(gts);
@@ -815,8 +888,8 @@ again:
 				vma->vm_page_prot);
 	}
 
-	mutex_unlock(&gts->ts_ctxlock);
 	preempt_enable();
+	mutex_unlock(&gts->ts_ctxlock);
 
 	return VM_FAULT_NOPAGE;
 }
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index ee74821b171..9cbf95bedce 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -51,9 +51,12 @@ static int statistics_show(struct seq_file *s, void *p)
 	printstat(s, assign_context);
 	printstat(s, assign_context_failed);
 	printstat(s, free_context);
-	printstat(s, load_context);
-	printstat(s, unload_context);
-	printstat(s, steal_context);
+	printstat(s, load_user_context);
+	printstat(s, load_kernel_context);
+	printstat(s, lock_kernel_context);
+	printstat(s, unlock_kernel_context);
+	printstat(s, steal_user_context);
+	printstat(s, steal_kernel_context);
 	printstat(s, steal_context_failed);
 	printstat(s, nopfn);
 	printstat(s, break_cow);
@@ -70,7 +73,7 @@ static int statistics_show(struct seq_file *s, void *p)
 	printstat(s, user_flush_tlb);
 	printstat(s, user_unload_context);
 	printstat(s, user_exception);
-	printstat(s, set_task_slice);
+	printstat(s, set_context_option);
 	printstat(s, migrate_check);
 	printstat(s, migrated_retarget);
 	printstat(s, migrated_unload);
@@ -84,6 +87,9 @@ static int statistics_show(struct seq_file *s, void *p)
 	printstat(s, tlb_dropin_fail_range_active);
 	printstat(s, tlb_dropin_fail_idle);
 	printstat(s, tlb_dropin_fail_fmm);
+	printstat(s, tlb_dropin_fail_no_exception);
+	printstat(s, tlb_dropin_fail_no_exception_war);
+	printstat(s, tfh_stale_on_fault);
 	printstat(s, mmu_invalidate_range);
 	printstat(s, mmu_invalidate_page);
 	printstat(s, mmu_clear_flush_young);
@@ -158,8 +164,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
 	unsigned long val;
 	char buf[80];
 
-	if (copy_from_user
-	    (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
+	if (strncpy_from_user(buf, userbuf, sizeof(buf) - 1) < 0)
 		return -EFAULT;
 	buf[count - 1] = '\0';
 	if (!strict_strtoul(buf, 10, &val))
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index bf1eeb7553e..34ab3d45391 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -148,11 +148,13 @@
 #include <linux/wait.h>
 #include <linux/mmu_notifier.h>
 #include "gru.h"
+#include "grulib.h"
 #include "gruhandles.h"
 
 extern struct gru_stats_s gru_stats;
 extern struct gru_blade_state *gru_base[];
 extern unsigned long gru_start_paddr, gru_end_paddr;
+extern void *gru_start_vaddr;
 extern unsigned int gru_max_gids;
 
 #define GRU_MAX_BLADES		MAX_NUMNODES
@@ -174,9 +176,12 @@ struct gru_stats_s {
 	atomic_long_t assign_context;
 	atomic_long_t assign_context_failed;
 	atomic_long_t free_context;
-	atomic_long_t load_context;
-	atomic_long_t unload_context;
-	atomic_long_t steal_context;
+	atomic_long_t load_user_context;
+	atomic_long_t load_kernel_context;
+	atomic_long_t lock_kernel_context;
+	atomic_long_t unlock_kernel_context;
+	atomic_long_t steal_user_context;
+	atomic_long_t steal_kernel_context;
 	atomic_long_t steal_context_failed;
 	atomic_long_t nopfn;
 	atomic_long_t break_cow;
@@ -193,7 +198,7 @@ struct gru_stats_s {
 	atomic_long_t user_flush_tlb;
 	atomic_long_t user_unload_context;
 	atomic_long_t user_exception;
-	atomic_long_t set_task_slice;
+	atomic_long_t set_context_option;
 	atomic_long_t migrate_check;
 	atomic_long_t migrated_retarget;
 	atomic_long_t migrated_unload;
@@ -207,6 +212,9 @@ struct gru_stats_s {
 	atomic_long_t tlb_dropin_fail_range_active;
 	atomic_long_t tlb_dropin_fail_idle;
 	atomic_long_t tlb_dropin_fail_fmm;
+	atomic_long_t tlb_dropin_fail_no_exception;
+	atomic_long_t tlb_dropin_fail_no_exception_war;
+	atomic_long_t tfh_stale_on_fault;
 	atomic_long_t mmu_invalidate_range;
 	atomic_long_t mmu_invalidate_page;
 	atomic_long_t mmu_clear_flush_young;
@@ -253,7 +261,6 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
 
 #define OPT_DPRINT	1
 #define OPT_STATS	2
-#define GRU_QUICKLOOK	4
 
 
 #define IRQ_GRU			110	/* Starting IRQ number for interrupts */
@@ -373,6 +380,7 @@ struct gru_thread_state {
 						   required for contest */
 	unsigned char		ts_cbr_au_count;/* Number of CBR resources
 						   required for contest */
+	char			ts_cch_req_slice;/* CCH packet slice */
 	char			ts_blade;	/* If >= 0, migrate context if
 						   ref from diferent blade */
 	char			ts_force_cch_reload;
@@ -380,6 +388,9 @@ struct gru_thread_state {
 						   after migration */
 	char			ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
 							  allocated CB */
+	int			ts_data_valid;	/* Indicates if ts_gdata has
+						   valid data */
+	struct gts_statistics	ustats;		/* User statistics */
 	unsigned long		ts_gdata[0];	/* save area for GRU data (CB,
 						   DS, CBE) */
 };
@@ -452,6 +463,14 @@ struct gru_blade_state {
 							   reserved cb */
 	void			*kernel_dsr;		/* First kernel
 							   reserved DSR */
+	struct rw_semaphore	bs_kgts_sema;		/* lock for kgts */
+	struct gru_thread_state *bs_kgts;		/* GTS for kernel use */
+
+	/* ---- the following are used for managing kernel async GRU CBRs --- */
+	int			bs_async_dsr_bytes;	/* DSRs for async */
+	int			bs_async_cbrs;		/* CBRs AU for async */
+	struct completion	*bs_async_wq;
+
 	/* ---- the following are protected by the bs_lock spinlock ---- */
 	spinlock_t		bs_lock;		/* lock used for
 							   stealing contexts */
@@ -552,6 +571,12 @@ struct gru_blade_state {
 
 /* Lock hierarchy checking enabled only in emulator */
 
+/* 0 = lock failed, 1 = locked */
+static inline int __trylock_handle(void *h)
+{
+	return !test_and_set_bit(1, h);
+}
+
 static inline void __lock_handle(void *h)
 {
 	while (test_and_set_bit(1, h))
@@ -563,6 +588,11 @@ static inline void __unlock_handle(void *h)
 	clear_bit(1, h);
 }
 
+static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+	return __trylock_handle(cch);
+}
+
 static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
 {
 	__lock_handle(cch);
@@ -584,6 +614,11 @@ static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
 	__unlock_handle(tgh);
 }
 
+static inline int is_kernel_context(struct gru_thread_state *gts)
+{
+	return !gts->ts_mm;
+}
+
 /*-----------------------------------------------------------------------------
  * Function prototypes & externs
  */
@@ -598,24 +633,32 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
 				*vma, int tsid);
 extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
 				*vma, int tsid);
+extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts,
+		int blade);
+extern void gru_load_context(struct gru_thread_state *gts);
+extern void gru_steal_context(struct gru_thread_state *gts, int blade_id);
 extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
 extern int gru_update_cch(struct gru_thread_state *gts, int force_unload);
 extern void gts_drop(struct gru_thread_state *gts);
 extern void gru_tgh_flush_init(struct gru_state *gru);
-extern int gru_kservices_init(struct gru_state *gru);
-extern void gru_kservices_exit(struct gru_state *gru);
+extern int gru_kservices_init(void);
+extern void gru_kservices_exit(void);
+extern int gru_dump_chiplet_request(unsigned long arg);
+extern long gru_get_gseg_statistics(unsigned long arg);
 extern irqreturn_t gru_intr(int irq, void *dev_id);
 extern int gru_handle_user_call_os(unsigned long address);
 extern int gru_user_flush_tlb(unsigned long arg);
 extern int gru_user_unload_context(unsigned long arg);
 extern int gru_get_exception_detail(unsigned long arg);
-extern int gru_set_task_slice(long address);
+extern int gru_set_context_option(unsigned long address);
 extern int gru_cpu_fault_map_id(void);
 extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
 extern void gru_flush_all_tlb(struct gru_state *gru);
 extern int gru_proc_init(void);
 extern void gru_proc_exit(void);
 
+extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+		int cbr_au_count, int dsr_au_count, int options, int tsid);
 extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
 		int cbr_au_count, char *cbmap);
 extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
@@ -624,6 +667,7 @@ extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
 extern struct gru_mm_struct *gru_register_mmu_notifier(void);
 extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
 
+extern int gru_ktest(unsigned long arg);
 extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
 					unsigned long len);
 
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 8d1c60a3f0d..5d778ec8cdb 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -235,7 +235,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	dev_dbg(xpnet, "passing skb to network layer\n"
-		KERN_DEBUG "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
 		"skb->end=0x%p skb->len=%d\n",
 		(void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
 		skb_end_pointer(skb), skb->len);
@@ -399,7 +399,7 @@ xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
 	msg->buf_pa = xp_pa((void *)start_addr);
 
 	dev_dbg(xpnet, "sending XPC message to %d:%d\n"
-		KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
+		"msg->buf_pa=0x%lx, msg->size=%u, "
 		"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
 		dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
 		msg->leadin_ignore, msg->tailout_ignore);