From 4ef110141b3e0758fe30d686417b5686b87eb25b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 19 Feb 2008 10:05:35 +1100 Subject: [POWERPC] spufs: fix scheduler starvation by idle contexts 2.6.25 has a regression where we can starve the scheduler by creating (N_SPES+1) contexts, then running them one at a time. The final context will never be run, as the other contexts are loaded on the SPEs, none of which are repoted as free (ie, spu->alloc_state != SPU_FREE), so spu_get_idle() doesn't give us a spu to run on. Because all of the contexts are stopped, none are descheduled by the scheduler tick, as spusched_tick returns if spu_stopped(ctx). This change replaces the spu_stopped() check with checking for SCHED_IDLE in ctx->policy. We set a context's policy to SCHED_IDLE when we're not in spu_run(). We also favour SCHED_IDLE contexts when looking for contexts to unbind, but leave their timeslice intact for later resumption. This patch fixes the following test in the spufs-testsuite: tests/20-scheduler/02-yield-starvation Signed-off-by: Jeremy Kerr --- arch/powerpc/platforms/cell/spufs/run.c | 1 + arch/powerpc/platforms/cell/spufs/sched.c | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index fca22e18069..6221968c2a3 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -234,6 +234,7 @@ static int spu_run_fini(struct spu_context *ctx, u32 *npc, *npc = ctx->ops->npc_read(ctx); spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + ctx->policy = SCHED_IDLE; spu_release(ctx); if (signal_pending(current)) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 5915343e259..3a5972117de 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -856,21 +856,18 @@ static noinline void spusched_tick(struct spu_context *ctx) { struct spu_context *new = NULL; struct spu *spu = NULL; - u32 status; if (spu_acquire(ctx)) BUG(); /* a kernel thread never has signals pending */ if (ctx->state != SPU_STATE_RUNNABLE) goto out; - if (spu_stopped(ctx, &status)) - goto out; if (ctx->flags & SPU_CREATE_NOSCHED) goto out; if (ctx->policy == SCHED_FIFO) goto out; - if (--ctx->time_slice) + if (--ctx->time_slice && ctx->policy != SCHED_IDLE) goto out; spu = ctx->spu; @@ -880,7 +877,8 @@ static noinline void spusched_tick(struct spu_context *ctx) new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { spu_unschedule(spu, ctx); - spu_add_to_rq(ctx); + if (ctx->policy != SCHED_IDLE) + spu_add_to_rq(ctx); } else { spu_context_nospu_trace(spusched_tick__newslice, ctx); ctx->time_slice++; -- cgit v1.2.3-70-g09d2 From 61b36fc1f7d511132b1dd1422c29c7a8f26d77db Mon Sep 17 00:00:00 2001 From: Andre Detsch Date: Tue, 19 Feb 2008 10:06:15 -0300 Subject: [POWERPC] cell: fix spurious false return from spu_trap_data_{map,seg} At present, the __spufs_trap_data_map and __spu_trap_data_seq functions exit if spu->flags has the SPU_CONTEXT_SWITCH_ACTIVE set. This was resulting in suprious returns from these functions, as they may be legitimately called when we have this bit set. We only use it in these two sanity checks, so this change removes the flag completely. This fixes hangs in the page-fault path of SPE apps. Signed-off-by: Andre Detsch Signed-off-by: Jeremy Kerr --- arch/powerpc/platforms/cell/spu_base.c | 12 ------------ arch/powerpc/platforms/cell/spufs/switch.c | 6 +++--- include/asm-powerpc/spu.h | 3 +-- 3 files changed, 4 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index e45cfa84911..87eb07f94c5 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -160,13 +160,6 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) pr_debug("%s\n", __FUNCTION__); - if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) { - /* SLBs are pre-loaded for context switch, so - * we should never get here! - */ - printk("%s: invalid access during switch!\n", __func__); - return 1; - } slb.esid = (ea & ESID_MASK) | SLB_ESID_V; switch(REGION_ID(ea)) { @@ -226,11 +219,6 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) return 0; } - if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) { - printk("%s: invalid access during switch!\n", __func__); - return 1; - } - spu->class_0_pending = 0; spu->dar = ea; spu->dsisr = dsisr; diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 6063c88c26d..6f5886c7b1f 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -720,8 +720,9 @@ static inline void set_switch_active(struct spu_state *csa, struct spu *spu) * Restore, Step 23. * Change the software context switch pending flag * to context switch active. + * + * This implementation does not uses a switch active flag. */ - set_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags); clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags); mb(); } @@ -1739,9 +1740,8 @@ static inline void reset_switch_active(struct spu_state *csa, struct spu *spu) { /* Restore, Step 74: * Reset the "context switch active" flag. + * Not performed by this implementation. */ - clear_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags); - mb(); } static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu) diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index f07c99ba5d1..e3c845b0f76 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -98,9 +98,8 @@ #define MFC_PRIV_ATTN_EVENT 0x00000800 #define MFC_MULTI_SRC_EVENT 0x00001000 -/* Flags indicating progress during context switch. */ +/* Flag indicating progress during context switch. */ #define SPU_CONTEXT_SWITCH_PENDING 0UL -#define SPU_CONTEXT_SWITCH_ACTIVE 1UL struct spu_context; struct spu_runqueue; -- cgit v1.2.3-70-g09d2