diff options
Diffstat (limited to 'arch/powerpc/platforms/cell/spufs/sched.c')
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 461 |
1 files changed, 461 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c new file mode 100644 index 00000000000..963182fbd1a --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -0,0 +1,461 @@ +/* sched.c - SPU scheduler. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * SPU scheduler, based on Linux thread priority. For now use + * a simple "cooperative" yield model with no preemption. SPU + * scheduling will eventually be preemptive: When a thread with + * a higher static priority gets ready to run, then an active SPU + * context will be preempted and returned to the waitq. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/completion.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> + +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include "spufs.h" + +#define SPU_MIN_TIMESLICE (100 * HZ / 1000) + +#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) +struct spu_prio_array { + atomic_t nr_blocked; + unsigned long bitmap[SPU_BITMAP_SIZE]; + wait_queue_head_t waitq[MAX_PRIO]; +}; + +/* spu_runqueue - This is the main runqueue data structure for SPUs. */ +struct spu_runqueue { + struct semaphore sem; + unsigned long nr_active; + unsigned long nr_idle; + unsigned long nr_switches; + struct list_head active_list; + struct list_head idle_list; + struct spu_prio_array prio; +}; + +static struct spu_runqueue *spu_runqueues = NULL; + +static inline struct spu_runqueue *spu_rq(void) +{ + /* Future: make this a per-NODE array, + * and use cpu_to_node(smp_processor_id()) + */ + return spu_runqueues; +} + +static inline struct spu *del_idle(struct spu_runqueue *rq) +{ + struct spu *spu; + + BUG_ON(rq->nr_idle <= 0); + BUG_ON(list_empty(&rq->idle_list)); + /* Future: Move SPU out of low-power SRI state. */ + spu = list_entry(rq->idle_list.next, struct spu, sched_list); + list_del_init(&spu->sched_list); + rq->nr_idle--; + return spu; +} + +static inline void del_active(struct spu_runqueue *rq, struct spu *spu) +{ + BUG_ON(rq->nr_active <= 0); + BUG_ON(list_empty(&rq->active_list)); + list_del_init(&spu->sched_list); + rq->nr_active--; +} + +static inline void add_idle(struct spu_runqueue *rq, struct spu *spu) +{ + /* Future: Put SPU into low-power SRI state. */ + list_add_tail(&spu->sched_list, &rq->idle_list); + rq->nr_idle++; +} + +static inline void add_active(struct spu_runqueue *rq, struct spu *spu) +{ + rq->nr_active++; + rq->nr_switches++; + list_add_tail(&spu->sched_list, &rq->active_list); +} + +static void prio_wakeup(struct spu_runqueue *rq) +{ + if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) { + int best = sched_find_first_bit(rq->prio.bitmap); + if (best < MAX_PRIO) { + wait_queue_head_t *wq = &rq->prio.waitq[best]; + wake_up_interruptible_nr(wq, 1); + } + } +} + +static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx, + u64 flags) +{ + int prio = current->prio; + wait_queue_head_t *wq = &rq->prio.waitq[prio]; + DEFINE_WAIT(wait); + + __set_bit(prio, rq->prio.bitmap); + atomic_inc(&rq->prio.nr_blocked); + prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE); + if (!signal_pending(current)) { + up(&rq->sem); + up_write(&ctx->state_sema); + pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, + current->pid, current->prio); + schedule(); + down_write(&ctx->state_sema); + down(&rq->sem); + } + finish_wait(wq, &wait); + atomic_dec(&rq->prio.nr_blocked); + if (!waitqueue_active(wq)) + __clear_bit(prio, rq->prio.bitmap); +} + +static inline int is_best_prio(struct spu_runqueue *rq) +{ + int best_prio; + + best_prio = sched_find_first_bit(rq->prio.bitmap); + return (current->prio < best_prio) ? 1 : 0; +} + +static inline void mm_needs_global_tlbie(struct mm_struct *mm) +{ + /* Global TLBIE broadcast required with SPEs. */ +#if (NR_CPUS > 1) + __cpus_setall(&mm->cpu_vm_mask, NR_CPUS); +#else + __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */ +#endif +} + +static inline void bind_context(struct spu *spu, struct spu_context *ctx) +{ + pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid, + spu->number); + spu->ctx = ctx; + spu->flags = 0; + ctx->flags = 0; + ctx->spu = spu; + ctx->ops = &spu_hw_ops; + spu->pid = current->pid; + spu->prio = current->prio; + spu->mm = ctx->owner; + mm_needs_global_tlbie(spu->mm); + spu->ibox_callback = spufs_ibox_callback; + spu->wbox_callback = spufs_wbox_callback; + spu->stop_callback = spufs_stop_callback; + mb(); + spu_unmap_mappings(ctx); + spu_restore(&ctx->csa, spu); + spu->timestamp = jiffies; +} + +static inline void unbind_context(struct spu *spu, struct spu_context *ctx) +{ + pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__, + spu->pid, spu->number); + spu_unmap_mappings(ctx); + spu_save(&ctx->csa, spu); + spu->timestamp = jiffies; + ctx->state = SPU_STATE_SAVED; + spu->ibox_callback = NULL; + spu->wbox_callback = NULL; + spu->stop_callback = NULL; + spu->mm = NULL; + spu->pid = 0; + spu->prio = MAX_PRIO; + ctx->ops = &spu_backing_ops; + ctx->spu = NULL; + ctx->flags = 0; + spu->flags = 0; + spu->ctx = NULL; +} + +static void spu_reaper(void *data) +{ + struct spu_context *ctx = data; + struct spu *spu; + + down_write(&ctx->state_sema); + spu = ctx->spu; + if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { + if (atomic_read(&spu->rq->prio.nr_blocked)) { + pr_debug("%s: spu=%d\n", __func__, spu->number); + ctx->ops->runcntl_stop(ctx); + spu_deactivate(ctx); + wake_up_all(&ctx->stop_wq); + } else { + clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); + } + } + up_write(&ctx->state_sema); + put_spu_context(ctx); +} + +static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu) +{ + struct spu_context *ctx = get_spu_context(spu->ctx); + unsigned long now = jiffies; + unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE; + + set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); + INIT_WORK(&ctx->reap_work, spu_reaper, ctx); + if (time_after(now, expire)) + schedule_work(&ctx->reap_work); + else + schedule_delayed_work(&ctx->reap_work, expire - now); +} + +static void check_preempt_active(struct spu_runqueue *rq) +{ + struct list_head *p; + struct spu *worst = NULL; + + list_for_each(p, &rq->active_list) { + struct spu *spu = list_entry(p, struct spu, sched_list); + struct spu_context *ctx = spu->ctx; + if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { + if (!worst || (spu->prio > worst->prio)) { + worst = spu; + } + } + } + if (worst && (current->prio < worst->prio)) + schedule_spu_reaper(rq, worst); +} + +static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags) +{ + struct spu_runqueue *rq; + struct spu *spu = NULL; + + rq = spu_rq(); + down(&rq->sem); + for (;;) { + if (rq->nr_idle > 0) { + if (is_best_prio(rq)) { + /* Fall through. */ + spu = del_idle(rq); + break; + } else { + prio_wakeup(rq); + up(&rq->sem); + yield(); + if (signal_pending(current)) { + return NULL; + } + rq = spu_rq(); + down(&rq->sem); + continue; + } + } else { + check_preempt_active(rq); + prio_wait(rq, ctx, flags); + if (signal_pending(current)) { + prio_wakeup(rq); + spu = NULL; + break; + } + continue; + } + } + up(&rq->sem); + return spu; +} + +static void put_idle_spu(struct spu *spu) +{ + struct spu_runqueue *rq = spu->rq; + + down(&rq->sem); + add_idle(rq, spu); + prio_wakeup(rq); + up(&rq->sem); +} + +static int get_active_spu(struct spu *spu) +{ + struct spu_runqueue *rq = spu->rq; + struct list_head *p; + struct spu *tmp; + int rc = 0; + + down(&rq->sem); + list_for_each(p, &rq->active_list) { + tmp = list_entry(p, struct spu, sched_list); + if (tmp == spu) { + del_active(rq, spu); + rc = 1; + break; + } + } + up(&rq->sem); + return rc; +} + +static void put_active_spu(struct spu *spu) +{ + struct spu_runqueue *rq = spu->rq; + + down(&rq->sem); + add_active(rq, spu); + up(&rq->sem); +} + +/* Lock order: + * spu_activate() & spu_deactivate() require the + * caller to have down_write(&ctx->state_sema). + * + * The rq->sem is breifly held (inside or outside a + * given ctx lock) for list management, but is never + * held during save/restore. + */ + +int spu_activate(struct spu_context *ctx, u64 flags) +{ + struct spu *spu; + + if (ctx->spu) + return 0; + spu = get_idle_spu(ctx, flags); + if (!spu) + return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; + bind_context(spu, ctx); + /* + * We're likely to wait for interrupts on the same + * CPU that we are now on, so send them here. + */ + spu_irq_setaffinity(spu, raw_smp_processor_id()); + put_active_spu(spu); + return 0; +} + +void spu_deactivate(struct spu_context *ctx) +{ + struct spu *spu; + int needs_idle; + + spu = ctx->spu; + if (!spu) + return; + needs_idle = get_active_spu(spu); + unbind_context(spu, ctx); + if (needs_idle) + put_idle_spu(spu); +} + +void spu_yield(struct spu_context *ctx) +{ + struct spu *spu; + int need_yield = 0; + + down_write(&ctx->state_sema); + spu = ctx->spu; + if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) { + pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number); + spu_deactivate(ctx); + ctx->state = SPU_STATE_SAVED; + need_yield = 1; + } else if (spu) { + spu->prio = MAX_PRIO; + } + up_write(&ctx->state_sema); + if (unlikely(need_yield)) + yield(); +} + +int __init spu_sched_init(void) +{ + struct spu_runqueue *rq; + struct spu *spu; + int i; + + rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL); + if (!rq) { + printk(KERN_WARNING "%s: Unable to allocate runqueues.\n", + __FUNCTION__); + return 1; + } + memset(rq, 0, sizeof(struct spu_runqueue)); + init_MUTEX(&rq->sem); + INIT_LIST_HEAD(&rq->active_list); + INIT_LIST_HEAD(&rq->idle_list); + rq->nr_active = 0; + rq->nr_idle = 0; + rq->nr_switches = 0; + atomic_set(&rq->prio.nr_blocked, 0); + for (i = 0; i < MAX_PRIO; i++) { + init_waitqueue_head(&rq->prio.waitq[i]); + __clear_bit(i, rq->prio.bitmap); + } + __set_bit(MAX_PRIO, rq->prio.bitmap); + for (;;) { + spu = spu_alloc(); + if (!spu) + break; + pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number); + add_idle(rq, spu); + spu->rq = rq; + spu->timestamp = jiffies; + } + if (!rq->nr_idle) { + printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__); + kfree(rq); + return 1; + } + return 0; +} + +void __exit spu_sched_exit(void) +{ + struct spu_runqueue *rq = spu_rq(); + struct spu *spu; + + if (!rq) { + printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__); + return; + } + while (rq->nr_idle > 0) { + spu = del_idle(rq); + if (!spu) + break; + spu_free(spu); + } + kfree(rq); +} |