diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2007-10-12 21:27:47 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2007-10-12 21:27:47 -0400 |
commit | b981d8b3f5e008ff10d993be633ad00564fc22cd (patch) | |
tree | e292dc07b22308912cf6a58354a608b9e5e8e1fd /arch/powerpc/platforms/cell/spufs | |
parent | b11d2127c4893a7315d1e16273bc8560049fa3ca (diff) | |
parent | 2b9e0aae1d50e880c58d46788e5e3ebd89d75d62 (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
drivers/macintosh/adbhid.c
Diffstat (limited to 'arch/powerpc/platforms/cell/spufs')
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/backing_ops.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/context.c | 45 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/coredump.c | 236 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/fault.c | 46 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/file.c | 343 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/gang.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/inode.c | 149 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/run.c | 50 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 393 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spu_restore.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped | 480 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spufs.h | 103 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/switch.c | 99 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/syscalls.c | 30 |
14 files changed, 1174 insertions, 815 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index 07a0e815abf..ec01214e51e 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -162,7 +162,8 @@ static int spu_backing_wbox_write(struct spu_context *ctx, u32 data) BUG_ON(avail != (4 - slot)); ctx->csa.spu_mailbox_data[slot] = data; ctx->csa.spu_chnlcnt_RW[29] = ++slot; - ctx->csa.prob.mb_stat_R = (((4 - slot) & 0xff) << 8); + ctx->csa.prob.mb_stat_R &= ~(0x00ff00); + ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8); gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT); ret = 4; } else { diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 6d7bd60f538..9cb081c26e7 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -22,6 +22,7 @@ #include <linux/fs.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/slab.h> #include <asm/atomic.h> #include <asm/spu.h> @@ -55,12 +56,13 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) ctx->ops = &spu_backing_ops; ctx->owner = get_task_mm(current); INIT_LIST_HEAD(&ctx->rq); + INIT_LIST_HEAD(&ctx->aff_list); if (gang) spu_gang_add_ctx(gang, ctx); - ctx->cpus_allowed = current->cpus_allowed; + + __spu_update_sched_info(ctx); spu_set_timeslice(ctx); - ctx->stats.execution_state = SPUCTX_UTIL_USER; - ctx->stats.tstamp = jiffies; + ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; atomic_inc(&nr_spu_contexts); goto out; @@ -81,6 +83,8 @@ void destroy_spu_context(struct kref *kref) spu_fini_csa(&ctx->csa); if (ctx->gang) spu_gang_remove_ctx(ctx->gang, ctx); + if (ctx->prof_priv_kref) + kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); BUG_ON(!list_empty(&ctx->rq)); atomic_dec(&nr_spu_contexts); kfree(ctx); @@ -166,6 +170,39 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags) void spu_acquire_saved(struct spu_context *ctx) { spu_acquire(ctx); - if (ctx->state != SPU_STATE_SAVED) + if (ctx->state != SPU_STATE_SAVED) { + set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); spu_deactivate(ctx); + } +} + +/** + * spu_release_saved - unlock spu context and return it to the runqueue + * @ctx: context to unlock + */ +void spu_release_saved(struct spu_context *ctx) +{ + BUG_ON(ctx->state != SPU_STATE_SAVED); + + if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags)) + spu_activate(ctx, 0); + + spu_release(ctx); } + +void spu_set_profile_private_kref(struct spu_context *ctx, + struct kref *prof_info_kref, + void ( * prof_info_release) (struct kref *kref)) +{ + ctx->prof_priv_kref = prof_info_kref; + ctx->prof_priv_release = prof_info_release; +} +EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); + +void *spu_get_profile_private_kref(struct spu_context *ctx) +{ + return ctx->prof_priv_kref; +} +EXPORT_SYMBOL_GPL(spu_get_profile_private_kref); + + diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 5d9ad5a0307..80f62363e1c 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -31,16 +31,7 @@ #include "spufs.h" -struct spufs_ctx_info { - struct list_head list; - int dfd; - int memsize; /* in bytes */ - struct spu_context *ctx; -}; - -static LIST_HEAD(ctx_info_list); - -static ssize_t do_coredump_read(int num, struct spu_context *ctx, void __user *buffer, +static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer, size_t size, loff_t *off) { u64 data; @@ -50,49 +41,57 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void __user *b return spufs_coredump_read[num].read(ctx, buffer, size, off); data = spufs_coredump_read[num].get(ctx); - ret = copy_to_user(buffer, &data, 8); - return ret ? -EFAULT : 8; + ret = snprintf(buffer, size, "0x%.16lx", data); + if (ret >= size) + return size; + return ++ret; /* count trailing NULL */ } /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. */ -static int spufs_dump_write(struct file *file, const void *addr, int nr) +static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset) { - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} + unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; + ssize_t written; -static int spufs_dump_seek(struct file *file, loff_t off) -{ - if (file->f_op->llseek) { - if (file->f_op->llseek(file, off, 0) != off) - return 0; - } else - file->f_pos = off; - return 1; + if (*foffset + nr > limit) + return -EIO; + + written = file->f_op->write(file, addr, nr, &file->f_pos); + *foffset += written; + + if (written != nr) + return -EIO; + + return 0; } -static void spufs_fill_memsize(struct spufs_ctx_info *ctx_info) +static int spufs_dump_align(struct file *file, char *buf, loff_t new_off, + loff_t *foffset) { - struct spu_context *ctx; - unsigned long long lslr; + int rc, size; + + size = min((loff_t)PAGE_SIZE, new_off - *foffset); + memset(buf, 0, size); + + rc = 0; + while (rc == 0 && new_off > *foffset) { + size = min((loff_t)PAGE_SIZE, new_off - *foffset); + rc = spufs_dump_write(file, buf, size, foffset); + } - ctx = ctx_info->ctx; - lslr = ctx->csa.priv2.spu_lslr_RW; - ctx_info->memsize = lslr + 1; + return rc; } -static int spufs_ctx_note_size(struct spufs_ctx_info *ctx_info) +static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) { - int dfd, memsize, i, sz, total = 0; + int i, sz, total = 0; char *name; char fullname[80]; - dfd = ctx_info->dfd; - memsize = ctx_info->memsize; - - for (i = 0; spufs_coredump_read[i].name; i++) { + for (i = 0; spufs_coredump_read[i].name != NULL; i++) { name = spufs_coredump_read[i].name; sz = spufs_coredump_read[i].size; @@ -100,39 +99,12 @@ static int spufs_ctx_note_size(struct spufs_ctx_info *ctx_info) total += sizeof(struct elf_note); total += roundup(strlen(fullname) + 1, 4); - if (!strcmp(name, "mem")) - total += roundup(memsize, 4); - else - total += roundup(sz, 4); + total += roundup(sz, 4); } return total; } -static int spufs_add_one_context(struct file *file, int dfd) -{ - struct spu_context *ctx; - struct spufs_ctx_info *ctx_info; - int size; - - ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; - if (ctx->flags & SPU_CREATE_NOSCHED) - return 0; - - ctx_info = kzalloc(sizeof(*ctx_info), GFP_KERNEL); - if (unlikely(!ctx_info)) - return -ENOMEM; - - ctx_info->dfd = dfd; - ctx_info->ctx = ctx; - - spufs_fill_memsize(ctx_info); - - size = spufs_ctx_note_size(ctx_info); - list_add(&ctx_info->list, &ctx_info_list); - return size; -} - /* * The additional architecture-specific notes for Cell are various * context files in the spu context. @@ -142,33 +114,57 @@ static int spufs_add_one_context(struct file *file, int dfd) * internal functionality to dump them without needing to actually * open the files. */ -static int spufs_arch_notes_size(void) +static struct spu_context *coredump_next_context(int *fd) { struct fdtable *fdt = files_fdtable(current->files); - int size = 0, fd; + struct file *file; + struct spu_context *ctx = NULL; - for (fd = 0; fd < fdt->max_fds; fd++) { - if (FD_ISSET(fd, fdt->open_fds)) { - struct file *file = fcheck(fd); + for (; *fd < fdt->max_fds; (*fd)++) { + if (!FD_ISSET(*fd, fdt->open_fds)) + continue; - if (file && file->f_op == &spufs_context_fops) { - int rval = spufs_add_one_context(file, fd); - if (rval < 0) - break; - size += rval; - } - } + file = fcheck(*fd); + + if (!file || file->f_op != &spufs_context_fops) + continue; + + ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; + if (ctx->flags & SPU_CREATE_NOSCHED) + continue; + + /* start searching the next fd next time we're called */ + (*fd)++; + break; } - return size; + return ctx; } -static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i, - struct file *file) +int spufs_coredump_extra_notes_size(void) { struct spu_context *ctx; + int size = 0, rc, fd; + + fd = 0; + while ((ctx = coredump_next_context(&fd)) != NULL) { + spu_acquire_saved(ctx); + rc = spufs_ctx_note_size(ctx, fd); + spu_release_saved(ctx); + if (rc < 0) + break; + + size += rc; + } + + return size; +} + +static int spufs_arch_write_note(struct spu_context *ctx, int i, + struct file *file, int dfd, loff_t *foffset) +{ loff_t pos = 0; - int sz, dfd, rc, total = 0; + int sz, rc, nread, total = 0; const int bufsz = PAGE_SIZE; char *name; char fullname[80], *buf; @@ -176,64 +172,70 @@ static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i, buf = (void *)get_zeroed_page(GFP_KERNEL); if (!buf) - return; + return -ENOMEM; - dfd = ctx_info->dfd; name = spufs_coredump_read[i].name; - - if (!strcmp(name, "mem")) - sz = ctx_info->memsize; - else - sz = spufs_coredump_read[i].size; - - ctx = ctx_info->ctx; - if (!ctx) - goto out; + sz = spufs_coredump_read[i].size; sprintf(fullname, "SPU/%d/%s", dfd, name); en.n_namesz = strlen(fullname) + 1; en.n_descsz = sz; en.n_type = NT_SPU; - if (!spufs_dump_write(file, &en, sizeof(en))) + rc = spufs_dump_write(file, &en, sizeof(en), foffset); + if (rc) goto out; - if (!spufs_dump_write(file, fullname, en.n_namesz)) + + rc = spufs_dump_write(file, fullname, en.n_namesz, foffset); + if (rc) goto out; - if (!spufs_dump_seek(file, roundup((unsigned long)file->f_pos, 4))) + + rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset); + if (rc) goto out; do { - rc = do_coredump_read(i, ctx, buf, bufsz, &pos); - if (rc > 0) { - if (!spufs_dump_write(file, buf, rc)) + nread = do_coredump_read(i, ctx, buf, bufsz, &pos); + if (nread > 0) { + rc = spufs_dump_write(file, buf, nread, foffset); + if (rc) goto out; - total += rc; + total += nread; } - } while (rc == bufsz && total < sz); + } while (nread == bufsz && total < sz); + + if (nread < 0) { + rc = nread; + goto out; + } + + rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4), + foffset); - spufs_dump_seek(file, roundup((unsigned long)file->f_pos - - total + sz, 4)); out: free_page((unsigned long)buf); + return rc; } -static void spufs_arch_write_notes(struct file *file) +int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset) { - int j; - struct spufs_ctx_info *ctx_info, *next; - - list_for_each_entry_safe(ctx_info, next, &ctx_info_list, list) { - spu_acquire_saved(ctx_info->ctx); - for (j = 0; j < spufs_coredump_num_notes; j++) - spufs_arch_write_note(ctx_info, j, file); - spu_release(ctx_info->ctx); - list_del(&ctx_info->list); - kfree(ctx_info); + struct spu_context *ctx; + int fd, j, rc; + + fd = 0; + while ((ctx = coredump_next_context(&fd)) != NULL) { + spu_acquire_saved(ctx); + + for (j = 0; spufs_coredump_read[j].name != NULL; j++) { + rc = spufs_arch_write_note(ctx, j, file, fd, foffset); + if (rc) { + spu_release_saved(ctx); + return rc; + } + } + + spu_release_saved(ctx); } -} -struct spu_coredump_calls spufs_coredump_calls = { - .arch_notes_size = spufs_arch_notes_size, - .arch_write_notes = spufs_arch_write_notes, - .owner = THIS_MODULE, -}; + return 0; +} diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e064d0c0d80..917eab4be48 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -75,22 +75,20 @@ good_area: } ret = 0; *flt = handle_mm_fault(mm, vma, ea, is_write); - switch (*flt) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - ret = -EFAULT; - goto bad_area; - case VM_FAULT_OOM: - ret = -ENOMEM; - goto bad_area; - default: + if (unlikely(*flt & VM_FAULT_ERROR)) { + if (*flt & VM_FAULT_OOM) { + ret = -ENOMEM; + goto bad_area; + } else if (*flt & VM_FAULT_SIGBUS) { + ret = -EFAULT; + goto bad_area; + } BUG(); } + if (*flt & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); return ret; @@ -181,16 +179,14 @@ int spufs_handle_class1(struct spu_context *ctx) if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) return 0; - spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT); + spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, dsisr, ctx->state); ctx->stats.hash_flt++; - if (ctx->state == SPU_STATE_RUNNABLE) { + if (ctx->state == SPU_STATE_RUNNABLE) ctx->spu->stats.hash_flt++; - spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT); - } /* we must not hold the lock when entering spu_handle_mm_fault */ spu_release(ctx); @@ -212,15 +208,15 @@ int spufs_handle_class1(struct spu_context *ctx) * In case of unhandled error report the problem to user space. */ if (!ret) { - if (flt == VM_FAULT_MINOR) - ctx->stats.min_flt++; - else + if (flt & VM_FAULT_MAJOR) ctx->stats.maj_flt++; + else + ctx->stats.min_flt++; if (ctx->state == SPU_STATE_RUNNABLE) { - if (flt == VM_FAULT_MINOR) - ctx->spu->stats.min_flt++; - else + if (flt & VM_FAULT_MAJOR) ctx->spu->stats.maj_flt++; + else + ctx->spu->stats.min_flt++; } if (ctx->spu) @@ -228,7 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx) } else spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); - spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); return ret; } EXPORT_SYMBOL_GPL(spufs_handle_class1); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index c2814ea96af..d72b16d6816 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -199,9 +199,9 @@ static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) } #ifdef CONFIG_SPU_FS_64K_LS -unsigned long spufs_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) +static unsigned long spufs_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) { struct spu_context *ctx = file->private_data; struct spu_state *csa = &ctx->csa; @@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer, spu_acquire_saved(ctx); ret = __spufs_regs_read(ctx, buffer, size, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer, ret = copy_from_user(lscsa->gprs + *pos - size, buffer, size) ? -EFAULT : size; - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer, spu_acquire_saved(ctx); ret = __spufs_fpcr_read(ctx, buffer, size, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer, ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, buffer, size) ? -EFAULT : size; - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf, spu_acquire_saved(ctx); ret = __spufs_signal1_read(ctx, buf, len, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -934,6 +934,13 @@ static const struct file_operations spufs_signal1_fops = { .mmap = spufs_signal1_mmap, }; +static const struct file_operations spufs_signal1_nosched_fops = { + .open = spufs_signal1_open, + .release = spufs_signal1_release, + .write = spufs_signal1_write, + .mmap = spufs_signal1_mmap, +}; + static int spufs_signal2_open(struct inode *inode, struct file *file) { struct spufs_inode_info *i = SPUFS_I(inode); @@ -992,7 +999,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf, spu_acquire_saved(ctx); ret = __spufs_signal2_read(ctx, buf, len, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1062,6 +1069,43 @@ static const struct file_operations spufs_signal2_fops = { .mmap = spufs_signal2_mmap, }; +static const struct file_operations spufs_signal2_nosched_fops = { + .open = spufs_signal2_open, + .release = spufs_signal2_release, + .write = spufs_signal2_write, + .mmap = spufs_signal2_mmap, +}; + +/* + * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the + * work of acquiring (or not) the SPU context before calling through + * to the actual get routine. The set routine is called directly. + */ +#define SPU_ATTR_NOACQUIRE 0 +#define SPU_ATTR_ACQUIRE 1 +#define SPU_ATTR_ACQUIRE_SAVED 2 + +#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire) \ +static u64 __##__get(void *data) \ +{ \ + struct spu_context *ctx = data; \ + u64 ret; \ + \ + if (__acquire == SPU_ATTR_ACQUIRE) { \ + spu_acquire(ctx); \ + ret = __get(ctx); \ + spu_release(ctx); \ + } else if (__acquire == SPU_ATTR_ACQUIRE_SAVED) { \ + spu_acquire_saved(ctx); \ + ret = __get(ctx); \ + spu_release_saved(ctx); \ + } else \ + ret = __get(ctx); \ + \ + return ret; \ +} \ +DEFINE_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt); + static void spufs_signal1_type_set(void *data, u64 val) { struct spu_context *ctx = data; @@ -1071,25 +1115,13 @@ static void spufs_signal1_type_set(void *data, u64 val) spu_release(ctx); } -static u64 __spufs_signal1_type_get(void *data) +static u64 spufs_signal1_type_get(struct spu_context *ctx) { - struct spu_context *ctx = data; return ctx->ops->signal1_type_get(ctx); } +DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, + spufs_signal1_type_set, "%llu", SPU_ATTR_ACQUIRE); -static u64 spufs_signal1_type_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - - spu_acquire(ctx); - ret = __spufs_signal1_type_get(data); - spu_release(ctx); - - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, - spufs_signal1_type_set, "%llu"); static void spufs_signal2_type_set(void *data, u64 val) { @@ -1100,25 +1132,12 @@ static void spufs_signal2_type_set(void *data, u64 val) spu_release(ctx); } -static u64 __spufs_signal2_type_get(void *data) +static u64 spufs_signal2_type_get(struct spu_context *ctx) { - struct spu_context *ctx = data; return ctx->ops->signal2_type_get(ctx); } - -static u64 spufs_signal2_type_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - - spu_acquire(ctx); - ret = __spufs_signal2_type_get(data); - spu_release(ctx); - - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, - spufs_signal2_type_set, "%llu"); +DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, + spufs_signal2_type_set, "%llu", SPU_ATTR_ACQUIRE); #if SPUFS_MMAP_4K static unsigned long spufs_mss_mmap_nopfn(struct vm_area_struct *vma, @@ -1594,17 +1613,12 @@ static void spufs_npc_set(void *data, u64 val) spu_release(ctx); } -static u64 spufs_npc_get(void *data) +static u64 spufs_npc_get(struct spu_context *ctx) { - struct spu_context *ctx = data; - u64 ret; - spu_acquire(ctx); - ret = ctx->ops->npc_read(ctx); - spu_release(ctx); - return ret; + return ctx->ops->npc_read(ctx); } -DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, - "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, + "0x%llx\n", SPU_ATTR_ACQUIRE); static void spufs_decr_set(void *data, u64 val) { @@ -1612,55 +1626,38 @@ static void spufs_decr_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->decr.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } -static u64 __spufs_decr_get(void *data) +static u64 spufs_decr_get(struct spu_context *ctx) { - struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; return lscsa->decr.slot[0]; } - -static u64 spufs_decr_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - spu_acquire_saved(ctx); - ret = __spufs_decr_get(data); - spu_release(ctx); - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, - "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, + "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED); static void spufs_decr_status_set(void *data, u64 val) { struct spu_context *ctx = data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); - lscsa->decr_status.slot[0] = (u32) val; - spu_release(ctx); -} - -static u64 __spufs_decr_status_get(void *data) -{ - struct spu_context *ctx = data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; - return lscsa->decr_status.slot[0]; + if (val) + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; + else + ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; + spu_release_saved(ctx); } -static u64 spufs_decr_status_get(void *data) +static u64 spufs_decr_status_get(struct spu_context *ctx) { - struct spu_context *ctx = data; - u64 ret; - spu_acquire_saved(ctx); - ret = __spufs_decr_status_get(data); - spu_release(ctx); - return ret; + if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) + return SPU_DECR_STATUS_RUNNING; + else + return 0; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, - spufs_decr_status_set, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, + spufs_decr_status_set, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); static void spufs_event_mask_set(void *data, u64 val) { @@ -1668,31 +1665,21 @@ static void spufs_event_mask_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->event_mask.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } -static u64 __spufs_event_mask_get(void *data) +static u64 spufs_event_mask_get(struct spu_context *ctx) { - struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; return lscsa->event_mask.slot[0]; } -static u64 spufs_event_mask_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - spu_acquire_saved(ctx); - ret = __spufs_event_mask_get(data); - spu_release(ctx); - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, - spufs_event_mask_set, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, + spufs_event_mask_set, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); -static u64 __spufs_event_status_get(void *data) +static u64 spufs_event_status_get(struct spu_context *ctx) { - struct spu_context *ctx = data; struct spu_state *state = &ctx->csa; u64 stat; stat = state->spu_chnlcnt_RW[0]; @@ -1700,19 +1687,8 @@ static u64 __spufs_event_status_get(void *data) return state->spu_chnldata_RW[0]; return 0; } - -static u64 spufs_event_status_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret = 0; - - spu_acquire_saved(ctx); - ret = __spufs_event_status_get(data); - spu_release(ctx); - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, - NULL, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, + NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) static void spufs_srr0_set(void *data, u64 val) { @@ -1720,48 +1696,35 @@ static void spufs_srr0_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->srr0.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } -static u64 spufs_srr0_get(void *data) +static u64 spufs_srr0_get(struct spu_context *ctx) { - struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; - u64 ret; - spu_acquire_saved(ctx); - ret = lscsa->srr0.slot[0]; - spu_release(ctx); - return ret; + return lscsa->srr0.slot[0]; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, - "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, + "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) -static u64 spufs_id_get(void *data) +static u64 spufs_id_get(struct spu_context *ctx) { - struct spu_context *ctx = data; u64 num; - spu_acquire(ctx); if (ctx->state == SPU_STATE_RUNNABLE) num = ctx->spu->number; else num = (unsigned int)-1; - spu_release(ctx); return num; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n") - -static u64 __spufs_object_id_get(void *data) -{ - struct spu_context *ctx = data; - return ctx->object_id; -} +DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n", + SPU_ATTR_ACQUIRE) -static u64 spufs_object_id_get(void *data) +static u64 spufs_object_id_get(struct spu_context *ctx) { /* FIXME: Should there really be no locking here? */ - return __spufs_object_id_get(data); + return ctx->object_id; } static void spufs_object_id_set(void *data, u64 id) @@ -1770,27 +1733,15 @@ static void spufs_object_id_set(void *data, u64 id) ctx->object_id = id; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get, - spufs_object_id_set, "0x%llx\n"); +DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get, + spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE); -static u64 __spufs_lslr_get(void *data) +static u64 spufs_lslr_get(struct spu_context *ctx) { - struct spu_context *ctx = data; return ctx->csa.priv2.spu_lslr_RW; } - -static u64 spufs_lslr_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - - spu_acquire_saved(ctx); - ret = __spufs_lslr_get(data); - spu_release(ctx); - - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); static int spufs_info_open(struct inode *inode, struct file *file) { @@ -1850,7 +1801,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_mbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1888,7 +1839,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_ibox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1929,7 +1880,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_wbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1979,7 +1930,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_dma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -2030,7 +1981,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_proxydma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -2065,14 +2016,26 @@ static const char *ctx_state_names[] = { }; static unsigned long long spufs_acct_time(struct spu_context *ctx, - enum spuctx_execution_state state) + enum spu_utilization_state state) { - unsigned long time = ctx->stats.times[state]; + struct timespec ts; + unsigned long long time = ctx->stats.times[state]; - if (ctx->stats.execution_state == state) - time += jiffies - ctx->stats.tstamp; + /* + * In general, utilization statistics are updated by the controlling + * thread as the spu context moves through various well defined + * state transitions, but if the context is lazily loaded its + * utilization statistics are not updated as the controlling thread + * is not tightly coupled with the execution of the spu context. We + * calculate and apply the time delta from the last recorded state + * of the spu context. + */ + if (ctx->spu && ctx->stats.util_state == state) { + ktime_get_ts(&ts); + time += timespec_to_ns(&ts) - ctx->stats.tstamp; + } - return jiffies_to_msecs(time); + return time / NSEC_PER_MSEC; } static unsigned long long spufs_slb_flts(struct spu_context *ctx) @@ -2107,11 +2070,11 @@ static int spufs_show_stat(struct seq_file *s, void *private) spu_acquire(ctx); seq_printf(s, "%s %llu %llu %llu %llu " "%llu %llu %llu %llu %llu %llu %llu %llu\n", - ctx_state_names[ctx->stats.execution_state], - spufs_acct_time(ctx, SPUCTX_UTIL_USER), - spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM), - spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT), - spufs_acct_time(ctx, SPUCTX_UTIL_LOADED), + ctx_state_names[ctx->stats.util_state], + spufs_acct_time(ctx, SPU_UTIL_USER), + spufs_acct_time(ctx, SPU_UTIL_SYSTEM), + spufs_acct_time(ctx, SPU_UTIL_IOWAIT), + spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED), ctx->stats.vol_ctx_switch, ctx->stats.invol_ctx_switch, spufs_slb_flts(ctx), @@ -2184,8 +2147,8 @@ struct tree_descr spufs_dir_nosched_contents[] = { { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, - { "signal1", &spufs_signal1_fops, 0666, }, - { "signal2", &spufs_signal2_fops, 0666, }, + { "signal1", &spufs_signal1_nosched_fops, 0222, }, + { "signal2", &spufs_signal2_nosched_fops, 0222, }, { "signal1_type", &spufs_signal1_type, 0666, }, { "signal2_type", &spufs_signal2_type, 0666, }, { "mss", &spufs_mss_fops, 0666, }, @@ -2201,25 +2164,25 @@ struct tree_descr spufs_dir_nosched_contents[] = { }; struct spufs_coredump_reader spufs_coredump_read[] = { - { "regs", __spufs_regs_read, NULL, 128 * 16 }, - { "fpcr", __spufs_fpcr_read, NULL, 16 }, - { "lslr", NULL, __spufs_lslr_get, 11 }, - { "decr", NULL, __spufs_decr_get, 11 }, - { "decr_status", NULL, __spufs_decr_status_get, 11 }, - { "mem", __spufs_mem_read, NULL, 256 * 1024, }, - { "signal1", __spufs_signal1_read, NULL, 4 }, - { "signal1_type", NULL, __spufs_signal1_type_get, 2 }, - { "signal2", __spufs_signal2_read, NULL, 4 }, - { "signal2_type", NULL, __spufs_signal2_type_get, 2 }, - { "event_mask", NULL, __spufs_event_mask_get, 8 }, - { "event_status", NULL, __spufs_event_status_get, 8 }, - { "mbox_info", __spufs_mbox_info_read, NULL, 4 }, - { "ibox_info", __spufs_ibox_info_read, NULL, 4 }, - { "wbox_info", __spufs_wbox_info_read, NULL, 16 }, - { "dma_info", __spufs_dma_info_read, NULL, 69 * 8 }, - { "proxydma_info", __spufs_proxydma_info_read, NULL, 35 * 8 }, - { "object-id", NULL, __spufs_object_id_get, 19 }, - { }, + { "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])}, + { "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) }, + { "lslr", NULL, spufs_lslr_get, 19 }, + { "decr", NULL, spufs_decr_get, 19 }, + { "decr_status", NULL, spufs_decr_status_get, 19 }, + { "mem", __spufs_mem_read, NULL, LS_SIZE, }, + { "signal1", __spufs_signal1_read, NULL, sizeof(u32) }, + { "signal1_type", NULL, spufs_signal1_type_get, 19 }, + { "signal2", __spufs_signal2_read, NULL, sizeof(u32) }, + { "signal2_type", NULL, spufs_signal2_type_get, 19 }, + { "event_mask", NULL, spufs_event_mask_get, 19 }, + { "event_status", NULL, spufs_event_status_get, 19 }, + { "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) }, + { "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) }, + { "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)}, + { "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)}, + { "proxydma_info", __spufs_proxydma_info_read, + NULL, sizeof(struct spu_proxydma_info)}, + { "object-id", NULL, spufs_object_id_get, 19 }, + { "npc", NULL, spufs_npc_get, 19 }, + { NULL }, }; -int spufs_coredump_num_notes = ARRAY_SIZE(spufs_coredump_read) - 1; - diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c index 212ea78f905..71a44325302 100644 --- a/arch/powerpc/platforms/cell/spufs/gang.c +++ b/arch/powerpc/platforms/cell/spufs/gang.c @@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void) kref_init(&gang->kref); mutex_init(&gang->mutex); + mutex_init(&gang->aff_mutex); INIT_LIST_HEAD(&gang->list); + INIT_LIST_HEAD(&gang->aff_list_head); out: return gang; @@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx) { mutex_lock(&gang->mutex); WARN_ON(ctx->gang != gang); + if (!list_empty(&ctx->aff_list)) { + list_del_init(&ctx->aff_list); + gang->aff_flags &= ~AFF_OFFSETS_SET; + } list_del_init(&ctx->gang_list); gang->contexts--; mutex_unlock(&gang->mutex); diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index f37460e5bfd..11098747d09 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -43,6 +43,7 @@ static struct kmem_cache *spufs_inode_cache; char *isolated_loader; +static int isolated_loader_size; static struct inode * spufs_alloc_inode(struct super_block *sb) @@ -316,11 +317,107 @@ out: return ret; } -static int spufs_create_context(struct inode *inode, - struct dentry *dentry, - struct vfsmount *mnt, int flags, int mode) +static struct spu_context * +spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, + struct file *filp) +{ + struct spu_context *tmp, *neighbor; + int count, node; + int aff_supp; + + aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, + struct spu, cbe_list))->aff_list); + + if (!aff_supp) + return ERR_PTR(-EINVAL); + + if (flags & SPU_CREATE_GANG) + return ERR_PTR(-EINVAL); + + if (flags & SPU_CREATE_AFFINITY_MEM && + gang->aff_ref_ctx && + gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM) + return ERR_PTR(-EEXIST); + + if (gang->aff_flags & AFF_MERGED) + return ERR_PTR(-EBUSY); + + neighbor = NULL; + if (flags & SPU_CREATE_AFFINITY_SPU) { + if (!filp || filp->f_op != &spufs_context_fops) + return ERR_PTR(-EINVAL); + + neighbor = get_spu_context( + SPUFS_I(filp->f_dentry->d_inode)->i_ctx); + + if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && + !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && + !list_entry(neighbor->aff_list.next, struct spu_context, + aff_list)->aff_head) + return ERR_PTR(-EEXIST); + + if (gang != neighbor->gang) + return ERR_PTR(-EINVAL); + + count = 1; + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + count++; + if (list_empty(&neighbor->aff_list)) + count++; + + for (node = 0; node < MAX_NUMNODES; node++) { + if ((cbe_spu_info[node].n_spus - atomic_read( + &cbe_spu_info[node].reserved_spus)) >= count) + break; + } + + if (node == MAX_NUMNODES) + return ERR_PTR(-EEXIST); + } + + return neighbor; +} + +static void +spufs_set_affinity(unsigned int flags, struct spu_context *ctx, + struct spu_context *neighbor) +{ + if (flags & SPU_CREATE_AFFINITY_MEM) + ctx->gang->aff_ref_ctx = ctx; + + if (flags & SPU_CREATE_AFFINITY_SPU) { + if (list_empty(&neighbor->aff_list)) { + list_add_tail(&neighbor->aff_list, + &ctx->gang->aff_list_head); + neighbor->aff_head = 1; + } + + if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head) + || list_entry(neighbor->aff_list.next, struct spu_context, + aff_list)->aff_head) { + list_add(&ctx->aff_list, &neighbor->aff_list); + } else { + list_add_tail(&ctx->aff_list, &neighbor->aff_list); + if (neighbor->aff_head) { + neighbor->aff_head = 0; + ctx->aff_head = 1; + } + } + + if (!ctx->gang->aff_ref_ctx) + ctx->gang->aff_ref_ctx = ctx; + } +} + +static int +spufs_create_context(struct inode *inode, struct dentry *dentry, + struct vfsmount *mnt, int flags, int mode, + struct file *aff_filp) { int ret; + int affinity; + struct spu_gang *gang; + struct spu_context *neighbor; ret = -EPERM; if ((flags & SPU_CREATE_NOSCHED) && @@ -336,9 +433,29 @@ static int spufs_create_context(struct inode *inode, if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) goto out_unlock; + gang = NULL; + neighbor = NULL; + affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); + if (affinity) { + gang = SPUFS_I(inode)->i_gang; + ret = -EINVAL; + if (!gang) + goto out_unlock; + mutex_lock(&gang->aff_mutex); + neighbor = spufs_assert_affinity(flags, gang, aff_filp); + if (IS_ERR(neighbor)) { + ret = PTR_ERR(neighbor); + goto out_aff_unlock; + } + } + ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); if (ret) - goto out_unlock; + goto out_aff_unlock; + + if (affinity) + spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx, + neighbor); /* * get references for dget and mntget, will be released @@ -352,6 +469,9 @@ static int spufs_create_context(struct inode *inode, goto out; } +out_aff_unlock: + if (affinity) + mutex_unlock(&gang->aff_mutex); out_unlock: mutex_unlock(&inode->i_mutex); out: @@ -450,7 +570,8 @@ out: static struct file_system_type spufs_type; -long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) +long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, + struct file *filp) { struct dentry *dentry; int ret; @@ -487,7 +608,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) dentry, nd->mnt, mode); else return spufs_create_context(nd->dentry->d_inode, - dentry, nd->mnt, flags, mode); + dentry, nd->mnt, flags, mode, filp); out_dput: dput(dentry); @@ -547,7 +668,8 @@ spufs_parse_options(char *options, struct inode *root) static void spufs_exit_isolated_loader(void) { - kfree(isolated_loader); + free_pages((unsigned long) isolated_loader, + get_order(isolated_loader_size)); } static void @@ -565,11 +687,12 @@ spufs_init_isolated_loader(void) if (!loader) return; - /* kmalloc should align on a 16 byte boundary..* */ - isolated_loader = kmalloc(size, GFP_KERNEL); + /* the loader must be align on a 16 byte boundary */ + isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size)); if (!isolated_loader) return; + isolated_loader_size = size; memcpy(isolated_loader, loader, size); printk(KERN_INFO "spufs: SPU isolation mode enabled\n"); } @@ -654,7 +777,7 @@ static int __init spufs_init(void) ret = -ENOMEM; spufs_inode_cache = kmem_cache_create("spufs_inode_cache", sizeof(struct spufs_inode_info), 0, - SLAB_HWCACHE_ALIGN, spufs_init_once, NULL); + SLAB_HWCACHE_ALIGN, spufs_init_once); if (!spufs_inode_cache) goto out; @@ -667,16 +790,11 @@ static int __init spufs_init(void) ret = register_spu_syscalls(&spufs_calls); if (ret) goto out_fs; - ret = register_arch_coredump_calls(&spufs_coredump_calls); - if (ret) - goto out_syscalls; spufs_init_isolated_loader(); return 0; -out_syscalls: - unregister_spu_syscalls(&spufs_calls); out_fs: unregister_filesystem(&spufs_type); out_sched: @@ -692,7 +810,6 @@ static void __exit spufs_exit(void) { spu_sched_exit(); spufs_exit_isolated_loader(); - unregister_arch_coredump_calls(&spufs_coredump_calls); unregister_spu_syscalls(&spufs_calls); unregister_filesystem(&spufs_type); kmem_cache_destroy(spufs_inode_cache); diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 58ae13b7de8..1ce5e22ea5f 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -18,15 +18,17 @@ void spufs_stop_callback(struct spu *spu) wake_up_all(&ctx->stop_wq); } -static inline int spu_stopped(struct spu_context *ctx, u32 * stat) +static inline int spu_stopped(struct spu_context *ctx, u32 *stat) { struct spu *spu; u64 pte_fault; *stat = ctx->ops->status_read(ctx); - if (ctx->state != SPU_STATE_RUNNABLE) - return 1; + spu = ctx->spu; + if (ctx->state != SPU_STATE_RUNNABLE || + test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) + return 1; pte_fault = spu->dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ? @@ -124,8 +126,10 @@ out: return ret; } -static int spu_run_init(struct spu_context *ctx, u32 * npc) +static int spu_run_init(struct spu_context *ctx, u32 *npc) { + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + if (ctx->flags & SPU_CREATE_ISOLATE) { unsigned long runcntl; @@ -151,16 +155,20 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc) ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); } + spuctx_switch_state(ctx, SPU_UTIL_USER); + return 0; } -static int spu_run_fini(struct spu_context *ctx, u32 * npc, - u32 * status) +static int spu_run_fini(struct spu_context *ctx, u32 *npc, + u32 *status) { int ret = 0; *status = ctx->ops->status_read(ctx); *npc = ctx->ops->npc_read(ctx); + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); spu_release(ctx); if (signal_pending(current)) @@ -185,11 +193,7 @@ static int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, if (ret) return ret; - ret = spu_run_init(ctx, npc); - if (ret) { - spu_release(ctx); - return ret; - } + spuctx_switch_state(ctx, SPU_UTIL_USER); return 0; } @@ -201,7 +205,7 @@ static int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, * This means we can only do a very rough approximation of POSIX * signal semantics. */ -int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, +static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, unsigned int *npc) { int ret; @@ -237,7 +241,7 @@ int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, return ret; } -int spu_process_callback(struct spu_context *ctx) +static int spu_process_callback(struct spu_context *ctx) { struct spu_syscall_block s; u32 ls_pointer, npc; @@ -289,10 +293,10 @@ static inline int spu_process_events(struct spu_context *ctx) return ret; } -long spufs_run_spu(struct file *file, struct spu_context *ctx, - u32 *npc, u32 *event) +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) { int ret; + struct spu *spu; u32 status; if (mutex_lock_interruptible(&ctx->run_mutex)) @@ -304,6 +308,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, spu_acquire(ctx); if (ctx->state == SPU_STATE_SAVED) { __spu_update_sched_info(ctx); + spu_set_timeslice(ctx); ret = spu_activate(ctx, 0); if (ret) { @@ -314,6 +319,9 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, /* * We have to update the scheduling priority under active_mutex * to protect against find_victim(). + * + * No need to update the timeslice ASAP, it will get updated + * once the current one has expired. */ spu_update_sched_info(ctx); } @@ -328,6 +336,17 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); if (unlikely(ret)) break; + spu = ctx->spu; + if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags))) { + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { + spu_switch_notify(spu, ctx); + continue; + } + } + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx); @@ -356,6 +375,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, (ctx->state == SPU_STATE_RUNNABLE)) ctx->stats.libassist++; + ctx->ops->master_stop(ctx); ret = spu_run_fini(ctx, npc, &status); spu_yield(ctx); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index e5b4dd1db28..4d257b3f933 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -51,9 +51,6 @@ struct spu_prio_array { DECLARE_BITMAP(bitmap, MAX_PRIO); struct list_head runq[MAX_PRIO]; spinlock_t runq_lock; - struct list_head active_list[MAX_NUMNODES]; - struct mutex active_mutex[MAX_NUMNODES]; - int nr_active[MAX_NUMNODES]; int nr_waiting; }; @@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx) ctx->policy = current->policy; /* - * A lot of places that don't hold active_mutex poke into + * A lot of places that don't hold list_mutex poke into * cpus_allowed, including grab_runnable_context which * already holds the runq_lock. So abuse runq_lock * to protect this field aswell. @@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx) { int node = ctx->spu->node; - mutex_lock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); __spu_update_sched_info(ctx); - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); } static int __node_allowed(struct spu_context *ctx, int node) @@ -169,56 +166,56 @@ static int node_allowed(struct spu_context *ctx, int node) return rval; } -/** - * spu_add_to_active_list - add spu to active list - * @spu: spu to add to the active list - */ -static void spu_add_to_active_list(struct spu *spu) -{ - int node = spu->node; - - mutex_lock(&spu_prio->active_mutex[node]); - spu_prio->nr_active[node]++; - list_add_tail(&spu->list, &spu_prio->active_list[node]); - mutex_unlock(&spu_prio->active_mutex[node]); -} +static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); -static void __spu_remove_from_active_list(struct spu *spu) +void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { - list_del_init(&spu->list); - spu_prio->nr_active[spu->node]--; + blocking_notifier_call_chain(&spu_switch_notifier, + ctx ? ctx->object_id : 0, spu); } -/** - * spu_remove_from_active_list - remove spu from active list - * @spu: spu to remove from the active list - */ -static void spu_remove_from_active_list(struct spu *spu) +static void notify_spus_active(void) { - int node = spu->node; - - mutex_lock(&spu_prio->active_mutex[node]); - __spu_remove_from_active_list(spu); - mutex_unlock(&spu_prio->active_mutex[node]); -} + int node; -static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); + /* + * Wake up the active spu_contexts. + * + * When the awakened processes see their "notify_active" flag is set, + * they will call spu_switch_notify(); + */ + for_each_online_node(node) { + struct spu *spu; -static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) -{ - blocking_notifier_call_chain(&spu_switch_notifier, - ctx ? ctx->object_id : 0, spu); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state != SPU_FREE) { + struct spu_context *ctx = spu->ctx; + set_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags); + mb(); + wake_up_all(&ctx->stop_wq); + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } } int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(&spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(&spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(&spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); /** * spu_bind_context - bind spu context to physical spu @@ -229,6 +226,10 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, spu->number, spu->node); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (ctx->flags & SPU_CREATE_NOSCHED) + atomic_inc(&cbe_spu_info[spu->node].reserved_spus); ctx->stats.slb_flt_base = spu->stats.slb_flt; ctx->stats.class2_intr_base = spu->stats.class2_intr; @@ -238,6 +239,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) ctx->spu = spu; ctx->ops = &spu_hw_ops; spu->pid = current->pid; + spu->tgid = current->tgid; spu_associate_mm(spu, ctx->owner); spu->ibox_callback = spufs_ibox_callback; spu->wbox_callback = spufs_wbox_callback; @@ -251,7 +253,152 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) spu_cpu_affinity_set(spu, raw_smp_processor_id()); spu_switch_notify(spu, ctx); ctx->state = SPU_STATE_RUNNABLE; - spu_switch_state(spu, SPU_UTIL_SYSTEM); + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); +} + +/* + * Must be used with the list_mutex held. + */ +static inline int sched_spu(struct spu *spu) +{ + BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); + + return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); +} + +static void aff_merge_remaining_ctxs(struct spu_gang *gang) +{ + struct spu_context *ctx; + + list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { + if (list_empty(&ctx->aff_list)) + list_add(&ctx->aff_list, &gang->aff_list_head); + } + gang->aff_flags |= AFF_MERGED; +} + +static void aff_set_offsets(struct spu_gang *gang) +{ + struct spu_context *ctx; + int offset; + + offset = -1; + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset--; + } + + offset = 0; + list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset++; + } + + gang->aff_flags |= AFF_OFFSETS_SET; +} + +static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, + int group_size, int lowest_offset) +{ + struct spu *spu; + int node, n; + + /* + * TODO: A better algorithm could be used to find a good spu to be + * used as reference location for the ctxs chain. + */ + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if ((!mem_aff || spu->has_mem_affinity) && + sched_spu(spu)) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + return spu; + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + return NULL; +} + +static void aff_set_ref_point_location(struct spu_gang *gang) +{ + int mem_aff, gs, lowest_offset; + struct spu_context *ctx; + struct spu *tmp; + + mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; + lowest_offset = 0; + gs = 0; + + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + gs++; + + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + lowest_offset = ctx->aff_offset; + } + + gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs, + lowest_offset); +} + +static struct spu *ctx_location(struct spu *ref, int offset, int node) +{ + struct spu *spu; + + spu = NULL; + if (offset >= 0) { + list_for_each_entry(spu, ref->aff_list.prev, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset--; + } + } else { + list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset++; + } + } + + return spu; +} + +/* + * affinity_check is called each time a context is going to be scheduled. + * It returns the spu ptr on which the context must run. + */ +static int has_affinity(struct spu_context *ctx) +{ + struct spu_gang *gang = ctx->gang; + + if (list_empty(&ctx->aff_list)) + return 0; + + if (!gang->aff_ref_spu) { + if (!(gang->aff_flags & AFF_MERGED)) + aff_merge_remaining_ctxs(gang); + if (!(gang->aff_flags & AFF_OFFSETS_SET)) + aff_set_offsets(gang); + aff_set_ref_point_location(gang); + } + + return gang->aff_ref_spu != NULL; } /** @@ -263,8 +410,19 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, spu->pid, spu->number, spu->node); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (spu->ctx->flags & SPU_CREATE_NOSCHED) + atomic_dec(&cbe_spu_info[spu->node].reserved_spus); - spu_switch_state(spu, SPU_UTIL_IDLE); + if (ctx->gang){ + mutex_lock(&ctx->gang->aff_mutex); + if (has_affinity(ctx)) { + if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) + ctx->gang->aff_ref_spu = NULL; + } + mutex_unlock(&ctx->gang->aff_mutex); + } spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); @@ -278,8 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) spu->dma_callback = NULL; spu_associate_mm(spu, NULL); spu->pid = 0; + spu->tgid = 0; ctx->ops = &spu_backing_ops; - ctx->spu = NULL; spu->flags = 0; spu->ctx = NULL; @@ -287,6 +445,10 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) (spu->stats.slb_flt - ctx->stats.slb_flt_base); ctx->stats.class2_intr += (spu->stats.class2_intr - ctx->stats.class2_intr_base); + + /* This maps the underlying spu state to idle */ + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + ctx->spu = NULL; } /** @@ -352,18 +514,53 @@ static void spu_prio_wait(struct spu_context *ctx) static struct spu *spu_get_idle(struct spu_context *ctx) { - struct spu *spu = NULL; - int node = cpu_to_node(raw_smp_processor_id()); - int n; + struct spu *spu, *aff_ref_spu; + int node, n; + + if (ctx->gang) { + mutex_lock(&ctx->gang->aff_mutex); + if (has_affinity(ctx)) { + aff_ref_spu = ctx->gang->aff_ref_spu; + atomic_inc(&ctx->gang->aff_sched_count); + mutex_unlock(&ctx->gang->aff_mutex); + node = aff_ref_spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + spu = ctx_location(aff_ref_spu, ctx->aff_offset, node); + if (spu && spu->alloc_state == SPU_FREE) + goto found; + mutex_unlock(&cbe_spu_info[node].list_mutex); + + mutex_lock(&ctx->gang->aff_mutex); + if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) + ctx->gang->aff_ref_spu = NULL; + mutex_unlock(&ctx->gang->aff_mutex); + return NULL; + } + mutex_unlock(&ctx->gang->aff_mutex); + } + node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; - spu = spu_alloc_node(node); - if (spu) - break; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state == SPU_FREE) + goto found; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); } + + return NULL; + + found: + spu->alloc_state = SPU_USED; + mutex_unlock(&cbe_spu_info[node].list_mutex); + pr_debug("Got SPU %d %d\n", spu->number, spu->node); + spu_init_channels(spu); return spu; } @@ -393,15 +590,15 @@ static struct spu *find_victim(struct spu_context *ctx) if (!node_allowed(ctx, node)) continue; - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry(spu, &spu_prio->active_list[node], list) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { struct spu_context *tmp = spu->ctx; - if (tmp->prio > ctx->prio && + if (tmp && tmp->prio > ctx->prio && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); if (victim) { /* @@ -426,8 +623,12 @@ static struct spu *find_victim(struct spu_context *ctx) victim = NULL; goto restart; } - spu_remove_from_active_list(spu); + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; spu_unbind_context(spu, victim); + mutex_unlock(&cbe_spu_info[node].list_mutex); + victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; mutex_unlock(&victim->state_mutex); @@ -455,8 +656,6 @@ static struct spu *find_victim(struct spu_context *ctx) */ int spu_activate(struct spu_context *ctx, unsigned long flags) { - spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); - do { struct spu *spu; @@ -477,8 +676,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags) if (!spu && rt_prio(ctx->prio)) spu = find_victim(ctx); if (spu) { + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_bind_context(spu, ctx); - spu_add_to_active_list(spu); + cbe_spu_info[node].nr_active++; + mutex_unlock(&cbe_spu_info[node].list_mutex); return 0; } @@ -500,7 +703,7 @@ static struct spu_context *grab_runnable_context(int prio, int node) int best; spin_lock(&spu_prio->runq_lock); - best = sched_find_first_bit(spu_prio->bitmap); + best = find_first_bit(spu_prio->bitmap, prio); while (best < prio) { struct list_head *rq = &spu_prio->runq[best]; @@ -527,11 +730,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - spu_remove_from_active_list(spu); + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_unbind_context(spu, ctx); + spu->alloc_state = SPU_FREE; + cbe_spu_info[node].nr_active--; + mutex_unlock(&cbe_spu_info[node].list_mutex); + ctx->stats.vol_ctx_switch++; spu->stats.vol_ctx_switch++; - spu_free(spu); + if (new) wake_up(&new->stop_wq); } @@ -550,21 +759,11 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) */ void spu_deactivate(struct spu_context *ctx) { - /* - * We must never reach this for a nosched context, - * but handle the case gracefull instead of panicing. - */ - if (ctx->flags & SPU_CREATE_NOSCHED) { - WARN_ON(1); - return; - } - __spu_deactivate(ctx, 1, MAX_PRIO); - spuctx_switch_state(ctx, SPUCTX_UTIL_USER); } /** - * spu_yield - yield a physical spu if others are waiting + * spu_yield - yield a physical spu if others are waiting * @ctx: spu context to yield * * Check if there is a higher priority context waiting and if yes @@ -575,17 +774,12 @@ void spu_yield(struct spu_context *ctx) { if (!(ctx->flags & SPU_CREATE_NOSCHED)) { mutex_lock(&ctx->state_mutex); - if (__spu_deactivate(ctx, 0, MAX_PRIO)) - spuctx_switch_state(ctx, SPUCTX_UTIL_USER); - else { - spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED); - spu_switch_state(ctx->spu, SPU_UTIL_USER); - } + __spu_deactivate(ctx, 0, MAX_PRIO); mutex_unlock(&ctx->state_mutex); } } -static void spusched_tick(struct spu_context *ctx) +static noinline void spusched_tick(struct spu_context *ctx) { if (ctx->flags & SPU_CREATE_NOSCHED) return; @@ -596,7 +790,7 @@ static void spusched_tick(struct spu_context *ctx) return; /* - * Unfortunately active_mutex ranks outside of state_mutex, so + * Unfortunately list_mutex ranks outside of state_mutex, so * we have to trylock here. If we fail give the context another * tick and try again. */ @@ -606,12 +800,11 @@ static void spusched_tick(struct spu_context *ctx) new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { - - __spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); ctx->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; - spu_free(spu); + spu->alloc_state = SPU_FREE; + cbe_spu_info[spu->node].nr_active--; wake_up(&new->stop_wq); /* * We need to break out of the wait loop in @@ -632,7 +825,7 @@ static void spusched_tick(struct spu_context *ctx) * * Return the number of tasks currently running or waiting to run. * - * Note that we don't take runq_lock / active_mutex here. Reading + * Note that we don't take runq_lock / list_mutex here. Reading * a single 32bit value is atomic on powerpc, and we don't care * about memory ordering issues here. */ @@ -641,7 +834,7 @@ static unsigned long count_active_contexts(void) int nr_active = 0, node; for (node = 0; node < MAX_NUMNODES; node++) - nr_active += spu_prio->nr_active[node]; + nr_active += cbe_spu_info[node].nr_active; nr_active += spu_prio->nr_waiting; return nr_active; @@ -681,19 +874,18 @@ static void spusched_wake(unsigned long data) static int spusched_thread(void *unused) { - struct spu *spu, *next; + struct spu *spu; int node; while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); schedule(); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, next, - &spu_prio->active_list[node], - list) - spusched_tick(spu->ctx); - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->ctx) + spusched_tick(spu->ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); } } @@ -751,11 +943,6 @@ int __init spu_sched_init(void) INIT_LIST_HEAD(&spu_prio->runq[i]); __clear_bit(i, spu_prio->bitmap); } - __set_bit(MAX_PRIO, spu_prio->bitmap); - for (i = 0; i < MAX_NUMNODES; i++) { - mutex_init(&spu_prio->active_mutex[i]); - INIT_LIST_HEAD(&spu_prio->active_list[i]); - } spin_lock_init(&spu_prio->runq_lock); setup_timer(&spusched_timer, spusched_wake, 0); @@ -783,9 +970,9 @@ int __init spu_sched_init(void) return err; } -void __exit spu_sched_exit(void) +void spu_sched_exit(void) { - struct spu *spu, *tmp; + struct spu *spu; int node; remove_proc_entry("spu_loadavg", NULL); @@ -794,13 +981,11 @@ void __exit spu_sched_exit(void) kthread_stop(spusched_task); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], - list) { - list_del_init(&spu->list); - spu_free(spu); - } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->alloc_state != SPU_FREE) + spu->alloc_state = SPU_FREE; + mutex_unlock(&cbe_spu_info[node].list_mutex); } kfree(spu_prio); } diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c index 4e19ed7a075..21a9c952d88 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_restore.c +++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c @@ -84,13 +84,13 @@ static inline void restore_decr(void) unsigned int decr_running; unsigned int decr; - /* Restore, Step 6: + /* Restore, Step 6(moved): * If the LSCSA "decrementer running" flag is set * then write the SPU_WrDec channel with the * decrementer value from LSCSA. */ offset = LSCSA_QW_OFFSET(decr_status); - decr_running = regs_spill[offset].slot[0]; + decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; if (decr_running) { offset = LSCSA_QW_OFFSET(decr); decr = regs_spill[offset].slot[0]; @@ -318,10 +318,10 @@ int main() build_dma_list(lscsa_ea); /* Step 3. */ restore_upper_240kb(lscsa_ea); /* Step 4. */ /* Step 5: done by 'exit'. */ - restore_decr(); /* Step 6. */ enqueue_putllc(lscsa_ea); /* Step 7. */ set_tag_update(); /* Step 8. */ read_tag_status(); /* Step 9. */ + restore_decr(); /* moved Step 6. */ read_llar_status(); /* Step 10. */ write_ppu_mb(); /* Step 11. */ write_ppuint_mb(); /* Step 12. */ diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped index 15183d209b5..f383b027e8b 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped @@ -10,7 +10,7 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x24fd8081, 0x1cd80081, 0x33001180, -0x42030003, +0x42034003, 0x33800284, 0x1c010204, 0x40200000, @@ -24,22 +24,22 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x23fffd84, 0x1c100183, 0x217ffa85, -0x3080a000, -0x3080a201, -0x3080a402, -0x3080a603, -0x3080a804, -0x3080aa05, -0x3080ac06, -0x3080ae07, -0x3080b008, -0x3080b209, -0x3080b40a, -0x3080b60b, -0x3080b80c, -0x3080ba0d, -0x3080bc0e, -0x3080be0f, +0x3080b000, +0x3080b201, +0x3080b402, +0x3080b603, +0x3080b804, +0x3080ba05, +0x3080bc06, +0x3080be07, +0x3080c008, +0x3080c209, +0x3080c40a, +0x3080c60b, +0x3080c80c, +0x3080ca0d, +0x3080cc0e, +0x3080ce0f, 0x00003ffc, 0x00000000, 0x00000000, @@ -48,19 +48,18 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x3ec00083, 0xb0a14103, 0x01a00204, -0x3ec10082, -0x4202800e, -0x04000703, -0xb0a14202, -0x21a00803, -0x3fbf028d, -0x3f20068d, -0x3fbe0682, +0x3ec10083, +0x4202c002, +0xb0a14203, +0x21a00802, +0x3fbf028a, +0x3f20050a, +0x3fbe0502, 0x3fe30102, 0x21a00882, -0x3f82028f, -0x3fe3078f, -0x3fbf0784, +0x3f82028b, +0x3fe3058b, +0x3fbf0584, 0x3f200204, 0x3fbe0204, 0x3fe30204, @@ -75,252 +74,285 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x21a00083, 0x40800082, 0x21a00b02, -0x10002818, -0x42a00002, -0x32800007, -0x4207000c, -0x18008208, -0x40a0000b, -0x4080020a, -0x40800709, -0x00200000, -0x42070002, -0x3ac30384, +0x10002612, +0x42a00003, +0x42074006, +0x1800c204, +0x40a00008, +0x40800789, +0x1c010305, +0x34000302, 0x1cffc489, -0x00200000, -0x18008383, -0x38830382, -0x4cffc486, -0x3ac28185, -0xb0408584, -0x28830382, -0x1c020387, -0x38828182, -0xb0408405, -0x1802c408, -0x28828182, -0x217ff886, -0x04000583, -0x21a00803, -0x3fbe0682, -0x3fe30102, -0x04000106, -0x21a00886, -0x04000603, -0x21a00903, -0x40803c02, -0x21a00982, -0x40800003, -0x04000184, -0x21a00a04, +0x3ec00303, +0x3ec00287, +0xb0408403, +0x24000302, +0x34000282, +0x1c020306, +0xb0408207, +0x18020204, +0x24000282, +0x217ffa09, +0x04000402, +0x21a00802, +0x3fbe0504, +0x3fe30204, +0x21a00884, +0x42074002, +0x21a00902, +0x40803c03, +0x21a00983, +0x04000485, +0x21a00a05, 0x40802202, 0x21a00a82, -0x42028005, -0x34208702, -0x21002282, -0x21a00804, -0x21a00886, -0x3fbf0782, +0x21a00805, +0x21a00884, +0x3fbf0582, 0x3f200102, 0x3fbe0102, 0x3fe30102, 0x21a00902, 0x40804003, 0x21a00983, -0x21a00a04, +0x21a00a05, 0x40805a02, 0x21a00a82, 0x40800083, 0x21a00b83, 0x01a00c02, -0x01a00d83, -0x3420c282, +0x30809c03, +0x34000182, +0x14004102, +0x21002082, +0x01a00d82, +0x3080a003, +0x34000182, 0x21a00e02, -0x34210283, -0x21a00f03, -0x34200284, -0x77400200, -0x3421c282, +0x3080a203, +0x34000182, +0x21a00f02, +0x3080a403, +0x34000182, +0x77400100, +0x3080a603, +0x34000182, 0x21a00702, -0x34218283, -0x21a00083, -0x34214282, +0x3080a803, +0x34000182, +0x21a00082, +0x3080aa03, +0x34000182, 0x21a00b02, -0x4200480c, -0x00200000, -0x1c010286, -0x34220284, -0x34220302, -0x0f608203, -0x5c024204, -0x3b81810b, -0x42013c02, -0x00200000, -0x18008185, -0x38808183, -0x3b814182, -0x21004e84, +0x4020007f, +0x3080ae02, +0x42004805, +0x3080ac04, +0x34000103, +0x34000202, +0x1cffc183, +0x3b810106, +0x0f608184, +0x42013802, +0x5c020183, +0x38810102, +0x3b810102, +0x21000e83, 0x4020007f, 0x35000100, -0x000004e0, -0x000002a0, -0x000002e8, -0x00000428, +0x00000470, +0x000002f8, +0x00000430, 0x00000360, -0x000002e8, -0x000004a0, -0x00000468, +0x000002f8, 0x000003c8, +0x000004a8, +0x00000298, 0x00000360, +0x00200000, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40085, -0x10009c09, -0x3ac10606, -0xb060c105, -0x4020007f, -0x4020007f, +0x40800208, +0x3ec40084, +0x40800407, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, -0x38810602, -0xb0408586, -0x28810602, -0x32004180, -0x34204702, +0x38820282, +0x41004003, +0xb0408189, +0x28820282, +0x3881c282, +0xb0408304, +0x2881c282, +0x00400000, +0x40800003, +0x35000000, +0x30809e03, +0x34000182, 0x21a00382, 0x4020007f, -0x327fdc80, +0x327fde00, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40087, -0x40800405, -0x00200000, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800206, +0x3ec40084, +0x40800407, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, +0x38818282, 0x41004003, -0x38810602, -0x4020007f, -0xb0408188, -0x4020007f, -0x28810602, -0x41201002, -0x38814603, -0x10009c09, -0xb060c109, -0x4020007f, -0x28814603, +0xb040818a, +0x10005b0b, +0x41201003, +0x28818282, +0x3881c282, +0xb0408184, 0x41193f83, -0x38818602, 0x60ffc003, -0xb040818a, -0x28818602, -0x32003080, +0x2881c282, +0x38820282, +0xb0408189, +0x28820282, +0x327fef80, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40087, -0x41201008, -0x10009c14, -0x40800405, -0x3ac10609, -0x40800606, -0x3ac1460a, -0xb060c107, -0x3ac1860b, +0x40800207, +0x3ec40086, +0x4120100b, +0x10005b14, +0x40800404, +0x3ac1c289, +0x40800608, +0xb060c106, +0x3ac10286, +0x3ac2028a, 0x20801203, -0x38810602, -0xb0408409, -0x28810602, -0x38814603, -0xb060c40a, -0x4020007f, -0x28814603, +0x3881c282, 0x41193f83, -0x38818602, 0x60ffc003, -0xb040818b, -0x28818602, -0x32002380, -0x409ffe02, -0x30801204, -0x40800205, -0x3ec40083, -0x40800406, -0x3ac14607, -0x3ac18608, -0xb0810103, -0x41004002, -0x20801204, -0x4020007f, -0x38814603, -0x10009c0b, -0xb060c107, -0x4020007f, -0x4020007f, -0x28814603, -0x38818602, -0x4020007f, +0xb0408589, +0x2881c282, +0x38810282, +0xb0408586, +0x28810282, +0x38820282, +0xb040818a, +0x28820282, 0x4020007f, -0xb0408588, -0x28818602, +0x327fe280, +0x409ffe02, +0x30801203, +0x40800207, +0x3ec40084, +0x40800408, +0x10005b14, +0x40800609, +0x3ac1c28a, +0x3ac2028b, +0xb060c104, +0x3ac24284, +0x20801203, +0x41201003, +0x3881c282, +0xb040830a, +0x2881c282, +0x38820282, +0xb040818b, +0x41193f83, +0x60ffc003, +0x28820282, +0x38824282, +0xb0408184, +0x28824282, 0x4020007f, -0x32001780, +0x327fd580, 0x409ffe02, -0x1000640e, -0x40800204, +0x1000658e, +0x40800206, 0x30801203, -0x40800405, -0x3ec40087, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800407, +0x3ec40084, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, 0x413d8003, -0x38810602, +0x38818282, 0x4020007f, -0x327fd780, -0x409ffe02, -0x10007f0c, -0x40800205, -0x30801204, -0x40800406, -0x3ec40083, -0x3ac14607, -0x3ac18608, -0xb0810103, -0x413d8002, -0x20801204, -0x38814603, +0x327fd800, +0x409ffe03, +0x30801202, +0x40800207, +0x3ec40084, +0x10005b09, +0x3ac1c288, +0xb0408184, 0x4020007f, -0x327feb80, +0x4020007f, +0x20801202, +0x3881c282, +0xb0408308, +0x2881c282, +0x327fc680, 0x409ffe02, +0x1000588b, +0x40800208, 0x30801203, -0x40800204, -0x3ec40087, -0x40800405, -0x1000650a, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800407, +0x3ec40084, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, -0x38810602, -0xb0408588, -0x4020007f, -0x327fc980, -0x00400000, -0x40800003, -0x4020007f, -0x35000000, +0x413d8003, +0x38820282, +0x327fbd80, +0x00200000, +0x00000da0, +0x00000000, +0x00000000, +0x00000000, +0x00000d90, +0x00000000, +0x00000000, +0x00000000, +0x00000db0, +0x00000000, +0x00000000, +0x00000000, +0x00000dc0, +0x00000000, +0x00000000, +0x00000000, +0x00000d80, +0x00000000, +0x00000000, +0x00000000, +0x00000df0, +0x00000000, +0x00000000, +0x00000000, +0x00000de0, +0x00000000, +0x00000000, +0x00000000, +0x00000dd0, +0x00000000, +0x00000000, +0x00000000, +0x00000e04, +0x00000000, +0x00000000, 0x00000000, +0x00000e00, 0x00000000, 0x00000000, 0x00000000, diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 08b3530288a..ca47b991bda 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -40,17 +40,10 @@ enum { struct spu_context_ops; struct spu_gang; -/* - * This is the state for spu utilization reporting to userspace. - * Because this state is visible to userspace it must never change and needs - * to be kept strictly separate from any internal state kept by the kernel. - */ -enum spuctx_execution_state { - SPUCTX_UTIL_USER = 0, - SPUCTX_UTIL_SYSTEM, - SPUCTX_UTIL_IOWAIT, - SPUCTX_UTIL_LOADED, - SPUCTX_UTIL_MAX +/* ctx->sched_flags */ +enum { + SPU_SCHED_NOTIFY_ACTIVE, + SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ }; struct spu_context { @@ -89,6 +82,8 @@ struct spu_context { struct list_head gang_list; struct spu_gang *gang; + struct kref *prof_priv_kref; + void ( * prof_priv_release) (struct kref *kref); /* owner thread */ pid_t tid; @@ -104,9 +99,9 @@ struct spu_context { /* statistics */ struct { /* updates protected by ctx->state_mutex */ - enum spuctx_execution_state execution_state; - unsigned long tstamp; /* time of last ctx switch */ - unsigned long times[SPUCTX_UTIL_MAX]; + enum spu_utilization_state util_state; + unsigned long long tstamp; /* time of last state switch */ + unsigned long long times[SPU_UTIL_MAX]; unsigned long long vol_ctx_switch; unsigned long long invol_ctx_switch; unsigned long long min_flt; @@ -118,6 +113,10 @@ struct spu_context { unsigned long long class2_intr_base; /* # at last ctx switch */ unsigned long long libassist; } stats; + + struct list_head aff_list; + int aff_head; + int aff_offset; }; struct spu_gang { @@ -125,8 +124,19 @@ struct spu_gang { struct mutex mutex; struct kref kref; int contexts; + + struct spu_context *aff_ref_ctx; + struct list_head aff_list_head; + struct mutex aff_mutex; + int aff_flags; + struct spu *aff_ref_spu; + atomic_t aff_sched_count; }; +/* Flag bits for spu_gang aff_flags */ +#define AFF_OFFSETS_SET 1 +#define AFF_MERGED 2 + struct mfc_dma_command { int32_t pad; /* reserved */ uint32_t lsa; /* local storage address */ @@ -190,10 +200,14 @@ extern struct tree_descr spufs_dir_contents[]; extern struct tree_descr spufs_dir_nosched_contents[]; /* system call implementation */ -long spufs_run_spu(struct file *file, - struct spu_context *ctx, u32 *npc, u32 *status); -long spufs_create(struct nameidata *nd, - unsigned int flags, mode_t mode); +extern struct spufs_calls spufs_calls; +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); +long spufs_create(struct nameidata *nd, unsigned int flags, + mode_t mode, struct file *filp); +/* ELF coredump callbacks for writing SPU ELF notes */ +extern int spufs_coredump_extra_notes_size(void); +extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset); + extern const struct file_operations spufs_context_fops; /* gang management */ @@ -206,6 +220,9 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); /* fault handling */ int spufs_handle_class1(struct spu_context *ctx); +/* affinity */ +struct spu *affinity_check(struct spu_context *ctx); + /* context management */ extern atomic_t nr_spu_contexts; static inline void spu_acquire(struct spu_context *ctx) @@ -227,15 +244,17 @@ void spu_unmap_mappings(struct spu_context *ctx); void spu_forget(struct spu_context *ctx); int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); void spu_acquire_saved(struct spu_context *ctx); +void spu_release_saved(struct spu_context *ctx); int spu_activate(struct spu_context *ctx, unsigned long flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); +void spu_switch_notify(struct spu *spu, struct spu_context *ctx); void spu_set_timeslice(struct spu_context *ctx); void spu_update_sched_info(struct spu_context *ctx); void __spu_update_sched_info(struct spu_context *ctx); int __init spu_sched_init(void); -void __exit spu_sched_exit(void); +void spu_sched_exit(void); extern char *isolated_loader; @@ -281,7 +300,7 @@ struct spufs_coredump_reader { char *name; ssize_t (*read)(struct spu_context *ctx, char __user *buffer, size_t size, loff_t *pos); - u64 (*get)(void *data); + u64 (*get)(struct spu_context *ctx); size_t size; }; extern struct spufs_coredump_reader spufs_coredump_read[]; @@ -293,30 +312,34 @@ extern int spufs_coredump_num_notes; * line. */ static inline void spuctx_switch_state(struct spu_context *ctx, - enum spuctx_execution_state new_state) + enum spu_utilization_state new_state) { - WARN_ON(!mutex_is_locked(&ctx->state_mutex)); - - if (ctx->stats.execution_state != new_state) { - unsigned long curtime = jiffies; + unsigned long long curtime; + signed long long delta; + struct timespec ts; + struct spu *spu; + enum spu_utilization_state old_state; - ctx->stats.times[ctx->stats.execution_state] += - curtime - ctx->stats.tstamp; - ctx->stats.tstamp = curtime; - ctx->stats.execution_state = new_state; - } -} + ktime_get_ts(&ts); + curtime = timespec_to_ns(&ts); + delta = curtime - ctx->stats.tstamp; -static inline void spu_switch_state(struct spu *spu, - enum spuctx_execution_state new_state) -{ - if (spu->stats.utilization_state != new_state) { - unsigned long curtime = jiffies; - - spu->stats.times[spu->stats.utilization_state] += - curtime - spu->stats.tstamp; + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); + WARN_ON(delta < 0); + + spu = ctx->spu; + old_state = ctx->stats.util_state; + ctx->stats.util_state = new_state; + ctx->stats.tstamp = curtime; + + /* + * Update the physical SPU utilization statistics. + */ + if (spu) { + ctx->stats.times[old_state] += delta; + spu->stats.times[old_state] += delta; + spu->stats.util_state = new_state; spu->stats.tstamp = curtime; - spu->stats.utilization_state = new_state; } } diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 9c506ba08cd..3d64c81cc6e 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -180,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) case MFC_CNTL_SUSPEND_COMPLETE: if (csa) { csa->priv2.mfc_control_RW = - in_be64(&priv2->mfc_control_RW) | + MFC_CNTL_SUSPEND_MASK | MFC_CNTL_SUSPEND_DMA_QUEUE; } break; @@ -190,9 +190,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == MFC_CNTL_SUSPEND_COMPLETE); if (csa) { - csa->priv2.mfc_control_RW = - in_be64(&priv2->mfc_control_RW) & - ~MFC_CNTL_SUSPEND_DMA_QUEUE; + csa->priv2.mfc_control_RW = 0; } break; } @@ -251,16 +249,8 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu) * Read MFC_CNTL[Ds]. Update saved copy of * CSA.MFC_CNTL[Ds]. */ - if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) { - csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; - csa->suspend_time = get_cycles(); - out_be64(&priv2->spu_chnlcntptr_RW, 7ULL); - eieio(); - csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW); - eieio(); - } else { - csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; - } + csa->priv2.mfc_control_RW |= + in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING; } static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) @@ -271,7 +261,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) * Write MFC_CNTL[Dh] set to a '1' to halt * the decrementer. */ - out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED); + out_be64(&priv2->mfc_control_RW, + MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK); eieio(); } @@ -615,7 +606,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu) static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; int i; /* Save, Step 42: @@ -626,7 +617,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); /* Save the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -708,7 +699,7 @@ static inline void get_kernel_slb(u64 ea, u64 slb[2]) llp = mmu_psize_defs[mmu_linear_psize].sllp; else llp = mmu_psize_defs[mmu_virtual_psize].sllp; - slb[0] = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | + slb[0] = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | SLB_VSID_KERNEL | llp; slb[1] = (ea & ESID_MASK) | SLB_ESID_V; } @@ -983,13 +974,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu) */ } -static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +static inline void suspend_mfc_and_halt_decr(struct spu_state *csa, + struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; /* Restore, Step 7: - * Restore, Step 47. - * Write MFC_Cntl[Dh,Sc]='1','1' to suspend + * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend * the queue and halt the decrementer. */ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | @@ -1090,7 +1081,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; u64 idx; int i; @@ -1102,7 +1093,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) out_be64(&priv2->spu_chnldata_RW, 0UL); /* Reset the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -1289,7 +1280,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu) cycles_t resume_time = get_cycles(); cycles_t delta_time = resume_time - csa->suspend_time; + csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING; + if (csa->lscsa->decr.slot[0] < delta_time) { + csa->lscsa->decr_status.slot[0] |= + SPU_DECR_STATUS_WRAPPED; + } + csa->lscsa->decr.slot[0] -= delta_time; + } else { + csa->lscsa->decr_status.slot[0] = 0; } } @@ -1398,6 +1397,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu) send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); } +static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 47. + * Write MFC_Cntl[Sc,Sm]='1','0' to suspend + * the queue. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); + eieio(); +} + static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) { /* Restore, Step 49: @@ -1548,32 +1559,27 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) * "wrapped" flag is set, OR in a '1' to * CSA.SPU_Event_Status[Tm]. */ - if (csa->lscsa->decr_status.slot[0] == 1) { - csa->spu_chnldata_RW[0] |= 0x20; - } - if ((csa->lscsa->decr_status.slot[0] == 1) && - (csa->spu_chnlcnt_RW[0] == 0 && - ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && - ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { + if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED)) + return; + + if ((csa->spu_chnlcnt_RW[0] == 0) && + (csa->spu_chnldata_RW[1] & 0x20) && + !(csa->spu_chnldata_RW[0] & 0x20)) csa->spu_chnlcnt_RW[0] = 1; - } + + csa->spu_chnldata_RW[0] |= 0x20; } static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; int i; /* Restore, Step 59: + * Restore the following CH: [0,3,4,24,25,27] */ - - /* Restore CH 1 without count */ - out_be64(&priv2->spu_chnlcntptr_RW, 1); - out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]); - - /* Restore the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -1932,7 +1938,7 @@ static void harvest(struct spu_state *prev, struct spu *spu) set_switch_pending(prev, spu); /* Step 5. */ stop_spu_isolate(spu); /* NEW. */ remove_other_spu_access(prev, spu); /* Step 6. */ - suspend_mfc(prev, spu); /* Step 7. */ + suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */ wait_suspend_mfc_complete(prev, spu); /* Step 8. */ if (!suspend_spe(prev, spu)) /* Step 9. */ clear_spu_status(prev, spu); /* Step 10. */ @@ -2140,19 +2146,6 @@ int spu_restore(struct spu_state *new, struct spu *spu) } EXPORT_SYMBOL_GPL(spu_restore); -/** - * spu_harvest - SPU harvest (reset) operation - * @spu: pointer to SPU iomem structure. - * - * Perform SPU harvest (reset) operation. - */ -void spu_harvest(struct spu *spu) -{ - acquire_spu_lock(spu); - harvest(NULL, spu); - release_spu_lock(spu); -} - static void init_prob(struct spu_state *csa) { csa->spu_chnlcnt_RW[9] = 1; diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 8e37bdf4dfd..2c34f717019 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp, goto out; i = SPUFS_I(filp->f_path.dentry->d_inode); - ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); + ret = spufs_run_spu(i->i_ctx, &npc, &status); if (put_user(npc, unpc)) ret = -EFAULT; @@ -58,26 +58,8 @@ out: return ret; } -#ifndef MODULE -asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) -{ - int fput_needed; - struct file *filp; - long ret; - - ret = -EBADF; - filp = fget_light(fd, &fput_needed); - if (filp) { - ret = do_spu_run(filp, unpc, ustatus); - fput_light(filp, fput_needed); - } - - return ret; -} -#endif - -asmlinkage long sys_spu_create(const char __user *pathname, - unsigned int flags, mode_t mode) +static long do_spu_create(const char __user *pathname, unsigned int flags, + mode_t mode, struct file *neighbor) { char *tmp; int ret; @@ -90,7 +72,7 @@ asmlinkage long sys_spu_create(const char __user *pathname, ret = path_lookup(tmp, LOOKUP_PARENT| LOOKUP_OPEN|LOOKUP_CREATE, &nd); if (!ret) { - ret = spufs_create(&nd, flags, mode); + ret = spufs_create(&nd, flags, mode, neighbor); path_release(&nd); } putname(tmp); @@ -100,7 +82,9 @@ asmlinkage long sys_spu_create(const char __user *pathname, } struct spufs_calls spufs_calls = { - .create_thread = sys_spu_create, + .create_thread = do_spu_create, .spu_run = do_spu_run, + .coredump_extra_notes_size = spufs_coredump_extra_notes_size, + .coredump_extra_notes_write = spufs_coredump_extra_notes_write, .owner = THIS_MODULE, }; |