diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/fault.c | 45 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/maccess.c | 94 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 65 |
4 files changed, 176 insertions, 30 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 46ef3fd0663..72cec9ecd96 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -294,7 +294,7 @@ static inline int do_exception(struct pt_regs *regs, int access) down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE - if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { + if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { @@ -549,19 +549,15 @@ static void pfault_interrupt(struct ext_code ext_code, if ((subcode & 0xff00) != __SUBCODE_MASK) return; kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; - if (subcode & 0x0080) { - /* Get the token (= pid of the affected task). */ - pid = sizeof(void *) == 4 ? param32 : param64; - rcu_read_lock(); - tsk = find_task_by_pid_ns(pid, &init_pid_ns); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); - if (!tsk) - return; - } else { - tsk = current; - } + /* Get the token (= pid of the affected task). */ + pid = sizeof(void *) == 4 ? param32 : param64; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; spin_lock(&pfault_lock); if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ @@ -574,6 +570,7 @@ static void pfault_interrupt(struct ext_code ext_code, tsk->thread.pfault_wait = 0; list_del(&tsk->thread.list); wake_up_process(tsk); + put_task_struct(tsk); } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial @@ -585,24 +582,35 @@ static void pfault_interrupt(struct ext_code ext_code, if (tsk->state == TASK_RUNNING) tsk->thread.pfault_wait = -1; } - put_task_struct(tsk); } else { /* signal bit not set -> a real page is missing. */ - if (tsk->thread.pfault_wait == -1) { + if (WARN_ON_ONCE(tsk != current)) + goto out; + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + } else if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial * interrupt (pfault_wait == -1). Set pfault_wait * back to zero and exit. */ tsk->thread.pfault_wait = 0; } else { /* Initial interrupt arrived before completion - * interrupt. Let the task sleep. */ + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. */ + get_task_struct(tsk); tsk->thread.pfault_wait = 1; list_add(&tsk->thread.list, &pfault_list); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); } } +out: spin_unlock(&pfault_lock); + put_task_struct(tsk); } static int __cpuinit pfault_cpu_notify(struct notifier_block *self, @@ -620,6 +628,7 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self, list_del(&thread->list); tsk = container_of(thread, struct task_struct, thread); wake_up_process(tsk); + put_task_struct(tsk); } spin_unlock_irq(&pfault_lock); break; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 597bb2d27c3..900de2b3cf2 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -58,6 +58,8 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; + clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY, + PTRS_PER_PTE * sizeof(pte_t)); page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; } diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 7bb15fcca75..795a0a9bb2e 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/gfp.h> +#include <linux/cpu.h> #include <asm/ctl_reg.h> /* @@ -61,21 +62,14 @@ long probe_kernel_write(void *dst, const void *src, size_t size) return copied < 0 ? -EFAULT : 0; } -/* - * Copy memory in real mode (kernel to kernel) - */ -int memcpy_real(void *dest, void *src, size_t count) +static int __memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; register unsigned long _len1 asm("3") = (unsigned long) count; register unsigned long _src asm("4") = (unsigned long) src; register unsigned long _len2 asm("5") = (unsigned long) count; - unsigned long flags; int rc = -EFAULT; - if (!count) - return 0; - flags = __arch_local_irq_stnsm(0xf8UL); asm volatile ( "0: mvcle %1,%2,0x0\n" "1: jo 0b\n" @@ -86,7 +80,23 @@ int memcpy_real(void *dest, void *src, size_t count) "+d" (_len2), "=m" (*((long *) dest)) : "m" (*((long *) src)) : "cc", "memory"); - arch_local_irq_restore(flags); + return rc; +} + +/* + * Copy memory in real mode (kernel to kernel) + */ +int memcpy_real(void *dest, void *src, size_t count) +{ + unsigned long flags; + int rc; + + if (!count) + return 0; + local_irq_save(flags); + __arch_local_irq_stnsm(0xfbUL); + rc = __memcpy_real(dest, src, count); + local_irq_restore(flags); return rc; } @@ -157,3 +167,69 @@ out: free_page((unsigned long) buf); return rc; } + +/* + * Check if physical address is within prefix or zero page + */ +static int is_swapped(unsigned long addr) +{ + unsigned long lc; + int cpu; + + if (addr < sizeof(struct _lowcore)) + return 1; + for_each_online_cpu(cpu) { + lc = (unsigned long) lowcore_ptr[cpu]; + if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc) + continue; + return 1; + } + return 0; +} + +/* + * Return swapped prefix or zero page address + */ +static unsigned long get_swapped(unsigned long addr) +{ + unsigned long prefix = store_prefix(); + + if (addr < sizeof(struct _lowcore)) + return addr + prefix; + if (addr >= prefix && addr < prefix + sizeof(struct _lowcore)) + return addr - prefix; + return addr; +} + +/* + * Convert a physical pointer for /dev/mem access + * + * For swapped prefix pages a new buffer is returned that contains a copy of + * the absolute memory. The buffer size is maximum one page large. + */ +void *xlate_dev_mem_ptr(unsigned long addr) +{ + void *bounce = (void *) addr; + unsigned long size; + + get_online_cpus(); + preempt_disable(); + if (is_swapped(addr)) { + size = PAGE_SIZE - (addr & ~PAGE_MASK); + bounce = (void *) __get_free_page(GFP_ATOMIC); + if (bounce) + memcpy_real(bounce, (void *) get_swapped(addr), size); + } + preempt_enable(); + put_online_cpus(); + return bounce; +} + +/* + * Free converted buffer for /dev/mem access (if necessary) + */ +void unxlate_dev_mem_ptr(unsigned long addr, void *buf) +{ + if ((void *) addr != buf) + free_page((unsigned long) buf); +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 373adf69b01..a3db5a3ea08 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -678,8 +678,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } } -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - static void __page_table_free_rcu(void *table, unsigned bit) { struct page *page; @@ -733,7 +731,66 @@ void __tlb_remove_table(void *_table) free_pages((unsigned long) table, ALLOC_ORDER); } -#endif +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + __tlb_flush_mm(tlb->mm); + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_table_flush(tlb); +} /* * switch on pgstes for its userspace process (for kvm) @@ -765,6 +822,8 @@ int s390_enable_sie(void) /* we copy the mm and let dup_mm create the page tables with_pgstes */ tsk->mm->context.alloc_pgste = 1; + /* make sure that both mms have a correct rss state */ + sync_mm_rss(tsk->mm); mm = dup_mm(tsk); tsk->mm->context.alloc_pgste = 0; if (!mm) |