diff options
Diffstat (limited to 'arch/ia64')
68 files changed, 14339 insertions, 6922 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 8fa3faf5ef1..3aa6c821449 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -19,6 +19,7 @@ config IA64 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_KVM default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -266,23 +267,23 @@ config IOSAPIC depends on !IA64_HP_SIM default y -config IA64_SGI_SN_XP - tristate "Support communication between SGI SSIs" - depends on IA64_GENERIC || IA64_SGI_SN2 - select IA64_UNCACHED_ALLOCATOR - help - An SGI machine can be divided into multiple Single System - Images which act independently of each other and have - hardware based memory protection from the others. Enabling - this feature will allow for direct communication between SSIs - based on a network adapter and DMA messaging. - config FORCE_MAX_ZONEORDER int "MAX_ORDER (11 - 17)" if !HUGETLB_PAGE range 11 17 if !HUGETLB_PAGE default "17" if HUGETLB_PAGE default "11" +config VIRT_CPU_ACCOUNTING + bool "Deterministic task and CPU time accounting" + default n + help + Select this option to enable more accurate task and CPU time + accounting. This is done by reading a CPU counter on each + kernel entry and exit and on transitions within the kernel + between system, softirq and hardirq state, so there is a + small performance impact. + If in doubt, say N here. + config SMP bool "Symmetric multi-processing support" help @@ -589,6 +590,8 @@ config MSPEC source "fs/Kconfig" +source "arch/ia64/kvm/Kconfig" + source "lib/Kconfig" # @@ -611,6 +614,9 @@ config IRQ_PER_CPU bool default y +config IOMMU_HELPER + def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC) + source "arch/ia64/hp/sim/Kconfig" source "arch/ia64/Kconfig.debug" diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index f1645c4f703..ec4cca477f4 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ +core-$(CONFIG_KVM) += arch/ia64/kvm/ drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 523eae6d3e4..9409de5c944 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -35,6 +35,7 @@ #include <linux/nodemask.h> #include <linux/bitops.h> /* hweight64() */ #include <linux/crash_dump.h> +#include <linux/iommu-helper.h> #include <asm/delay.h> /* ia64_get_itc() */ #include <asm/io.h> @@ -460,6 +461,13 @@ get_iovp_order (unsigned long size) return order; } +static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr, + unsigned int bitshiftcnt) +{ + return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3) + + bitshiftcnt; +} + /** * sba_search_bitmap - find free space in IO PDIR resource bitmap * @ioc: IO MMU structure which owns the pdir we are interested in. @@ -471,15 +479,25 @@ get_iovp_order (unsigned long size) * Cool perf optimization: search for log2(size) bits at a time. */ static SBA_INLINE unsigned long -sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) +sba_search_bitmap(struct ioc *ioc, struct device *dev, + unsigned long bits_wanted, int use_hint) { unsigned long *res_ptr; unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); - unsigned long flags, pide = ~0UL; + unsigned long flags, pide = ~0UL, tpide; + unsigned long boundary_size; + unsigned long shift; + int ret; ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); ASSERT(res_ptr < res_end); + boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1; + boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift; + + BUG_ON(ioc->ibase & ~iovp_mask); + shift = ioc->ibase >> iovp_shift; + spin_lock_irqsave(&ioc->res_lock, flags); /* Allow caller to force a search through the entire resource space */ @@ -504,9 +522,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) if (likely(*res_ptr != ~0UL)) { bitshiftcnt = ffz(*res_ptr); *res_ptr |= (1UL << bitshiftcnt); - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ - pide += bitshiftcnt; + pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); ioc->res_bitshift = bitshiftcnt + bits_wanted; goto found_it; } @@ -535,11 +551,13 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); ASSERT(0 != mask); for (; mask ; mask <<= o, bitshiftcnt += o) { - if(0 == ((*res_ptr) & mask)) { + tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); + ret = iommu_is_span_boundary(tpide, bits_wanted, + shift, + boundary_size); + if ((0 == ((*res_ptr) & mask)) && !ret) { *res_ptr |= mask; /* mark resources busy! */ - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ - pide += bitshiftcnt; + pide = tpide; ioc->res_bitshift = bitshiftcnt + bits_wanted; goto found_it; } @@ -560,6 +578,11 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) end = res_end - qwords; for (; res_ptr < end; res_ptr++) { + tpide = ptr_to_pide(ioc, res_ptr, 0); + ret = iommu_is_span_boundary(tpide, bits_wanted, + shift, boundary_size); + if (ret) + goto next_ptr; for (i = 0 ; i < qwords ; i++) { if (res_ptr[i] != 0) goto next_ptr; @@ -572,8 +595,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) res_ptr[i] = ~0UL; res_ptr[i] |= RESMAP_MASK(bits); - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ + pide = tpide; res_ptr += qwords; ioc->res_bitshift = bits; goto found_it; @@ -605,7 +627,7 @@ found_it: * resource bit map. */ static int -sba_alloc_range(struct ioc *ioc, size_t size) +sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size) { unsigned int pages_needed = size >> iovp_shift; #ifdef PDIR_SEARCH_TIMING @@ -622,9 +644,9 @@ sba_alloc_range(struct ioc *ioc, size_t size) /* ** "seek and ye shall find"...praying never hurts either... */ - pide = sba_search_bitmap(ioc, pages_needed, 1); + pide = sba_search_bitmap(ioc, dev, pages_needed, 1); if (unlikely(pide >= (ioc->res_size << 3))) { - pide = sba_search_bitmap(ioc, pages_needed, 0); + pide = sba_search_bitmap(ioc, dev, pages_needed, 0); if (unlikely(pide >= (ioc->res_size << 3))) { #if DELAYED_RESOURCE_CNT > 0 unsigned long flags; @@ -653,7 +675,7 @@ sba_alloc_range(struct ioc *ioc, size_t size) } spin_unlock_irqrestore(&ioc->saved_lock, flags); - pide = sba_search_bitmap(ioc, pages_needed, 0); + pide = sba_search_bitmap(ioc, dev, pages_needed, 0); if (unlikely(pide >= (ioc->res_size << 3))) panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", ioc->ioc_hpa); @@ -936,7 +958,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir) spin_unlock_irqrestore(&ioc->res_lock, flags); #endif - pide = sba_alloc_range(ioc, size); + pide = sba_alloc_range(ioc, dev, size); iovp = (dma_addr_t) pide << iovp_shift; @@ -1373,7 +1395,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev, dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; ASSERT(dma_len <= DMA_CHUNK_SIZE); dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG - | (sba_alloc_range(ioc, dma_len) << iovp_shift) + | (sba_alloc_range(ioc, dev, dma_len) << iovp_shift) | dma_offset); n_mappings++; } diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index 969fe9f443c..3d47839a0c4 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -294,7 +294,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) return NOTIFY_DONE; } - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE; diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c index 7661bb065fa..3a078ad3aa4 100644 --- a/arch/ia64/hp/sim/simscsi.c +++ b/arch/ia64/hp/sim/simscsi.c @@ -201,22 +201,6 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode) simscsi_sg_readwrite(sc, mode, offset); } -static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len) -{ - - int i; - unsigned thislen; - struct scatterlist *slp; - - scsi_for_each_sg(sc, slp, scsi_sg_count(sc), i) { - if (!len) - break; - thislen = min(len, slp->length); - memcpy(sg_virt(slp), buf, thislen); - len -= thislen; - } -} - static int simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) { @@ -258,7 +242,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) buf[6] = 0; /* reserved */ buf[7] = 0; /* various flags */ memcpy(buf + 8, "HP SIMULATED DISK 0.00", 28); - simscsi_fillresult(sc, buf, 36); + scsi_sg_copy_from_buffer(sc, buf, 36); sc->result = GOOD; break; @@ -306,14 +290,15 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) buf[5] = 0; buf[6] = 2; buf[7] = 0; - simscsi_fillresult(sc, buf, 8); + scsi_sg_copy_from_buffer(sc, buf, 8); sc->result = GOOD; break; case MODE_SENSE: case MODE_SENSE_10: /* sd.c uses this to determine whether disk does write-caching. */ - simscsi_fillresult(sc, (char *)empty_zero_page, scsi_bufflen(sc)); + scsi_sg_copy_from_buffer(sc, (char *)empty_zero_page, + PAGE_SIZE); sc->result = GOOD; break; diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h index 446c9aac924..9a3abf58cea 100644 --- a/arch/ia64/ia32/elfcore32.h +++ b/arch/ia64/ia32/elfcore32.h @@ -30,7 +30,19 @@ struct elf_siginfo int si_errno; /* errno */ }; -#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0) +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* + * Hacks are here since types between compat_timeval (= pair of s32) and + * ia64-native timeval (= pair of s64) are not compatible, at least a file + * arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on + * use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval(). + */ +#define cputime_to_timeval(a,b) \ + do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0) +#else +#define jiffies_to_timeval(a,b) \ + do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0) +#endif struct elf_prstatus { diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index b1bf51fe97b..7e028ceb93b 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -38,6 +38,7 @@ #include <linux/eventpoll.h> #include <linux/personality.h> #include <linux/ptrace.h> +#include <linux/regset.h> #include <linux/stat.h> #include <linux/ipc.h> #include <linux/capability.h> @@ -2387,16 +2388,45 @@ get_free_idx (void) return -ESRCH; } +static void set_tls_desc(struct task_struct *p, int idx, + const struct ia32_user_desc *info, int n) +{ + struct thread_struct *t = &p->thread; + struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN]; + int cpu; + + /* + * We must not get preempted while modifying the TLS. + */ + cpu = get_cpu(); + + while (n-- > 0) { + if (LDT_empty(info)) { + desc->a = 0; + desc->b = 0; + } else { + desc->a = LDT_entry_a(info); + desc->b = LDT_entry_b(info); + } + + ++info; + ++desc; + } + + if (t == ¤t->thread) + load_TLS(t, cpu); + + put_cpu(); +} + /* * Set a given TLS descriptor: */ asmlinkage int sys32_set_thread_area (struct ia32_user_desc __user *u_info) { - struct thread_struct *t = ¤t->thread; struct ia32_user_desc info; - struct desc_struct *desc; - int cpu, idx; + int idx; if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; @@ -2416,18 +2446,7 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info) if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; - - cpu = smp_processor_id(); - - if (LDT_empty(&info)) { - desc->a = 0; - desc->b = 0; - } else { - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - load_TLS(t, cpu); + set_tls_desc(current, idx, &info, 1); return 0; } @@ -2451,6 +2470,20 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info) #define GET_PRESENT(desc) (((desc)->b >> 15) & 1) #define GET_USEABLE(desc) (((desc)->b >> 20) & 1) +static void fill_user_desc(struct ia32_user_desc *info, int idx, + const struct desc_struct *desc) +{ + info->entry_number = idx; + info->base_addr = GET_BASE(desc); + info->limit = GET_LIMIT(desc); + info->seg_32bit = GET_32BIT(desc); + info->contents = GET_CONTENTS(desc); + info->read_exec_only = !GET_WRITABLE(desc); + info->limit_in_pages = GET_LIMIT_PAGES(desc); + info->seg_not_present = !GET_PRESENT(desc); + info->useable = GET_USEABLE(desc); +} + asmlinkage int sys32_get_thread_area (struct ia32_user_desc __user *u_info) { @@ -2464,22 +2497,588 @@ sys32_get_thread_area (struct ia32_user_desc __user *u_info) return -EINVAL; desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - - info.entry_number = idx; - info.base_addr = GET_BASE(desc); - info.limit = GET_LIMIT(desc); - info.seg_32bit = GET_32BIT(desc); - info.contents = GET_CONTENTS(desc); - info.read_exec_only = !GET_WRITABLE(desc); - info.limit_in_pages = GET_LIMIT_PAGES(desc); - info.seg_not_present = !GET_PRESENT(desc); - info.useable = GET_USEABLE(desc); + fill_user_desc(&info, idx, desc); if (copy_to_user(u_info, &info, sizeof(info))) return -EFAULT; return 0; } +struct regset_get { + void *kbuf; + void __user *ubuf; +}; + +struct regset_set { + const void *kbuf; + const void __user *ubuf; +}; + +struct regset_getset { + struct task_struct *target; + const struct user_regset *regset; + union { + struct regset_get get; + struct regset_set set; + } u; + unsigned int pos; + unsigned int count; + int ret; +}; + +static void getfpreg(struct task_struct *task, int regno, int *val) +{ + switch (regno / sizeof(int)) { + case 0: + *val = task->thread.fcr & 0xffff; + break; + case 1: + *val = task->thread.fsr & 0xffff; + break; + case 2: + *val = (task->thread.fsr>>16) & 0xffff; + break; + case 3: + *val = task->thread.fir; + break; + case 4: + *val = (task->thread.fir>>32) & 0xffff; + break; + case 5: + *val = task->thread.fdr; + break; + case 6: + *val = (task->thread.fdr >> 32) & 0xffff; + break; + } +} + +static void setfpreg(struct task_struct *task, int regno, int val) +{ + switch (regno / sizeof(int)) { + case 0: + task->thread.fcr = (task->thread.fcr & (~0x1f3f)) + | (val & 0x1f3f); + break; + case 1: + task->thread.fsr = (task->thread.fsr & (~0xffff)) | val; + break; + case 2: + task->thread.fsr = (task->thread.fsr & (~0xffff0000)) + | (val << 16); + break; + case 3: + task->thread.fir = (task->thread.fir & (~0xffffffff)) | val; + break; + case 5: + task->thread.fdr = (task->thread.fdr & (~0xffffffff)) | val; + break; + } +} + +static void access_fpreg_ia32(int regno, void *reg, + struct pt_regs *pt, struct switch_stack *sw, + int tos, int write) +{ + void *f; + + if ((regno += tos) >= 8) + regno -= 8; + if (regno < 4) + f = &pt->f8 + regno; + else if (regno <= 7) + f = &sw->f12 + (regno - 4); + else { + printk(KERN_ERR "regno must be less than 7 \n"); + return; + } + + if (write) + memcpy(f, reg, sizeof(struct _fpreg_ia32)); + else + memcpy(reg, f, sizeof(struct _fpreg_ia32)); +} + +static void do_fpregs_get(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + struct task_struct *task = dst->target; + struct pt_regs *pt; + int start, end, tos; + char buf[80]; + + if (dst->count == 0 || unw_unwind_to_user(info) < 0) + return; + if (dst->pos < 7 * sizeof(int)) { + end = min((dst->pos + dst->count), + (unsigned int)(7 * sizeof(int))); + for (start = dst->pos; start < end; start += sizeof(int)) + getfpreg(task, start, (int *)(buf + start)); + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, buf, + 0, 7 * sizeof(int)); + if (dst->ret || dst->count == 0) + return; + } + if (dst->pos < sizeof(struct ia32_user_i387_struct)) { + pt = task_pt_regs(task); + tos = (task->thread.fsr >> 11) & 7; + end = min(dst->pos + dst->count, + (unsigned int)(sizeof(struct ia32_user_i387_struct))); + start = (dst->pos - 7 * sizeof(int)) / + sizeof(struct _fpreg_ia32); + end = (end - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32); + for (; start < end; start++) + access_fpreg_ia32(start, + (struct _fpreg_ia32 *)buf + start, + pt, info->sw, tos, 0); + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + buf, 7 * sizeof(int), + sizeof(struct ia32_user_i387_struct)); + if (dst->ret || dst->count == 0) + return; + } +} + +static void do_fpregs_set(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + struct task_struct *task = dst->target; + struct pt_regs *pt; + char buf[80]; + int end, start, tos; + + if (dst->count == 0 || unw_unwind_to_user(info) < 0) + return; + + if (dst->pos < 7 * sizeof(int)) { + start = dst->pos; + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, buf, + 0, 7 * sizeof(int)); + if (dst->ret) + return; + for (; start < dst->pos; start += sizeof(int)) + setfpreg(task, start, *((int *)(buf + start))); + if (dst->count == 0) + return; + } + if (dst->pos < sizeof(struct ia32_user_i387_struct)) { + start = (dst->pos - 7 * sizeof(int)) / + sizeof(struct _fpreg_ia32); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, + buf, 7 * sizeof(int), + sizeof(struct ia32_user_i387_struct)); + if (dst->ret) + return; + pt = task_pt_regs(task); + tos = (task->thread.fsr >> 11) & 7; + end = (dst->pos - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32); + for (; start < end; start++) + access_fpreg_ia32(start, + (struct _fpreg_ia32 *)buf + start, + pt, info->sw, tos, 1); + if (dst->count == 0) + return; + } +} + +#define OFFSET(member) ((int)(offsetof(struct ia32_user_fxsr_struct, member))) +static void getfpxreg(struct task_struct *task, int start, int end, char *buf) +{ + int min_val; + + min_val = min(end, OFFSET(fop)); + while (start < min_val) { + if (start == OFFSET(cwd)) + *((short *)buf) = task->thread.fcr & 0xffff; + else if (start == OFFSET(swd)) + *((short *)buf) = task->thread.fsr & 0xffff; + else if (start == OFFSET(twd)) + *((short *)buf) = (task->thread.fsr>>16) & 0xffff; + buf += 2; + start += 2; + } + /* skip fop element */ + if (start == OFFSET(fop)) { + start += 2; + buf += 2; + } + while (start < end) { + if (start == OFFSET(fip)) + *((int *)buf) = task->thread.fir; + else if (start == OFFSET(fcs)) + *((int *)buf) = (task->thread.fir>>32) & 0xffff; + else if (start == OFFSET(foo)) + *((int *)buf) = task->thread.fdr; + else if (start == OFFSET(fos)) + *((int *)buf) = (task->thread.fdr>>32) & 0xffff; + else if (start == OFFSET(mxcsr)) + *((int *)buf) = ((task->thread.fcr>>32) & 0xff80) + | ((task->thread.fsr>>32) & 0x3f); + buf += 4; + start += 4; + } +} + +static void setfpxreg(struct task_struct *task, int start, int end, char *buf) +{ + int min_val, num32; + short num; + unsigned long num64; + + min_val = min(end, OFFSET(fop)); + while (start < min_val) { + num = *((short *)buf); + if (start == OFFSET(cwd)) { + task->thread.fcr = (task->thread.fcr & (~0x1f3f)) + | (num & 0x1f3f); + } else if (start == OFFSET(swd)) { + task->thread.fsr = (task->thread.fsr & (~0xffff)) | num; + } else if (start == OFFSET(twd)) { + task->thread.fsr = (task->thread.fsr & (~0xffff0000)) + | (((int)num) << 16); + } + buf += 2; + start += 2; + } + /* skip fop element */ + if (start == OFFSET(fop)) { + start += 2; + buf += 2; + } + while (start < end) { + num32 = *((int *)buf); + if (start == OFFSET(fip)) + task->thread.fir = (task->thread.fir & (~0xffffffff)) + | num32; + else if (start == OFFSET(foo)) + task->thread.fdr = (task->thread.fdr & (~0xffffffff)) + | num32; + else if (start == OFFSET(mxcsr)) { + num64 = num32 & 0xff10; + task->thread.fcr = (task->thread.fcr & + (~0xff1000000000UL)) | (num64<<32); + num64 = num32 & 0x3f; + task->thread.fsr = (task->thread.fsr & + (~0x3f00000000UL)) | (num64<<32); + } + buf += 4; + start += 4; + } +} + +static void do_fpxregs_get(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + struct task_struct *task = dst->target; + struct pt_regs *pt; + char buf[128]; + int start, end, tos; + + if (dst->count == 0 || unw_unwind_to_user(info) < 0) + return; + if (dst->pos < OFFSET(st_space[0])) { + end = min(dst->pos + dst->count, (unsigned int)32); + getfpxreg(task, dst->pos, end, buf); + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, buf, + 0, OFFSET(st_space[0])); + if (dst->ret || dst->count == 0) + return; + } + if (dst->pos < OFFSET(xmm_space[0])) { + pt = task_pt_regs(task); + tos = (task->thread.fsr >> 11) & 7; + end = min(dst->pos + dst->count, + (unsigned int)OFFSET(xmm_space[0])); + start = (dst->pos - OFFSET(st_space[0])) / 16; + end = (end - OFFSET(st_space[0])) / 16; + for (; start < end; start++) + access_fpreg_ia32(start, buf + 16 * start, pt, + info->sw, tos, 0); + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + buf, OFFSET(st_space[0]), OFFSET(xmm_space[0])); + if (dst->ret || dst->count == 0) + return; + } + if (dst->pos < OFFSET(padding[0])) + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + &info->sw->f16, OFFSET(xmm_space[0]), + OFFSET(padding[0])); +} + +static void do_fpxregs_set(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + struct task_struct *task = dst->target; + char buf[128]; + int start, end; + + if (dst->count == 0 || unw_unwind_to_user(info) < 0) + return; + + if (dst->pos < OFFSET(st_space[0])) { + start = dst->pos; + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, + buf, 0, OFFSET(st_space[0])); + if (dst->ret) + return; + setfpxreg(task, start, dst->pos, buf); + if (dst->count == 0) + return; + } + if (dst->pos < OFFSET(xmm_space[0])) { + struct pt_regs *pt; + int tos; + pt = task_pt_regs(task); + tos = (task->thread.fsr >> 11) & 7; + start = (dst->pos - OFFSET(st_space[0])) / 16; + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, + buf, OFFSET(st_space[0]), OFFSET(xmm_space[0])); + if (dst->ret) + return; + end = (dst->pos - OFFSET(st_space[0])) / 16; + for (; start < end; start++) + access_fpreg_ia32(start, buf + 16 * start, pt, info->sw, + tos, 1); + if (dst->count == 0) + return; + } + if (dst->pos < OFFSET(padding[0])) + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, + &info->sw->f16, OFFSET(xmm_space[0]), + OFFSET(padding[0])); +} +#undef OFFSET + +static int do_regset_call(void (*call)(struct unw_frame_info *, void *), + struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct regset_getset info = { .target = target, .regset = regset, + .pos = pos, .count = count, + .u.set = { .kbuf = kbuf, .ubuf = ubuf }, + .ret = 0 }; + + if (target == current) + unw_init_running(call, &info); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, target); + (*call)(&ufi, &info); + } + + return info.ret; +} + +static int ia32_fpregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return do_regset_call(do_fpregs_get, target, regset, pos, count, + kbuf, ubuf); +} + +static int ia32_fpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_fpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static int ia32_fpxregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return do_regset_call(do_fpxregs_get, target, regset, pos, count, + kbuf, ubuf); +} + +static int ia32_fpxregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_fpxregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static int ia32_genregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (kbuf) { + u32 *kp = kbuf; + while (count > 0) { + *kp++ = getreg(target, pos); + pos += 4; + count -= 4; + } + } else { + u32 __user *up = ubuf; + while (count > 0) { + if (__put_user(getreg(target, pos), up++)) + return -EFAULT; + pos += 4; + count -= 4; + } + } + return 0; +} + +static int ia32_genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + if (kbuf) { + const u32 *kp = kbuf; + while (!ret && count > 0) { + putreg(target, pos, *kp++); + pos += 4; + count -= 4; + } + } else { + const u32 __user *up = ubuf; + u32 val; + while (!ret && count > 0) { + ret = __get_user(val, up++); + if (!ret) + putreg(target, pos, val); + pos += 4; + count -= 4; + } + } + return ret; +} + +static int ia32_tls_active(struct task_struct *target, + const struct user_regset *regset) +{ + struct thread_struct *t = &target->thread; + int n = GDT_ENTRY_TLS_ENTRIES; + while (n > 0 && desc_empty(&t->tls_array[n -1])) + --n; + return n; +} + +static int ia32_tls_get(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, void *kbuf, void __user *ubuf) +{ + const struct desc_struct *tls; + + if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) || + (pos % sizeof(struct ia32_user_desc)) != 0 || + (count % sizeof(struct ia32_user_desc)) != 0) + return -EINVAL; + + pos /= sizeof(struct ia32_user_desc); + count /= sizeof(struct ia32_user_desc); + + tls = &target->thread.tls_array[pos]; + + if (kbuf) { + struct ia32_user_desc *info = kbuf; + while (count-- > 0) + fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++, + tls++); + } else { + struct ia32_user_desc __user *u_info = ubuf; + while (count-- > 0) { + struct ia32_user_desc info; + fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++); + if (__copy_to_user(u_info++, &info, sizeof(info))) + return -EFAULT; + } + } + + return 0; +} + +static int ia32_tls_set(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const void __user *ubuf) +{ + struct ia32_user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; + const struct ia32_user_desc *info; + + if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) || + (pos % sizeof(struct ia32_user_desc)) != 0 || + (count % sizeof(struct ia32_user_desc)) != 0) + return -EINVAL; + + if (kbuf) + info = kbuf; + else if (__copy_from_user(infobuf, ubuf, count)) + return -EFAULT; + else + info = infobuf; + + set_tls_desc(target, + GDT_ENTRY_TLS_MIN + (pos / sizeof(struct ia32_user_desc)), + info, count / sizeof(struct ia32_user_desc)); + + return 0; +} + +/* + * This should match arch/i386/kernel/ptrace.c:native_regsets. + * XXX ioperm? vm86? + */ +static const struct user_regset ia32_regsets[] = { + { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_regs_struct32)/4, + .size = 4, .align = 4, + .get = ia32_genregs_get, .set = ia32_genregs_set + }, + { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct ia32_user_i387_struct) / 4, + .size = 4, .align = 4, + .get = ia32_fpregs_get, .set = ia32_fpregs_set + }, + { + .core_note_type = NT_PRXFPREG, + .n = sizeof(struct ia32_user_fxsr_struct) / 4, + .size = 4, .align = 4, + .get = ia32_fpxregs_get, .set = ia32_fpxregs_set + }, + { + .core_note_type = NT_386_TLS, + .n = GDT_ENTRY_TLS_ENTRIES, + .bias = GDT_ENTRY_TLS_MIN, + .size = sizeof(struct ia32_user_desc), + .align = sizeof(struct ia32_user_desc), + .active = ia32_tls_active, + .get = ia32_tls_get, .set = ia32_tls_set, + }, +}; + +const struct user_regset_view user_ia32_view = { + .name = "i386", .e_machine = EM_386, + .regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets) +}; + long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, __u32 len_low, __u32 len_high, int advice) { diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 33e5a598672..13fd10e8699 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ - salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ + salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ unwind.o mca.o mca_asm.o topology.o obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 78f28d825f3..c7467f863c7 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) static struct acpi_table_slit __initdata *slit_table; +cpumask_t early_cpu_possible_map = CPU_MASK_NONE; static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) { @@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) (pa->apic_id << 8) | (pa->local_sapic_eid); /* nid should be overridden as logical node id later */ node_cpuid[srat_num_cpus].nid = pxm; + cpu_set(srat_num_cpus, early_cpu_possible_map); srat_num_cpus++; } @@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void) } /* set logical node id in cpu structure */ - for (i = 0; i < srat_num_cpus; i++) + for_each_possible_early_cpu(i) node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); printk(KERN_INFO "Number of logical nodes in system = %d\n", diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 0aebc6f79e9..230a6f92367 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -7,6 +7,7 @@ #define ASM_OFFSETS_C 1 #include <linux/sched.h> +#include <linux/pid.h> #include <linux/clocksource.h> #include <asm-ia64/processor.h> @@ -34,17 +35,29 @@ void foo(void) DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); + BUILD_BUG_ON(sizeof(struct upid) != 32); + DEFINE(IA64_UPID_SHIFT, 5); + BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); + DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); + DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); + DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime)); +#endif BLANK(); DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); + DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid)); + DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level)); + DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0])); DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index fbe742ad2fd..f065093f8e9 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -24,6 +24,7 @@ int kdump_status[NR_CPUS]; static atomic_t kdump_cpu_frozen; atomic_t kdump_in_progress; static int kdump_on_init = 1; +static int kdump_on_fatal_mca = 1; static inline Elf64_Word *append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data, @@ -118,6 +119,7 @@ machine_crash_shutdown(struct pt_regs *pt) static void machine_kdump_on_init(void) { + crash_save_vmcoreinfo(); local_irq_disable(); kexec_disable_iosapic(); machine_kexec(ia64_kimage); @@ -148,7 +150,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) struct ia64_mca_notify_die *nd; struct die_args *args = data; - if (!kdump_on_init) + if (!kdump_on_init && !kdump_on_fatal_mca) return NOTIFY_DONE; if (!ia64_kimage) { @@ -173,32 +175,38 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) return NOTIFY_DONE; switch (val) { - case DIE_INIT_MONARCH_PROCESS: + case DIE_INIT_MONARCH_PROCESS: + if (kdump_on_init) { atomic_set(&kdump_in_progress, 1); *(nd->monarch_cpu) = -1; - break; - case DIE_INIT_MONARCH_LEAVE: + } + break; + case DIE_INIT_MONARCH_LEAVE: + if (kdump_on_init) machine_kdump_on_init(); - break; - case DIE_INIT_SLAVE_LEAVE: - if (atomic_read(&kdump_in_progress)) - unw_init_running(kdump_cpu_freeze, NULL); - break; - case DIE_MCA_RENDZVOUS_LEAVE: - if (atomic_read(&kdump_in_progress)) - unw_init_running(kdump_cpu_freeze, NULL); - break; - case DIE_MCA_MONARCH_LEAVE: - /* die_register->signr indicate if MCA is recoverable */ - if (!args->signr) - machine_kdump_on_init(); - break; + break; + case DIE_INIT_SLAVE_LEAVE: + if (atomic_read(&kdump_in_progress)) + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_RENDZVOUS_LEAVE: + if (atomic_read(&kdump_in_progress)) + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_MONARCH_LEAVE: + /* *(nd->data) indicate if MCA is recoverable */ + if (kdump_on_fatal_mca && !(*(nd->data))) { + atomic_set(&kdump_in_progress, 1); + *(nd->monarch_cpu) = -1; + machine_kdump_on_init(); + } + break; } return NOTIFY_DONE; } #ifdef CONFIG_SYSCTL -static ctl_table kdump_on_init_table[] = { +static ctl_table kdump_ctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "kdump_on_init", @@ -207,6 +215,14 @@ static ctl_table kdump_on_init_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kdump_on_fatal_mca", + .data = &kdump_on_fatal_mca, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; @@ -215,7 +231,7 @@ static ctl_table sys_table[] = { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, - .child = kdump_on_init_table, + .child = kdump_ctl_table, }, { .ctl_name = 0 } }; diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 728d7247a1a..d45f215bc8f 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -37,6 +37,7 @@ #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mca.h> +#include <asm/tlbflush.h> #define EFI_DEBUG 0 @@ -403,6 +404,41 @@ efi_get_pal_addr (void) return NULL; } + +static u8 __init palo_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8) (sum + *(buffer++)); + + return sum; +} + +/* + * Parse and handle PALO table which is published at: + * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf + */ +static void __init handle_palo(unsigned long palo_phys) +{ + struct palo_table *palo = __va(palo_phys); + u8 checksum; + + if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) { + printk(KERN_INFO "PALO signature incorrect.\n"); + return; + } + + checksum = palo_checksum((u8 *)palo, palo->length); + if (checksum) { + printk(KERN_INFO "PALO checksum incorrect.\n"); + return; + } + + setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO); +} + void efi_map_pal_code (void) { @@ -432,6 +468,7 @@ efi_init (void) u64 efi_desc_size; char *cp, vendor[100] = "unknown"; int i; + unsigned long palo_phys; /* * It's too early to be able to use the standard kernel command line @@ -496,6 +533,8 @@ efi_init (void) efi.hcdp = EFI_INVALID_TABLE_ADDR; efi.uga = EFI_INVALID_TABLE_ADDR; + palo_phys = EFI_INVALID_TABLE_ADDR; + for (i = 0; i < (int) efi.systab->nr_tables; i++) { if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { efi.mps = config_tables[i].table; @@ -515,10 +554,17 @@ efi_init (void) } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { efi.hcdp = config_tables[i].table; printk(" HCDP=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, + PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) { + palo_phys = config_tables[i].table; + printk(" PALO=0x%lx", config_tables[i].table); } } printk("\n"); + if (palo_phys != EFI_INVALID_TABLE_ADDR) + handle_palo(palo_phys); + runtime = __va(efi.systab->runtime); efi.get_time = phys_get_time; efi.set_time = phys_set_time; diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3c331c464b4..e49ad8c5dc6 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -570,6 +570,7 @@ GLOBAL_ENTRY(ia64_trace_syscall) br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value .ret3: (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +(pUStk) rsm psr.i // disable interrupts br.cond.sptk .work_pending_syscall_end strace_error: @@ -710,6 +711,16 @@ ENTRY(ia64_leave_syscall) (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk #endif .work_processed_syscall: +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + adds r2=PT(LOADRS)+16,r12 +(pUStk) mov.m r22=ar.itc // fetch time at leave + adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r18] // load current_thread_info()->flags + ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" + adds r3=PT(AR_BSPSTORE)+16,r12 // deferred + ;; +#else adds r2=PT(LOADRS)+16,r12 adds r3=PT(AR_BSPSTORE)+16,r12 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 @@ -718,6 +729,7 @@ ENTRY(ia64_leave_syscall) ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" nop.i 0 ;; +#endif mov r16=ar.bsp // M2 get existing backing store pointer ld8 r18=[r2],PT(R9)-PT(B6) // load b6 (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? @@ -737,12 +749,21 @@ ENTRY(ia64_leave_syscall) ld8 r29=[r2],16 // M0|1 load cr.ipsr ld8 r28=[r3],16 // M0|1 load cr.iip +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 + ;; + ld8 r30=[r2],16 // M0|1 load cr.ifs + ld8 r25=[r3],16 // M0|1 load ar.unat +(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; +#else mov r22=r0 // A clear r22 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs ld8 r25=[r3],16 // M0|1 load ar.unat (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 ;; +#endif ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled nop 0 @@ -759,7 +780,11 @@ ENTRY(ia64_leave_syscall) ld8.fill r1=[r3],16 // M0|1 load r1 (pUStk) mov r17=1 // A ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +(pUStk) st1 [r15]=r17 // M2|3 +#else (pUStk) st1 [r14]=r17 // M2|3 +#endif ld8.fill r13=[r3],16 // M0|1 mov f8=f0 // F clear f8 ;; @@ -775,12 +800,22 @@ ENTRY(ia64_leave_syscall) shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition cover // B add current frame into dirty partition & set cr.ifs ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + mov r19=ar.bsp // M2 get new backing store pointer + st8 [r14]=r22 // M save time at leave + mov f10=f0 // F clear f10 + + mov r22=r0 // A clear r22 + movl r14=__kernel_syscall_via_epc // X + ;; +#else mov r19=ar.bsp // M2 get new backing store pointer mov f10=f0 // F clear f10 nop.m 0 movl r14=__kernel_syscall_via_epc // X ;; +#endif mov.m ar.csd=r0 // M2 clear ar.csd mov.m ar.ccv=r0 // M2 clear ar.ccv mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) @@ -913,10 +948,18 @@ GLOBAL_ENTRY(ia64_leave_kernel) adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + .pred.rel.mutex pUStk,pKStk +(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled +(pUStk) mov.m r22=ar.itc // M fetch time at leave + nop.i 0 + ;; +#else (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled nop.i 0 nop.i 0 ;; +#endif ld8 r29=[r16],16 // load cr.ipsr ld8 r28=[r17],16 // load cr.iip ;; @@ -938,15 +981,37 @@ GLOBAL_ENTRY(ia64_leave_kernel) ;; ld8.fill r12=[r16],16 ld8.fill r13=[r17],16 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 +#else (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 +#endif ;; ld8 r20=[r16],16 // ar.fpsr ld8.fill r15=[r17],16 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred +#endif ;; ld8.fill r14=[r16],16 ld8.fill r2=[r17] (pUStk) mov r17=1 ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; + // mib : mov add br -> mib : ld8 add br + // bbb_ : br nop cover;; mbb_ : mov br cover;; + // + // no one require bsp in r16 if (pKStk) branch is selected. +(pUStk) st8 [r3]=r22 // save time at leave +(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack + shr.u r18=r19,16 // get byte size of existing "dirty" partition + ;; + ld8.fill r3=[r16] // deferred + LOAD_PHYS_STACK_REG_SIZE(r17) +(pKStk) br.cond.dpnt skip_rbs_switch + mov r16=ar.bsp // get existing backing store pointer +#else ld8.fill r3=[r16] (pUStk) st1 [r18]=r17 // restore current->thread.on_ustack shr.u r18=r19,16 // get byte size of existing "dirty" partition @@ -954,6 +1019,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) mov r16=ar.bsp // get existing backing store pointer LOAD_PHYS_STACK_REG_SIZE(r17) (pKStk) br.cond.dpnt skip_rbs_switch +#endif /* * Restore user backing store. diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 44841971f07..c1625c7e177 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -61,13 +61,29 @@ ENTRY(fsys_getpid) .prologue .altrp b6 .body + add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 + ;; + ld8 r17=[r17] // r17 = current->group_leader add r9=TI_FLAGS+IA64_TASK_SIZE,r16 ;; ld4 r9=[r9] - add r8=IA64_TASK_TGID_OFFSET,r16 + add r17=IA64_TASK_TGIDLINK_OFFSET,r17 ;; and r9=TIF_ALLWORK_MASK,r9 - ld4 r8=[r8] // r8 = current->tgid + ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid + ;; + add r8=IA64_PID_LEVEL_OFFSET,r17 + ;; + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; + mov r17=0 ;; cmp.ne p8,p0=0,r9 (p8) br.spnt.many fsys_fallback_syscall @@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address) .altrp b6 .body add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + add r17=IA64_TASK_TGIDLINK_OFFSET,r16 ;; ld4 r9=[r9] tnat.z p6,p7=r32 // check argument register for being NaT + ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid ;; and r9=TIF_ALLWORK_MASK,r9 - add r8=IA64_TASK_PID_OFFSET,r16 + add r8=IA64_PID_LEVEL_OFFSET,r17 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 ;; - ld4 r8=[r8] + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; cmp.ne p8,p0=0,r9 mov r17=-1 ;; @@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday) // Note that instructions are optimized for McKinley. McKinley can // process two bundles simultaneously and therefore we continuously // try to feed the CPU two bundles and then a stop. - // - // Additional note that code has changed a lot. Optimization is TBD. - // Comments begin with "?" are maybe outdated. - tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle - mov pr = r30,0xc000 // Set predicates according to function + add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 + tnat.nz p6,p0 = r31 // guard against Nat argument +(p6) br.cond.spnt.few .fail_einval movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address ;; + ld4 r2 = [r2] // process work pending flags movl r29 = itc_jitter_data // itc_jitter add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time - ld4 r2 = [r2] // process work pending flags - ;; -(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 - add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 + mov pr = r30,0xc000 // Set predicates according to function + ;; and r2 = TIF_ALLWORK_MASK,r2 -(p6) br.cond.spnt.few .fail_einval // ? deferred branch + add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 +(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time ;; - add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last + add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled -(p6) br.cond.spnt.many fsys_fallback_syscall +(p6) br.cond.spnt.many fsys_fallback_syscall ;; // Begin critical section .time_redo: @@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday) (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. (p13) ld8 r25 = [r19] // get itc_lastcycle value - ;; // ? could be removed by moving the last add upward ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec ;; ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec @@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday) EX(.fail_efault, probe.w.fault r31, 3) xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) ;; - // ? simulate tbit.nz.or p7,p0 = r28,0 getf.sig r2 = f8 mf ;; ld4 r10 = [r20] // gtod_lock.sequence shr.u r2 = r2,r23 // shift by factor - ;; // ? overloaded 3 bundles! + ;; add r8 = r8,r2 // Add xtime.nsecs cmp4.ne p7,p0 = r28,r10 (p7) br.cond.dpnt.few .time_redo // sequence number changed, redo @@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3) EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles (p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it ;; - mov r8 = r0 (p14) getf.sig r2 = f8 ;; + mov r8 = r0 (p14) shr.u r21 = r2, 4 ;; EX(.fail_efault, st8 [r31] = r9) @@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down) nop.i 0 ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + mov.m r30=ar.itc // M get cycle for accounting +#else nop.m 0 +#endif nop.i 0 ;; mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore @@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down) cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup // B ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + // mov.m r30=ar.itc is called in advance + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r30,r19 // elapsed time in user mode + ;; + add r20=r20,r22 // sum stime + add r21=r21,r18 // sum utime + ;; + st8 [r16]=r20 // update stime + st8 [r17]=r21 // update utime + ;; +#endif mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 mov rp=r14 // I0 set the real return addr and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index d3a41d5f8d1..ddeab4e36fd 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock) br.ret.sptk.many rp END(sched_clock) +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +GLOBAL_ENTRY(cycle_to_cputime) + alloc r16=ar.pfs,1,0,0,0 + addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 + ;; + ldf8 f8=[r8] + ;; + setf.sig f9=r32 + ;; + xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) + xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product + ;; + getf.sig r8=f10 // (5 cyc) + getf.sig r9=f11 + ;; + shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT + br.ret.sptk.many rp +END(cycle_to_cputime) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + GLOBAL_ENTRY(start_kernel_thread) .prologue .save rp, r0 // this is the end of the call-chain diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 8e7193d5552..6da1f20d737 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -19,12 +19,6 @@ EXPORT_SYMBOL_GPL(empty_zero_page); EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */ EXPORT_SYMBOL(csum_ipv6_magic); -#include <asm/semaphore.h> -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); -EXPORT_SYMBOL(__down_trylock); -EXPORT_SYMBOL(__up); - #include <asm/page.h> EXPORT_SYMBOL(clear_page); diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index d8be23fbe6b..5538471e8d6 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) static unsigned char count; static long last_time; - if (jiffies - last_time > 5*HZ) + if (time_after(jiffies, last_time + 5 * HZ)) count = 0; if (++count < 5) { last_time = jiffies; diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 34f44d8be00..6678c49daba 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -805,8 +805,13 @@ ENTRY(break_fault) (p8) adds r28=16,r28 // A switch cr.iip to next bundle (p9) adds r8=1,r8 // A increment ei to next slot +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + ;; + mov b6=r30 // I0 setup syscall handler branch reg early +#else nop.i 0 ;; +#endif mov.m r25=ar.unat // M2 (5 cyc) dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr @@ -817,7 +822,11 @@ ENTRY(break_fault) // /////////////////////////////////////////////////////////////////////// st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + mov.m r30=ar.itc // M get cycle for accounting +#else mov b6=r30 // I0 setup syscall handler branch reg early +#endif cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit @@ -829,6 +838,30 @@ ENTRY(break_fault) cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? br.call.sptk.many b7=ia64_syscall_setup // B 1: +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + // mov.m r30=ar.itc is called in advance, and r13 is current + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A +(pKStk) br.cond.spnt .skip_accounting // B unlikely skip + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime + ld8 r21=[r17] // M cumulated utime + sub r22=r19,r18 // A stime before leave + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp + sub r18=r30,r19 // A elapsed time in user + ;; + add r20=r20,r22 // A sum stime + add r21=r21,r18 // A sum utime + ;; + st8 [r16]=r20 // M update stime + st8 [r17]=r21 // M update utime + ;; +.skip_accounting: +#endif mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 nop 0 bsw.1 // B (6 cyc) regs are saved, switch to bank 1 @@ -928,6 +961,7 @@ END(interrupt) * - r27: saved ar.rsc * - r28: saved cr.iip * - r29: saved cr.ipsr + * - r30: ar.itc for accounting (don't touch) * - r31: saved pr * - b0: original contents (to be saved) * On exit: @@ -1090,6 +1124,41 @@ END(dispatch_illegal_op_fault) DBG_FAULT(16) FAULT(16) +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + /* + * There is no particular reason for this code to be here, other than + * that there happens to be space here that would go unused otherwise. + * If this fault ever gets "unreserved", simply moved the following + * code to a more suitable spot... + * + * account_sys_enter is called from SAVE_MIN* macros if accounting is + * enabled and if the macro is entered from user mode. + */ +ENTRY(account_sys_enter) + // mov.m r20=ar.itc is called in advance, and r13 is current + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel + ;; + ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel + ;; + st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r20,r19 // elapsed time in user mode + ;; + add r23=r23,r22 // sum stime + add r21=r21,r18 // sum utime + ;; + st8 [r16]=r23 // update stime + st8 [r17]=r21 // update utime + ;; + br.ret.sptk.many rp +END(account_sys_enter) +#endif + .org ia64_ivt+0x4400 ///////////////////////////////////////////////////////////////////////////////////////// // 0x4400 Entry 17 (size 64 bundles) Reserved diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 8d9a446a0d1..233434f4f88 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -78,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = { { u, u, u }, /* 1F */ }; +/* Insert a long branch code */ +static void __kprobes set_brl_inst(void *from, void *to) +{ + s64 rel = ((s64) to - (s64) from) >> 4; + bundle_t *brl; + brl = (bundle_t *) ((u64) from & ~0xf); + brl->quad0.template = 0x05; /* [MLX](stop) */ + brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */ + brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2; + brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46); + /* brl.cond.sptk.many.clr rel<<4 (qp=0) */ + brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff); +} + /* * In this function we check to see if the instruction * is IP relative instruction and update the kprobe @@ -496,6 +510,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; } +/* Check the instruction in the slot is break */ +static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot) +{ + unsigned int major_opcode; + unsigned int template = bundle->quad0.template; + unsigned long kprobe_inst; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get Kprobe probe instruction at given slot*/ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + /* For break instruction, + * Bits 37:40 Major opcode to be zero + * Bits 27:32 X6 to be zero + * Bits 32:35 X3 to be zero + */ + if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) { + /* Not a break instruction */ + return 0; + } + + /* Is a break instruction */ + return 1; +} + +/* + * In this function, we check whether the target bundle modifies IP or + * it triggers an exception. If so, it cannot be boostable. + */ +static int __kprobes can_boost(bundle_t *bundle, uint slot, + unsigned long bundle_addr) +{ + unsigned int template = bundle->quad0.template; + + do { + if (search_exception_tables(bundle_addr + slot) || + __is_ia64_break_inst(bundle, slot)) + return 0; /* exception may occur in this bundle*/ + } while ((++slot) < 3); + template &= 0x1e; + if (template >= 0x10 /* including B unit */ || + template == 0x04 /* including X unit */ || + template == 0x06) /* undefined */ + return 0; + + return 1; +} + +/* Prepare long jump bundle and disables other boosters if need */ +static void __kprobes prepare_booster(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr & ~0xFULL; + unsigned int slot = (unsigned long)p->addr & 0xf; + struct kprobe *other_kp; + + if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) { + set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1); + p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE; + } + + /* disables boosters in previous slots */ + for (; addr < (unsigned long)p->addr; addr++) { + other_kp = get_kprobe((void *)addr); + if (other_kp) + other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE; + } +} + int __kprobes arch_prepare_kprobe(struct kprobe *p) { unsigned long addr = (unsigned long) p->addr; @@ -530,6 +615,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp); + prepare_booster(p); + return 0; } @@ -543,7 +630,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) src = &p->opcode.bundle; flush_icache_range((unsigned long)p->ainsn.insn, - (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + (unsigned long)p->ainsn.insn + + sizeof(kprobe_opcode_t) * MAX_INSN_SIZE); + switch (p->ainsn.slot) { case 0: dest->quad0.slot0 = src->quad0.slot0; @@ -584,13 +673,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, 0); + free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); mutex_unlock(&kprobe_mutex); } /* * We are resuming execution after a single step fault, so the pt_regs * structure reflects the register state after we executed the instruction - * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust + * located in the kprobe (p->ainsn.insn->bundle). We still need to adjust * the ip to point back to the original stack address. To set the IP address * to original stack address, handle the case where we need to fixup the * relative IP address and/or fixup branch register. @@ -607,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) if (slot == 1 && bundle_encoding[template][1] == L) slot = 2; - if (p->ainsn.inst_flag) { + if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) { if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { /* Fix relative IP address */ @@ -686,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) static int __kprobes is_ia64_break_inst(struct pt_regs *regs) { unsigned int slot = ia64_psr(regs)->ri; - unsigned int template, major_opcode; - unsigned long kprobe_inst; unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip; bundle_t bundle; memcpy(&bundle, kprobe_addr, sizeof(bundle_t)); - template = bundle.quad0.template; - - /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ - if (slot == 1 && bundle_encoding[template][1] == L) - slot++; - /* Get Kprobe probe instruction at given slot*/ - get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode); - - /* For break instruction, - * Bits 37:40 Major opcode to be zero - * Bits 27:32 X6 to be zero - * Bits 32:35 X3 to be zero - */ - if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) { - /* Not a break instruction */ - return 0; - } - - /* Is a break instruction */ - return 1; + return __is_ia64_break_inst(&bundle, slot); } static int __kprobes pre_kprobes_handler(struct die_args *args) @@ -802,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) return 1; ss_probe: +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) + if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { + /* Boost up -- we can execute copied instructions directly */ + ia64_psr(regs)->ri = p->ainsn.slot; + regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL; + /* turn single stepping off */ + ia64_psr(regs)->ss = 0; + + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } +#endif prepare_ss(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; return 1; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6c18221dba3..705176b434b 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -69,6 +69,7 @@ * 2007-04-27 Russ Anderson <rja@sgi.com> * Support multiple cpus going through OS_MCA in the same event. */ +#include <linux/jiffies.h> #include <linux/types.h> #include <linux/init.h> #include <linux/sched.h> @@ -97,6 +98,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> +#include <asm/tlb.h> #include "mca_drv.h" #include "entry.h" @@ -107,11 +109,26 @@ # define IA64_MCA_DEBUG(fmt...) #endif +#define NOTIFY_INIT(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + +#define NOTIFY_MCA(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + /* Used by mca_asm.S */ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ +DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */ unsigned long __per_cpu_mca[NR_CPUS]; @@ -293,7 +310,8 @@ static void ia64_mlogbuf_dump_from_init(void) if (mlogbuf_finished) return; - if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { + if (mlogbuf_timestamp && + time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) { printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " " and the system seems to be messed up.\n"); ia64_mlogbuf_finish(0); @@ -762,9 +780,8 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg) /* Mask all interrupts */ local_irq_save(flags); - if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; /* Register with the SAL monarch that the slave has @@ -772,17 +789,13 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg) */ ia64_sal_mc_rendez(); - if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1); /* Wait for the monarch cpu to exit. */ while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ - if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; /* Enable all interrupts */ @@ -1182,6 +1195,49 @@ all_in: return; } +/* mca_insert_tr + * + * Switch rid when TR reload and needed! + * iord: 1: itr, 2: itr; + * +*/ +static void mca_insert_tr(u64 iord) +{ + + int i; + u64 old_rr; + struct ia64_tr_entry *p; + unsigned long psr; + int cpu = smp_processor_id(); + + psr = ia64_clear_ic(); + for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { + p = &__per_cpu_idtrs[cpu][iord-1][i]; + if (p->pte & 0x1) { + old_rr = ia64_get_rr(p->ifa); + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, p->rr); + ia64_srlz_d(); + } + ia64_ptr(iord, p->ifa, p->itir >> 2); + ia64_srlz_i(); + if (iord & 0x1) { + ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (iord & 0x2) { + ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, old_rr); + ia64_srlz_d(); + } + } + } + ia64_set_psr(psr); +} + /* * ia64_mca_handler * @@ -1209,7 +1265,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, int recover, cpu = smp_processor_id(); struct task_struct *previous_current; struct ia64_mca_notify_die nd = - { .sos = sos, .monarch_cpu = &monarch_cpu }; + { .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover }; static atomic_t mca_count; static cpumask_t mca_cpu; @@ -1225,9 +1281,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); - if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; if (sos->monarch) { @@ -1241,13 +1295,12 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, * does not work. */ ia64_mca_wakeup_all(); - if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); } else { while (cpu_isset(cpu, mca_cpu)) cpu_relax(); /* spin until monarch wakes us */ - } + } + + NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1); /* Get the MCA error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); @@ -1266,15 +1319,14 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, } else { /* Dump buffered message to console */ ia64_mlogbuf_finish(1); -#ifdef CONFIG_KEXEC - atomic_set(&kdump_in_progress, 1); - monarch_cpu = -1; -#endif } - if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + if (__get_cpu_var(ia64_mca_tr_reload)) { + mca_insert_tr(0x1); /*Reload dynamic itrs*/ + mca_insert_tr(0x2); /*Reload dynamic itrs*/ + } + + NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1); if (atomic_dec_return(&mca_count) > 0) { int i; @@ -1595,7 +1647,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); + NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0); mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); @@ -1632,17 +1684,15 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; while (monarch_cpu == -1) cpu_relax(); /* spin until monarch enters */ - if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); - if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1); + while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ - if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); + mprintk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; @@ -1651,9 +1701,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, } monarch_cpu = cpu; - if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1); /* * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be @@ -1668,12 +1716,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * to default_monarch_init_process() above and just print all the * tasks. */ - if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); - if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1); + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); @@ -1905,7 +1950,7 @@ ia64_mca_init(void) printk(KERN_INFO "Increasing MCA rendezvous timeout from " "%ld to %ld milliseconds\n", timeout, isrv.v0); timeout = isrv.v0; - (void) notify_die(DIE_MCA_NEW_TIMEOUT, "MCA", NULL, timeout, 0, 0); + NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0); continue; } printk(KERN_ERR "Failed to register rendezvous interrupt " diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 8bc7d259e0c..a06d46548ff 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -219,8 +219,13 @@ ia64_reload_tr: mov r20=IA64_TR_CURRENT_STACK ;; itr.d dtr[r20]=r16 + GET_THIS_PADDR(r2, ia64_mca_tr_reload) + mov r18 = 1 ;; srlz.d + ;; + st8 [r2] =r18 + ;; done_tlb_purge_and_reload: diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index c9ac8bada78..7c548ac52bb 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -3,6 +3,18 @@ #include "entry.h" +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* read ar.itc in advance, and use it before leaving bank 0 */ +#define ACCOUNT_GET_STAMP \ +(pUStk) mov.m r20=ar.itc; +#define ACCOUNT_SYS_ENTER \ +(pUStk) br.call.spnt rp=account_sys_enter \ + ;; +#else +#define ACCOUNT_GET_STAMP +#define ACCOUNT_SYS_ENTER +#endif + /* * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves * the minimum state necessary that allows us to turn psr.ic back @@ -122,11 +134,13 @@ ;; \ .mem.offset 0,0; st8.spill [r16]=r2,16; \ .mem.offset 8,0; st8.spill [r17]=r3,16; \ + ACCOUNT_GET_STAMP \ adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ ;; \ EXTRA; \ movl r1=__gp; /* establish kernel global pointer */ \ ;; \ + ACCOUNT_SYS_ENTER \ bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ ;; diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index a78b45f5fe2..c93420c9740 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void) for(node=0; node < MAX_NUMNODES; node++) cpus_clear(node_to_cpu_mask[node]); - for(cpu = 0; cpu < NR_CPUS; ++cpu) { + for_each_possible_early_cpu(cpu) { node = -1; for (i = 0; i < NR_CPUS; ++i) if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index 2cb9425e042..e0dca8743db 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) while (offp < (s32 *) end) { wp = (u64 *) ia64_imva((char *) offp + *offp); - wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ - wp[1] = 0x0004000000000200UL; - wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ - wp[3] = 0x0084006880000200UL; + wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ + wp[1] = 0x0084006880000200UL; + wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ + wp[3] = 0x0004000000000200UL; ia64_fc(wp); ia64_fc(wp + 2); ++offp; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index a2aabfdc80d..c8e403752a0 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -4204,10 +4204,10 @@ pfm_check_task_exist(pfm_context_t *ctx) do_each_thread (g, t) { if (t->thread.pfm_context == ctx) { ret = 0; - break; + goto out; } } while_each_thread (g, t); - +out: read_unlock(&tasklist_lock); DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); @@ -5511,7 +5511,7 @@ stop_monitoring: } static int -pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs) +pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) { struct task_struct *task; pfm_context_t *ctx; @@ -5591,7 +5591,7 @@ pfm_interrupt_handler(int irq, void *arg) start_cycles = ia64_get_itc(); - ret = pfm_do_interrupt_handler(irq, arg, regs); + ret = pfm_do_interrupt_handler(arg, regs); total_cycles = ia64_get_itc(); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 49937a383b2..a5ea817cbcb 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -625,21 +625,6 @@ do_dump_fpu (struct unw_frame_info *info, void *arg) do_dump_task_fpu(current, info, arg); } -int -dump_task_regs(struct task_struct *task, elf_gregset_t *regs) -{ - struct unw_frame_info tcore_info; - - if (current == task) { - unw_init_running(do_copy_regs, regs); - } else { - memset(&tcore_info, 0, sizeof(tcore_info)); - unw_init_from_blocked_task(&tcore_info, task); - do_copy_task_regs(task, &tcore_info, regs); - } - return 1; -} - void ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) { @@ -647,21 +632,6 @@ ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) } int -dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst) -{ - struct unw_frame_info tcore_info; - - if (current == task) { - unw_init_running(do_dump_fpu, dst); - } else { - memset(&tcore_info, 0, sizeof(tcore_info)); - unw_init_from_blocked_task(&tcore_info, task); - do_dump_task_fpu(task, &tcore_info, dst); - } - return 1; -} - -int dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) { unw_init_running(do_dump_fpu, dst); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index ab784ec4319..2a9943b5947 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -3,6 +3,9 @@ * * Copyright (C) 1999-2005 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2006 Intel Co + * 2006-08-12 - IA64 Native Utrace implementation support added by + * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> * * Derived from the x86 and Alpha versions. */ @@ -17,6 +20,8 @@ #include <linux/security.h> #include <linux/audit.h> #include <linux/signal.h> +#include <linux/regset.h> +#include <linux/elf.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -740,25 +745,6 @@ ia64_sync_fph (struct task_struct *task) psr->dfh = 1; } -static int -access_fr (struct unw_frame_info *info, int regnum, int hi, - unsigned long *data, int write_access) -{ - struct ia64_fpreg fpval; - int ret; - - ret = unw_get_fr(info, regnum, &fpval); - if (ret < 0) - return ret; - - if (write_access) { - fpval.u.bits[hi] = *data; - ret = unw_set_fr(info, regnum, fpval); - } else - *data = fpval.u.bits[hi]; - return ret; -} - /* * Change the machine-state of CHILD such that it will return via the normal * kernel exit-path, rather than the syscall-exit path. @@ -860,309 +846,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt, static int access_uarea (struct task_struct *child, unsigned long addr, - unsigned long *data, int write_access) -{ - unsigned long *ptr, regnum, urbs_end, cfm; - struct switch_stack *sw; - struct pt_regs *pt; -# define pt_reg_addr(pt, reg) ((void *) \ - ((unsigned long) (pt) \ - + offsetof(struct pt_regs, reg))) - - - pt = task_pt_regs(child); - sw = (struct switch_stack *) (child->thread.ksp + 16); - - if ((addr & 0x7) != 0) { - dprintk("ptrace: unaligned register address 0x%lx\n", addr); - return -1; - } - - if (addr < PT_F127 + 16) { - /* accessing fph */ - if (write_access) - ia64_sync_fph(child); - else - ia64_flush_fph(child); - ptr = (unsigned long *) - ((unsigned long) &child->thread.fph + addr); - } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) { - /* scratch registers untouched by kernel (saved in pt_regs) */ - ptr = pt_reg_addr(pt, f10) + (addr - PT_F10); - } else if (addr >= PT_F12 && addr < PT_F15 + 16) { - /* - * Scratch registers untouched by kernel (saved in - * switch_stack). - */ - ptr = (unsigned long *) ((long) sw - + (addr - PT_NAT_BITS - 32)); - } else if (addr < PT_AR_LC + 8) { - /* preserved state: */ - struct unw_frame_info info; - char nat = 0; - int ret; - - unw_init_from_blocked_task(&info, child); - if (unw_unwind_to_user(&info) < 0) - return -1; - - switch (addr) { - case PT_NAT_BITS: - return access_nat_bits(child, pt, &info, - data, write_access); - - case PT_R4: case PT_R5: case PT_R6: case PT_R7: - if (write_access) { - /* read NaT bit first: */ - unsigned long dummy; - - ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4, - &dummy, &nat); - if (ret < 0) - return ret; - } - return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data, - &nat, write_access); - - case PT_B1: case PT_B2: case PT_B3: - case PT_B4: case PT_B5: - return unw_access_br(&info, (addr - PT_B1)/8 + 1, data, - write_access); - - case PT_AR_EC: - return unw_access_ar(&info, UNW_AR_EC, data, - write_access); - - case PT_AR_LC: - return unw_access_ar(&info, UNW_AR_LC, data, - write_access); - - default: - if (addr >= PT_F2 && addr < PT_F5 + 16) - return access_fr(&info, (addr - PT_F2)/16 + 2, - (addr & 8) != 0, data, - write_access); - else if (addr >= PT_F16 && addr < PT_F31 + 16) - return access_fr(&info, - (addr - PT_F16)/16 + 16, - (addr & 8) != 0, - data, write_access); - else { - dprintk("ptrace: rejecting access to register " - "address 0x%lx\n", addr); - return -1; - } - } - } else if (addr < PT_F9+16) { - /* scratch state */ - switch (addr) { - case PT_AR_BSP: - /* - * By convention, we use PT_AR_BSP to refer to - * the end of the user-level backing store. - * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) - * to get the real value of ar.bsp at the time - * the kernel was entered. - * - * Furthermore, when changing the contents of - * PT_AR_BSP (or PT_CFM) while the task is - * blocked in a system call, convert the state - * so that the non-system-call exit - * path is used. This ensures that the proper - * state will be picked up when resuming - * execution. However, it *also* means that - * once we write PT_AR_BSP/PT_CFM, it won't be - * possible to modify the syscall arguments of - * the pending system call any longer. This - * shouldn't be an issue because modifying - * PT_AR_BSP/PT_CFM generally implies that - * we're either abandoning the pending system - * call or that we defer it's re-execution - * (e.g., due to GDB doing an inferior - * function call). - */ - urbs_end = ia64_get_user_rbs_end(child, pt, &cfm); - if (write_access) { - if (*data != urbs_end) { - if (in_syscall(pt)) - convert_to_non_syscall(child, - pt, - cfm); - /* - * Simulate user-level write - * of ar.bsp: - */ - pt->loadrs = 0; - pt->ar_bspstore = *data; - } - } else - *data = urbs_end; - return 0; - - case PT_CFM: - urbs_end = ia64_get_user_rbs_end(child, pt, &cfm); - if (write_access) { - if (((cfm ^ *data) & PFM_MASK) != 0) { - if (in_syscall(pt)) - convert_to_non_syscall(child, - pt, - cfm); - pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) - | (*data & PFM_MASK)); - } - } else - *data = cfm; - return 0; - - case PT_CR_IPSR: - if (write_access) { - unsigned long tmp = *data; - /* psr.ri==3 is a reserved value: SDM 2:25 */ - if ((tmp & IA64_PSR_RI) == IA64_PSR_RI) - tmp &= ~IA64_PSR_RI; - pt->cr_ipsr = ((tmp & IPSR_MASK) - | (pt->cr_ipsr & ~IPSR_MASK)); - } else - *data = (pt->cr_ipsr & IPSR_MASK); - return 0; - - case PT_AR_RSC: - if (write_access) - pt->ar_rsc = *data | (3 << 2); /* force PL3 */ - else - *data = pt->ar_rsc; - return 0; - - case PT_AR_RNAT: - ptr = pt_reg_addr(pt, ar_rnat); - break; - case PT_R1: - ptr = pt_reg_addr(pt, r1); - break; - case PT_R2: case PT_R3: - ptr = pt_reg_addr(pt, r2) + (addr - PT_R2); - break; - case PT_R8: case PT_R9: case PT_R10: case PT_R11: - ptr = pt_reg_addr(pt, r8) + (addr - PT_R8); - break; - case PT_R12: case PT_R13: - ptr = pt_reg_addr(pt, r12) + (addr - PT_R12); - break; - case PT_R14: - ptr = pt_reg_addr(pt, r14); - break; - case PT_R15: - ptr = pt_reg_addr(pt, r15); - break; - case PT_R16: case PT_R17: case PT_R18: case PT_R19: - case PT_R20: case PT_R21: case PT_R22: case PT_R23: - case PT_R24: case PT_R25: case PT_R26: case PT_R27: - case PT_R28: case PT_R29: case PT_R30: case PT_R31: - ptr = pt_reg_addr(pt, r16) + (addr - PT_R16); - break; - case PT_B0: - ptr = pt_reg_addr(pt, b0); - break; - case PT_B6: - ptr = pt_reg_addr(pt, b6); - break; - case PT_B7: - ptr = pt_reg_addr(pt, b7); - break; - case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8: - case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8: - ptr = pt_reg_addr(pt, f6) + (addr - PT_F6); - break; - case PT_AR_BSPSTORE: - ptr = pt_reg_addr(pt, ar_bspstore); - break; - case PT_AR_UNAT: - ptr = pt_reg_addr(pt, ar_unat); - break; - case PT_AR_PFS: - ptr = pt_reg_addr(pt, ar_pfs); - break; - case PT_AR_CCV: - ptr = pt_reg_addr(pt, ar_ccv); - break; - case PT_AR_FPSR: - ptr = pt_reg_addr(pt, ar_fpsr); - break; - case PT_CR_IIP: - ptr = pt_reg_addr(pt, cr_iip); - break; - case PT_PR: - ptr = pt_reg_addr(pt, pr); - break; - /* scratch register */ - - default: - /* disallow accessing anything else... */ - dprintk("ptrace: rejecting access to register " - "address 0x%lx\n", addr); - return -1; - } - } else if (addr <= PT_AR_SSD) { - ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD); - } else { - /* access debug registers */ - - if (addr >= PT_IBR) { - regnum = (addr - PT_IBR) >> 3; - ptr = &child->thread.ibr[0]; - } else { - regnum = (addr - PT_DBR) >> 3; - ptr = &child->thread.dbr[0]; - } - - if (regnum >= 8) { - dprintk("ptrace: rejecting access to register " - "address 0x%lx\n", addr); - return -1; - } -#ifdef CONFIG_PERFMON - /* - * Check if debug registers are used by perfmon. This - * test must be done once we know that we can do the - * operation, i.e. the arguments are all valid, but - * before we start modifying the state. - * - * Perfmon needs to keep a count of how many processes - * are trying to modify the debug registers for system - * wide monitoring sessions. - * - * We also include read access here, because they may - * cause the PMU-installed debug register state - * (dbr[], ibr[]) to be reset. The two arrays are also - * used by perfmon, but we do not use - * IA64_THREAD_DBG_VALID. The registers are restored - * by the PMU context switch code. - */ - if (pfm_use_debug_registers(child)) return -1; -#endif - - if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { - child->thread.flags |= IA64_THREAD_DBG_VALID; - memset(child->thread.dbr, 0, - sizeof(child->thread.dbr)); - memset(child->thread.ibr, 0, - sizeof(child->thread.ibr)); - } - - ptr += regnum; - - if ((regnum & 1) && write_access) { - /* don't let the user set kernel-level breakpoints: */ - *ptr = *data & ~(7UL << 56); - return 0; - } - } - if (write_access) - *ptr = *data; - else - *data = *ptr; - return 0; -} + unsigned long *data, int write_access); static long ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) @@ -1626,3 +1310,892 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, if (test_thread_flag(TIF_RESTORE_RSE)) ia64_sync_krbs(); } + +/* Utrace implementation starts here */ +struct regset_get { + void *kbuf; + void __user *ubuf; +}; + +struct regset_set { + const void *kbuf; + const void __user *ubuf; +}; + +struct regset_getset { + struct task_struct *target; + const struct user_regset *regset; + union { + struct regset_get get; + struct regset_set set; + } u; + unsigned int pos; + unsigned int count; + int ret; +}; + +static int +access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long *ptr = NULL; + int ret; + char nat = 0; + + pt = task_pt_regs(target); + switch (addr) { + case ELF_GR_OFFSET(1): + ptr = &pt->r1; + break; + case ELF_GR_OFFSET(2): + case ELF_GR_OFFSET(3): + ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2)); + break; + case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7): + if (write_access) { + /* read NaT bit first: */ + unsigned long dummy; + + ret = unw_get_gr(info, addr/8, &dummy, &nat); + if (ret < 0) + return ret; + } + return unw_access_gr(info, addr/8, data, &nat, write_access); + case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11): + ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8); + break; + case ELF_GR_OFFSET(12): + case ELF_GR_OFFSET(13): + ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12); + break; + case ELF_GR_OFFSET(14): + ptr = &pt->r14; + break; + case ELF_GR_OFFSET(15): + ptr = &pt->r15; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static int +access_elf_breg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long *ptr = NULL; + + pt = task_pt_regs(target); + switch (addr) { + case ELF_BR_OFFSET(0): + ptr = &pt->b0; + break; + case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5): + return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8, + data, write_access); + case ELF_BR_OFFSET(6): + ptr = &pt->b6; + break; + case ELF_BR_OFFSET(7): + ptr = &pt->b7; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static int +access_elf_areg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long cfm, urbs_end; + unsigned long *ptr = NULL; + + pt = task_pt_regs(target); + if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) { + switch (addr) { + case ELF_AR_RSC_OFFSET: + /* force PL3 */ + if (write_access) + pt->ar_rsc = *data | (3 << 2); + else + *data = pt->ar_rsc; + return 0; + case ELF_AR_BSP_OFFSET: + /* + * By convention, we use PT_AR_BSP to refer to + * the end of the user-level backing store. + * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) + * to get the real value of ar.bsp at the time + * the kernel was entered. + * + * Furthermore, when changing the contents of + * PT_AR_BSP (or PT_CFM) while the task is + * blocked in a system call, convert the state + * so that the non-system-call exit + * path is used. This ensures that the proper + * state will be picked up when resuming + * execution. However, it *also* means that + * once we write PT_AR_BSP/PT_CFM, it won't be + * possible to modify the syscall arguments of + * the pending system call any longer. This + * shouldn't be an issue because modifying + * PT_AR_BSP/PT_CFM generally implies that + * we're either abandoning the pending system + * call or that we defer it's re-execution + * (e.g., due to GDB doing an inferior + * function call). + */ + urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); + if (write_access) { + if (*data != urbs_end) { + if (in_syscall(pt)) + convert_to_non_syscall(target, + pt, + cfm); + /* + * Simulate user-level write + * of ar.bsp: + */ + pt->loadrs = 0; + pt->ar_bspstore = *data; + } + } else + *data = urbs_end; + return 0; + case ELF_AR_BSPSTORE_OFFSET: + ptr = &pt->ar_bspstore; + break; + case ELF_AR_RNAT_OFFSET: + ptr = &pt->ar_rnat; + break; + case ELF_AR_CCV_OFFSET: + ptr = &pt->ar_ccv; + break; + case ELF_AR_UNAT_OFFSET: + ptr = &pt->ar_unat; + break; + case ELF_AR_FPSR_OFFSET: + ptr = &pt->ar_fpsr; + break; + case ELF_AR_PFS_OFFSET: + ptr = &pt->ar_pfs; + break; + case ELF_AR_LC_OFFSET: + return unw_access_ar(info, UNW_AR_LC, data, + write_access); + case ELF_AR_EC_OFFSET: + return unw_access_ar(info, UNW_AR_EC, data, + write_access); + case ELF_AR_CSD_OFFSET: + ptr = &pt->ar_csd; + break; + case ELF_AR_SSD_OFFSET: + ptr = &pt->ar_ssd; + } + } else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) { + switch (addr) { + case ELF_CR_IIP_OFFSET: + ptr = &pt->cr_iip; + break; + case ELF_CFM_OFFSET: + urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); + if (write_access) { + if (((cfm ^ *data) & PFM_MASK) != 0) { + if (in_syscall(pt)) + convert_to_non_syscall(target, + pt, + cfm); + pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) + | (*data & PFM_MASK)); + } + } else + *data = cfm; + return 0; + case ELF_CR_IPSR_OFFSET: + if (write_access) { + unsigned long tmp = *data; + /* psr.ri==3 is a reserved value: SDM 2:25 */ + if ((tmp & IA64_PSR_RI) == IA64_PSR_RI) + tmp &= ~IA64_PSR_RI; + pt->cr_ipsr = ((tmp & IPSR_MASK) + | (pt->cr_ipsr & ~IPSR_MASK)); + } else + *data = (pt->cr_ipsr & IPSR_MASK); + return 0; + } + } else if (addr == ELF_NAT_OFFSET) + return access_nat_bits(target, pt, info, + data, write_access); + else if (addr == ELF_PR_OFFSET) + ptr = &pt->pr; + else + return -1; + + if (write_access) + *ptr = *data; + else + *data = *ptr; + + return 0; +} + +static int +access_elf_reg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15)) + return access_elf_gpreg(target, info, addr, data, write_access); + else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7)) + return access_elf_breg(target, info, addr, data, write_access); + else + return access_elf_areg(target, info, addr, data, write_access); +} + +void do_gpregs_get(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + struct regset_getset *dst = arg; + elf_greg_t tmp[16]; + unsigned int i, index, min_copy; + + if (unw_unwind_to_user(info) < 0) + return; + + /* + * coredump format: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + + + /* Skip r0 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) { + dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count, + &dst->u.get.kbuf, + &dst->u.get.ubuf, + 0, ELF_GR_OFFSET(1)); + if (dst->ret || dst->count == 0) + return; + } + + /* gr1 - gr15 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) { + index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t); + min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_GR_OFFSET(16); + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_GR_OFFSET(1), ELF_GR_OFFSET(16)); + if (dst->ret || dst->count == 0) + return; + } + + /* r16-r31 */ + if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) { + pt = task_pt_regs(dst->target); + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16, + ELF_GR_OFFSET(16), ELF_NAT_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* nat, pr, b0 - b7 */ + if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) { + index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t); + min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_CR_IIP_OFFSET; + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd + */ + if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) { + index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t); + min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_AR_END_OFFSET; + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET); + } +} + +void do_gpregs_set(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + struct regset_getset *dst = arg; + elf_greg_t tmp[16]; + unsigned int i, index; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip r0 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) { + dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + 0, ELF_GR_OFFSET(1)); + if (dst->ret || dst->count == 0) + return; + } + + /* gr1-gr15 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) { + i = dst->pos; + index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_GR_OFFSET(1), ELF_GR_OFFSET(16)); + if (dst->ret) + return; + for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + if (dst->count == 0) + return; + } + + /* gr16-gr31 */ + if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) { + pt = task_pt_regs(dst->target); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16, + ELF_GR_OFFSET(16), ELF_NAT_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* nat, pr, b0 - b7 */ + if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) { + i = dst->pos; + index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET); + if (dst->ret) + return; + for (; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + if (dst->count == 0) + return; + } + + /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd + */ + if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) { + i = dst->pos; + index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET); + if (dst->ret) + return; + for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + } +} + +#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t)) + +void do_fpregs_get(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + struct task_struct *task = dst->target; + elf_fpreg_t tmp[30]; + int index, min_copy, i; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip pos 0 and 1 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { + dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count, + &dst->u.get.kbuf, + &dst->u.get.ubuf, + 0, ELF_FP_OFFSET(2)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fr2-fr31 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { + index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t); + + min_copy = min(((unsigned int)ELF_FP_OFFSET(32)), + dst->pos + dst->count); + for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t), + index++) + if (unw_get_fr(info, i / sizeof(elf_fpreg_t), + &tmp[index])) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fph */ + if (dst->count > 0) { + ia64_flush_fph(dst->target); + if (task->thread.flags & IA64_THREAD_FPH_VALID) + dst->ret = user_regset_copyout( + &dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + &dst->target->thread.fph, + ELF_FP_OFFSET(32), -1); + else + /* Zero fill instead. */ + dst->ret = user_regset_copyout_zero( + &dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + ELF_FP_OFFSET(32), -1); + } +} + +void do_fpregs_set(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + elf_fpreg_t fpreg, tmp[30]; + int index, start, end; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip pos 0 and 1 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { + dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + 0, ELF_FP_OFFSET(2)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fr2-fr31 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { + start = dst->pos; + end = min(((unsigned int)ELF_FP_OFFSET(32)), + dst->pos + dst->count); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); + if (dst->ret) + return; + + if (start & 0xF) { /* only write high part */ + if (unw_get_fr(info, start / sizeof(elf_fpreg_t), + &fpreg)) { + dst->ret = -EIO; + return; + } + tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0] + = fpreg.u.bits[0]; + start &= ~0xFUL; + } + if (end & 0xF) { /* only write low part */ + if (unw_get_fr(info, end / sizeof(elf_fpreg_t), + &fpreg)) { + dst->ret = -EIO; + return; + } + tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1] + = fpreg.u.bits[1]; + end = (end + 0xF) & ~0xFUL; + } + + for ( ; start < end ; start += sizeof(elf_fpreg_t)) { + index = start / sizeof(elf_fpreg_t); + if (unw_set_fr(info, index, tmp[index - 2])) { + dst->ret = -EIO; + return; + } + } + if (dst->ret || dst->count == 0) + return; + } + + /* fph */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) { + ia64_sync_fph(dst->target); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + &dst->target->thread.fph, + ELF_FP_OFFSET(32), -1); + } +} + +static int +do_regset_call(void (*call)(struct unw_frame_info *, void *), + struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct regset_getset info = { .target = target, .regset = regset, + .pos = pos, .count = count, + .u.set = { .kbuf = kbuf, .ubuf = ubuf }, + .ret = 0 }; + + if (target == current) + unw_init_running(call, &info); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, target); + (*call)(&ufi, &info); + } + + return info.ret; +} + +static int +gpregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return do_regset_call(do_gpregs_get, target, regset, pos, count, + kbuf, ubuf); +} + +static int gpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_gpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static void do_gpregs_writeback(struct unw_frame_info *info, void *arg) +{ + do_sync_rbs(info, ia64_sync_user_rbs); +} + +/* + * This is called to write back the register backing store. + * ptrace does this before it stops, so that a tracer reading the user + * memory after the thread stops will get the current register data. + */ +static int +gpregs_writeback(struct task_struct *target, + const struct user_regset *regset, + int now) +{ + if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE)) + return 0; + tsk_set_notify_resume(target); + return do_regset_call(do_gpregs_writeback, target, regset, 0, 0, + NULL, NULL); +} + +static int +fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32; +} + +static int fpregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return do_regset_call(do_fpregs_get, target, regset, pos, count, + kbuf, ubuf); +} + +static int fpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_fpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static int +access_uarea(struct task_struct *child, unsigned long addr, + unsigned long *data, int write_access) +{ + unsigned int pos = -1; /* an invalid value */ + int ret; + unsigned long *ptr, regnum; + + if ((addr & 0x7) != 0) { + dprintk("ptrace: unaligned register address 0x%lx\n", addr); + return -1; + } + if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) || + (addr >= PT_R7 + 8 && addr < PT_B1) || + (addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) || + (addr >= PT_AR_SSD + 8 && addr < PT_DBR)) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } + + switch (addr) { + case PT_F32 ... (PT_F127 + 15): + pos = addr - PT_F32 + ELF_FP_OFFSET(32); + break; + case PT_F2 ... (PT_F5 + 15): + pos = addr - PT_F2 + ELF_FP_OFFSET(2); + break; + case PT_F10 ... (PT_F31 + 15): + pos = addr - PT_F10 + ELF_FP_OFFSET(10); + break; + case PT_F6 ... (PT_F9 + 15): + pos = addr - PT_F6 + ELF_FP_OFFSET(6); + break; + } + + if (pos != -1) { + if (write_access) + ret = fpregs_set(child, NULL, pos, + sizeof(unsigned long), data, NULL); + else + ret = fpregs_get(child, NULL, pos, + sizeof(unsigned long), data, NULL); + if (ret != 0) + return -1; + return 0; + } + + switch (addr) { + case PT_NAT_BITS: + pos = ELF_NAT_OFFSET; + break; + case PT_R4 ... PT_R7: + pos = addr - PT_R4 + ELF_GR_OFFSET(4); + break; + case PT_B1 ... PT_B5: + pos = addr - PT_B1 + ELF_BR_OFFSET(1); + break; + case PT_AR_EC: + pos = ELF_AR_EC_OFFSET; + break; + case PT_AR_LC: + pos = ELF_AR_LC_OFFSET; + break; + case PT_CR_IPSR: + pos = ELF_CR_IPSR_OFFSET; + break; + case PT_CR_IIP: + pos = ELF_CR_IIP_OFFSET; + break; + case PT_CFM: + pos = ELF_CFM_OFFSET; + break; + case PT_AR_UNAT: + pos = ELF_AR_UNAT_OFFSET; + break; + case PT_AR_PFS: + pos = ELF_AR_PFS_OFFSET; + break; + case PT_AR_RSC: + pos = ELF_AR_RSC_OFFSET; + break; + case PT_AR_RNAT: + pos = ELF_AR_RNAT_OFFSET; + break; + case PT_AR_BSPSTORE: + pos = ELF_AR_BSPSTORE_OFFSET; + break; + case PT_PR: + pos = ELF_PR_OFFSET; + break; + case PT_B6: + pos = ELF_BR_OFFSET(6); + break; + case PT_AR_BSP: + pos = ELF_AR_BSP_OFFSET; + break; + case PT_R1 ... PT_R3: + pos = addr - PT_R1 + ELF_GR_OFFSET(1); + break; + case PT_R12 ... PT_R15: + pos = addr - PT_R12 + ELF_GR_OFFSET(12); + break; + case PT_R8 ... PT_R11: + pos = addr - PT_R8 + ELF_GR_OFFSET(8); + break; + case PT_R16 ... PT_R31: + pos = addr - PT_R16 + ELF_GR_OFFSET(16); + break; + case PT_AR_CCV: + pos = ELF_AR_CCV_OFFSET; + break; + case PT_AR_FPSR: + pos = ELF_AR_FPSR_OFFSET; + break; + case PT_B0: + pos = ELF_BR_OFFSET(0); + break; + case PT_B7: + pos = ELF_BR_OFFSET(7); + break; + case PT_AR_CSD: + pos = ELF_AR_CSD_OFFSET; + break; + case PT_AR_SSD: + pos = ELF_AR_SSD_OFFSET; + break; + } + + if (pos != -1) { + if (write_access) + ret = gpregs_set(child, NULL, pos, + sizeof(unsigned long), data, NULL); + else + ret = gpregs_get(child, NULL, pos, + sizeof(unsigned long), data, NULL); + if (ret != 0) + return -1; + return 0; + } + + /* access debug registers */ + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + ptr = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + ptr = &child->thread.dbr[0]; + } + + if (regnum >= 8) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } +#ifdef CONFIG_PERFMON + /* + * Check if debug registers are used by perfmon. This + * test must be done once we know that we can do the + * operation, i.e. the arguments are all valid, but + * before we start modifying the state. + * + * Perfmon needs to keep a count of how many processes + * are trying to modify the debug registers for system + * wide monitoring sessions. + * + * We also include read access here, because they may + * cause the PMU-installed debug register state + * (dbr[], ibr[]) to be reset. The two arrays are also + * used by perfmon, but we do not use + * IA64_THREAD_DBG_VALID. The registers are restored + * by the PMU context switch code. + */ + if (pfm_use_debug_registers(child)) + return -1; +#endif + + if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { + child->thread.flags |= IA64_THREAD_DBG_VALID; + memset(child->thread.dbr, 0, + sizeof(child->thread.dbr)); + memset(child->thread.ibr, 0, + sizeof(child->thread.ibr)); + } + + ptr += regnum; + + if ((regnum & 1) && write_access) { + /* don't let the user set kernel-level breakpoints: */ + *ptr = *data & ~(7UL << 56); + return 0; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static const struct user_regset native_regsets[] = { + { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t), + .get = gpregs_get, .set = gpregs_set, + .writeback = gpregs_writeback + }, + { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), + .get = fpregs_get, .set = fpregs_set, .active = fpregs_active + }, +}; + +static const struct user_regset_view user_ia64_view = { + .name = "ia64", + .e_machine = EM_IA_64, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) +{ +#ifdef CONFIG_IA32_SUPPORT + extern const struct user_regset_view user_ia32_view; + if (IS_IA32_PROCESS(task_pt_regs(tsk))) + return &user_ia32_view; +#endif + return &user_ia64_view; +} diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 779c3cca206..b11bb50a197 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -44,8 +44,8 @@ #include <linux/smp.h> #include <linux/timer.h> #include <linux/vmalloc.h> +#include <linux/semaphore.h> -#include <asm/semaphore.h> #include <asm/sal.h> #include <asm/uaccess.h> diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c deleted file mode 100644 index 2724ef3fbae..00000000000 --- a/arch/ia64/kernel/semaphore.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * IA-64 semaphore implementation (derived from x86 version). - * - * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - */ - -/* - * Semaphores are implemented using a two-way counter: The "count" - * variable is decremented for each process that tries to acquire the - * semaphore, while the "sleepers" variable is a count of such - * acquires. - * - * Notably, the inline "up()" and "down()" functions can efficiently - * test if they need to do any extra work (up needs to do something - * only if count was negative before the increment operation. - * - * "sleeping" and the contention routine ordering is protected - * by the spinlock in the semaphore's waitqueue head. - * - * Note that these functions are only called when there is contention - * on the lock, and as such all this is the "non-critical" part of the - * whole semaphore business. The critical part is the inline stuff in - * <asm/semaphore.h> where we want to avoid any extra jumps and calls. - */ -#include <linux/sched.h> -#include <linux/init.h> - -#include <asm/errno.h> -#include <asm/semaphore.h> - -/* - * Logic: - * - Only on a boundary condition do we need to care. When we go - * from a negative count to a non-negative, we wake people up. - * - When we go from a non-negative count to a negative do we - * (a) synchronize with the "sleepers" count and (b) make sure - * that we're on the wakeup list before we synchronize so that - * we cannot lose wakeup events. - */ - -void -__up (struct semaphore *sem) -{ - wake_up(&sem->wait); -} - -void __sched __down (struct semaphore *sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_UNINTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * the wait_queue_head. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_UNINTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - tsk->state = TASK_RUNNING; -} - -int __sched __down_interruptible (struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_INTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers ++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * With signals pending, this turns into - * the trylock failure case - we won't be - * sleeping, and we* can't get the lock as - * it has contention. Just correct the count - * and exit. - */ - if (signal_pending(current)) { - retval = -EINTR; - sem->sleepers = 0; - atomic_add(sleepers, &sem->count); - break; - } - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * wait_queue_head. The "-1" is because we're - * still hoping to get the semaphore. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_INTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - - tsk->state = TASK_RUNNING; - return retval; -} - -/* - * Trylock failed - make sure we correct for having decremented the - * count. - */ -int -__down_trylock (struct semaphore *sem) -{ - unsigned long flags; - int sleepers; - - spin_lock_irqsave(&sem->wait.lock, flags); - sleepers = sem->sleepers + 1; - sem->sleepers = 0; - - /* - * Add "everybody else" and us into it. They aren't - * playing, because we own the spinlock in the - * wait_queue_head. - */ - if (!atomic_add_negative(sleepers, &sem->count)) { - wake_up_locked(&sem->wait); - } - - spin_unlock_irqrestore(&sem->wait.lock, flags); - return 1; -} diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4aa9eaea76c..5015ca1275c 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -59,6 +59,7 @@ #include <asm/setup.h> #include <asm/smp.h> #include <asm/system.h> +#include <asm/tlbflush.h> #include <asm/unistd.h> #include <asm/hpsim.h> @@ -176,6 +177,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) return 0; } +/* + * Similar to "filter_rsvd_memory()", but the reserved memory ranges + * are not filtered out. + */ +int __init +filter_memory(unsigned long start, unsigned long end, void *arg) +{ + void (*func)(unsigned long, unsigned long, int); + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) + return 0; + } +#endif + func = arg; + if (start < end) + call_pernode_memory(__pa(start), end - start, func); + return 0; +} + static void __init sort_regions (struct rsvd_region *rsvd_region, int max) { @@ -493,6 +517,8 @@ setup_arch (char **cmdline_p) acpi_table_init(); # ifdef CONFIG_ACPI_NUMA acpi_numa_init(); + per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ? + 32 : cpus_weight(early_cpu_possible_map)), additional_cpus); # endif #else # ifdef CONFIG_SMP @@ -946,9 +972,10 @@ cpu_init (void) #endif /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ - if (ia64_pal_vm_summary(NULL, &vmi) == 0) + if (ia64_pal_vm_summary(NULL, &vmi) == 0) { max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; - else { + setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL); + } else { printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); max_ctx = (1U << 15) - 1; /* use architected minimum */ } diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 4e446aa5f4a..9a9d4c48933 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -213,6 +213,19 @@ send_IPI_allbutself (int op) * Called with preemption disabled. */ static inline void +send_IPI_mask(cpumask_t mask, int op) +{ + unsigned int cpu; + + for_each_cpu_mask(cpu, mask) { + send_IPI_single(cpu, op); + } +} + +/* + * Called with preemption disabled. + */ +static inline void send_IPI_all (int op) { int i; @@ -401,6 +414,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int } EXPORT_SYMBOL(smp_call_function_single); +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * <mask> The set of cpus to run on. Must not include the current cpu. + * <func> The function to run. This must be fast and non-blocking. + * <info> An arbitrary pointer to pass to the function. + * <wait> If true, wait (atomically) until function + * has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + struct call_data_struct data; + cpumask_t allbutself; + int cpus; + + spin_lock(&call_lock); + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + if (!cpus) { + spin_unlock(&call_lock); + return 0; + } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/ + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(IPI_CALL_FUNC); + else + send_IPI_mask(mask, IPI_CALL_FUNC); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); + call_data = NULL; + + spin_unlock(&call_lock); + return 0; + +} +EXPORT_SYMBOL(smp_call_function_mask); + /* * this function sends a 'generic call function' IPI to all other CPUs * in the system. diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 32ee5979a04..16483be18c0 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -400,9 +400,9 @@ smp_callin (void) /* Setup the per cpu irq handling data structures */ __setup_vector_irq(cpuid); cpu_set(cpuid, cpu_online_map); - unlock_ipi_calllock(); per_cpu(cpu_state, cpuid) = CPU_ONLINE; spin_unlock(&vector_lock); + unlock_ipi_calllock(); smp_setup_percpu_timer(); diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 17fda5293c6..48e15a51782 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = { }; static struct clocksource *itc_clocksource; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + +#include <linux/kernel_stat.h> + +extern cputime_t cycle_to_cputime(u64 cyc); + +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) +{ + struct thread_info *pi = task_thread_info(prev); + struct thread_info *ni = task_thread_info(next); + cputime_t delta_stime, delta_utime; + __u64 now; + + now = ia64_get_itc(); + + delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); + account_system_time(prev, 0, delta_stime); + account_system_time_scaled(prev, delta_stime); + + if (pi->ac_utime) { + delta_utime = cycle_to_cputime(pi->ac_utime); + account_user_time(prev, delta_utime); + account_user_time_scaled(prev, delta_utime); + } + + pi->ac_stamp = ni->ac_stamp = now; + ni->ac_stime = ni->ac_utime = 0; +} + +/* + * Account time for a transition between system, hard irq or soft irq state. + * Note that this function is called with interrupts enabled. + */ +void account_system_vtime(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + unsigned long flags; + cputime_t delta_stime; + __u64 now; + + local_irq_save(flags); + + now = ia64_get_itc(); + + delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); + account_system_time(tsk, 0, delta_stime); + account_system_time_scaled(tsk, delta_stime); + ti->ac_stime = 0; + + ti->ac_stamp = now; + + local_irq_restore(flags); +} + +/* + * Called from the timer interrupt handler to charge accumulated user time + * to the current process. Must be called with interrupts disabled. + */ +void account_process_tick(struct task_struct *p, int user_tick) +{ + struct thread_info *ti = task_thread_info(p); + cputime_t delta_utime; + + if (ti->ac_utime) { + delta_utime = cycle_to_cputime(ti->ac_utime); + account_user_time(p, delta_utime); + account_user_time_scaled(p, delta_utime); + ti->ac_utime = 0; + } +} + +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + static irqreturn_t timer_interrupt (int irq, void *dev_id) { diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 6903361d11a..ff0e7c10faa 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -13,6 +13,7 @@ * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes. * 2001/01/17 Add support emulation of unaligned kernel accesses. */ +#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/tty.h> @@ -1290,7 +1291,7 @@ within_logging_rate_limit (void) { static unsigned long count, last_time; - if (jiffies - last_time > 5*HZ) + if (time_after(jiffies, last_time + 5 * HZ)) count = 0; if (count < 5) { last_time = jiffies; diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig new file mode 100644 index 00000000000..7914e482850 --- /dev/null +++ b/arch/ia64/kvm/Kconfig @@ -0,0 +1,49 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + depends on HAVE_KVM || IA64 + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting fully virtualized guest machines using hardware + virtualization extensions. You will need a fairly recent + processor equipped with virtualization extensions. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_INTEL + tristate "KVM for Intel Itanium 2 processors support" + depends on KVM && m + ---help--- + Provides support for KVM on Itanium 2 processors equipped with the VT + extensions. + +config KVM_TRACE + bool + +endif # VIRTUALIZATION diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile new file mode 100644 index 00000000000..52353397a1a --- /dev/null +++ b/arch/ia64/kvm/Makefile @@ -0,0 +1,58 @@ +#This Make file is to generate asm-offsets.h and build source. +# + +#Generate asm-offsets.h for vmm module build +offsets-file := asm-offsets.h + +always := $(offsets-file) +targets := $(offsets-file) +targets += arch/ia64/kvm/asm-offsets.s +clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S) + +# Default sed regexp - multiline due to syntax constraints +define sed-y + "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" +endef + +quiet_cmd_offsets = GEN $@ +define cmd_offsets + (set -e; \ + echo "#ifndef __ASM_KVM_OFFSETS_H__"; \ + echo "#define __ASM_KVM_OFFSETS_H__"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Makefile"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-y) $<; \ + echo ""; \ + echo "#endif" ) > $@ +endef +# We use internal rules to avoid the "is up to date" message from make +arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c + $(call if_changed_dep,cc_s_c) + +$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s + $(call cmd,offsets) + +# +# Makefile for Kernel-based Virtual Machine module +# + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ +EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) + +kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o +obj-$(CONFIG_KVM) += kvm.o + +FORCE : $(obj)/$(offsets-file) +EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 +kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ + vtlb.o process.o +#Add link memcpy and memset to avoid possible structure assignment error +kvm-intel-objs += ../lib/memset.o ../lib/memcpy.o +obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c new file mode 100644 index 00000000000..4e3dc13a619 --- /dev/null +++ b/arch/ia64/kvm/asm-offsets.c @@ -0,0 +1,251 @@ +/* + * asm-offsets.c Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + * + * Anthony Xu <anthony.xu@intel.com> + * Xiantao Zhang <xiantao.zhang@intel.com> + * Copyright (c) 2007 Intel Corporation KVM support. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/autoconf.h> +#include <linux/kvm_host.h> + +#include "vcpu.h" + +#define task_struct kvm_vcpu + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : :) + +#define OFFSET(_sym, _str, _mem) \ + DEFINE(_sym, offsetof(_str, _mem)); + +void foo(void) +{ + DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); + DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs)); + + BLANK(); + + DEFINE(VMM_VCPU_META_RR0_OFFSET, + offsetof(struct kvm_vcpu, arch.metaphysical_rr0)); + DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, + offsetof(struct kvm_vcpu, + arch.metaphysical_saved_rr0)); + DEFINE(VMM_VCPU_VRR0_OFFSET, + offsetof(struct kvm_vcpu, arch.vrr[0])); + DEFINE(VMM_VPD_IRR0_OFFSET, + offsetof(struct vpd, irr[0])); + DEFINE(VMM_VCPU_ITC_CHECK_OFFSET, + offsetof(struct kvm_vcpu, arch.itc_check)); + DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET, + offsetof(struct kvm_vcpu, arch.irq_check)); + DEFINE(VMM_VPD_VHPI_OFFSET, + offsetof(struct vpd, vhpi)); + DEFINE(VMM_VCPU_VSA_BASE_OFFSET, + offsetof(struct kvm_vcpu, arch.vsa_base)); + DEFINE(VMM_VCPU_VPD_OFFSET, + offsetof(struct kvm_vcpu, arch.vpd)); + DEFINE(VMM_VCPU_IRQ_CHECK, + offsetof(struct kvm_vcpu, arch.irq_check)); + DEFINE(VMM_VCPU_TIMER_PENDING, + offsetof(struct kvm_vcpu, arch.timer_pending)); + DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, + offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0)); + DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, + offsetof(struct kvm_vcpu, arch.mode_flags)); + DEFINE(VMM_VCPU_ITC_OFS_OFFSET, + offsetof(struct kvm_vcpu, arch.itc_offset)); + DEFINE(VMM_VCPU_LAST_ITC_OFFSET, + offsetof(struct kvm_vcpu, arch.last_itc)); + DEFINE(VMM_VCPU_SAVED_GP_OFFSET, + offsetof(struct kvm_vcpu, arch.saved_gp)); + + BLANK(); + + DEFINE(VMM_PT_REGS_B6_OFFSET, + offsetof(struct kvm_pt_regs, b6)); + DEFINE(VMM_PT_REGS_B7_OFFSET, + offsetof(struct kvm_pt_regs, b7)); + DEFINE(VMM_PT_REGS_AR_CSD_OFFSET, + offsetof(struct kvm_pt_regs, ar_csd)); + DEFINE(VMM_PT_REGS_AR_SSD_OFFSET, + offsetof(struct kvm_pt_regs, ar_ssd)); + DEFINE(VMM_PT_REGS_R8_OFFSET, + offsetof(struct kvm_pt_regs, r8)); + DEFINE(VMM_PT_REGS_R9_OFFSET, + offsetof(struct kvm_pt_regs, r9)); + DEFINE(VMM_PT_REGS_R10_OFFSET, + offsetof(struct kvm_pt_regs, r10)); + DEFINE(VMM_PT_REGS_R11_OFFSET, + offsetof(struct kvm_pt_regs, r11)); + DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET, + offsetof(struct kvm_pt_regs, cr_ipsr)); + DEFINE(VMM_PT_REGS_CR_IIP_OFFSET, + offsetof(struct kvm_pt_regs, cr_iip)); + DEFINE(VMM_PT_REGS_CR_IFS_OFFSET, + offsetof(struct kvm_pt_regs, cr_ifs)); + DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET, + offsetof(struct kvm_pt_regs, ar_unat)); + DEFINE(VMM_PT_REGS_AR_PFS_OFFSET, + offsetof(struct kvm_pt_regs, ar_pfs)); + DEFINE(VMM_PT_REGS_AR_RSC_OFFSET, + offsetof(struct kvm_pt_regs, ar_rsc)); + DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET, + offsetof(struct kvm_pt_regs, ar_rnat)); + + DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET, + offsetof(struct kvm_pt_regs, ar_bspstore)); + DEFINE(VMM_PT_REGS_PR_OFFSET, + offsetof(struct kvm_pt_regs, pr)); + DEFINE(VMM_PT_REGS_B0_OFFSET, + offsetof(struct kvm_pt_regs, b0)); + DEFINE(VMM_PT_REGS_LOADRS_OFFSET, + offsetof(struct kvm_pt_regs, loadrs)); + DEFINE(VMM_PT_REGS_R1_OFFSET, + offsetof(struct kvm_pt_regs, r1)); + DEFINE(VMM_PT_REGS_R12_OFFSET, + offsetof(struct kvm_pt_regs, r12)); + DEFINE(VMM_PT_REGS_R13_OFFSET, + offsetof(struct kvm_pt_regs, r13)); + DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET, + offsetof(struct kvm_pt_regs, ar_fpsr)); + DEFINE(VMM_PT_REGS_R15_OFFSET, + offsetof(struct kvm_pt_regs, r15)); + DEFINE(VMM_PT_REGS_R14_OFFSET, + offsetof(struct kvm_pt_regs, r14)); + DEFINE(VMM_PT_REGS_R2_OFFSET, + offsetof(struct kvm_pt_regs, r2)); + DEFINE(VMM_PT_REGS_R3_OFFSET, + offsetof(struct kvm_pt_regs, r3)); + DEFINE(VMM_PT_REGS_R16_OFFSET, + offsetof(struct kvm_pt_regs, r16)); + DEFINE(VMM_PT_REGS_R17_OFFSET, + offsetof(struct kvm_pt_regs, r17)); + DEFINE(VMM_PT_REGS_R18_OFFSET, + offsetof(struct kvm_pt_regs, r18)); + DEFINE(VMM_PT_REGS_R19_OFFSET, + offsetof(struct kvm_pt_regs, r19)); + DEFINE(VMM_PT_REGS_R20_OFFSET, + offsetof(struct kvm_pt_regs, r20)); + DEFINE(VMM_PT_REGS_R21_OFFSET, + offsetof(struct kvm_pt_regs, r21)); + DEFINE(VMM_PT_REGS_R22_OFFSET, + offsetof(struct kvm_pt_regs, r22)); + DEFINE(VMM_PT_REGS_R23_OFFSET, + offsetof(struct kvm_pt_regs, r23)); + DEFINE(VMM_PT_REGS_R24_OFFSET, + offsetof(struct kvm_pt_regs, r24)); + DEFINE(VMM_PT_REGS_R25_OFFSET, + offsetof(struct kvm_pt_regs, r25)); + DEFINE(VMM_PT_REGS_R26_OFFSET, + offsetof(struct kvm_pt_regs, r26)); + DEFINE(VMM_PT_REGS_R27_OFFSET, + offsetof(struct kvm_pt_regs, r27)); + DEFINE(VMM_PT_REGS_R28_OFFSET, + offsetof(struct kvm_pt_regs, r28)); + DEFINE(VMM_PT_REGS_R29_OFFSET, + offsetof(struct kvm_pt_regs, r29)); + DEFINE(VMM_PT_REGS_R30_OFFSET, + offsetof(struct kvm_pt_regs, r30)); + DEFINE(VMM_PT_REGS_R31_OFFSET, + offsetof(struct kvm_pt_regs, r31)); + DEFINE(VMM_PT_REGS_AR_CCV_OFFSET, + offsetof(struct kvm_pt_regs, ar_ccv)); + DEFINE(VMM_PT_REGS_F6_OFFSET, + offsetof(struct kvm_pt_regs, f6)); + DEFINE(VMM_PT_REGS_F7_OFFSET, + offsetof(struct kvm_pt_regs, f7)); + DEFINE(VMM_PT_REGS_F8_OFFSET, + offsetof(struct kvm_pt_regs, f8)); + DEFINE(VMM_PT_REGS_F9_OFFSET, + offsetof(struct kvm_pt_regs, f9)); + DEFINE(VMM_PT_REGS_F10_OFFSET, + offsetof(struct kvm_pt_regs, f10)); + DEFINE(VMM_PT_REGS_F11_OFFSET, + offsetof(struct kvm_pt_regs, f11)); + DEFINE(VMM_PT_REGS_R4_OFFSET, + offsetof(struct kvm_pt_regs, r4)); + DEFINE(VMM_PT_REGS_R5_OFFSET, + offsetof(struct kvm_pt_regs, r5)); + DEFINE(VMM_PT_REGS_R6_OFFSET, + offsetof(struct kvm_pt_regs, r6)); + DEFINE(VMM_PT_REGS_R7_OFFSET, + offsetof(struct kvm_pt_regs, r7)); + DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET, + offsetof(struct kvm_pt_regs, eml_unat)); + DEFINE(VMM_VCPU_IIPA_OFFSET, + offsetof(struct kvm_vcpu, arch.cr_iipa)); + DEFINE(VMM_VCPU_OPCODE_OFFSET, + offsetof(struct kvm_vcpu, arch.opcode)); + DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause)); + DEFINE(VMM_VCPU_ISR_OFFSET, + offsetof(struct kvm_vcpu, arch.cr_isr)); + DEFINE(VMM_PT_REGS_R16_SLOT, + (((offsetof(struct kvm_pt_regs, r16) + - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f)); + DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, + offsetof(struct kvm_vcpu, arch.mode_flags)); + DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp)); + BLANK(); + + DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd)); + DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs)); + DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET, + offsetof(struct kvm_vcpu, arch.insvc[0])); + DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta)); + DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr)); + + DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4])); + DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5])); + DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12])); + DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13])); + DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0])); + DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1])); + DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0])); + DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1])); + DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2])); + DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0])); + DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16])); + DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18])); + DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19])); + DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21])); + DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24])); + DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27])); + DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28])); + DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29])); + DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30])); + DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36])); + DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40])); + DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64])); + DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65])); + DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0])); + DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2])); + DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8])); + DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0])); + DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0])); + DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2])); + DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3])); + DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32])); + DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33])); + DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0])); + DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr)); + BLANK(); +} diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c new file mode 100644 index 00000000000..6df07324013 --- /dev/null +++ b/arch/ia64/kvm/kvm-ia64.c @@ -0,0 +1,1806 @@ + +/* + * kvm_ia64.c: Basic KVM suppport On Itanium series processors + * + * + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/percpu.h> +#include <linux/gfp.h> +#include <linux/fs.h> +#include <linux/smp.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/bitops.h> +#include <linux/hrtimer.h> +#include <linux/uaccess.h> + +#include <asm/pgtable.h> +#include <asm/gcc_intrin.h> +#include <asm/pal.h> +#include <asm/cacheflush.h> +#include <asm/div64.h> +#include <asm/tlb.h> + +#include "misc.h" +#include "vti.h" +#include "iodev.h" +#include "ioapic.h" +#include "lapic.h" + +static unsigned long kvm_vmm_base; +static unsigned long kvm_vsa_base; +static unsigned long kvm_vm_buffer; +static unsigned long kvm_vm_buffer_size; +unsigned long kvm_vmm_gp; + +static long vp_env_info; + +static struct kvm_vmm_info *kvm_vmm_info; + +static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + + +struct fdesc{ + unsigned long ip; + unsigned long gp; +}; + +static void kvm_flush_icache(unsigned long start, unsigned long len) +{ + int l; + + for (l = 0; l < (len + 32); l += 32) + ia64_fc(start + l); + + ia64_sync_i(); + ia64_srlz_i(); +} + +static void kvm_flush_tlb_all(void) +{ + unsigned long i, j, count0, count1, stride0, stride1, addr; + long flags; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, + (u64)opt_handler); + + return iprv.status; +} + +static DEFINE_SPINLOCK(vp_lock); + +void kvm_arch_hardware_enable(void *garbage) +{ + long status; + long tmp_base; + unsigned long pte; + unsigned long saved_psr; + int slot; + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + spin_lock(&vp_lock); + status = ia64_pal_vp_init_env(kvm_vsa_base ? + VP_INIT_ENV : VP_INIT_ENV_INITALIZE, + __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); + if (status != 0) { + printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); + return ; + } + + if (!kvm_vsa_base) { + kvm_vsa_base = tmp_base; + printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); + } + spin_unlock(&vp_lock); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_hardware_disable(void *garbage) +{ + + long status; + int slot; + unsigned long pte; + unsigned long saved_psr; + unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + status = ia64_pal_vp_exit_env(host_iva); + if (status) + printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", + status); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + + int r; + + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_USER_MEMORY: + + r = 1; + break; + default: + r = 0; + } + return r; + +} + +static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + struct kvm_io_device *dev; + + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + + return dev; +} + +static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 1; + return 0; +} + +static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct kvm_mmio_req *p; + struct kvm_io_device *mmio_dev; + + p = kvm_get_vcpu_ioreq(vcpu); + + if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) + goto mmio; + vcpu->mmio_needed = 1; + vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; + vcpu->mmio_size = kvm_run->mmio.len = p->size; + vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; + + if (vcpu->mmio_is_write) + memcpy(vcpu->mmio_data, &p->data, p->size); + memcpy(kvm_run->mmio.data, &p->data, p->size); + kvm_run->exit_reason = KVM_EXIT_MMIO; + return 0; +mmio: + mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); + if (mmio_dev) { + if (!p->dir) + kvm_iodevice_write(mmio_dev, p->addr, p->size, + &p->data); + else + kvm_iodevice_read(mmio_dev, p->addr, p->size, + &p->data); + + } else + printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); + p->state = STATE_IORESP_READY; + + return 1; +} + +static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_PAL_CALL) + return kvm_pal_emul(vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 2; + return 0; + } +} + +static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + kvm_sal_emul(vcpu); + return 1; + } else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 3; + return 0; + } + +} + +/* + * offset: address offset to IPI space. + * value: deliver value. + */ +static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, + uint64_t vector) +{ + switch (dm) { + case SAPIC_FIXED: + kvm_apic_set_irq(vcpu, vector, 0); + break; + case SAPIC_NMI: + kvm_apic_set_irq(vcpu, 2, 0); + break; + case SAPIC_EXTINT: + kvm_apic_set_irq(vcpu, 0, 0); + break; + case SAPIC_INIT: + case SAPIC_PMI: + default: + printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); + break; + } +} + +static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, + unsigned long eid) +{ + union ia64_lid lid; + int i; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (kvm->vcpus[i]) { + lid.val = VCPU_LID(kvm->vcpus[i]); + if (lid.id == id && lid.eid == eid) + return kvm->vcpus[i]; + } + } + + return NULL; +} + +static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm_vcpu *target_vcpu; + struct kvm_pt_regs *regs; + union ia64_ipi_a addr = p->u.ipi_data.addr; + union ia64_ipi_d data = p->u.ipi_data.data; + + target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); + if (!target_vcpu) + return handle_vm_error(vcpu, kvm_run); + + if (!target_vcpu->arch.launched) { + regs = vcpu_regs(target_vcpu); + + regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; + regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; + + target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(&target_vcpu->wq)) + wake_up_interruptible(&target_vcpu->wq); + } else { + vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); + if (target_vcpu != vcpu) + kvm_vcpu_kick(target_vcpu); + } + + return 1; +} + +struct call_data { + struct kvm_ptc_g ptc_g_data; + struct kvm_vcpu *vcpu; +}; + +static void vcpu_global_purge(void *info) +{ + struct call_data *p = (struct call_data *)info; + struct kvm_vcpu *vcpu = p->vcpu; + + if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + return; + + set_bit(KVM_REQ_PTC_G, &vcpu->requests); + if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { + vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = + p->ptc_g_data; + } else { + clear_bit(KVM_REQ_PTC_G, &vcpu->requests); + vcpu->arch.ptc_g_count = 0; + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); + } +} + +static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm *kvm = vcpu->kvm; + struct call_data call_data; + int i; + call_data.ptc_g_data = p->u.ptc_g_data; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == + KVM_MP_STATE_UNINITIALIZED || + vcpu == kvm->vcpus[i]) + continue; + + if (waitqueue_active(&kvm->vcpus[i]->wq)) + wake_up_interruptible(&kvm->vcpus[i]->wq); + + if (kvm->vcpus[i]->cpu != -1) { + call_data.vcpu = kvm->vcpus[i]; + smp_call_function_single(kvm->vcpus[i]->cpu, + vcpu_global_purge, &call_data, 0, 1); + } else + printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); + + } + return 1; +} + +static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + return 1; +} + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + + ktime_t kt; + long itc_diff; + unsigned long vcpu_now_itc; + + unsigned long expires; + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; + + if (time_after(vcpu_now_itc, vpd->itm)) { + vcpu->arch.timer_check = 1; + return 1; + } + itc_diff = vpd->itm - vcpu_now_itc; + if (itc_diff < 0) + itc_diff = -itc_diff; + + expires = div64_64(itc_diff, cyc_per_usec); + kt = ktime_set(0, 1000 * expires); + vcpu->arch.ht_active = 1; + hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); + + if (irqchip_in_kernel(vcpu->kvm)) { + vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + kvm_vcpu_block(vcpu); + hrtimer_cancel(p_ht); + vcpu->arch.ht_active = 0; + + if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) + return -EINTR; + return 1; + } else { + printk(KERN_ERR"kvm: Unsupported userspace halt!"); + return 0; + } +} + +static int handle_vm_shutdown(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + return 0; +} + +static int handle_external_interrupt(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return 1; +} + +static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) = { + [EXIT_REASON_VM_PANIC] = handle_vm_error, + [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, + [EXIT_REASON_PAL_CALL] = handle_pal_call, + [EXIT_REASON_SAL_CALL] = handle_sal_call, + [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, + [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, + [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, + [EXIT_REASON_IPI] = handle_ipi, + [EXIT_REASON_PTC_G] = handle_global_purge, + +}; + +static const int kvm_vti_max_exit_handlers = + sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); + +static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu) +{ +} + +static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_exit_data; + + p_exit_data = kvm_get_exit_data(vcpu); + return p_exit_data->exit_reason; +} + +/* + * The guest has exited. See if we can fix it or if we need userspace + * assistance. + */ +static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + u32 exit_reason = kvm_get_exit_reason(vcpu); + vcpu->arch.last_exit = exit_reason; + + if (exit_reason < kvm_vti_max_exit_handlers + && kvm_vti_exit_handlers[exit_reason]) + return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = exit_reason; + } + return 0; +} + +static inline void vti_set_rr6(unsigned long rr6) +{ + ia64_set_rr(RR6, rr6); + ia64_srlz_i(); +} + +static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) +{ + unsigned long pte; + struct kvm *kvm = vcpu->kvm; + int r; + + /*Insert a pair of tr to map vmm*/ + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vmm_tr_slot = r; + /*Insert a pairt of tr to map data of vm*/ + pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, + pte, KVM_VM_DATA_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vm_tr_slot = r; + r = 0; +out: + return r; + +} + +static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) +{ + + ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); + ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); + +} + +static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + + if (vcpu->arch.last_run_cpu != cpu || + per_cpu(last_vcpu, cpu) != vcpu) { + per_cpu(last_vcpu, cpu) = vcpu; + vcpu->arch.last_run_cpu = cpu; + kvm_flush_tlb_all(); + } + + vcpu->arch.host_rr6 = ia64_get_rr(RR6); + vti_set_rr6(vcpu->arch.vmm_rr); + return kvm_insert_vmm_mapping(vcpu); +} +static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) +{ + kvm_purge_vmm_mapping(vcpu); + vti_set_rr6(vcpu->arch.host_rr6); +} + +static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + union context *host_ctx, *guest_ctx; + int r; + + /*Get host and guest context with guest address space.*/ + host_ctx = kvm_get_host_context(vcpu); + guest_ctx = kvm_get_guest_context(vcpu); + + r = kvm_vcpu_pre_transition(vcpu); + if (r < 0) + goto out; + kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); + kvm_vcpu_post_transition(vcpu); + r = 0; +out: + return r; +} + +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + +again: + preempt_disable(); + + kvm_prepare_guest_switch(vcpu); + local_irq_disable(); + + if (signal_pending(current)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + goto out; + } + + vcpu->guest_mode = 1; + kvm_guest_enter(); + + r = vti_vcpu_run(vcpu, kvm_run); + if (r < 0) { + local_irq_enable(); + preempt_enable(); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + goto out; + } + + vcpu->arch.launched = 1; + vcpu->guest_mode = 0; + local_irq_enable(); + + /* + * We must have an instruction between local_irq_enable() and + * kvm_guest_exit(), so the timer interrupt isn't delayed by + * the interrupt shadow. The stat.exits increment will do nicely. + * But we need to prevent reordering, hence this barrier(): + */ + barrier(); + + kvm_guest_exit(); + + preempt_enable(); + + r = kvm_handle_exit(kvm_run, vcpu); + + if (r > 0) { + if (!need_resched()) + goto again; + } + +out: + if (r > 0) { + kvm_resched(vcpu); + goto again; + } + + return r; +} + +static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) +{ + struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); + + if (!vcpu->mmio_is_write) + memcpy(&p->data, vcpu->mmio_data, 8); + p->state = STATE_IORESP_READY; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { + kvm_vcpu_block(vcpu); + vcpu_put(vcpu); + return -EAGAIN; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + kvm_set_mmio_data(vcpu); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + } + r = __vcpu_run(vcpu, kvm_run); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + return r; +} + +/* + * Allocate 16M memory for every vm to hold its specific data. + * Its memory map is defined in kvm_host.h. + */ +static struct kvm *kvm_alloc_kvm(void) +{ + + struct kvm *kvm; + uint64_t vm_base; + + vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); + + if (!vm_base) + return ERR_PTR(-ENOMEM); + printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); + + /* Zero all pages before use! */ + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + + kvm = (struct kvm *)(vm_base + KVM_VM_OFS); + kvm->arch.vm_base = vm_base; + + return kvm; +} + +struct kvm_io_range { + unsigned long start; + unsigned long size; + unsigned long type; +}; + +static const struct kvm_io_range io_ranges[] = { + {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, + {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, + {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, + {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, + {PIB_START, PIB_SIZE, GPFN_PIB}, +}; + +static void kvm_build_io_pmt(struct kvm *kvm) +{ + unsigned long i, j; + + /* Mark I/O ranges */ + for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); + i++) { + for (j = io_ranges[i].start; + j < io_ranges[i].start + io_ranges[i].size; + j += PAGE_SIZE) + kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, + io_ranges[i].type, 0); + } + +} + +/*Use unused rids to virtualize guest rid.*/ +#define GUEST_PHYSICAL_RR0 0x1739 +#define GUEST_PHYSICAL_RR4 0x2739 +#define VMM_INIT_RR 0x1660 + +static void kvm_init_vm(struct kvm *kvm) +{ + long vm_base; + + BUG_ON(!kvm); + + kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; + kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; + kvm->arch.vmm_init_rr = VMM_INIT_RR; + + vm_base = kvm->arch.vm_base; + if (vm_base) { + kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; + kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; + kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; + } + + /* + *Fill P2M entries for MMIO/IO ranges + */ + kvm_build_io_pmt(kvm); + +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm = kvm_alloc_kvm(); + + if (IS_ERR(kvm)) + return ERR_PTR(-ENOMEM); + kvm_init_vm(kvm); + + return kvm; + +} + +static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, + struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(ioapic_irqchip(kvm), + &chip->chip.ioapic, + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + int r; + + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + vpd->vgr[i] = regs->vpd.vgr[i]; + vpd->vbgr[i] = regs->vpd.vbgr[i]; + } + for (i = 0; i < 128; i++) + vpd->vcr[i] = regs->vpd.vcr[i]; + vpd->vhpi = regs->vpd.vhpi; + vpd->vnat = regs->vpd.vnat; + vpd->vbnat = regs->vpd.vbnat; + vpd->vpsr = regs->vpd.vpsr; + + vpd->vpr = regs->vpd.vpr; + + r = -EFAULT; + r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, + sizeof(union context)); + if (r) + goto out; + r = copy_from_user(vcpu + 1, regs->saved_stack + + sizeof(struct kvm_vcpu), + IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); + if (r) + goto out; + vcpu->arch.exit_data = + ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; + + RESTORE_REGS(mp_state); + RESTORE_REGS(vmm_rr); + memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); + memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); + RESTORE_REGS(itr_regions); + RESTORE_REGS(dtr_regions); + RESTORE_REGS(tc_regions); + RESTORE_REGS(irq_check); + RESTORE_REGS(itc_check); + RESTORE_REGS(timer_check); + RESTORE_REGS(timer_pending); + RESTORE_REGS(last_itc); + for (i = 0; i < 8; i++) { + vcpu->arch.vrr[i] = regs->vrr[i]; + vcpu->arch.ibr[i] = regs->ibr[i]; + vcpu->arch.dbr[i] = regs->dbr[i]; + } + for (i = 0; i < 4; i++) + vcpu->arch.insvc[i] = regs->insvc[i]; + RESTORE_REGS(xtp); + RESTORE_REGS(metaphysical_rr0); + RESTORE_REGS(metaphysical_rr4); + RESTORE_REGS(metaphysical_saved_rr0); + RESTORE_REGS(metaphysical_saved_rr4); + RESTORE_REGS(fp_psr); + RESTORE_REGS(saved_gp); + + vcpu->arch.irq_new_pending = 1; + vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); + set_bit(KVM_REQ_RESUME, &vcpu->requests); + + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r = -EINVAL; + + switch (ioctl) { + case KVM_SET_MEMORY_REGION: { + struct kvm_memory_region kvm_mem; + struct kvm_userspace_memory_region kvm_userspace_mem; + + r = -EFAULT; + if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) + goto out; + kvm_userspace_mem.slot = kvm_mem.slot; + kvm_userspace_mem.flags = kvm_mem.flags; + kvm_userspace_mem.guest_phys_addr = + kvm_mem.guest_phys_addr; + kvm_userspace_mem.memory_size = kvm_mem.memory_size; + r = kvm_vm_ioctl_set_memory_region(kvm, + &kvm_userspace_mem, 0); + if (r) + goto out; + break; + } + case KVM_CREATE_IRQCHIP: + r = -EFAULT; + r = kvm_ioapic_init(kvm); + if (r) + goto out; + break; + case KVM_IRQ_LINE: { + struct kvm_irq_level irq_event; + + r = -EFAULT; + if (copy_from_user(&irq_event, argp, sizeof irq_event)) + goto out; + if (irqchip_in_kernel(kvm)) { + mutex_lock(&kvm->lock); + kvm_ioapic_set_irq(kvm->arch.vioapic, + irq_event.irq, + irq_event.level); + mutex_unlock(&kvm->lock); + r = 0; + } + break; + } + case KVM_GET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_get_irqchip(kvm, &chip); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &chip, sizeof chip)) + goto out; + r = 0; + break; + } + case KVM_SET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_set_irqchip(kvm, &chip); + if (r) + goto out; + r = 0; + break; + } + default: + ; + } +out: + return r; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; + +} +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + + return -EINVAL; +} + +static int kvm_alloc_vmm_area(void) +{ + if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { + kvm_vmm_base = __get_free_pages(GFP_KERNEL, + get_order(KVM_VMM_SIZE)); + if (!kvm_vmm_base) + return -ENOMEM; + + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; + + printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", + kvm_vmm_base, kvm_vm_buffer); + } + + return 0; +} + +static void kvm_free_vmm_area(void) +{ + if (kvm_vmm_base) { + /*Zero this area before free to avoid bits leak!!*/ + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); + kvm_vmm_base = 0; + kvm_vm_buffer = 0; + kvm_vsa_base = 0; + } +} + +/* + * Make sure that a cpu that is being hot-unplugged does not have any vcpus + * cached on it. Leave it as blank for IA64. + */ +void decache_vcpus_on_cpu(int cpu) +{ +} + +static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +static int vti_init_vpd(struct kvm_vcpu *vcpu) +{ + int i; + union cpuid3_t cpuid3; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (IS_ERR(vpd)) + return PTR_ERR(vpd); + + /* CPUID init */ + for (i = 0; i < 5; i++) + vpd->vcpuid[i] = ia64_get_cpuid(i); + + /* Limit the CPUID number to 5 */ + cpuid3.value = vpd->vcpuid[3]; + cpuid3.number = 4; /* 5 - 1 */ + vpd->vcpuid[3] = cpuid3.value; + + /*Set vac and vdc fields*/ + vpd->vac.a_from_int_cr = 1; + vpd->vac.a_to_int_cr = 1; + vpd->vac.a_from_psr = 1; + vpd->vac.a_from_cpuid = 1; + vpd->vac.a_cover = 1; + vpd->vac.a_bsw = 1; + vpd->vac.a_int = 1; + vpd->vdc.d_vmsw = 1; + + /*Set virtual buffer*/ + vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; + + return 0; +} + +static int vti_create_vp(struct kvm_vcpu *vcpu) +{ + long ret; + struct vpd *vpd = vcpu->arch.vpd; + unsigned long vmm_ivt; + + vmm_ivt = kvm_vmm_info->vmm_ivt; + + printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); + + ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); + + if (ret) { + printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); + return -EINVAL; + } + return 0; +} + +static void init_ptce_info(struct kvm_vcpu *vcpu) +{ + ia64_ptce_info_t ptce = {0}; + + ia64_get_ptce(&ptce); + vcpu->arch.ptce_base = ptce.base; + vcpu->arch.ptce_count[0] = ptce.count[0]; + vcpu->arch.ptce_count[1] = ptce.count[1]; + vcpu->arch.ptce_stride[0] = ptce.stride[0]; + vcpu->arch.ptce_stride[1] = ptce.stride[1]; +} + +static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) +{ + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + + if (hrtimer_cancel(p_ht)) + hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS); +} + +static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) +{ + struct kvm_vcpu *vcpu; + wait_queue_head_t *q; + + vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); + if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) + goto out; + + q = &vcpu->wq; + if (waitqueue_active(q)) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(q); + } +out: + vcpu->arch.timer_check = 1; + return HRTIMER_NORESTART; +} + +#define PALE_RESET_ENTRY 0x80000000ffffffb0UL + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *v; + int r; + int i; + long itc_offset; + struct kvm *kvm = vcpu->kvm; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + union context *p_ctx = &vcpu->arch.guest; + struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); + + /*Init vcpu context for first run.*/ + if (IS_ERR(vmm_vcpu)) + return PTR_ERR(vmm_vcpu); + + if (vcpu->vcpu_id == 0) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + + /*Set entry address for first run.*/ + regs->cr_iip = PALE_RESET_ENTRY; + + /*Initilize itc offset for vcpus*/ + itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + v->arch.itc_offset = itc_offset; + v->arch.last_itc = 0; + } + } else + vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; + + r = -ENOMEM; + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); + if (!vcpu->arch.apic) + goto out; + vcpu->arch.apic->vcpu = vcpu; + + p_ctx->gr[1] = 0; + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); + p_ctx->gr[13] = (unsigned long)vmm_vcpu; + p_ctx->psr = 0x1008522000UL; + p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ + p_ctx->caller_unat = 0; + p_ctx->pr = 0x0; + p_ctx->ar[36] = 0x0; /*unat*/ + p_ctx->ar[19] = 0x0; /*rnat*/ + p_ctx->ar[18] = (unsigned long)vmm_vcpu + + ((sizeof(struct kvm_vcpu)+15) & ~15); + p_ctx->ar[64] = 0x0; /*pfs*/ + p_ctx->cr[0] = 0x7e04UL; + p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; + p_ctx->cr[8] = 0x3c; + + /*Initilize region register*/ + p_ctx->rr[0] = 0x30; + p_ctx->rr[1] = 0x30; + p_ctx->rr[2] = 0x30; + p_ctx->rr[3] = 0x30; + p_ctx->rr[4] = 0x30; + p_ctx->rr[5] = 0x30; + p_ctx->rr[7] = 0x30; + + /*Initilize branch register 0*/ + p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; + + vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; + vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; + vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; + + hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + vcpu->arch.hlt_timer.function = hlt_timer_fn; + + vcpu->arch.last_run_cpu = -1; + vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); + vcpu->arch.vsa_base = kvm_vsa_base; + vcpu->arch.__gp = kvm_vmm_gp; + vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); + init_ptce_info(vcpu); + + r = 0; +out: + return r; +} + +static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) +{ + unsigned long psr; + int r; + + local_irq_save(psr); + r = kvm_insert_vmm_mapping(vcpu); + if (r) + goto fail; + r = kvm_vcpu_init(vcpu, vcpu->kvm, id); + if (r) + goto fail; + + r = vti_init_vpd(vcpu); + if (r) { + printk(KERN_DEBUG"kvm: vpd init error!!\n"); + goto uninit; + } + + r = vti_create_vp(vcpu); + if (r) + goto uninit; + + kvm_purge_vmm_mapping(vcpu); + local_irq_restore(psr); + + return 0; +uninit: + kvm_vcpu_uninit(vcpu); +fail: + return r; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu; + unsigned long vm_base = kvm->arch.vm_base; + int r; + int cpu; + + r = -ENOMEM; + if (!vm_base) { + printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); + goto fail; + } + vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); + vcpu->kvm = kvm; + + cpu = get_cpu(); + vti_vcpu_load(vcpu, cpu); + r = vti_vcpu_setup(vcpu, id); + put_cpu(); + + if (r) { + printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); + goto fail; + } + + return vcpu; +fail: + return ERR_PTR(r); +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -EINVAL; +} + +static void free_kvm(struct kvm *kvm) +{ + unsigned long vm_base = kvm->arch.vm_base; + + if (vm_base) { + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); + } + +} + +static void kvm_release_vm_pages(struct kvm *kvm) +{ + struct kvm_memory_slot *memslot; + int i, j; + unsigned long base_gfn; + + for (i = 0; i < kvm->nmemslots; i++) { + memslot = &kvm->memslots[i]; + base_gfn = memslot->base_gfn; + + for (j = 0; j < memslot->npages; j++) { + if (memslot->rmap[j]) + put_page((struct page *)memslot->rmap[j]); + } + } +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kfree(kvm->arch.vioapic); + kvm_release_vm_pages(kvm); + kvm_free_physmem(kvm); + free_kvm(kvm); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + if (cpu != vcpu->cpu) { + vcpu->cpu = cpu; + if (vcpu->arch.ht_active) + kvm_migrate_hlt_timer(vcpu); + } +} + +#define SAVE_REGS(_x) regs->_x = vcpu->arch._x + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + int r; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + regs->vpd.vgr[i] = vpd->vgr[i]; + regs->vpd.vbgr[i] = vpd->vbgr[i]; + } + for (i = 0; i < 128; i++) + regs->vpd.vcr[i] = vpd->vcr[i]; + regs->vpd.vhpi = vpd->vhpi; + regs->vpd.vnat = vpd->vnat; + regs->vpd.vbnat = vpd->vbnat; + regs->vpd.vpsr = vpd->vpsr; + regs->vpd.vpr = vpd->vpr; + + r = -EFAULT; + r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, + sizeof(union context)); + if (r) + goto out; + r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); + if (r) + goto out; + SAVE_REGS(mp_state); + SAVE_REGS(vmm_rr); + memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); + memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); + SAVE_REGS(itr_regions); + SAVE_REGS(dtr_regions); + SAVE_REGS(tc_regions); + SAVE_REGS(irq_check); + SAVE_REGS(itc_check); + SAVE_REGS(timer_check); + SAVE_REGS(timer_pending); + SAVE_REGS(last_itc); + for (i = 0; i < 8; i++) { + regs->vrr[i] = vcpu->arch.vrr[i]; + regs->ibr[i] = vcpu->arch.ibr[i]; + regs->dbr[i] = vcpu->arch.dbr[i]; + } + for (i = 0; i < 4; i++) + regs->insvc[i] = vcpu->arch.insvc[i]; + regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); + SAVE_REGS(xtp); + SAVE_REGS(metaphysical_rr0); + SAVE_REGS(metaphysical_rr4); + SAVE_REGS(metaphysical_saved_rr0); + SAVE_REGS(metaphysical_saved_rr4); + SAVE_REGS(fp_psr); + SAVE_REGS(saved_gp); + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + + hrtimer_cancel(&vcpu->arch.hlt_timer); + kfree(vcpu->arch.apic); +} + + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + unsigned long i; + struct page *page; + int npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; + unsigned long base_gfn = memslot->base_gfn; + + for (i = 0; i < npages; i++) { + page = gfn_to_page(kvm, base_gfn + i); + kvm_set_pmt_entry(kvm, base_gfn + i, + page_to_pfn(page) << PAGE_SHIFT, + _PAGE_AR_RWX|_PAGE_MA_WB); + memslot->rmap[i] = (unsigned long)page; + } + + return 0; +} + + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_uninit(vcpu); +} + +static int vti_cpu_has_kvm_support(void) +{ + long avail = 1, status = 1, control = 1; + long ret; + + ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); + if (ret) + goto out; + + if (!(avail & PAL_PROC_VM_BIT)) + goto out; + + printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); + + ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); + if (ret) + goto out; + printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); + + if (!(vp_env_info & VP_OPCODE)) { + printk(KERN_WARNING"kvm: No opcode ability on hardware, " + "vm_env_info:0x%lx\n", vp_env_info); + } + + return 1; +out: + return 0; +} + +static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, + struct module *module) +{ + unsigned long module_base; + unsigned long vmm_size; + + unsigned long vmm_offset, func_offset, fdesc_offset; + struct fdesc *p_fdesc; + + BUG_ON(!module); + + if (!kvm_vmm_base) { + printk("kvm: kvm area hasn't been initilized yet!!\n"); + return -EFAULT; + } + + /*Calculate new position of relocated vmm module.*/ + module_base = (unsigned long)module->module_core; + vmm_size = module->core_size; + if (unlikely(vmm_size > KVM_VMM_SIZE)) + return -EFAULT; + + memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); + kvm_flush_icache(kvm_vmm_base, vmm_size); + + /*Recalculate kvm_vmm_info based on new VMM*/ + vmm_offset = vmm_info->vmm_ivt - module_base; + kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; + printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", + kvm_vmm_info->vmm_ivt); + + fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; + kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); + + printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", + KVM_VMM_BASE+func_offset); + + fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; + kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); + + kvm_vmm_gp = p_fdesc->gp; + + printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", + kvm_vmm_info->vmm_entry); + printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", + KVM_VMM_BASE + func_offset); + + return 0; +} + +int kvm_arch_init(void *opaque) +{ + int r; + struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; + + if (!vti_cpu_has_kvm_support()) { + printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); + r = -EOPNOTSUPP; + goto out; + } + + if (kvm_vmm_info) { + printk(KERN_ERR "kvm: Already loaded VMM module!\n"); + r = -EEXIST; + goto out; + } + + r = -ENOMEM; + kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); + if (!kvm_vmm_info) + goto out; + + if (kvm_alloc_vmm_area()) + goto out_free0; + + r = kvm_relocate_vmm(vmm_info, vmm_info->module); + if (r) + goto out_free1; + + return 0; + +out_free1: + kvm_free_vmm_area(); +out_free0: + kfree(kvm_vmm_info); +out: + return r; +} + +void kvm_arch_exit(void) +{ + kvm_free_vmm_area(); + kfree(kvm_vmm_info); + kvm_vmm_info = NULL; +} + +static int kvm_ia64_sync_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + struct kvm_memory_slot *memslot; + int r, i; + long n, base; + unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS + + KVM_MEM_DIRTY_LOG_OFS); + + r = -EINVAL; + if (log->slot >= KVM_MEMORY_SLOTS) + goto out; + + memslot = &kvm->memslots[log->slot]; + r = -ENOENT; + if (!memslot->dirty_bitmap) + goto out; + + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + base = memslot->base_gfn / BITS_PER_LONG; + + for (i = 0; i < n/sizeof(long); ++i) { + memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; + dirty_bitmap[base + i] = 0; + } + r = 0; +out: + return r; +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + int n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + + spin_lock(&kvm->arch.dirty_log_lock); + + r = kvm_ia64_sync_dirty_log(kvm, log); + if (r) + goto out; + + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* If nothing is dirty, don't bother messing with page tables. */ + if (is_dirty) { + kvm_flush_remote_tlbs(kvm); + memslot = &kvm->memslots[log->slot]; + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +out: + spin_unlock(&kvm->arch.dirty_log_lock); + return r; +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +static void vcpu_kick_intr(void *info) +{ +#ifdef DEBUG + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; + printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); +#endif +} + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int ipi_pcpu = vcpu->cpu; + + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + + if (vcpu->guest_mode) + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); +} + +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) +{ + + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (!test_and_set_bit(vec, &vpd->irr[0])) { + vcpu->arch.irq_new_pending = 1; + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) + kvm_vcpu_kick(vcpu); + else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + } + return 1; + } + return 0; +} + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) +{ + return apic->vcpu->vcpu_id == dest; +} + +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) +{ + return 0; +} + +struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, + unsigned long bitmap) +{ + struct kvm_vcpu *lvcpu = kvm->vcpus[0]; + int i; + + for (i = 1; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i]) + continue; + if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) + lvcpu = kvm->vcpus[i]; + } + + return lvcpu; +} + +static int find_highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + + return -1; +} + +int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (vpd->irr[0] & (1UL << NMI_VECTOR)) + return NMI_VECTOR; + if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return find_highest_bits((int *)&vpd->irr[0]); +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + if (kvm_highest_pending_irq(vcpu) != -1) + return 1; + return 0; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c new file mode 100644 index 00000000000..091f936c448 --- /dev/null +++ b/arch/ia64/kvm/kvm_fw.c @@ -0,0 +1,500 @@ +/* + * PAL/SAL call delegation + * + * Copyright (c) 2004 Li Susie <susie.li@intel.com> + * Copyright (c) 2005 Yu Ke <ke.yu@intel.com> + * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <linux/kvm_host.h> +#include <linux/smp.h> + +#include "vti.h" +#include "misc.h" + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/tlb.h> + +/* + * Handy macros to make sure that the PAL return values start out + * as something meaningful. + */ +#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \ + { \ + x.status = PAL_STATUS_UNIMPLEMENTED; \ + x.v0 = 0; \ + x.v1 = 0; \ + x.v2 = 0; \ + } + +#define INIT_PAL_STATUS_SUCCESS(x) \ + { \ + x.status = PAL_STATUS_SUCCESS; \ + x.v0 = 0; \ + x.v1 = 0; \ + x.v2 = 0; \ + } + +static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu, + u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) { + struct exit_ctl_data *p; + + if (vcpu) { + p = &vcpu->arch.exit_data; + if (p->exit_reason == EXIT_REASON_PAL_CALL) { + *gr28 = p->u.pal_data.gr28; + *gr29 = p->u.pal_data.gr29; + *gr30 = p->u.pal_data.gr30; + *gr31 = p->u.pal_data.gr31; + return ; + } + } + printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n"); +} + +static void set_pal_result(struct kvm_vcpu *vcpu, + struct ia64_pal_retval result) { + + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { + p->u.pal_data.ret = result; + return ; + } + INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret); +} + +static void set_sal_result(struct kvm_vcpu *vcpu, + struct sal_ret_values result) { + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { + p->u.sal_data.ret = result; + return ; + } + printk(KERN_WARNING"Failed to set sal result!!\n"); +} + +struct cache_flush_args { + u64 cache_type; + u64 operation; + u64 progress; + long status; +}; + +cpumask_t cpu_cache_coherent_map; + +static void remote_pal_cache_flush(void *data) +{ + struct cache_flush_args *args = data; + long status; + u64 progress = args->progress; + + status = ia64_pal_cache_flush(args->cache_type, args->operation, + &progress, NULL); + if (status != 0) + args->status = status; +} + +static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) +{ + u64 gr28, gr29, gr30, gr31; + struct ia64_pal_retval result = {0, 0, 0, 0}; + struct cache_flush_args args = {0, 0, 0, 0}; + long psr; + + gr28 = gr29 = gr30 = gr31 = 0; + kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31); + + if (gr31 != 0) + printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu); + + /* Always call Host Pal in int=1 */ + gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS; + args.cache_type = gr29; + args.operation = gr30; + smp_call_function(remote_pal_cache_flush, + (void *)&args, 1, 1); + if (args.status != 0) + printk(KERN_ERR"pal_cache_flush error!," + "status:0x%lx\n", args.status); + /* + * Call Host PAL cache flush + * Clear psr.ic when call PAL_CACHE_FLUSH + */ + local_irq_save(psr); + result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1, + &result.v0); + local_irq_restore(psr); + if (result.status != 0) + printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld" + "in1:%lx,in2:%lx\n", + vcpu, result.status, gr29, gr30); + +#if 0 + if (gr29 == PAL_CACHE_TYPE_COHERENT) { + cpus_setall(vcpu->arch.cache_coherent_map); + cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map); + cpus_setall(cpu_cache_coherent_map); + cpu_clear(vcpu->cpu, cpu_cache_coherent_map); + } +#endif + return result; +} + +struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0); + return result; +} + +static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0); + + /* + * PAL_FREQ_BASE may not be implemented in some platforms, + * call SAL instead. + */ + if (result.v0 == 0) { + result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, + &result.v0, + &result.v1); + result.v2 = 0; + } + + return result; +} + +static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); + return result; +} + +static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result; + + INIT_PAL_STATUS_UNIMPLEMENTED(result); + return result; +} + +static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + INIT_PAL_STATUS_SUCCESS(result); + return result; +} + +static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result = {0, 0, 0, 0}; + long in0, in1, in2, in3; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + result.status = ia64_pal_proc_get_features(&result.v0, &result.v1, + &result.v2, in2); + + return result; +} + +static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) +{ + + pal_cache_config_info_t ci; + long status; + unsigned long in0, in1, in2, in3, r9, r10; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + status = ia64_pal_cache_config_info(in1, in2, &ci); + r9 = ci.pcci_info_1.pcci1_data; + r10 = ci.pcci_info_2.pcci2_data; + return ((struct ia64_pal_retval){status, r9, r10, 0}); +} + +#define GUEST_IMPL_VA_MSB 59 +#define GUEST_RID_BITS 18 + +static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu) +{ + + pal_vm_info_1_u_t vminfo1; + pal_vm_info_2_u_t vminfo2; + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0); + if (!result.status) { + vminfo1.pvi1_val = result.v0; + vminfo1.pal_vm_info_1_s.max_itr_entry = 8; + vminfo1.pal_vm_info_1_s.max_dtr_entry = 8; + result.v0 = vminfo1.pvi1_val; + vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB; + vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS; + result.v1 = vminfo2.pvi2_val; + } + + return result; +} + +static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result; + + INIT_PAL_STATUS_UNIMPLEMENTED(result); + + return result; +} + +static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu) +{ + u64 index = 0; + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) + index = p->u.pal_data.gr28; + + return index; +} + +int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + + u64 gr28; + struct ia64_pal_retval result; + int ret = 1; + + gr28 = kvm_get_pal_call_index(vcpu); + /*printk("pal_call index:%lx\n",gr28);*/ + switch (gr28) { + case PAL_CACHE_FLUSH: + result = pal_cache_flush(vcpu); + break; + case PAL_CACHE_SUMMARY: + result = pal_cache_summary(vcpu); + break; + case PAL_HALT_LIGHT: + { + vcpu->arch.timer_pending = 1; + INIT_PAL_STATUS_SUCCESS(result); + if (kvm_highest_pending_irq(vcpu) == -1) + ret = kvm_emulate_halt(vcpu); + + } + break; + + case PAL_FREQ_RATIOS: + result = pal_freq_ratios(vcpu); + break; + + case PAL_FREQ_BASE: + result = pal_freq_base(vcpu); + break; + + case PAL_LOGICAL_TO_PHYSICAL : + result = pal_logical_to_physica(vcpu); + break; + + case PAL_VM_SUMMARY : + result = pal_vm_summary(vcpu); + break; + + case PAL_VM_INFO : + result = pal_vm_info(vcpu); + break; + case PAL_PLATFORM_ADDR : + result = pal_platform_addr(vcpu); + break; + case PAL_CACHE_INFO: + result = pal_cache_info(vcpu); + break; + case PAL_PTCE_INFO: + INIT_PAL_STATUS_SUCCESS(result); + result.v1 = (1L << 32) | 1L; + break; + case PAL_VM_PAGE_SIZE: + result.status = ia64_pal_vm_page_size(&result.v0, + &result.v1); + break; + case PAL_RSE_INFO: + result.status = ia64_pal_rse_info(&result.v0, + (pal_hints_u_t *)&result.v1); + break; + case PAL_PROC_GET_FEATURES: + result = pal_proc_get_features(vcpu); + break; + case PAL_DEBUG_INFO: + result.status = ia64_pal_debug_info(&result.v0, + &result.v1); + break; + case PAL_VERSION: + result.status = ia64_pal_version( + (pal_version_u_t *)&result.v0, + (pal_version_u_t *)&result.v1); + + break; + case PAL_FIXED_ADDR: + result.status = PAL_STATUS_SUCCESS; + result.v0 = vcpu->vcpu_id; + break; + default: + INIT_PAL_STATUS_UNIMPLEMENTED(result); + printk(KERN_WARNING"kvm: Unsupported pal call," + " index:0x%lx\n", gr28); + } + set_pal_result(vcpu, result); + return ret; +} + +static struct sal_ret_values sal_emulator(struct kvm *kvm, + long index, unsigned long in1, + unsigned long in2, unsigned long in3, + unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + unsigned long r9 = 0; + unsigned long r10 = 0; + long r11 = 0; + long status; + + status = 0; + switch (index) { + case SAL_FREQ_BASE: + status = ia64_sal_freq_base(in1, &r9, &r10); + break; + case SAL_PCI_CONFIG_READ: + printk(KERN_WARNING"kvm: Not allowed to call here!" + " SAL_PCI_CONFIG_READ\n"); + break; + case SAL_PCI_CONFIG_WRITE: + printk(KERN_WARNING"kvm: Not allowed to call here!" + " SAL_PCI_CONFIG_WRITE\n"); + break; + case SAL_SET_VECTORS: + if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { + if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { + status = -2; + } else { + kvm->arch.rdv_sal_data.boot_ip = in2; + kvm->arch.rdv_sal_data.boot_gp = in3; + } + printk("Rendvous called! iip:%lx\n\n", in2); + } else + printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu." + "ignored...\n", in1); + break; + case SAL_GET_STATE_INFO: + /* No more info. */ + status = -5; + r9 = 0; + break; + case SAL_GET_STATE_INFO_SIZE: + /* Return a dummy size. */ + status = 0; + r9 = 128; + break; + case SAL_CLEAR_STATE_INFO: + /* Noop. */ + break; + case SAL_MC_RENDEZ: + printk(KERN_WARNING + "kvm: called SAL_MC_RENDEZ. ignored...\n"); + break; + case SAL_MC_SET_PARAMS: + printk(KERN_WARNING + "kvm: called SAL_MC_SET_PARAMS.ignored!\n"); + break; + case SAL_CACHE_FLUSH: + if (1) { + /*Flush using SAL. + This method is faster but has a side + effect on other vcpu running on + this cpu. */ + status = ia64_sal_cache_flush(in1); + } else { + /*Maybe need to implement the method + without side effect!*/ + status = 0; + } + break; + case SAL_CACHE_INIT: + printk(KERN_WARNING + "kvm: called SAL_CACHE_INIT. ignored...\n"); + break; + case SAL_UPDATE_PAL: + printk(KERN_WARNING + "kvm: CALLED SAL_UPDATE_PAL. ignored...\n"); + break; + default: + printk(KERN_WARNING"kvm: called SAL_CALL with unknown index." + " index:%ld\n", index); + status = -1; + break; + } + return ((struct sal_ret_values) {status, r9, r10, r11}); +} + +static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1, + u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){ + + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p) { + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + *in0 = p->u.sal_data.in0; + *in1 = p->u.sal_data.in1; + *in2 = p->u.sal_data.in2; + *in3 = p->u.sal_data.in3; + *in4 = p->u.sal_data.in4; + *in5 = p->u.sal_data.in5; + *in6 = p->u.sal_data.in6; + *in7 = p->u.sal_data.in7; + return ; + } + } + *in0 = 0; +} + +void kvm_sal_emul(struct kvm_vcpu *vcpu) +{ + + struct sal_ret_values result; + u64 index, in1, in2, in3, in4, in5, in6, in7; + + kvm_get_sal_call_data(vcpu, &index, &in1, &in2, + &in3, &in4, &in5, &in6, &in7); + result = sal_emulator(vcpu->kvm, index, in1, in2, in3, + in4, in5, in6, in7); + set_sal_result(vcpu, result); +} diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h new file mode 100644 index 00000000000..13980d9b8bc --- /dev/null +++ b/arch/ia64/kvm/kvm_minstate.h @@ -0,0 +1,273 @@ +/* + * kvm_minstate.h: min save macros + * Copyright (c) 2007, Intel Corporation. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + + +#include <asm/asmmacro.h> +#include <asm/types.h> +#include <asm/kregs.h> +#include "asm-offsets.h" + +#define KVM_MINSTATE_START_SAVE_MIN \ + mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\ + ;; \ + mov.m r28 = ar.rnat; \ + addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ + ;; \ + lfetch.fault.excl.nt1 [r22]; \ + addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ + mov r23 = ar.bspstore; /* save ar.bspstore */ \ + ;; \ + mov ar.bspstore = r22; /* switch to kernel RBS */\ + ;; \ + mov r18 = ar.bsp; \ + mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ + + + +#define KVM_MINSTATE_END_SAVE_MIN \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */\ + ;; + + +#define PAL_VSA_SYNC_READ \ + /* begin to call pal vps sync_read */ \ + add r25 = VMM_VPD_BASE_OFFSET, r21; \ + adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \ + ;; \ + ld8 r25 = [r25]; /* read vpd base */ \ + ld8 r20 = [r20]; \ + ;; \ + add r20 = PAL_VPS_SYNC_READ,r20; \ + ;; \ +{ .mii; \ + nop 0x0; \ + mov r24 = ip; \ + mov b0 = r20; \ + ;; \ +}; \ +{ .mmb; \ + add r24 = 0x20, r24; \ + nop 0x0; \ + br.cond.sptk b0; /* call the service */ \ + ;; \ +}; + + + +#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 + +/* + * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ + + +#define PT(f) (VMM_PT_REGS_##f##_OFFSET) + +#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ + KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ + mov r27 = ar.rsc; /* M */ \ + mov r20 = r1; /* A */ \ + mov r25 = ar.unat; /* M */ \ + mov r29 = cr.ipsr; /* M */ \ + mov r26 = ar.pfs; /* I */ \ + mov r18 = cr.isr; \ + COVER; /* B;; (or nothing) */ \ + ;; \ + tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \ + mov r1 = r16; \ +/* mov r21=r16; */ \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + ;; \ + KVM_MINSTATE_START_SAVE_MIN \ + adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \ + adds r16 = PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16] = r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \ + mov r29 = b0 \ + ;; \ + adds r16 = PT(R8),r1; /* initialize first base pointer */\ + adds r17 = PT(R9),r1; /* initialize second base pointer */\ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r8,16; \ +.mem.offset 8,0; st8.spill [r17] = r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r10,24; \ +.mem.offset 8,0; st8.spill [r17] = r11,24; \ + ;; \ + mov r9 = cr.iip; /* M */ \ + mov r10 = ar.fpsr; /* M */ \ + ;; \ + st8 [r16] = r9,16; /* save cr.iip */ \ + st8 [r17] = r30,16; /* save cr.ifs */ \ + sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \ + ;; \ + st8 [r16] = r25,16; /* save ar.unat */ \ + st8 [r17] = r26,16; /* save ar.pfs */ \ + shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\ + ;; \ + st8 [r16] = r27,16; /* save ar.rsc */ \ + st8 [r17] = r28,16; /* save ar.rnat */ \ + ;; /* avoid RAW on r16 & r17 */ \ + st8 [r16] = r23,16; /* save ar.bspstore */ \ + st8 [r17] = r31,16; /* save predicates */ \ + ;; \ + st8 [r16] = r29,16; /* save b0 */ \ + st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17] = r12,16; \ + adds r12 = -16,r1; /* switch to kernel memory stack */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r13,16; \ +.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\ + mov r13 = r21; /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r15,16; \ +.mem.offset 8,0; st8.spill [r17] = r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r2,16; \ +.mem.offset 8,0; st8.spill [r17] = r3,16; \ + adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \ + ;; \ + adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \ + adds r17 = VMM_VCPU_ISR_OFFSET,r13; \ + mov r26 = cr.iipa; \ + mov r27 = cr.isr; \ + ;; \ + st8 [r16] = r26; \ + st8 [r17] = r27; \ + ;; \ + EXTRA; \ + mov r8 = ar.ccv; \ + mov r9 = ar.csd; \ + mov r10 = ar.ssd; \ + movl r11 = FPSR_DEFAULT; /* L-unit */ \ + adds r17 = VMM_VCPU_GP_OFFSET,r13; \ + ;; \ + ld8 r1 = [r17];/* establish kernel global pointer */ \ + ;; \ + PAL_VSA_SYNC_READ \ + KVM_MINSTATE_END_SAVE_MIN + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.f6 + * r3: points to &pt_regs.f7 + * r8: contents of ar.ccv + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define KVM_SAVE_REST \ +.mem.offset 0,0; st8.spill [r2] = r16,16; \ +.mem.offset 8,0; st8.spill [r3] = r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r18,16; \ +.mem.offset 8,0; st8.spill [r3] = r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r20,16; \ +.mem.offset 8,0; st8.spill [r3] = r21,16; \ + mov r18=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r22,16; \ +.mem.offset 8,0; st8.spill [r3] = r23,16; \ + mov r19 = b7; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r24,16; \ +.mem.offset 8,0; st8.spill [r3] = r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r26,16; \ +.mem.offset 8,0; st8.spill [r3] = r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r28,16; \ +.mem.offset 8,0; st8.spill [r3] = r29,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r30,16; \ +.mem.offset 8,0; st8.spill [r3] = r31,32; \ + ;; \ + mov ar.fpsr = r11; \ + st8 [r2] = r8,8; \ + adds r24 = PT(B6)-PT(F7),r3; \ + adds r25 = PT(B7)-PT(F7),r3; \ + ;; \ + st8 [r24] = r18,16; /* b6 */ \ + st8 [r25] = r19,16; /* b7 */ \ + adds r2 = PT(R4)-PT(F6),r2; \ + adds r3 = PT(R5)-PT(F7),r3; \ + ;; \ + st8 [r24] = r9; /* ar.csd */ \ + st8 [r25] = r10; /* ar.ssd */ \ + ;; \ + mov r18 = ar.unat; \ + adds r19 = PT(EML_UNAT)-PT(R4),r2; \ + ;; \ + st8 [r19] = r18; /* eml_unat */ \ + + +#define KVM_SAVE_EXTRA \ +.mem.offset 0,0; st8.spill [r2] = r4,16; \ +.mem.offset 8,0; st8.spill [r3] = r5,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r6,16; \ +.mem.offset 8,0; st8.spill [r3] = r7; \ + ;; \ + mov r26 = ar.unat; \ + ;; \ + st8 [r2] = r26;/* eml_unat */ \ + +#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,) +#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19) +#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, ) diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h new file mode 100644 index 00000000000..6d6cbcb1489 --- /dev/null +++ b/arch/ia64/kvm/lapic.h @@ -0,0 +1,25 @@ +#ifndef __KVM_IA64_LAPIC_H +#define __KVM_IA64_LAPIC_H + +#include <linux/kvm_host.h> + +/* + * vlsapic + */ +struct kvm_lapic{ + struct kvm_vcpu *vcpu; + uint64_t insvc[4]; + uint64_t vhpi; + uint8_t xtp; + uint8_t pal_init_pending; + uint8_t pad[2]; +}; + +int kvm_create_lapic(struct kvm_vcpu *vcpu); +void kvm_free_lapic(struct kvm_vcpu *vcpu); + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); + +#endif diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h new file mode 100644 index 00000000000..e585c460734 --- /dev/null +++ b/arch/ia64/kvm/misc.h @@ -0,0 +1,93 @@ +#ifndef __KVM_IA64_MISC_H +#define __KVM_IA64_MISC_H + +#include <linux/kvm_host.h> +/* + * misc.h + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +/* + *Return p2m base address at host side! + */ +static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) +{ + return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); +} + +static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, + u64 paddr, u64 mem_flags) +{ + uint64_t *pmt_base = kvm_host_get_pmt(kvm); + unsigned long pte; + + pte = PAGE_ALIGN(paddr) | mem_flags; + pmt_base[gfn] = pte; +} + +/*Function for translating host address to guest address*/ + +static inline void *to_guest(struct kvm *kvm, void *addr) +{ + return (void *)((unsigned long)(addr) - kvm->arch.vm_base + + KVM_VM_DATA_BASE); +} + +/*Function for translating guest address to host address*/ + +static inline void *to_host(struct kvm *kvm, void *addr) +{ + return (void *)((unsigned long)addr - KVM_VM_DATA_BASE + + kvm->arch.vm_base); +} + +/* Get host context of the vcpu */ +static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu) +{ + union context *ctx = &vcpu->arch.host; + return to_guest(vcpu->kvm, ctx); +} + +/* Get guest context of the vcpu */ +static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu) +{ + union context *ctx = &vcpu->arch.guest; + return to_guest(vcpu->kvm, ctx); +} + +/* kvm get exit data from gvmm! */ +static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.exit_data; +} + +/*kvm get vcpu ioreq for kvm module!*/ +static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_ctl_data; + + if (vcpu) { + p_ctl_data = kvm_get_exit_data(vcpu); + if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION) + return &p_ctl_data->u.ioreq; + } + + return NULL; +} + +#endif diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c new file mode 100644 index 00000000000..351bf70da46 --- /dev/null +++ b/arch/ia64/kvm/mmio.c @@ -0,0 +1,341 @@ +/* + * mmio.c: MMIO emulation components. + * Copyright (c) 2004, Intel Corporation. + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Kun Tian (Kevin Tian) (Kevin.tian@intel.com) + * + * Copyright (c) 2007 Intel Corporation KVM support. + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/kvm_host.h> + +#include "vcpu.h" + +static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val) +{ + VLSAPIC_XTP(v) = val; +} + +/* + * LSAPIC OFFSET + */ +#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20)) +#define PIB_OFST_INTA 0x1E0000 +#define PIB_OFST_XTP 0x1E0008 + +/* + * execute write IPI op. + */ +static void vlsapic_write_ipi(struct kvm_vcpu *vcpu, + uint64_t addr, uint64_t data) +{ + struct exit_ctl_data *p = ¤t_vcpu->arch.exit_data; + unsigned long psr; + + local_irq_save(psr); + + p->exit_reason = EXIT_REASON_IPI; + p->u.ipi_data.addr.val = addr; + p->u.ipi_data.data.val = data; + vmm_transition(current_vcpu); + + local_irq_restore(psr); + +} + +void lsapic_write(struct kvm_vcpu *v, unsigned long addr, + unsigned long length, unsigned long val) +{ + addr &= (PIB_SIZE - 1); + + switch (addr) { + case PIB_OFST_INTA: + /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ + panic_vm(v); + break; + case PIB_OFST_XTP: + if (length == 1) { + vlsapic_write_xtp(v, val); + } else { + /*panic_domain(NULL, + "Undefined write on PIB XTP\n");*/ + panic_vm(v); + } + break; + default: + if (PIB_LOW_HALF(addr)) { + /*lower half */ + if (length != 8) + /*panic_domain(NULL, + "Can't LHF write with size %ld!\n", + length);*/ + panic_vm(v); + else + vlsapic_write_ipi(v, addr, val); + } else { /* upper half + printk("IPI-UHF write %lx\n",addr);*/ + panic_vm(v); + } + break; + } +} + +unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, + unsigned long length) +{ + uint64_t result = 0; + + addr &= (PIB_SIZE - 1); + + switch (addr) { + case PIB_OFST_INTA: + if (length == 1) /* 1 byte load */ + ; /* There is no i8259, there is no INTA access*/ + else + /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ + panic_vm(v); + + break; + case PIB_OFST_XTP: + if (length == 1) { + result = VLSAPIC_XTP(v); + /* printk("read xtp %lx\n", result); */ + } else { + /*panic_domain(NULL, + "Undefined read on PIB XTP\n");*/ + panic_vm(v); + } + break; + default: + panic_vm(v); + break; + } + return result; +} + +static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, + u16 s, int ma, int dir) +{ + unsigned long iot; + struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long psr; + + iot = __gpfn_is_io(src_pa >> PAGE_SHIFT); + + local_irq_save(psr); + + /*Intercept the acces for PIB range*/ + if (iot == GPFN_PIB) { + if (!dir) + lsapic_write(vcpu, src_pa, s, *dest); + else + *dest = lsapic_read(vcpu, src_pa, s); + goto out; + } + p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION; + p->u.ioreq.addr = src_pa; + p->u.ioreq.size = s; + p->u.ioreq.dir = dir; + if (dir == IOREQ_WRITE) + p->u.ioreq.data = *dest; + p->u.ioreq.state = STATE_IOREQ_READY; + vmm_transition(vcpu); + + if (p->u.ioreq.state == STATE_IORESP_READY) { + if (dir == IOREQ_READ) + *dest = p->u.ioreq.data; + } else + panic_vm(vcpu); +out: + local_irq_restore(psr); + return ; +} + +/* + dir 1: read 0:write + inst_type 0:integer 1:floating point + */ +#define SL_INTEGER 0 /* store/load interger*/ +#define SL_FLOATING 1 /* store/load floating*/ + +void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) +{ + struct kvm_pt_regs *regs; + IA64_BUNDLE bundle; + int slot, dir = 0; + int inst_type = -1; + u16 size = 0; + u64 data, slot1a, slot1b, temp, update_reg; + s32 imm; + INST64 inst; + + regs = vcpu_regs(vcpu); + + if (fetch_code(vcpu, regs->cr_iip, &bundle)) { + /* if fetch code fail, return and try again */ + return; + } + slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; + if (!slot) + inst.inst = bundle.slot0; + else if (slot == 1) { + slot1a = bundle.slot1a; + slot1b = bundle.slot1b; + inst.inst = slot1a + (slot1b << 18); + } else if (slot == 2) + inst.inst = bundle.slot2; + + /* Integer Load/Store */ + if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) { + inst_type = SL_INTEGER; + size = (inst.M1.x6 & 0x3); + if ((inst.M1.x6 >> 2) > 0xb) { + /*write*/ + dir = IOREQ_WRITE; + data = vcpu_get_gr(vcpu, inst.M4.r2); + } else if ((inst.M1.x6 >> 2) < 0xb) { + /*read*/ + dir = IOREQ_READ; + } + } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) { + /* Integer Load + Reg update */ + inst_type = SL_INTEGER; + dir = IOREQ_READ; + size = (inst.M2.x6 & 0x3); + temp = vcpu_get_gr(vcpu, inst.M2.r3); + update_reg = vcpu_get_gr(vcpu, inst.M2.r2); + temp += update_reg; + vcpu_set_gr(vcpu, inst.M2.r3, temp, 0); + } else if (inst.M3.major == 5) { + /*Integer Load/Store + Imm update*/ + inst_type = SL_INTEGER; + size = (inst.M3.x6&0x3); + if ((inst.M5.x6 >> 2) > 0xb) { + /*write*/ + dir = IOREQ_WRITE; + data = vcpu_get_gr(vcpu, inst.M5.r2); + temp = vcpu_get_gr(vcpu, inst.M5.r3); + imm = (inst.M5.s << 31) | (inst.M5.i << 30) | + (inst.M5.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M5.r3, temp, 0); + + } else if ((inst.M3.x6 >> 2) < 0xb) { + /*read*/ + dir = IOREQ_READ; + temp = vcpu_get_gr(vcpu, inst.M3.r3); + imm = (inst.M3.s << 31) | (inst.M3.i << 30) | + (inst.M3.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M3.r3, temp, 0); + + } + } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B + && inst.M9.m == 0 && inst.M9.x == 0) { + /* Floating-point spill*/ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M9.f2, &v); + /* Write high word. FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; + } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { + /* Floating-point spill + Imm update */ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M10.f2, &v); + temp = vcpu_get_gr(vcpu, inst.M10.r3); + imm = (inst.M10.s << 31) | (inst.M10.i << 30) | + (inst.M10.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); + + /* Write high word.FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; + } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { + /* Floating-point stf8 + Imm update */ + struct ia64_fpreg v; + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + size = 3; + vcpu_get_fpreg(vcpu, inst.M10.f2, &v); + data = v.u.bits[0]; /* Significand. */ + temp = vcpu_get_gr(vcpu, inst.M10.r3); + imm = (inst.M10.s << 31) | (inst.M10.i << 30) | + (inst.M10.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); + } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c + && inst.M15.x6 <= 0x2f) { + temp = vcpu_get_gr(vcpu, inst.M15.r3); + imm = (inst.M15.s << 31) | (inst.M15.i << 30) | + (inst.M15.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M15.r3, temp, 0); + + vcpu_increment_iip(vcpu); + return; + } else if (inst.M12.major == 6 && inst.M12.m == 1 + && inst.M12.x == 1 && inst.M12.x6 == 1) { + /* Floating-point Load Pair + Imm ldfp8 M12*/ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_READ; + size = 8; /*ldfd*/ + mmio_access(vcpu, padr, &data, size, ma, dir); + v.u.bits[0] = data; + v.u.bits[1] = 0x1003E; + vcpu_set_fpreg(vcpu, inst.M12.f1, &v); + padr += 8; + mmio_access(vcpu, padr, &data, size, ma, dir); + v.u.bits[0] = data; + v.u.bits[1] = 0x1003E; + vcpu_set_fpreg(vcpu, inst.M12.f2, &v); + padr += 8; + vcpu_set_gr(vcpu, inst.M12.r3, padr, 0); + vcpu_increment_iip(vcpu); + return; + } else { + inst_type = -1; + panic_vm(vcpu); + } + + size = 1 << size; + if (dir == IOREQ_WRITE) { + mmio_access(vcpu, padr, &data, size, ma, dir); + } else { + mmio_access(vcpu, padr, &data, size, ma, dir); + if (inst_type == SL_INTEGER) + vcpu_set_gr(vcpu, inst.M1.r1, data, 0); + else + panic_vm(vcpu); + + } + vcpu_increment_iip(vcpu); +} diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S new file mode 100644 index 00000000000..e4f15d641b2 --- /dev/null +++ b/arch/ia64/kvm/optvfault.S @@ -0,0 +1,918 @@ +/* + * arch/ia64/vmx/optvfault.S + * optimize virtualization fault handler + * + * Copyright (C) 2006 Intel Co + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + */ + +#include <asm/asmmacro.h> +#include <asm/processor.h> + +#include "vti.h" +#include "asm-offsets.h" + +#define ACCE_MOV_FROM_AR +#define ACCE_MOV_FROM_RR +#define ACCE_MOV_TO_RR +#define ACCE_RSM +#define ACCE_SSM +#define ACCE_MOV_TO_PSR +#define ACCE_THASH + +//mov r1=ar3 +GLOBAL_ENTRY(kvm_asm_mov_from_ar) +#ifndef ACCE_MOV_FROM_AR + br.many kvm_virtualization_fault_back +#endif + add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 + add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 + extr.u r17=r25,6,7 + ;; + ld8 r18=[r18] + mov r19=ar.itc + mov r24=b0 + ;; + add r19=r19,r18 + addl r20=@gprel(asm_mov_to_reg),gp + ;; + st8 [r16] = r19 + adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 + shladd r17=r17,4,r20 + ;; + mov b0=r17 + br.sptk.few b0 + ;; +END(kvm_asm_mov_from_ar) + + +// mov r1=rr[r3] +GLOBAL_ENTRY(kvm_asm_mov_from_rr) +#ifndef ACCE_MOV_FROM_RR + br.many kvm_virtualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,6,7 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r24=b0 + ;; + add r27=VMM_VCPU_VRR0_OFFSET,r21 + mov b0=r16 + br.many b0 + ;; +kvm_asm_mov_from_rr_back_1: + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + adds r22=asm_mov_to_reg-asm_mov_from_reg,r20 + shr.u r26=r19,61 + ;; + shladd r17=r17,4,r22 + shladd r27=r26,3,r27 + ;; + ld8 r19=[r27] + mov b0=r17 + br.many b0 +END(kvm_asm_mov_from_rr) + + +// mov rr[r3]=r2 +GLOBAL_ENTRY(kvm_asm_mov_to_rr) +#ifndef ACCE_MOV_TO_RR + br.many kvm_virtualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,13,7 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r22=b0 + ;; + add r27=VMM_VCPU_VRR0_OFFSET,r21 + mov b0=r16 + br.many b0 + ;; +kvm_asm_mov_to_rr_back_1: + adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20 + shr.u r23=r19,61 + shladd r17=r17,4,r20 + ;; + //if rr6, go back + cmp.eq p6,p0=6,r23 + mov b0=r22 + (p6) br.cond.dpnt.many kvm_virtualization_fault_back + ;; + mov r28=r19 + mov b0=r17 + br.many b0 +kvm_asm_mov_to_rr_back_2: + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + shladd r27=r23,3,r27 + ;; // vrr.rid<<4 |0xe + st8 [r27]=r19 + mov b0=r30 + ;; + extr.u r16=r19,8,26 + extr.u r18 =r19,2,6 + mov r17 =0xe + ;; + shladd r16 = r16, 4, r17 + extr.u r19 =r19,0,8 + ;; + shl r16 = r16,8 + ;; + add r19 = r19, r16 + ;; //set ve 1 + dep r19=-1,r19,0,1 + cmp.lt p6,p0=14,r18 + ;; + (p6) mov r18=14 + ;; + (p6) dep r19=r18,r19,2,6 + ;; + cmp.eq p6,p0=0,r23 + ;; + cmp.eq.or p6,p0=4,r23 + ;; + adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + ;; + ld4 r16=[r16] + cmp.eq p7,p0=r0,r0 + (p6) shladd r17=r23,1,r17 + ;; + (p6) st8 [r17]=r19 + (p6) tbit.nz p6,p7=r16,0 + ;; + (p7) mov rr[r28]=r19 + mov r24=r22 + br.many b0 +END(kvm_asm_mov_to_rr) + + +//rsm +GLOBAL_ENTRY(kvm_asm_rsm) +#ifndef ACCE_RSM + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,6,21 + extr.u r27=r25,31,2 + ;; + ld8 r16=[r16] + extr.u r28=r25,36,1 + dep r26=r27,r26,21,2 + ;; + add r17=VPD_VPSR_START_OFFSET,r16 + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + //r26 is imm24 + dep r26=r28,r26,23,1 + ;; + ld8 r18=[r17] + movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI + ld4 r23=[r22] + sub r27=-1,r26 + mov r24=b0 + ;; + mov r20=cr.ipsr + or r28=r27,r28 + and r19=r18,r27 + ;; + st8 [r17]=r19 + and r20=r20,r28 + /* Comment it out due to short of fp lazy alorgithm support + adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 + ;; + ld8 r27=[r27] + ;; + tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT + ;; + (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 + */ + ;; + mov cr.ipsr=r20 + tbit.nz p6,p0=r23,0 + ;; + tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT + (p6) br.dptk kvm_resume_to_guest + ;; + add r26=VMM_VCPU_META_RR0_OFFSET,r21 + add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 + dep r23=-1,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + br.many kvm_resume_to_guest +END(kvm_asm_rsm) + + +//ssm +GLOBAL_ENTRY(kvm_asm_ssm) +#ifndef ACCE_SSM + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,6,21 + extr.u r27=r25,31,2 + ;; + ld8 r16=[r16] + extr.u r28=r25,36,1 + dep r26=r27,r26,21,2 + ;; //r26 is imm24 + add r27=VPD_VPSR_START_OFFSET,r16 + dep r26=r28,r26,23,1 + ;; //r19 vpsr + ld8 r29=[r27] + mov r24=b0 + ;; + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + mov r20=cr.ipsr + or r19=r29,r26 + ;; + ld4 r23=[r22] + st8 [r27]=r19 + or r20=r20,r26 + ;; + mov cr.ipsr=r20 + movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT + ;; + and r19=r28,r19 + tbit.z p6,p0=r23,0 + ;; + cmp.ne.or p6,p0=r28,r19 + (p6) br.dptk kvm_asm_ssm_1 + ;; + add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 + dep r23=0,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + ;; +kvm_asm_ssm_1: + tbit.nz p6,p0=r29,IA64_PSR_I_BIT + ;; + tbit.z.or p6,p0=r19,IA64_PSR_I_BIT + (p6) br.dptk kvm_resume_to_guest + ;; + add r29=VPD_VTPR_START_OFFSET,r16 + add r30=VPD_VHPI_START_OFFSET,r16 + ;; + ld8 r29=[r29] + ld8 r30=[r30] + ;; + extr.u r17=r29,4,4 + extr.u r18=r29,16,1 + ;; + dep r17=r18,r17,4,1 + ;; + cmp.gt p6,p0=r30,r17 + (p6) br.dpnt.few kvm_asm_dispatch_vexirq + br.many kvm_resume_to_guest +END(kvm_asm_ssm) + + +//mov psr.l=r2 +GLOBAL_ENTRY(kvm_asm_mov_to_psr) +#ifndef ACCE_MOV_TO_PSR + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,13,7 //r2 + ;; + ld8 r16=[r16] + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 + shladd r26=r26,4,r20 + mov r24=b0 + ;; + add r27=VPD_VPSR_START_OFFSET,r16 + mov b0=r26 + br.many b0 + ;; +kvm_asm_mov_to_psr_back: + ld8 r17=[r27] + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + dep r19=0,r19,32,32 + ;; + ld4 r23=[r22] + dep r18=0,r17,0,32 + ;; + add r30=r18,r19 + movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT + ;; + st8 [r27]=r30 + and r27=r28,r30 + and r29=r28,r17 + ;; + cmp.eq p5,p0=r29,r27 + cmp.eq p6,p7=r28,r27 + (p5) br.many kvm_asm_mov_to_psr_1 + ;; + //virtual to physical + (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21 + (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 + (p7) dep r23=-1,r23,0,1 + ;; + //physical to virtual + (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 + (p6) dep r23=0,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + ;; +kvm_asm_mov_to_psr_1: + mov r20=cr.ipsr + movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT + ;; + or r19=r19,r28 + dep r20=0,r20,0,32 + ;; + add r20=r19,r20 + mov b0=r24 + ;; + /* Comment it out due to short of fp lazy algorithm support + adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 + ;; + ld8 r27=[r27] + ;; + tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT + ;; + (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 + ;; + */ + mov cr.ipsr=r20 + cmp.ne p6,p0=r0,r0 + ;; + tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT + tbit.z.or p6,p0=r30,IA64_PSR_I_BIT + (p6) br.dpnt.few kvm_resume_to_guest + ;; + add r29=VPD_VTPR_START_OFFSET,r16 + add r30=VPD_VHPI_START_OFFSET,r16 + ;; + ld8 r29=[r29] + ld8 r30=[r30] + ;; + extr.u r17=r29,4,4 + extr.u r18=r29,16,1 + ;; + dep r17=r18,r17,4,1 + ;; + cmp.gt p6,p0=r30,r17 + (p6) br.dpnt.few kvm_asm_dispatch_vexirq + br.many kvm_resume_to_guest +END(kvm_asm_mov_to_psr) + + +ENTRY(kvm_asm_dispatch_vexirq) +//increment iip + mov r16=cr.ipsr + ;; + extr.u r17=r16,IA64_PSR_RI_BIT,2 + tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 + ;; + (p6) mov r18=cr.iip + (p6) mov r17=r0 + (p7) add r17=1,r17 + ;; + (p6) add r18=0x10,r18 + dep r16=r17,r16,IA64_PSR_RI_BIT,2 + ;; + (p6) mov cr.iip=r18 + mov cr.ipsr=r16 + mov r30 =1 + br.many kvm_dispatch_vexirq +END(kvm_asm_dispatch_vexirq) + +// thash +// TODO: add support when pta.vf = 1 +GLOBAL_ENTRY(kvm_asm_thash) +#ifndef ACCE_THASH + br.many kvm_virtualization_fault_back +#endif + extr.u r17=r25,20,7 // get r3 from opcode in r25 + extr.u r18=r25,6,7 // get r1 from opcode in r25 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20 + shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17) + adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs + ;; + mov r24=b0 + ;; + ld8 r16=[r16] // get VPD addr + mov b0=r17 + br.many b0 // r19 return value + ;; +kvm_asm_thash_back1: + shr.u r23=r19,61 // get RR number + adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr + adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta + ;; + shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr + ld8 r17=[r16] // get PTA + mov r26=1 + ;; + extr.u r29=r17,2,6 // get pta.size + ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value + ;; + extr.u r25=r25,2,6 // get rr.ps + shl r22=r26,r29 // 1UL << pta.size + ;; + shr.u r23=r19,r25 // vaddr >> rr.ps + adds r26=3,r29 // pta.size + 3 + shl r27=r17,3 // pta << 3 + ;; + shl r23=r23,3 // (vaddr >> rr.ps) << 3 + shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) + movl r16=7<<61 + ;; + adds r22=-1,r22 // (1UL << pta.size) - 1 + shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size + and r19=r19,r16 // vaddr & VRN_MASK + ;; + and r22=r22,r23 // vhpt_offset + or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size) + adds r26=asm_mov_to_reg-asm_mov_from_reg,r20 + ;; + or r19=r19,r22 // calc pval + shladd r17=r18,4,r26 + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + ;; + mov b0=r17 + br.many b0 +END(kvm_asm_thash) + +#define MOV_TO_REG0 \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + nop.b 0x0; \ + ;; \ +}; + + +#define MOV_TO_REG(n) \ +{; \ + mov r##n##=r19; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; + + +#define MOV_FROM_REG(n) \ +{; \ + mov r19=r##n##; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; + + +#define MOV_TO_BANK0_REG(n) \ +ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \ +{; \ + mov r26=r2; \ + mov r2=r19; \ + bsw.1; \ + ;; \ +}; \ +{; \ + mov r##n##=r2; \ + nop.b 0x0; \ + bsw.0; \ + ;; \ +}; \ +{; \ + mov r2=r26; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; \ +END(asm_mov_to_bank0_reg##n##) + + +#define MOV_FROM_BANK0_REG(n) \ +ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \ +{; \ + mov r26=r2; \ + nop.b 0x0; \ + bsw.1; \ + ;; \ +}; \ +{; \ + mov r2=r##n##; \ + nop.b 0x0; \ + bsw.0; \ + ;; \ +}; \ +{; \ + mov r19=r2; \ + mov r2=r26; \ + mov b0=r30; \ +}; \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many b0; \ + ;; \ +}; \ +END(asm_mov_from_bank0_reg##n##) + + +#define JMP_TO_MOV_TO_BANK0_REG(n) \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many asm_mov_to_bank0_reg##n##; \ + ;; \ +} + + +#define JMP_TO_MOV_FROM_BANK0_REG(n) \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many asm_mov_from_bank0_reg##n##; \ + ;; \ +} + + +MOV_FROM_BANK0_REG(16) +MOV_FROM_BANK0_REG(17) +MOV_FROM_BANK0_REG(18) +MOV_FROM_BANK0_REG(19) +MOV_FROM_BANK0_REG(20) +MOV_FROM_BANK0_REG(21) +MOV_FROM_BANK0_REG(22) +MOV_FROM_BANK0_REG(23) +MOV_FROM_BANK0_REG(24) +MOV_FROM_BANK0_REG(25) +MOV_FROM_BANK0_REG(26) +MOV_FROM_BANK0_REG(27) +MOV_FROM_BANK0_REG(28) +MOV_FROM_BANK0_REG(29) +MOV_FROM_BANK0_REG(30) +MOV_FROM_BANK0_REG(31) + + +// mov from reg table +ENTRY(asm_mov_from_reg) + MOV_FROM_REG(0) + MOV_FROM_REG(1) + MOV_FROM_REG(2) + MOV_FROM_REG(3) + MOV_FROM_REG(4) + MOV_FROM_REG(5) + MOV_FROM_REG(6) + MOV_FROM_REG(7) + MOV_FROM_REG(8) + MOV_FROM_REG(9) + MOV_FROM_REG(10) + MOV_FROM_REG(11) + MOV_FROM_REG(12) + MOV_FROM_REG(13) + MOV_FROM_REG(14) + MOV_FROM_REG(15) + JMP_TO_MOV_FROM_BANK0_REG(16) + JMP_TO_MOV_FROM_BANK0_REG(17) + JMP_TO_MOV_FROM_BANK0_REG(18) + JMP_TO_MOV_FROM_BANK0_REG(19) + JMP_TO_MOV_FROM_BANK0_REG(20) + JMP_TO_MOV_FROM_BANK0_REG(21) + JMP_TO_MOV_FROM_BANK0_REG(22) + JMP_TO_MOV_FROM_BANK0_REG(23) + JMP_TO_MOV_FROM_BANK0_REG(24) + JMP_TO_MOV_FROM_BANK0_REG(25) + JMP_TO_MOV_FROM_BANK0_REG(26) + JMP_TO_MOV_FROM_BANK0_REG(27) + JMP_TO_MOV_FROM_BANK0_REG(28) + JMP_TO_MOV_FROM_BANK0_REG(29) + JMP_TO_MOV_FROM_BANK0_REG(30) + JMP_TO_MOV_FROM_BANK0_REG(31) + MOV_FROM_REG(32) + MOV_FROM_REG(33) + MOV_FROM_REG(34) + MOV_FROM_REG(35) + MOV_FROM_REG(36) + MOV_FROM_REG(37) + MOV_FROM_REG(38) + MOV_FROM_REG(39) + MOV_FROM_REG(40) + MOV_FROM_REG(41) + MOV_FROM_REG(42) + MOV_FROM_REG(43) + MOV_FROM_REG(44) + MOV_FROM_REG(45) + MOV_FROM_REG(46) + MOV_FROM_REG(47) + MOV_FROM_REG(48) + MOV_FROM_REG(49) + MOV_FROM_REG(50) + MOV_FROM_REG(51) + MOV_FROM_REG(52) + MOV_FROM_REG(53) + MOV_FROM_REG(54) + MOV_FROM_REG(55) + MOV_FROM_REG(56) + MOV_FROM_REG(57) + MOV_FROM_REG(58) + MOV_FROM_REG(59) + MOV_FROM_REG(60) + MOV_FROM_REG(61) + MOV_FROM_REG(62) + MOV_FROM_REG(63) + MOV_FROM_REG(64) + MOV_FROM_REG(65) + MOV_FROM_REG(66) + MOV_FROM_REG(67) + MOV_FROM_REG(68) + MOV_FROM_REG(69) + MOV_FROM_REG(70) + MOV_FROM_REG(71) + MOV_FROM_REG(72) + MOV_FROM_REG(73) + MOV_FROM_REG(74) + MOV_FROM_REG(75) + MOV_FROM_REG(76) + MOV_FROM_REG(77) + MOV_FROM_REG(78) + MOV_FROM_REG(79) + MOV_FROM_REG(80) + MOV_FROM_REG(81) + MOV_FROM_REG(82) + MOV_FROM_REG(83) + MOV_FROM_REG(84) + MOV_FROM_REG(85) + MOV_FROM_REG(86) + MOV_FROM_REG(87) + MOV_FROM_REG(88) + MOV_FROM_REG(89) + MOV_FROM_REG(90) + MOV_FROM_REG(91) + MOV_FROM_REG(92) + MOV_FROM_REG(93) + MOV_FROM_REG(94) + MOV_FROM_REG(95) + MOV_FROM_REG(96) + MOV_FROM_REG(97) + MOV_FROM_REG(98) + MOV_FROM_REG(99) + MOV_FROM_REG(100) + MOV_FROM_REG(101) + MOV_FROM_REG(102) + MOV_FROM_REG(103) + MOV_FROM_REG(104) + MOV_FROM_REG(105) + MOV_FROM_REG(106) + MOV_FROM_REG(107) + MOV_FROM_REG(108) + MOV_FROM_REG(109) + MOV_FROM_REG(110) + MOV_FROM_REG(111) + MOV_FROM_REG(112) + MOV_FROM_REG(113) + MOV_FROM_REG(114) + MOV_FROM_REG(115) + MOV_FROM_REG(116) + MOV_FROM_REG(117) + MOV_FROM_REG(118) + MOV_FROM_REG(119) + MOV_FROM_REG(120) + MOV_FROM_REG(121) + MOV_FROM_REG(122) + MOV_FROM_REG(123) + MOV_FROM_REG(124) + MOV_FROM_REG(125) + MOV_FROM_REG(126) + MOV_FROM_REG(127) +END(asm_mov_from_reg) + + +/* must be in bank 0 + * parameter: + * r31: pr + * r24: b0 + */ +ENTRY(kvm_resume_to_guest) + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 =[r16] + adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21 + ;; + mov r16=cr.ipsr + ;; + ld8 r20 = [r20] + adds r19=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r25=[r19] + extr.u r17=r16,IA64_PSR_RI_BIT,2 + tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 + ;; + (p6) mov r18=cr.iip + (p6) mov r17=r0 + ;; + (p6) add r18=0x10,r18 + (p7) add r17=1,r17 + ;; + (p6) mov cr.iip=r18 + dep r16=r17,r16,IA64_PSR_RI_BIT,2 + ;; + mov cr.ipsr=r16 + adds r19= VPD_VPSR_START_OFFSET,r25 + add r28=PAL_VPS_RESUME_NORMAL,r20 + add r29=PAL_VPS_RESUME_HANDLER,r20 + ;; + ld8 r19=[r19] + mov b0=r29 + cmp.ne p6,p7 = r0,r0 + ;; + tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p6) ld8 r26=[r25] + (p7) mov b0=r28 + mov pr=r31,-2 + br.sptk.many b0 // call pal service + ;; +END(kvm_resume_to_guest) + + +MOV_TO_BANK0_REG(16) +MOV_TO_BANK0_REG(17) +MOV_TO_BANK0_REG(18) +MOV_TO_BANK0_REG(19) +MOV_TO_BANK0_REG(20) +MOV_TO_BANK0_REG(21) +MOV_TO_BANK0_REG(22) +MOV_TO_BANK0_REG(23) +MOV_TO_BANK0_REG(24) +MOV_TO_BANK0_REG(25) +MOV_TO_BANK0_REG(26) +MOV_TO_BANK0_REG(27) +MOV_TO_BANK0_REG(28) +MOV_TO_BANK0_REG(29) +MOV_TO_BANK0_REG(30) +MOV_TO_BANK0_REG(31) + + +// mov to reg table +ENTRY(asm_mov_to_reg) + MOV_TO_REG0 + MOV_TO_REG(1) + MOV_TO_REG(2) + MOV_TO_REG(3) + MOV_TO_REG(4) + MOV_TO_REG(5) + MOV_TO_REG(6) + MOV_TO_REG(7) + MOV_TO_REG(8) + MOV_TO_REG(9) + MOV_TO_REG(10) + MOV_TO_REG(11) + MOV_TO_REG(12) + MOV_TO_REG(13) + MOV_TO_REG(14) + MOV_TO_REG(15) + JMP_TO_MOV_TO_BANK0_REG(16) + JMP_TO_MOV_TO_BANK0_REG(17) + JMP_TO_MOV_TO_BANK0_REG(18) + JMP_TO_MOV_TO_BANK0_REG(19) + JMP_TO_MOV_TO_BANK0_REG(20) + JMP_TO_MOV_TO_BANK0_REG(21) + JMP_TO_MOV_TO_BANK0_REG(22) + JMP_TO_MOV_TO_BANK0_REG(23) + JMP_TO_MOV_TO_BANK0_REG(24) + JMP_TO_MOV_TO_BANK0_REG(25) + JMP_TO_MOV_TO_BANK0_REG(26) + JMP_TO_MOV_TO_BANK0_REG(27) + JMP_TO_MOV_TO_BANK0_REG(28) + JMP_TO_MOV_TO_BANK0_REG(29) + JMP_TO_MOV_TO_BANK0_REG(30) + JMP_TO_MOV_TO_BANK0_REG(31) + MOV_TO_REG(32) + MOV_TO_REG(33) + MOV_TO_REG(34) + MOV_TO_REG(35) + MOV_TO_REG(36) + MOV_TO_REG(37) + MOV_TO_REG(38) + MOV_TO_REG(39) + MOV_TO_REG(40) + MOV_TO_REG(41) + MOV_TO_REG(42) + MOV_TO_REG(43) + MOV_TO_REG(44) + MOV_TO_REG(45) + MOV_TO_REG(46) + MOV_TO_REG(47) + MOV_TO_REG(48) + MOV_TO_REG(49) + MOV_TO_REG(50) + MOV_TO_REG(51) + MOV_TO_REG(52) + MOV_TO_REG(53) + MOV_TO_REG(54) + MOV_TO_REG(55) + MOV_TO_REG(56) + MOV_TO_REG(57) + MOV_TO_REG(58) + MOV_TO_REG(59) + MOV_TO_REG(60) + MOV_TO_REG(61) + MOV_TO_REG(62) + MOV_TO_REG(63) + MOV_TO_REG(64) + MOV_TO_REG(65) + MOV_TO_REG(66) + MOV_TO_REG(67) + MOV_TO_REG(68) + MOV_TO_REG(69) + MOV_TO_REG(70) + MOV_TO_REG(71) + MOV_TO_REG(72) + MOV_TO_REG(73) + MOV_TO_REG(74) + MOV_TO_REG(75) + MOV_TO_REG(76) + MOV_TO_REG(77) + MOV_TO_REG(78) + MOV_TO_REG(79) + MOV_TO_REG(80) + MOV_TO_REG(81) + MOV_TO_REG(82) + MOV_TO_REG(83) + MOV_TO_REG(84) + MOV_TO_REG(85) + MOV_TO_REG(86) + MOV_TO_REG(87) + MOV_TO_REG(88) + MOV_TO_REG(89) + MOV_TO_REG(90) + MOV_TO_REG(91) + MOV_TO_REG(92) + MOV_TO_REG(93) + MOV_TO_REG(94) + MOV_TO_REG(95) + MOV_TO_REG(96) + MOV_TO_REG(97) + MOV_TO_REG(98) + MOV_TO_REG(99) + MOV_TO_REG(100) + MOV_TO_REG(101) + MOV_TO_REG(102) + MOV_TO_REG(103) + MOV_TO_REG(104) + MOV_TO_REG(105) + MOV_TO_REG(106) + MOV_TO_REG(107) + MOV_TO_REG(108) + MOV_TO_REG(109) + MOV_TO_REG(110) + MOV_TO_REG(111) + MOV_TO_REG(112) + MOV_TO_REG(113) + MOV_TO_REG(114) + MOV_TO_REG(115) + MOV_TO_REG(116) + MOV_TO_REG(117) + MOV_TO_REG(118) + MOV_TO_REG(119) + MOV_TO_REG(120) + MOV_TO_REG(121) + MOV_TO_REG(122) + MOV_TO_REG(123) + MOV_TO_REG(124) + MOV_TO_REG(125) + MOV_TO_REG(126) + MOV_TO_REG(127) +END(asm_mov_to_reg) diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c new file mode 100644 index 00000000000..5a33f7ed29a --- /dev/null +++ b/arch/ia64/kvm/process.c @@ -0,0 +1,970 @@ +/* + * process.c: handle interruption inject for guests. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + */ +#include "vcpu.h" + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/fpswa.h> +#include <asm/kregs.h> +#include <asm/tlb.h> + +fpswa_interface_t *vmm_fpswa_interface; + +#define IA64_VHPT_TRANS_VECTOR 0x0000 +#define IA64_INST_TLB_VECTOR 0x0400 +#define IA64_DATA_TLB_VECTOR 0x0800 +#define IA64_ALT_INST_TLB_VECTOR 0x0c00 +#define IA64_ALT_DATA_TLB_VECTOR 0x1000 +#define IA64_DATA_NESTED_TLB_VECTOR 0x1400 +#define IA64_INST_KEY_MISS_VECTOR 0x1800 +#define IA64_DATA_KEY_MISS_VECTOR 0x1c00 +#define IA64_DIRTY_BIT_VECTOR 0x2000 +#define IA64_INST_ACCESS_BIT_VECTOR 0x2400 +#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800 +#define IA64_BREAK_VECTOR 0x2c00 +#define IA64_EXTINT_VECTOR 0x3000 +#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000 +#define IA64_KEY_PERMISSION_VECTOR 0x5100 +#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200 +#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300 +#define IA64_GENEX_VECTOR 0x5400 +#define IA64_DISABLED_FPREG_VECTOR 0x5500 +#define IA64_NAT_CONSUMPTION_VECTOR 0x5600 +#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */ +#define IA64_DEBUG_VECTOR 0x5900 +#define IA64_UNALIGNED_REF_VECTOR 0x5a00 +#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00 +#define IA64_FP_FAULT_VECTOR 0x5c00 +#define IA64_FP_TRAP_VECTOR 0x5d00 +#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00 +#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00 +#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000 + +/* SDM vol2 5.5 - IVA based interruption handling */ +#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\ + IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \ + IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT) + +#define DOMN_PAL_REQUEST 0x110000 +#define DOMN_SAL_REQUEST 0x110001 + +static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800, + 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, + 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400, + 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00, + 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, + 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, + 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, + 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00 +}; + +static void collect_interruption(struct kvm_vcpu *vcpu) +{ + u64 ipsr; + u64 vdcr; + u64 vifs; + unsigned long vpsr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + vpsr = vcpu_get_psr(vcpu); + vcpu_bsw0(vcpu); + if (vpsr & IA64_PSR_IC) { + + /* Sync mpsr id/da/dd/ss/ed bits to vipsr + * since after guest do rfi, we still want these bits on in + * mpsr + */ + + ipsr = regs->cr_ipsr; + vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA + | IA64_PSR_DD | IA64_PSR_SS + | IA64_PSR_ED)); + vcpu_set_ipsr(vcpu, vpsr); + + /* Currently, for trap, we do not advance IIP to next + * instruction. That's because we assume caller already + * set up IIP correctly + */ + + vcpu_set_iip(vcpu , regs->cr_iip); + + /* set vifs.v to zero */ + vifs = VCPU(vcpu, ifs); + vifs &= ~IA64_IFS_V; + vcpu_set_ifs(vcpu, vifs); + + vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa)); + } + + vdcr = VCPU(vcpu, dcr); + + /* Set guest psr + * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged + * be: set to the value of dcr.be + * pp: set to the value of dcr.pp + */ + vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION; + vpsr |= (vdcr & IA64_DCR_BE); + + /* VDCR pp bit position is different from VPSR pp bit */ + if (vdcr & IA64_DCR_PP) { + vpsr |= IA64_PSR_PP; + } else { + vpsr &= ~IA64_PSR_PP;; + } + + vcpu_set_psr(vcpu, vpsr); + +} + +void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec) +{ + u64 viva; + struct kvm_pt_regs *regs; + union ia64_isr pt_isr; + + regs = vcpu_regs(vcpu); + + /* clear cr.isr.ir (incomplete register frame)*/ + pt_isr.val = VMX(vcpu, cr_isr); + pt_isr.ir = 0; + VMX(vcpu, cr_isr) = pt_isr.val; + + collect_interruption(vcpu); + + viva = vcpu_get_iva(vcpu); + regs->cr_iip = viva + vec; +} + +static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa) +{ + union ia64_rr rr, rr1; + + rr.val = vcpu_get_rr(vcpu, ifa); + rr1.val = 0; + rr1.ps = rr.ps; + rr1.rid = rr.rid; + return (rr1.val); +} + + +/* + * Set vIFA & vITIR & vIHA, when vPSR.ic =1 + * Parameter: + * set_ifa: if true, set vIFA + * set_itir: if true, set vITIR + * set_iha: if true, set vIHA + */ +void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr, + int set_ifa, int set_itir, int set_iha) +{ + long vpsr; + u64 value; + + vpsr = VCPU(vcpu, vpsr); + /* Vol2, Table 8-1 */ + if (vpsr & IA64_PSR_IC) { + if (set_ifa) + vcpu_set_ifa(vcpu, vadr); + if (set_itir) { + value = vcpu_get_itir_on_fault(vcpu, vadr); + vcpu_set_itir(vcpu, value); + } + + if (set_iha) { + value = vcpu_thash(vcpu, vadr); + vcpu_set_iha(vcpu, value); + } + } +} + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR); +} + +/* + * Instruction TLB Fault + * @ Instruction TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); +} + + + +/* + * Data Nested TLB Fault + * @ Data Nested TLB Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void nested_dtlb(struct kvm_vcpu *vcpu) +{ + inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR); +} + +/* + * Alternate Data TLB Fault + * @ Alternate Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr) +{ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); +} + + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr) +{ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR); +} + +/* Deal with: + * VHPT Translation Vector + */ +static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA*/ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); + + +} + +/* + * VHPT Instruction Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + +/* + * VHPT Data Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + + +/* + * Deal with: + * General Exception vector + */ +void _general_exception(struct kvm_vcpu *vcpu) +{ + inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); +} + + +/* + * Illegal Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void illegal_op(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Illegal Dependency Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void illegal_dep(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Reserved Register/Field Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void rsv_reg_field(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} +/* + * Privileged Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ + +void privilege_op(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Unimplement Data Address Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void unimpl_daddr(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Privileged Register Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void privilege_reg(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* Deal with + * Nat consumption vector + * Parameter: + * vaddr: Optional, if t == REGISTER + */ +static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr, + enum tlb_miss_type t) +{ + /* If vPSR.ic && t == DATA/INST, IFA */ + if (t == DATA || t == INSTRUCTION) { + /* IFA */ + set_ifa_itir_iha(vcpu, vadr, 1, 0, 0); + } + + inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR); +} + +/* + * Instruction Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, INSTRUCTION); +} + +/* + * Register Nat Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void rnat_consumption(struct kvm_vcpu *vcpu) +{ + _nat_consumption_fault(vcpu, 0, REGISTER); +} + +/* + * Data Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, DATA); +} + +/* Deal with + * Page not present vector + */ +static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); +} + + +void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +/* Deal with + * Data access rights vector + */ +void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR); +} + +fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, + unsigned long *fpsr, unsigned long *isr, unsigned long *pr, + unsigned long *ifs, struct kvm_pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; + struct kvm_vcpu *vcpu = current_vcpu; + + uint64_t old_rr7 = ia64_get_rr(7UL<<61); + + if (!vmm_fpswa_interface) + return (fpswa_ret_t) {-1, 0, 0, 0}; + + /* + * Just let fpswa driver to use hardware fp registers. + * No fp register is valid in memory. + */ + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + /*Call host fpswa interface directly to virtualize + *guest fpswa request! + */ + ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]); + ia64_srlz_d(); + + ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle, + ipsr, fpsr, isr, pr, ifs, &fp_state); + ia64_set_rr(7UL << 61, old_rr7); + ia64_srlz_d(); + return ret; +} + +/* + * Handle floating-point assist faults and traps for domain. + */ +unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs, + unsigned long isr) +{ + struct kvm_vcpu *v = current_vcpu; + IA64_BUNDLE bundle; + unsigned long fault_ip; + fpswa_ret_t ret; + + fault_ip = regs->cr_iip; + /* + * When the FP trap occurs, the trapping instruction is completed. + * If ipsr.ri == 0, there is the trapping instruction in previous + * bundle. + */ + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + + if (fetch_code(v, fault_ip, &bundle)) + return -EAGAIN; + + if (!bundle.i64[0] && !bundle.i64[1]) + return -EACCES; + + ret = vmm_fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, + &isr, ®s->pr, ®s->cr_ifs, regs); + return ret.status; +} + +void reflect_interruption(u64 ifa, u64 isr, u64 iim, + u64 vec, struct kvm_pt_regs *regs) +{ + u64 vector; + int status ; + struct kvm_vcpu *vcpu = current_vcpu; + u64 vpsr = VCPU(vcpu, vpsr); + + vector = vec2off[vec]; + + if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { + panic_vm(vcpu); + return; + } + + switch (vec) { + case 32: /*IA64_FP_FAULT_VECTOR*/ + status = vmm_handle_fpu_swa(1, regs, isr); + if (!status) { + vcpu_increment_iip(vcpu); + return; + } else if (-EAGAIN == status) + return; + break; + case 33: /*IA64_FP_TRAP_VECTOR*/ + status = vmm_handle_fpu_swa(0, regs, isr); + if (!status) + return ; + else if (-EAGAIN == status) { + vcpu_decrement_iip(vcpu); + return ; + } + break; + } + + VCPU(vcpu, isr) = isr; + VCPU(vcpu, iipa) = regs->cr_iip; + if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) + VCPU(vcpu, iim) = iim; + else + set_ifa_itir_iha(vcpu, ifa, 1, 1, 1); + + inject_guest_interruption(vcpu, vector); +} + +static void set_pal_call_data(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + /*FIXME:For static and stacked convention, firmware + * has put the parameters in gr28-gr31 before + * break to vmm !!*/ + + p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28); + p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29); + p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); + p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); + p->exit_reason = EXIT_REASON_PAL_CALL; +} + +static void set_pal_call_result(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (p->exit_reason == EXIT_REASON_PAL_CALL) { + vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0); + vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0); + vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); + vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); + } else + panic_vm(vcpu); +} + +static void set_sal_call_data(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32); + p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33); + p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34); + p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35); + p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36); + p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37); + p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38); + p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39); + p->exit_reason = EXIT_REASON_SAL_CALL; +} + +static void set_sal_call_result(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0); + vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0); + vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); + vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); + } else + panic_vm(vcpu); +} + +void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, + unsigned long isr, unsigned long iim) +{ + struct kvm_vcpu *v = current_vcpu; + + if (ia64_psr(regs)->cpl == 0) { + /* Allow hypercalls only when cpl = 0. */ + if (iim == DOMN_PAL_REQUEST) { + set_pal_call_data(v); + vmm_transition(v); + set_pal_call_result(v); + vcpu_increment_iip(v); + return; + } else if (iim == DOMN_SAL_REQUEST) { + set_sal_call_data(v); + vmm_transition(v); + set_sal_call_result(v); + vcpu_increment_iip(v); + return; + } + } + reflect_interruption(ifa, isr, iim, 11, regs); +} + +void check_pending_irq(struct kvm_vcpu *vcpu) +{ + int mask, h_pending, h_inservice; + u64 isr; + unsigned long vpsr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + h_pending = highest_pending_irq(vcpu); + if (h_pending == NULL_VECTOR) { + update_vhpi(vcpu, NULL_VECTOR); + return; + } + h_inservice = highest_inservice_irq(vcpu); + + vpsr = VCPU(vcpu, vpsr); + mask = irq_masked(vcpu, h_pending, h_inservice); + if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) { + isr = vpsr & IA64_PSR_RI; + update_vhpi(vcpu, h_pending); + reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ + } else if (mask == IRQ_MASKED_BY_INSVC) { + if (VCPU(vcpu, vhpi)) + update_vhpi(vcpu, NULL_VECTOR); + } else { + /* masked by vpsr.i or vtpr.*/ + update_vhpi(vcpu, h_pending); + } +} + +static void generate_exirq(struct kvm_vcpu *vcpu) +{ + unsigned vpsr; + uint64_t isr; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + vpsr = VCPU(vcpu, vpsr); + isr = vpsr & IA64_PSR_RI; + if (!(vpsr & IA64_PSR_IC)) + panic_vm(vcpu); + reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ +} + +void vhpi_detection(struct kvm_vcpu *vcpu) +{ + uint64_t threshold, vhpi; + union ia64_tpr vtpr; + struct ia64_psr vpsr; + + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + vtpr.val = VCPU(vcpu, tpr); + + threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; + vhpi = VCPU(vcpu, vhpi); + if (vhpi > threshold) { + /* interrupt actived*/ + generate_exirq(vcpu); + } +} + + +void leave_hypervisor_tail(void) +{ + struct kvm_vcpu *v = current_vcpu; + + if (VMX(v, timer_check)) { + VMX(v, timer_check) = 0; + if (VMX(v, itc_check)) { + if (vcpu_get_itc(v) > VCPU(v, itm)) { + if (!(VCPU(v, itv) & (1 << 16))) { + vcpu_pend_interrupt(v, VCPU(v, itv) + & 0xff); + VMX(v, itc_check) = 0; + } else { + v->arch.timer_pending = 1; + } + VMX(v, last_itc) = VCPU(v, itm) + 1; + } + } + } + + rmb(); + if (v->arch.irq_new_pending) { + v->arch.irq_new_pending = 0; + VMX(v, irq_check) = 0; + check_pending_irq(v); + return; + } + if (VMX(v, irq_check)) { + VMX(v, irq_check) = 0; + vhpi_detection(v); + } +} + + +static inline void handle_lds(struct kvm_pt_regs *regs) +{ + regs->cr_ipsr |= IA64_PSR_ED; +} + +void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type) +{ + unsigned long pte; + union ia64_rr rr; + + rr.val = ia64_get_rr(vadr); + pte = vadr & _PAGE_PPN_MASK; + pte = pte | PHY_PAGE_WB; + thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type); + return; +} + +void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs) +{ + unsigned long vpsr; + int type; + + u64 vhpt_adr, gppa, pteval, rr, itir; + union ia64_isr misr; + union ia64_pta vpta; + struct thash_data *data; + struct kvm_vcpu *v = current_vcpu; + + vpsr = VCPU(v, vpsr); + misr.val = VMX(v, cr_isr); + + type = vec; + + if (is_physical_mode(v) && (!(vadr << 1 >> 62))) { + if (vec == 2) { + if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) { + emulate_io_inst(v, ((vadr << 1) >> 1), 4); + return; + } + } + physical_tlb_miss(v, vadr, type); + return; + } + data = vtlb_lookup(v, vadr, type); + if (data != 0) { + if (type == D_TLB) { + gppa = (vadr & ((1UL << data->ps) - 1)) + + (data->ppn >> (data->ps - 12) << data->ps); + if (__gpfn_is_io(gppa >> PAGE_SHIFT)) { + if (data->pl >= ((regs->cr_ipsr >> + IA64_PSR_CPL0_BIT) & 3)) + emulate_io_inst(v, gppa, data->ma); + else { + vcpu_set_isr(v, misr.val); + data_access_rights(v, vadr); + } + return ; + } + } + thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type); + + } else if (type == D_TLB) { + if (misr.sp) { + handle_lds(regs); + return; + } + + rr = vcpu_get_rr(v, vadr); + itir = rr & (RR_RID_MASK | RR_PS_MASK); + + if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) { + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + alt_dtlb(v, vadr); + } else { + nested_dtlb(v); + } + return ; + } + + vpta.val = vcpu_get_pta(v); + /* avoid recursively walking (short format) VHPT */ + + vhpt_adr = vcpu_thash(v, vadr); + if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { + /* VHPT successfully read. */ + if (!(pteval & _PAGE_P)) { + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dtlb_fault(v, vadr); + } else { + nested_dtlb(v); + } + } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) { + thash_purge_and_insert(v, pteval, itir, + vadr, D_TLB); + } else if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dtlb_fault(v, vadr); + } else { + nested_dtlb(v); + } + } else { + /* Can't read VHPT. */ + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dvhpt_fault(v, vadr); + } else { + nested_dtlb(v); + } + } + } else if (type == I_TLB) { + if (!(vpsr & IA64_PSR_IC)) + misr.ni = 1; + if (!vhpt_enabled(v, vadr, INST_REF)) { + vcpu_set_isr(v, misr.val); + alt_itlb(v, vadr); + return; + } + + vpta.val = vcpu_get_pta(v); + + vhpt_adr = vcpu_thash(v, vadr); + if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { + /* VHPT successfully read. */ + if (pteval & _PAGE_P) { + if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) { + vcpu_set_isr(v, misr.val); + itlb_fault(v, vadr); + return ; + } + rr = vcpu_get_rr(v, vadr); + itir = rr & (RR_RID_MASK | RR_PS_MASK); + thash_purge_and_insert(v, pteval, itir, + vadr, I_TLB); + } else { + vcpu_set_isr(v, misr.val); + inst_page_not_present(v, vadr); + } + } else { + vcpu_set_isr(v, misr.val); + ivhpt_fault(v, vadr); + } + } +} + +void kvm_vexirq(struct kvm_vcpu *vcpu) +{ + u64 vpsr, isr; + struct kvm_pt_regs *regs; + + regs = vcpu_regs(vcpu); + vpsr = VCPU(vcpu, vpsr); + isr = vpsr & IA64_PSR_RI; + reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/ +} + +void kvm_ia64_handle_irq(struct kvm_vcpu *v) +{ + struct exit_ctl_data *p = &v->arch.exit_data; + long psr; + + local_irq_save(psr); + p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; + vmm_transition(v); + local_irq_restore(psr); + + VMX(v, timer_check) = 1; + +} + +static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos) +{ + u64 oldrid, moldrid, oldpsbits, vaddr; + struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos]; + vaddr = p->vaddr; + + oldrid = VMX(v, vrr[0]); + VMX(v, vrr[0]) = p->rr; + oldpsbits = VMX(v, psbits[0]); + VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]); + moldrid = ia64_get_rr(0x0); + ia64_set_rr(0x0, vrrtomrr(p->rr)); + ia64_srlz_d(); + + vaddr = PAGEALIGN(vaddr, p->ps); + thash_purge_entries_remote(v, vaddr, p->ps); + + VMX(v, vrr[0]) = oldrid; + VMX(v, psbits[0]) = oldpsbits; + ia64_set_rr(0x0, moldrid); + ia64_dv_serialize_data(); +} + +static void vcpu_do_resume(struct kvm_vcpu *vcpu) +{ + /*Re-init VHPT and VTLB once from resume*/ + vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES; + thash_init(&vcpu->arch.vhpt, VHPT_SHIFT); + vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES; + thash_init(&vcpu->arch.vtlb, VTLB_SHIFT); + + ia64_set_pta(vcpu->arch.vhpt.pta.val); +} + +static void kvm_do_resume_op(struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { + vcpu_do_resume(vcpu); + return; + } + + if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) { + thash_purge_all(vcpu); + return; + } + + if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) { + while (vcpu->arch.ptc_g_count > 0) + ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count); + } +} + +void vmm_transition(struct kvm_vcpu *vcpu) +{ + ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, + 0, 0, 0, 0, 0, 0); + vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); + ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, + 0, 0, 0, 0, 0, 0); + kvm_do_resume_op(vcpu); +} diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S new file mode 100644 index 00000000000..30897d44d61 --- /dev/null +++ b/arch/ia64/kvm/trampoline.S @@ -0,0 +1,1038 @@ +/* Save all processor states + * + * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com> + * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com> + */ + +#include <asm/asmmacro.h> +#include "asm-offsets.h" + + +#define CTX(name) VMM_CTX_##name##_OFFSET + + /* + * r32: context_t base address + */ +#define SAVE_BRANCH_REGS \ + add r2 = CTX(B0),r32; \ + add r3 = CTX(B1),r32; \ + mov r16 = b0; \ + mov r17 = b1; \ + ;; \ + st8 [r2]=r16,16; \ + st8 [r3]=r17,16; \ + ;; \ + mov r16 = b2; \ + mov r17 = b3; \ + ;; \ + st8 [r2]=r16,16; \ + st8 [r3]=r17,16; \ + ;; \ + mov r16 = b4; \ + mov r17 = b5; \ + ;; \ + st8 [r2]=r16; \ + st8 [r3]=r17; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_BRANCH_REGS \ + add r2 = CTX(B0),r33; \ + add r3 = CTX(B1),r33; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov b0 = r16; \ + mov b1 = r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov b2 = r16; \ + mov b3 = r17; \ + ;; \ + ld8 r16=[r2]; \ + ld8 r17=[r3]; \ + ;; \ + mov b4=r16; \ + mov b5=r17; \ + ;; + + + /* + * r32: context_t base address + * bsw == 1 + * Save all bank1 general registers, r4 ~ r7 + */ +#define SAVE_GENERAL_REGS \ + add r2=CTX(R4),r32; \ + add r3=CTX(R5),r32; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r4,16; \ +.mem.offset 8,0; \ + st8.spill [r3]=r5,16; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r6,48; \ +.mem.offset 8,0; \ + st8.spill [r3]=r7,48; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r12; \ +.mem.offset 8,0; \ + st8.spill [r3]=r13; \ + ;; + + /* + * r33: context_t base address + * bsw == 1 + */ +#define RESTORE_GENERAL_REGS \ + add r2=CTX(R4),r33; \ + add r3=CTX(R5),r33; \ + ;; \ + ld8.fill r4=[r2],16; \ + ld8.fill r5=[r3],16; \ + ;; \ + ld8.fill r6=[r2],48; \ + ld8.fill r7=[r3],48; \ + ;; \ + ld8.fill r12=[r2]; \ + ld8.fill r13 =[r3]; \ + ;; + + + + + /* + * r32: context_t base address + */ +#define SAVE_KERNEL_REGS \ + add r2 = CTX(KR0),r32; \ + add r3 = CTX(KR1),r32; \ + mov r16 = ar.k0; \ + mov r17 = ar.k1; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k2; \ + mov r17 = ar.k3; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k4; \ + mov r17 = ar.k5; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k6; \ + mov r17 = ar.k7; \ + ;; \ + st8 [r2] = r16; \ + st8 [r3] = r17; \ + ;; + + + + /* + * r33: context_t base address + */ +#define RESTORE_KERNEL_REGS \ + add r2 = CTX(KR0),r33; \ + add r3 = CTX(KR1),r33; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k0=r16; \ + mov ar.k1=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k2=r16; \ + mov ar.k3=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k4=r16; \ + mov ar.k5=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k6=r16; \ + mov ar.k7=r17; \ + ;; + + + + /* + * r32: context_t base address + */ +#define SAVE_APP_REGS \ + add r2 = CTX(BSPSTORE),r32; \ + mov r16 = ar.bspstore; \ + ;; \ + st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\ + mov r16 = ar.rnat; \ + ;; \ + st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \ + mov r16 = ar.fcr; \ + ;; \ + st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \ + mov r16 = ar.eflag; \ + ;; \ + st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \ + mov r16 = ar.cflg; \ + ;; \ + st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \ + mov r16 = ar.fsr; \ + ;; \ + st8 [r2] = r16,CTX(FIR)-CTX(FSR); \ + mov r16 = ar.fir; \ + ;; \ + st8 [r2] = r16,CTX(FDR)-CTX(FIR); \ + mov r16 = ar.fdr; \ + ;; \ + st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \ + mov r16 = ar.unat; \ + ;; \ + st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \ + mov r16 = ar.fpsr; \ + ;; \ + st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \ + mov r16 = ar.pfs; \ + ;; \ + st8 [r2] = r16,CTX(LC)-CTX(PFS); \ + mov r16 = ar.lc; \ + ;; \ + st8 [r2] = r16; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_APP_REGS \ + add r2=CTX(BSPSTORE),r33; \ + ;; \ + ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \ + ;; \ + mov ar.bspstore=r16; \ + ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \ + ;; \ + mov ar.rnat=r16; \ + ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \ + ;; \ + mov ar.fcr=r16; \ + ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \ + ;; \ + mov ar.eflag=r16; \ + ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \ + ;; \ + mov ar.cflg=r16; \ + ld8 r16=[r2],CTX(FIR)-CTX(FSR); \ + ;; \ + mov ar.fsr=r16; \ + ld8 r16=[r2],CTX(FDR)-CTX(FIR); \ + ;; \ + mov ar.fir=r16; \ + ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \ + ;; \ + mov ar.fdr=r16; \ + ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \ + ;; \ + mov ar.unat=r16; \ + ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \ + ;; \ + mov ar.fpsr=r16; \ + ld8 r16=[r2],CTX(LC)-CTX(PFS); \ + ;; \ + mov ar.pfs=r16; \ + ld8 r16=[r2]; \ + ;; \ + mov ar.lc=r16; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_CTL_REGS \ + add r2 = CTX(DCR),r32; \ + mov r16 = cr.dcr; \ + ;; \ + st8 [r2] = r16,CTX(IVA)-CTX(DCR); \ + ;; \ + mov r16 = cr.iva; \ + ;; \ + st8 [r2] = r16,CTX(PTA)-CTX(IVA); \ + ;; \ + mov r16 = cr.pta; \ + ;; \ + st8 [r2] = r16 ; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_CTL_REGS \ + add r2 = CTX(DCR),r33; \ + ;; \ + ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \ + ;; \ + mov cr.dcr = r16; \ + dv_serialize_data; \ + ;; \ + ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \ + ;; \ + mov cr.iva = r16; \ + dv_serialize_data; \ + ;; \ + ld8 r16 = [r2]; \ + ;; \ + mov cr.pta = r16; \ + dv_serialize_data; \ + ;; + + + /* + * r32: context_t base address + */ +#define SAVE_REGION_REGS \ + add r2=CTX(RR0),r32; \ + mov r16=rr[r0]; \ + dep.z r18=1,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=2,61,3; \ + ;; \ + st8 [r2]=r17,8; \ + mov r16=rr[r18]; \ + dep.z r18=3,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=4,61,3; \ + ;; \ + st8 [r2]=r17,8; \ + mov r16=rr[r18]; \ + dep.z r18=5,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=7,61,3; \ + ;; \ + st8 [r2]=r17,16; \ + mov r16=rr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + ;; + + /* + * r33:context_t base address + */ +#define RESTORE_REGION_REGS \ + add r2=CTX(RR0),r33;\ + mov r18=r0; \ + ;; \ + ld8 r20=[r2],8; \ + ;; /* rr0 */ \ + ld8 r21=[r2],8; \ + ;; /* rr1 */ \ + ld8 r22=[r2],8; \ + ;; /* rr2 */ \ + ld8 r23=[r2],8; \ + ;; /* rr3 */ \ + ld8 r24=[r2],8; \ + ;; /* rr4 */ \ + ld8 r25=[r2],16; \ + ;; /* rr5 */ \ + ld8 r27=[r2]; \ + ;; /* rr7 */ \ + mov rr[r18]=r20; \ + dep.z r18=1,61,3; \ + ;; /* rr1 */ \ + mov rr[r18]=r21; \ + dep.z r18=2,61,3; \ + ;; /* rr2 */ \ + mov rr[r18]=r22; \ + dep.z r18=3,61,3; \ + ;; /* rr3 */ \ + mov rr[r18]=r23; \ + dep.z r18=4,61,3; \ + ;; /* rr4 */ \ + mov rr[r18]=r24; \ + dep.z r18=5,61,3; \ + ;; /* rr5 */ \ + mov rr[r18]=r25; \ + dep.z r18=7,61,3; \ + ;; /* rr7 */ \ + mov rr[r18]=r27; \ + ;; \ + srlz.i; \ + ;; + + + + /* + * r32: context_t base address + * r36~r39:scratch registers + */ +#define SAVE_DEBUG_REGS \ + add r2=CTX(IBR0),r32; \ + add r3=CTX(DBR0),r32; \ + mov r16=ibr[r0]; \ + mov r17=dbr[r0]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=1,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=2,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=2,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=3,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=4,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=5,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=6,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=7,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + ;; + + +/* + * r33: point to context_t structure + * ar.lc are corrupted. + */ +#define RESTORE_DEBUG_REGS \ + add r2=CTX(IBR0),r33; \ + add r3=CTX(DBR0),r33; \ + mov r16=7; \ + mov r17=r0; \ + ;; \ + mov ar.lc = r16; \ + ;; \ +1: \ + ld8 r18=[r2],8; \ + ld8 r19=[r3],8; \ + ;; \ + mov ibr[r17]=r18; \ + mov dbr[r17]=r19; \ + ;; \ + srlz.i; \ + ;; \ + add r17=1,r17; \ + br.cloop.sptk 1b; \ + ;; + + + /* + * r32: context_t base address + */ +#define SAVE_FPU_LOW \ + add r2=CTX(F2),r32; \ + add r3=CTX(F3),r32; \ + ;; \ + stf.spill.nta [r2]=f2,32; \ + stf.spill.nta [r3]=f3,32; \ + ;; \ + stf.spill.nta [r2]=f4,32; \ + stf.spill.nta [r3]=f5,32; \ + ;; \ + stf.spill.nta [r2]=f6,32; \ + stf.spill.nta [r3]=f7,32; \ + ;; \ + stf.spill.nta [r2]=f8,32; \ + stf.spill.nta [r3]=f9,32; \ + ;; \ + stf.spill.nta [r2]=f10,32; \ + stf.spill.nta [r3]=f11,32; \ + ;; \ + stf.spill.nta [r2]=f12,32; \ + stf.spill.nta [r3]=f13,32; \ + ;; \ + stf.spill.nta [r2]=f14,32; \ + stf.spill.nta [r3]=f15,32; \ + ;; \ + stf.spill.nta [r2]=f16,32; \ + stf.spill.nta [r3]=f17,32; \ + ;; \ + stf.spill.nta [r2]=f18,32; \ + stf.spill.nta [r3]=f19,32; \ + ;; \ + stf.spill.nta [r2]=f20,32; \ + stf.spill.nta [r3]=f21,32; \ + ;; \ + stf.spill.nta [r2]=f22,32; \ + stf.spill.nta [r3]=f23,32; \ + ;; \ + stf.spill.nta [r2]=f24,32; \ + stf.spill.nta [r3]=f25,32; \ + ;; \ + stf.spill.nta [r2]=f26,32; \ + stf.spill.nta [r3]=f27,32; \ + ;; \ + stf.spill.nta [r2]=f28,32; \ + stf.spill.nta [r3]=f29,32; \ + ;; \ + stf.spill.nta [r2]=f30; \ + stf.spill.nta [r3]=f31; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_FPU_HIGH \ + add r2=CTX(F32),r32; \ + add r3=CTX(F33),r32; \ + ;; \ + stf.spill.nta [r2]=f32,32; \ + stf.spill.nta [r3]=f33,32; \ + ;; \ + stf.spill.nta [r2]=f34,32; \ + stf.spill.nta [r3]=f35,32; \ + ;; \ + stf.spill.nta [r2]=f36,32; \ + stf.spill.nta [r3]=f37,32; \ + ;; \ + stf.spill.nta [r2]=f38,32; \ + stf.spill.nta [r3]=f39,32; \ + ;; \ + stf.spill.nta [r2]=f40,32; \ + stf.spill.nta [r3]=f41,32; \ + ;; \ + stf.spill.nta [r2]=f42,32; \ + stf.spill.nta [r3]=f43,32; \ + ;; \ + stf.spill.nta [r2]=f44,32; \ + stf.spill.nta [r3]=f45,32; \ + ;; \ + stf.spill.nta [r2]=f46,32; \ + stf.spill.nta [r3]=f47,32; \ + ;; \ + stf.spill.nta [r2]=f48,32; \ + stf.spill.nta [r3]=f49,32; \ + ;; \ + stf.spill.nta [r2]=f50,32; \ + stf.spill.nta [r3]=f51,32; \ + ;; \ + stf.spill.nta [r2]=f52,32; \ + stf.spill.nta [r3]=f53,32; \ + ;; \ + stf.spill.nta [r2]=f54,32; \ + stf.spill.nta [r3]=f55,32; \ + ;; \ + stf.spill.nta [r2]=f56,32; \ + stf.spill.nta [r3]=f57,32; \ + ;; \ + stf.spill.nta [r2]=f58,32; \ + stf.spill.nta [r3]=f59,32; \ + ;; \ + stf.spill.nta [r2]=f60,32; \ + stf.spill.nta [r3]=f61,32; \ + ;; \ + stf.spill.nta [r2]=f62,32; \ + stf.spill.nta [r3]=f63,32; \ + ;; \ + stf.spill.nta [r2]=f64,32; \ + stf.spill.nta [r3]=f65,32; \ + ;; \ + stf.spill.nta [r2]=f66,32; \ + stf.spill.nta [r3]=f67,32; \ + ;; \ + stf.spill.nta [r2]=f68,32; \ + stf.spill.nta [r3]=f69,32; \ + ;; \ + stf.spill.nta [r2]=f70,32; \ + stf.spill.nta [r3]=f71,32; \ + ;; \ + stf.spill.nta [r2]=f72,32; \ + stf.spill.nta [r3]=f73,32; \ + ;; \ + stf.spill.nta [r2]=f74,32; \ + stf.spill.nta [r3]=f75,32; \ + ;; \ + stf.spill.nta [r2]=f76,32; \ + stf.spill.nta [r3]=f77,32; \ + ;; \ + stf.spill.nta [r2]=f78,32; \ + stf.spill.nta [r3]=f79,32; \ + ;; \ + stf.spill.nta [r2]=f80,32; \ + stf.spill.nta [r3]=f81,32; \ + ;; \ + stf.spill.nta [r2]=f82,32; \ + stf.spill.nta [r3]=f83,32; \ + ;; \ + stf.spill.nta [r2]=f84,32; \ + stf.spill.nta [r3]=f85,32; \ + ;; \ + stf.spill.nta [r2]=f86,32; \ + stf.spill.nta [r3]=f87,32; \ + ;; \ + stf.spill.nta [r2]=f88,32; \ + stf.spill.nta [r3]=f89,32; \ + ;; \ + stf.spill.nta [r2]=f90,32; \ + stf.spill.nta [r3]=f91,32; \ + ;; \ + stf.spill.nta [r2]=f92,32; \ + stf.spill.nta [r3]=f93,32; \ + ;; \ + stf.spill.nta [r2]=f94,32; \ + stf.spill.nta [r3]=f95,32; \ + ;; \ + stf.spill.nta [r2]=f96,32; \ + stf.spill.nta [r3]=f97,32; \ + ;; \ + stf.spill.nta [r2]=f98,32; \ + stf.spill.nta [r3]=f99,32; \ + ;; \ + stf.spill.nta [r2]=f100,32; \ + stf.spill.nta [r3]=f101,32; \ + ;; \ + stf.spill.nta [r2]=f102,32; \ + stf.spill.nta [r3]=f103,32; \ + ;; \ + stf.spill.nta [r2]=f104,32; \ + stf.spill.nta [r3]=f105,32; \ + ;; \ + stf.spill.nta [r2]=f106,32; \ + stf.spill.nta [r3]=f107,32; \ + ;; \ + stf.spill.nta [r2]=f108,32; \ + stf.spill.nta [r3]=f109,32; \ + ;; \ + stf.spill.nta [r2]=f110,32; \ + stf.spill.nta [r3]=f111,32; \ + ;; \ + stf.spill.nta [r2]=f112,32; \ + stf.spill.nta [r3]=f113,32; \ + ;; \ + stf.spill.nta [r2]=f114,32; \ + stf.spill.nta [r3]=f115,32; \ + ;; \ + stf.spill.nta [r2]=f116,32; \ + stf.spill.nta [r3]=f117,32; \ + ;; \ + stf.spill.nta [r2]=f118,32; \ + stf.spill.nta [r3]=f119,32; \ + ;; \ + stf.spill.nta [r2]=f120,32; \ + stf.spill.nta [r3]=f121,32; \ + ;; \ + stf.spill.nta [r2]=f122,32; \ + stf.spill.nta [r3]=f123,32; \ + ;; \ + stf.spill.nta [r2]=f124,32; \ + stf.spill.nta [r3]=f125,32; \ + ;; \ + stf.spill.nta [r2]=f126; \ + stf.spill.nta [r3]=f127; \ + ;; + + /* + * r33: point to context_t structure + */ +#define RESTORE_FPU_LOW \ + add r2 = CTX(F2), r33; \ + add r3 = CTX(F3), r33; \ + ;; \ + ldf.fill.nta f2 = [r2], 32; \ + ldf.fill.nta f3 = [r3], 32; \ + ;; \ + ldf.fill.nta f4 = [r2], 32; \ + ldf.fill.nta f5 = [r3], 32; \ + ;; \ + ldf.fill.nta f6 = [r2], 32; \ + ldf.fill.nta f7 = [r3], 32; \ + ;; \ + ldf.fill.nta f8 = [r2], 32; \ + ldf.fill.nta f9 = [r3], 32; \ + ;; \ + ldf.fill.nta f10 = [r2], 32; \ + ldf.fill.nta f11 = [r3], 32; \ + ;; \ + ldf.fill.nta f12 = [r2], 32; \ + ldf.fill.nta f13 = [r3], 32; \ + ;; \ + ldf.fill.nta f14 = [r2], 32; \ + ldf.fill.nta f15 = [r3], 32; \ + ;; \ + ldf.fill.nta f16 = [r2], 32; \ + ldf.fill.nta f17 = [r3], 32; \ + ;; \ + ldf.fill.nta f18 = [r2], 32; \ + ldf.fill.nta f19 = [r3], 32; \ + ;; \ + ldf.fill.nta f20 = [r2], 32; \ + ldf.fill.nta f21 = [r3], 32; \ + ;; \ + ldf.fill.nta f22 = [r2], 32; \ + ldf.fill.nta f23 = [r3], 32; \ + ;; \ + ldf.fill.nta f24 = [r2], 32; \ + ldf.fill.nta f25 = [r3], 32; \ + ;; \ + ldf.fill.nta f26 = [r2], 32; \ + ldf.fill.nta f27 = [r3], 32; \ + ;; \ + ldf.fill.nta f28 = [r2], 32; \ + ldf.fill.nta f29 = [r3], 32; \ + ;; \ + ldf.fill.nta f30 = [r2], 32; \ + ldf.fill.nta f31 = [r3], 32; \ + ;; + + + + /* + * r33: point to context_t structure + */ +#define RESTORE_FPU_HIGH \ + add r2 = CTX(F32), r33; \ + add r3 = CTX(F33), r33; \ + ;; \ + ldf.fill.nta f32 = [r2], 32; \ + ldf.fill.nta f33 = [r3], 32; \ + ;; \ + ldf.fill.nta f34 = [r2], 32; \ + ldf.fill.nta f35 = [r3], 32; \ + ;; \ + ldf.fill.nta f36 = [r2], 32; \ + ldf.fill.nta f37 = [r3], 32; \ + ;; \ + ldf.fill.nta f38 = [r2], 32; \ + ldf.fill.nta f39 = [r3], 32; \ + ;; \ + ldf.fill.nta f40 = [r2], 32; \ + ldf.fill.nta f41 = [r3], 32; \ + ;; \ + ldf.fill.nta f42 = [r2], 32; \ + ldf.fill.nta f43 = [r3], 32; \ + ;; \ + ldf.fill.nta f44 = [r2], 32; \ + ldf.fill.nta f45 = [r3], 32; \ + ;; \ + ldf.fill.nta f46 = [r2], 32; \ + ldf.fill.nta f47 = [r3], 32; \ + ;; \ + ldf.fill.nta f48 = [r2], 32; \ + ldf.fill.nta f49 = [r3], 32; \ + ;; \ + ldf.fill.nta f50 = [r2], 32; \ + ldf.fill.nta f51 = [r3], 32; \ + ;; \ + ldf.fill.nta f52 = [r2], 32; \ + ldf.fill.nta f53 = [r3], 32; \ + ;; \ + ldf.fill.nta f54 = [r2], 32; \ + ldf.fill.nta f55 = [r3], 32; \ + ;; \ + ldf.fill.nta f56 = [r2], 32; \ + ldf.fill.nta f57 = [r3], 32; \ + ;; \ + ldf.fill.nta f58 = [r2], 32; \ + ldf.fill.nta f59 = [r3], 32; \ + ;; \ + ldf.fill.nta f60 = [r2], 32; \ + ldf.fill.nta f61 = [r3], 32; \ + ;; \ + ldf.fill.nta f62 = [r2], 32; \ + ldf.fill.nta f63 = [r3], 32; \ + ;; \ + ldf.fill.nta f64 = [r2], 32; \ + ldf.fill.nta f65 = [r3], 32; \ + ;; \ + ldf.fill.nta f66 = [r2], 32; \ + ldf.fill.nta f67 = [r3], 32; \ + ;; \ + ldf.fill.nta f68 = [r2], 32; \ + ldf.fill.nta f69 = [r3], 32; \ + ;; \ + ldf.fill.nta f70 = [r2], 32; \ + ldf.fill.nta f71 = [r3], 32; \ + ;; \ + ldf.fill.nta f72 = [r2], 32; \ + ldf.fill.nta f73 = [r3], 32; \ + ;; \ + ldf.fill.nta f74 = [r2], 32; \ + ldf.fill.nta f75 = [r3], 32; \ + ;; \ + ldf.fill.nta f76 = [r2], 32; \ + ldf.fill.nta f77 = [r3], 32; \ + ;; \ + ldf.fill.nta f78 = [r2], 32; \ + ldf.fill.nta f79 = [r3], 32; \ + ;; \ + ldf.fill.nta f80 = [r2], 32; \ + ldf.fill.nta f81 = [r3], 32; \ + ;; \ + ldf.fill.nta f82 = [r2], 32; \ + ldf.fill.nta f83 = [r3], 32; \ + ;; \ + ldf.fill.nta f84 = [r2], 32; \ + ldf.fill.nta f85 = [r3], 32; \ + ;; \ + ldf.fill.nta f86 = [r2], 32; \ + ldf.fill.nta f87 = [r3], 32; \ + ;; \ + ldf.fill.nta f88 = [r2], 32; \ + ldf.fill.nta f89 = [r3], 32; \ + ;; \ + ldf.fill.nta f90 = [r2], 32; \ + ldf.fill.nta f91 = [r3], 32; \ + ;; \ + ldf.fill.nta f92 = [r2], 32; \ + ldf.fill.nta f93 = [r3], 32; \ + ;; \ + ldf.fill.nta f94 = [r2], 32; \ + ldf.fill.nta f95 = [r3], 32; \ + ;; \ + ldf.fill.nta f96 = [r2], 32; \ + ldf.fill.nta f97 = [r3], 32; \ + ;; \ + ldf.fill.nta f98 = [r2], 32; \ + ldf.fill.nta f99 = [r3], 32; \ + ;; \ + ldf.fill.nta f100 = [r2], 32; \ + ldf.fill.nta f101 = [r3], 32; \ + ;; \ + ldf.fill.nta f102 = [r2], 32; \ + ldf.fill.nta f103 = [r3], 32; \ + ;; \ + ldf.fill.nta f104 = [r2], 32; \ + ldf.fill.nta f105 = [r3], 32; \ + ;; \ + ldf.fill.nta f106 = [r2], 32; \ + ldf.fill.nta f107 = [r3], 32; \ + ;; \ + ldf.fill.nta f108 = [r2], 32; \ + ldf.fill.nta f109 = [r3], 32; \ + ;; \ + ldf.fill.nta f110 = [r2], 32; \ + ldf.fill.nta f111 = [r3], 32; \ + ;; \ + ldf.fill.nta f112 = [r2], 32; \ + ldf.fill.nta f113 = [r3], 32; \ + ;; \ + ldf.fill.nta f114 = [r2], 32; \ + ldf.fill.nta f115 = [r3], 32; \ + ;; \ + ldf.fill.nta f116 = [r2], 32; \ + ldf.fill.nta f117 = [r3], 32; \ + ;; \ + ldf.fill.nta f118 = [r2], 32; \ + ldf.fill.nta f119 = [r3], 32; \ + ;; \ + ldf.fill.nta f120 = [r2], 32; \ + ldf.fill.nta f121 = [r3], 32; \ + ;; \ + ldf.fill.nta f122 = [r2], 32; \ + ldf.fill.nta f123 = [r3], 32; \ + ;; \ + ldf.fill.nta f124 = [r2], 32; \ + ldf.fill.nta f125 = [r3], 32; \ + ;; \ + ldf.fill.nta f126 = [r2], 32; \ + ldf.fill.nta f127 = [r3], 32; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_PTK_REGS \ + add r2=CTX(PKR0), r32; \ + mov r16=7; \ + ;; \ + mov ar.lc=r16; \ + mov r17=r0; \ + ;; \ +1: \ + mov r18=pkr[r17]; \ + ;; \ + srlz.i; \ + ;; \ + st8 [r2]=r18, 8; \ + ;; \ + add r17 =1,r17; \ + ;; \ + br.cloop.sptk 1b; \ + ;; + +/* + * r33: point to context_t structure + * ar.lc are corrupted. + */ +#define RESTORE_PTK_REGS \ + add r2=CTX(PKR0), r33; \ + mov r16=7; \ + ;; \ + mov ar.lc=r16; \ + mov r17=r0; \ + ;; \ +1: \ + ld8 r18=[r2], 8; \ + ;; \ + mov pkr[r17]=r18; \ + ;; \ + srlz.i; \ + ;; \ + add r17 =1,r17; \ + ;; \ + br.cloop.sptk 1b; \ + ;; + + +/* + * void vmm_trampoline( context_t * from, + * context_t * to) + * + * from: r32 + * to: r33 + * note: interrupt disabled before call this function. + */ +GLOBAL_ENTRY(vmm_trampoline) + mov r16 = psr + adds r2 = CTX(PSR), r32 + ;; + st8 [r2] = r16, 8 // psr + mov r17 = pr + ;; + st8 [r2] = r17, 8 // pr + mov r18 = ar.unat + ;; + st8 [r2] = r18 + mov r17 = ar.rsc + ;; + adds r2 = CTX(RSC),r32 + ;; + st8 [r2]= r17 + mov ar.rsc =0 + flushrs + ;; + SAVE_GENERAL_REGS + ;; + SAVE_KERNEL_REGS + ;; + SAVE_APP_REGS + ;; + SAVE_BRANCH_REGS + ;; + SAVE_CTL_REGS + ;; + SAVE_REGION_REGS + ;; + //SAVE_DEBUG_REGS + ;; + rsm psr.dfl + ;; + srlz.d + ;; + SAVE_FPU_LOW + ;; + rsm psr.dfh + ;; + srlz.d + ;; + SAVE_FPU_HIGH + ;; + SAVE_PTK_REGS + ;; + RESTORE_PTK_REGS + ;; + RESTORE_FPU_HIGH + ;; + RESTORE_FPU_LOW + ;; + //RESTORE_DEBUG_REGS + ;; + RESTORE_REGION_REGS + ;; + RESTORE_CTL_REGS + ;; + RESTORE_BRANCH_REGS + ;; + RESTORE_APP_REGS + ;; + RESTORE_KERNEL_REGS + ;; + RESTORE_GENERAL_REGS + ;; + adds r2=CTX(PSR), r33 + ;; + ld8 r16=[r2], 8 // psr + ;; + mov psr.l=r16 + ;; + srlz.d + ;; + ld8 r16=[r2], 8 // pr + ;; + mov pr =r16,-1 + ld8 r16=[r2] // unat + ;; + mov ar.unat=r16 + ;; + adds r2=CTX(RSC),r33 + ;; + ld8 r16 =[r2] + ;; + mov ar.rsc = r16 + ;; + br.ret.sptk.few b0 +END(vmm_trampoline) diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c new file mode 100644 index 00000000000..e44027ce566 --- /dev/null +++ b/arch/ia64/kvm/vcpu.c @@ -0,0 +1,2163 @@ +/* + * kvm_vcpu.c: handling all virtual cpu related thing. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang <xiantao.zhang@intel.com> + */ + +#include <linux/kvm_host.h> +#include <linux/types.h> + +#include <asm/processor.h> +#include <asm/ia64regs.h> +#include <asm/gcc_intrin.h> +#include <asm/kregs.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> + +#include "asm-offsets.h" +#include "vcpu.h" + +/* + * Special notes: + * - Index by it/dt/rt sequence + * - Only existing mode transitions are allowed in this table + * - RSE is placed at lazy mode when emulating guest partial mode + * - If gva happens to be rr0 and rr4, only allowed case is identity + * mapping (gva=gpa), or panic! (How?) + */ +int mm_switch_table[8][8] = { + /* 2004/09/12(Kevin): Allow switch to self */ + /* + * (it,dt,rt): (0,0,0) -> (1,1,1) + * This kind of transition usually occurs in the very early + * stage of Linux boot up procedure. Another case is in efi + * and pal calls. (see "arch/ia64/kernel/head.S") + * + * (it,dt,rt): (0,0,0) -> (0,1,1) + * This kind of transition is found when OSYa exits efi boot + * service. Due to gva = gpa in this case (Same region), + * data access can be satisfied though itlb entry for physical + * emulation is hit. + */ + {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + /* + * (it,dt,rt): (0,1,1) -> (1,1,1) + * This kind of transition is found in OSYa. + * + * (it,dt,rt): (0,1,1) -> (0,0,0) + * This kind of transition is found in OSYa + */ + {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V}, + /* (1,0,0)->(1,1,1) */ + {0, 0, 0, 0, 0, 0, 0, SW_P2V}, + /* + * (it,dt,rt): (1,0,1) -> (1,1,1) + * This kind of transition usually occurs when Linux returns + * from the low level TLB miss handlers. + * (see "arch/ia64/kernel/ivt.S") + */ + {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V}, + {0, 0, 0, 0, 0, 0, 0, 0}, + /* + * (it,dt,rt): (1,1,1) -> (1,0,1) + * This kind of transition usually occurs in Linux low level + * TLB miss handler. (see "arch/ia64/kernel/ivt.S") + * + * (it,dt,rt): (1,1,1) -> (0,0,0) + * This kind of transition usually occurs in pal and efi calls, + * which requires running in physical mode. + * (see "arch/ia64/kernel/head.S") + * (1,1,1)->(1,0,0) + */ + + {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF}, +}; + +void physical_mode_init(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mode_flags = GUEST_IN_PHY; +} + +void switch_to_physical_rid(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + /* Save original virtual mode rr[0] and rr[4] */ + psr = ia64_clear_ic(); + ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4); + ia64_srlz_d(); + + ia64_set_psr(psr); + return; +} + + +void switch_to_virtual_rid(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + psr = ia64_clear_ic(); + ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4); + ia64_srlz_d(); + ia64_set_psr(psr); + return; +} + +static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr) +{ + return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; +} + +void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, + struct ia64_psr new_psr) +{ + int act; + act = mm_switch_action(old_psr, new_psr); + switch (act) { + case SW_V2P: + /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n", + old_psr.val, new_psr.val);*/ + switch_to_physical_rid(vcpu); + /* + * Set rse to enforced lazy, to prevent active rse + *save/restor when guest physical mode. + */ + vcpu->arch.mode_flags |= GUEST_IN_PHY; + break; + case SW_P2V: + switch_to_virtual_rid(vcpu); + /* + * recover old mode which is saved when entering + * guest physical mode + */ + vcpu->arch.mode_flags &= ~GUEST_IN_PHY; + break; + case SW_SELF: + break; + case SW_NOP: + break; + default: + /* Sanity check */ + break; + } + return; +} + + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, + struct ia64_psr new_psr) +{ + + if ((old_psr.dt != new_psr.dt) + || (old_psr.it != new_psr.it) + || (old_psr.rt != new_psr.rt)) + switch_mm_mode(vcpu, old_psr, new_psr); + + return; +} + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void prepare_if_physical_mode(struct kvm_vcpu *vcpu) +{ + if (is_physical_mode(vcpu)) { + vcpu->arch.mode_flags |= GUEST_PHY_EMUL; + switch_to_virtual_rid(vcpu); + } + return; +} + +/* Recover always follows prepare */ +void recover_if_physical_mode(struct kvm_vcpu *vcpu) +{ + if (is_physical_mode(vcpu)) + switch_to_physical_rid(vcpu); + vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; + return; +} + +#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x) + +static u16 gr_info[32] = { + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + RPT(r1), RPT(r2), RPT(r3), + RPT(r4), RPT(r5), RPT(r6), RPT(r7), + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 + +static inline unsigned long +rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg) +{ + reg += rrb; + if (reg >= sor) + reg -= sor; + return reg; +} + +/* + * Return the (rotated) index for floating point register + * be in the REGNUM (REGNUM must range from 32-127, + * result is in the range from 0-95. + */ +static inline unsigned long fph_index(struct kvm_pt_regs *regs, + long regnum) +{ + unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; + return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); +} + + +/* + * The inverse of the above: given bspstore and the number of + * registers, calculate ar.bsp. + */ +static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr, + long num_regs) +{ + long delta = ia64_rse_slot_num(addr) + num_regs; + int i = 0; + + if (num_regs < 0) + delta -= 0x3e; + if (delta < 0) { + while (delta <= -0x3f) { + i--; + delta += 0x3f; + } + } else { + while (delta >= 0x3f) { + i++; + delta -= 0x3f; + } + } + + return addr + num_regs + i; +} + +static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, + unsigned long *val, int *nat) +{ + unsigned long *bsp, *addr, *rnat_addr, *bspstore; + unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; + unsigned long nat_mask; + unsigned long old_rsc, new_rsc; + long sof = (regs->cr_ifs) & 0x7f; + long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + old_rsc = ia64_getreg(_IA64_REG_AR_RSC); + new_rsc = old_rsc&(~(0x3)); + ia64_setreg(_IA64_REG_AR_RSC, new_rsc); + + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + bsp = kbs + (regs->loadrs >> 19); + + addr = kvm_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + if (addr >= bspstore) { + ia64_flushrs(); + ia64_mf(); + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + } + *val = *addr; + if (nat) { + if (bspstore < rnat_addr) + *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT) + & nat_mask); + else + *nat = (int)!!((*rnat_addr) & nat_mask); + ia64_setreg(_IA64_REG_AR_RSC, old_rsc); + } +} + +void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, + unsigned long val, unsigned long nat) +{ + unsigned long *bsp, *bspstore, *addr, *rnat_addr; + unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; + unsigned long nat_mask; + unsigned long old_rsc, new_rsc, psr; + unsigned long rnat; + long sof = (regs->cr_ifs) & 0x7f; + long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + old_rsc = ia64_getreg(_IA64_REG_AR_RSC); + /* put RSC to lazy mode, and set loadrs 0 */ + new_rsc = old_rsc & (~0x3fff0003); + ia64_setreg(_IA64_REG_AR_RSC, new_rsc); + bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */ + + addr = kvm_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + local_irq_save(psr); + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + if (addr >= bspstore) { + + ia64_flushrs(); + ia64_mf(); + *addr = val; + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + rnat = ia64_getreg(_IA64_REG_AR_RNAT); + if (bspstore < rnat_addr) + rnat = rnat & (~nat_mask); + else + *rnat_addr = (*rnat_addr)&(~nat_mask); + + ia64_mf(); + ia64_loadrs(); + ia64_setreg(_IA64_REG_AR_RNAT, rnat); + } else { + rnat = ia64_getreg(_IA64_REG_AR_RNAT); + *addr = val; + if (bspstore < rnat_addr) + rnat = rnat&(~nat_mask); + else + *rnat_addr = (*rnat_addr) & (~nat_mask); + + ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore); + ia64_setreg(_IA64_REG_AR_RNAT, rnat); + } + local_irq_restore(psr); + ia64_setreg(_IA64_REG_AR_RSC, old_rsc); +} + +void getreg(unsigned long regnum, unsigned long *val, + int *nat, struct kvm_pt_regs *regs) +{ + unsigned long addr, *unat; + if (regnum >= IA64_FIRST_STACKED_GR) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + addr = (unsigned long)regs; + unat = ®s->eml_unat;; + + addr += gr_info[regnum]; + + *val = *(unsigned long *)addr; + /* + * do it only when requested + */ + if (nat) + *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL; +} + +void setreg(unsigned long regnum, unsigned long val, + int nat, struct kvm_pt_regs *regs) +{ + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + /* + * First takes care of stacked registers + */ + if (regnum >= IA64_FIRST_STACKED_GR) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + addr = (unsigned long)regs; + unat = ®s->eml_unat; + /* + * add offset from base of struct + * and do it ! + */ + addr += gr_info[regnum]; + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = 1UL << ((addr >> 3) & 0x3f); + if (nat) + *unat |= bitmask; + else + *unat &= ~bitmask; + +} + +u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + u64 val; + + if (!reg) + return 0; + getreg(reg, &val, 0, regs); + return val; +} + +void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + long sof = (regs->cr_ifs) & 0x7f; + + if (!reg) + return; + if (reg >= sof + 32) + return; + setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/ +} + +void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, + struct kvm_pt_regs *regs) +{ + /* Take floating register rotation into consideration*/ + if (regnum >= IA64_FIRST_ROTATING_FR) + regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); +#define CASE_FIXED_FP(reg) \ + case (reg) : \ + ia64_stf_spill(fpval, reg); \ + break + + switch (regnum) { + CASE_FIXED_FP(0); + CASE_FIXED_FP(1); + CASE_FIXED_FP(2); + CASE_FIXED_FP(3); + CASE_FIXED_FP(4); + CASE_FIXED_FP(5); + + CASE_FIXED_FP(6); + CASE_FIXED_FP(7); + CASE_FIXED_FP(8); + CASE_FIXED_FP(9); + CASE_FIXED_FP(10); + CASE_FIXED_FP(11); + + CASE_FIXED_FP(12); + CASE_FIXED_FP(13); + CASE_FIXED_FP(14); + CASE_FIXED_FP(15); + CASE_FIXED_FP(16); + CASE_FIXED_FP(17); + CASE_FIXED_FP(18); + CASE_FIXED_FP(19); + CASE_FIXED_FP(20); + CASE_FIXED_FP(21); + CASE_FIXED_FP(22); + CASE_FIXED_FP(23); + CASE_FIXED_FP(24); + CASE_FIXED_FP(25); + CASE_FIXED_FP(26); + CASE_FIXED_FP(27); + CASE_FIXED_FP(28); + CASE_FIXED_FP(29); + CASE_FIXED_FP(30); + CASE_FIXED_FP(31); + CASE_FIXED_FP(32); + CASE_FIXED_FP(33); + CASE_FIXED_FP(34); + CASE_FIXED_FP(35); + CASE_FIXED_FP(36); + CASE_FIXED_FP(37); + CASE_FIXED_FP(38); + CASE_FIXED_FP(39); + CASE_FIXED_FP(40); + CASE_FIXED_FP(41); + CASE_FIXED_FP(42); + CASE_FIXED_FP(43); + CASE_FIXED_FP(44); + CASE_FIXED_FP(45); + CASE_FIXED_FP(46); + CASE_FIXED_FP(47); + CASE_FIXED_FP(48); + CASE_FIXED_FP(49); + CASE_FIXED_FP(50); + CASE_FIXED_FP(51); + CASE_FIXED_FP(52); + CASE_FIXED_FP(53); + CASE_FIXED_FP(54); + CASE_FIXED_FP(55); + CASE_FIXED_FP(56); + CASE_FIXED_FP(57); + CASE_FIXED_FP(58); + CASE_FIXED_FP(59); + CASE_FIXED_FP(60); + CASE_FIXED_FP(61); + CASE_FIXED_FP(62); + CASE_FIXED_FP(63); + CASE_FIXED_FP(64); + CASE_FIXED_FP(65); + CASE_FIXED_FP(66); + CASE_FIXED_FP(67); + CASE_FIXED_FP(68); + CASE_FIXED_FP(69); + CASE_FIXED_FP(70); + CASE_FIXED_FP(71); + CASE_FIXED_FP(72); + CASE_FIXED_FP(73); + CASE_FIXED_FP(74); + CASE_FIXED_FP(75); + CASE_FIXED_FP(76); + CASE_FIXED_FP(77); + CASE_FIXED_FP(78); + CASE_FIXED_FP(79); + CASE_FIXED_FP(80); + CASE_FIXED_FP(81); + CASE_FIXED_FP(82); + CASE_FIXED_FP(83); + CASE_FIXED_FP(84); + CASE_FIXED_FP(85); + CASE_FIXED_FP(86); + CASE_FIXED_FP(87); + CASE_FIXED_FP(88); + CASE_FIXED_FP(89); + CASE_FIXED_FP(90); + CASE_FIXED_FP(91); + CASE_FIXED_FP(92); + CASE_FIXED_FP(93); + CASE_FIXED_FP(94); + CASE_FIXED_FP(95); + CASE_FIXED_FP(96); + CASE_FIXED_FP(97); + CASE_FIXED_FP(98); + CASE_FIXED_FP(99); + CASE_FIXED_FP(100); + CASE_FIXED_FP(101); + CASE_FIXED_FP(102); + CASE_FIXED_FP(103); + CASE_FIXED_FP(104); + CASE_FIXED_FP(105); + CASE_FIXED_FP(106); + CASE_FIXED_FP(107); + CASE_FIXED_FP(108); + CASE_FIXED_FP(109); + CASE_FIXED_FP(110); + CASE_FIXED_FP(111); + CASE_FIXED_FP(112); + CASE_FIXED_FP(113); + CASE_FIXED_FP(114); + CASE_FIXED_FP(115); + CASE_FIXED_FP(116); + CASE_FIXED_FP(117); + CASE_FIXED_FP(118); + CASE_FIXED_FP(119); + CASE_FIXED_FP(120); + CASE_FIXED_FP(121); + CASE_FIXED_FP(122); + CASE_FIXED_FP(123); + CASE_FIXED_FP(124); + CASE_FIXED_FP(125); + CASE_FIXED_FP(126); + CASE_FIXED_FP(127); + } +#undef CASE_FIXED_FP +} + +void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, + struct kvm_pt_regs *regs) +{ + /* Take floating register rotation into consideration*/ + if (regnum >= IA64_FIRST_ROTATING_FR) + regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); + +#define CASE_FIXED_FP(reg) \ + case (reg) : \ + ia64_ldf_fill(reg, fpval); \ + break + + switch (regnum) { + CASE_FIXED_FP(2); + CASE_FIXED_FP(3); + CASE_FIXED_FP(4); + CASE_FIXED_FP(5); + + CASE_FIXED_FP(6); + CASE_FIXED_FP(7); + CASE_FIXED_FP(8); + CASE_FIXED_FP(9); + CASE_FIXED_FP(10); + CASE_FIXED_FP(11); + + CASE_FIXED_FP(12); + CASE_FIXED_FP(13); + CASE_FIXED_FP(14); + CASE_FIXED_FP(15); + CASE_FIXED_FP(16); + CASE_FIXED_FP(17); + CASE_FIXED_FP(18); + CASE_FIXED_FP(19); + CASE_FIXED_FP(20); + CASE_FIXED_FP(21); + CASE_FIXED_FP(22); + CASE_FIXED_FP(23); + CASE_FIXED_FP(24); + CASE_FIXED_FP(25); + CASE_FIXED_FP(26); + CASE_FIXED_FP(27); + CASE_FIXED_FP(28); + CASE_FIXED_FP(29); + CASE_FIXED_FP(30); + CASE_FIXED_FP(31); + CASE_FIXED_FP(32); + CASE_FIXED_FP(33); + CASE_FIXED_FP(34); + CASE_FIXED_FP(35); + CASE_FIXED_FP(36); + CASE_FIXED_FP(37); + CASE_FIXED_FP(38); + CASE_FIXED_FP(39); + CASE_FIXED_FP(40); + CASE_FIXED_FP(41); + CASE_FIXED_FP(42); + CASE_FIXED_FP(43); + CASE_FIXED_FP(44); + CASE_FIXED_FP(45); + CASE_FIXED_FP(46); + CASE_FIXED_FP(47); + CASE_FIXED_FP(48); + CASE_FIXED_FP(49); + CASE_FIXED_FP(50); + CASE_FIXED_FP(51); + CASE_FIXED_FP(52); + CASE_FIXED_FP(53); + CASE_FIXED_FP(54); + CASE_FIXED_FP(55); + CASE_FIXED_FP(56); + CASE_FIXED_FP(57); + CASE_FIXED_FP(58); + CASE_FIXED_FP(59); + CASE_FIXED_FP(60); + CASE_FIXED_FP(61); + CASE_FIXED_FP(62); + CASE_FIXED_FP(63); + CASE_FIXED_FP(64); + CASE_FIXED_FP(65); + CASE_FIXED_FP(66); + CASE_FIXED_FP(67); + CASE_FIXED_FP(68); + CASE_FIXED_FP(69); + CASE_FIXED_FP(70); + CASE_FIXED_FP(71); + CASE_FIXED_FP(72); + CASE_FIXED_FP(73); + CASE_FIXED_FP(74); + CASE_FIXED_FP(75); + CASE_FIXED_FP(76); + CASE_FIXED_FP(77); + CASE_FIXED_FP(78); + CASE_FIXED_FP(79); + CASE_FIXED_FP(80); + CASE_FIXED_FP(81); + CASE_FIXED_FP(82); + CASE_FIXED_FP(83); + CASE_FIXED_FP(84); + CASE_FIXED_FP(85); + CASE_FIXED_FP(86); + CASE_FIXED_FP(87); + CASE_FIXED_FP(88); + CASE_FIXED_FP(89); + CASE_FIXED_FP(90); + CASE_FIXED_FP(91); + CASE_FIXED_FP(92); + CASE_FIXED_FP(93); + CASE_FIXED_FP(94); + CASE_FIXED_FP(95); + CASE_FIXED_FP(96); + CASE_FIXED_FP(97); + CASE_FIXED_FP(98); + CASE_FIXED_FP(99); + CASE_FIXED_FP(100); + CASE_FIXED_FP(101); + CASE_FIXED_FP(102); + CASE_FIXED_FP(103); + CASE_FIXED_FP(104); + CASE_FIXED_FP(105); + CASE_FIXED_FP(106); + CASE_FIXED_FP(107); + CASE_FIXED_FP(108); + CASE_FIXED_FP(109); + CASE_FIXED_FP(110); + CASE_FIXED_FP(111); + CASE_FIXED_FP(112); + CASE_FIXED_FP(113); + CASE_FIXED_FP(114); + CASE_FIXED_FP(115); + CASE_FIXED_FP(116); + CASE_FIXED_FP(117); + CASE_FIXED_FP(118); + CASE_FIXED_FP(119); + CASE_FIXED_FP(120); + CASE_FIXED_FP(121); + CASE_FIXED_FP(122); + CASE_FIXED_FP(123); + CASE_FIXED_FP(124); + CASE_FIXED_FP(125); + CASE_FIXED_FP(126); + CASE_FIXED_FP(127); + } +} + +void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, + struct ia64_fpreg *val) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + getfpreg(reg, val, regs); /* FIXME: handle NATs later*/ +} + +void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, + struct ia64_fpreg *val) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + if (reg > 1) + setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ +} + +/************************************************************************ + * lsapic timer + ***********************************************************************/ +u64 vcpu_get_itc(struct kvm_vcpu *vcpu) +{ + unsigned long guest_itc; + guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); + + if (guest_itc >= VMX(vcpu, last_itc)) { + VMX(vcpu, last_itc) = guest_itc; + return guest_itc; + } else + return VMX(vcpu, last_itc); +} + +static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val); +static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) +{ + struct kvm_vcpu *v; + int i; + long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); + unsigned long vitv = VCPU(vcpu, itv); + + if (vcpu->vcpu_id == 0) { + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + VMX(v, itc_offset) = itc_offset; + VMX(v, last_itc) = 0; + } + } + VMX(vcpu, last_itc) = 0; + if (VCPU(vcpu, itm) <= val) { + VMX(vcpu, itc_check) = 0; + vcpu_unpend_interrupt(vcpu, vitv); + } else { + VMX(vcpu, itc_check) = 1; + vcpu_set_itm(vcpu, VCPU(vcpu, itm)); + } + +} + +static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, itm)); +} + +static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val) +{ + unsigned long vitv = VCPU(vcpu, itv); + VCPU(vcpu, itm) = val; + + if (val > vcpu_get_itc(vcpu)) { + VMX(vcpu, itc_check) = 1; + vcpu_unpend_interrupt(vcpu, vitv); + VMX(vcpu, timer_pending) = 0; + } else + VMX(vcpu, itc_check) = 0; +} + +#define ITV_VECTOR(itv) (itv&0xff) +#define ITV_IRQ_MASK(itv) (itv&(1<<16)) + +static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, itv) = val; + if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) { + vcpu_pend_interrupt(vcpu, ITV_VECTOR(val)); + vcpu->arch.timer_pending = 0; + } +} + +static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val) +{ + int vec; + + vec = highest_inservice_irq(vcpu); + if (vec == NULL_VECTOR) + return; + VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63)); + VCPU(vcpu, eoi) = 0; + vcpu->arch.irq_new_pending = 1; + +} + +/* See Table 5-8 in SDM vol2 for the definition */ +int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice) +{ + union ia64_tpr vtpr; + + vtpr.val = VCPU(vcpu, tpr); + + if (h_inservice == NMI_VECTOR) + return IRQ_MASKED_BY_INSVC; + + if (h_pending == NMI_VECTOR) { + /* Non Maskable Interrupt */ + return IRQ_NO_MASKED; + } + + if (h_inservice == ExtINT_VECTOR) + return IRQ_MASKED_BY_INSVC; + + if (h_pending == ExtINT_VECTOR) { + if (vtpr.mmi) { + /* mask all external IRQ */ + return IRQ_MASKED_BY_VTPR; + } else + return IRQ_NO_MASKED; + } + + if (is_higher_irq(h_pending, h_inservice)) { + if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4))) + return IRQ_NO_MASKED; + else + return IRQ_MASKED_BY_VTPR; + } else { + return IRQ_MASKED_BY_INSVC; + } +} + +void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec) +{ + long spsr; + int ret; + + local_irq_save(spsr); + ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0])); + local_irq_restore(spsr); + + vcpu->arch.irq_new_pending = 1; +} + +void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec) +{ + long spsr; + int ret; + + local_irq_save(spsr); + ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0])); + local_irq_restore(spsr); + if (ret) { + vcpu->arch.irq_new_pending = 1; + wmb(); + } +} + +void update_vhpi(struct kvm_vcpu *vcpu, int vec) +{ + u64 vhpi; + + if (vec == NULL_VECTOR) + vhpi = 0; + else if (vec == NMI_VECTOR) + vhpi = 32; + else if (vec == ExtINT_VECTOR) + vhpi = 16; + else + vhpi = vec >> 4; + + VCPU(vcpu, vhpi) = vhpi; + if (VCPU(vcpu, vac).a_int) + ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT, + (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0); +} + +u64 vcpu_get_ivr(struct kvm_vcpu *vcpu) +{ + int vec, h_inservice, mask; + + vec = highest_pending_irq(vcpu); + h_inservice = highest_inservice_irq(vcpu); + mask = irq_masked(vcpu, vec, h_inservice); + if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) { + if (VCPU(vcpu, vhpi)) + update_vhpi(vcpu, NULL_VECTOR); + return IA64_SPURIOUS_INT_VECTOR; + } + if (mask == IRQ_MASKED_BY_VTPR) { + update_vhpi(vcpu, vec); + return IA64_SPURIOUS_INT_VECTOR; + } + VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63)); + vcpu_unpend_interrupt(vcpu, vec); + return (u64)vec; +} + +/************************************************************************** + Privileged operation emulation routines + **************************************************************************/ +u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr) +{ + union ia64_pta vpta; + union ia64_rr vrr; + u64 pval; + u64 vhpt_offset; + + vpta.val = vcpu_get_pta(vcpu); + vrr.val = vcpu_get_rr(vcpu, vadr); + vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1); + if (vpta.vf) { + pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val, + vpta.val, 0, 0, 0, 0); + } else { + pval = (vadr & VRN_MASK) | vhpt_offset | + (vpta.val << 3 >> (vpta.size + 3) << (vpta.size)); + } + return pval; +} + +u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr) +{ + union ia64_rr vrr; + union ia64_pta vpta; + u64 pval; + + vpta.val = vcpu_get_pta(vcpu); + vrr.val = vcpu_get_rr(vcpu, vadr); + if (vpta.vf) { + pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val, + 0, 0, 0, 0, 0); + } else + pval = 1; + + return pval; +} + +u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr) +{ + struct thash_data *data; + union ia64_pta vpta; + u64 key; + + vpta.val = vcpu_get_pta(vcpu); + if (vpta.vf == 0) { + key = 1; + return key; + } + data = vtlb_lookup(vcpu, vadr, D_TLB); + if (!data || !data->p) + key = 1; + else + key = data->key; + + return key; +} + + + +void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long thash, vadr; + + vadr = vcpu_get_gr(vcpu, inst.M46.r3); + thash = vcpu_thash(vcpu, vadr); + vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); +} + + +void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long tag, vadr; + + vadr = vcpu_get_gr(vcpu, inst.M46.r3); + tag = vcpu_ttag(vcpu, vadr); + vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); +} + +int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) +{ + struct thash_data *data; + union ia64_isr visr, pt_isr; + struct kvm_pt_regs *regs; + struct ia64_psr vpsr; + + regs = vcpu_regs(vcpu); + pt_isr.val = VMX(vcpu, cr_isr); + visr.val = 0; + visr.ei = pt_isr.ei; + visr.ir = pt_isr.ir; + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + visr.na = 1; + + data = vhpt_lookup(vadr); + if (data) { + if (data->p == 0) { + vcpu_set_isr(vcpu, visr.val); + data_page_not_present(vcpu, vadr); + return IA64_FAULT; + } else if (data->ma == VA_MATTR_NATPAGE) { + vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + } else { + *padr = (data->gpaddr >> data->ps << data->ps) | + (vadr & (PSIZE(data->ps) - 1)); + return IA64_NO_FAULT; + } + } + + data = vtlb_lookup(vcpu, vadr, D_TLB); + if (data) { + if (data->p == 0) { + vcpu_set_isr(vcpu, visr.val); + data_page_not_present(vcpu, vadr); + return IA64_FAULT; + } else if (data->ma == VA_MATTR_NATPAGE) { + vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + } else{ + *padr = ((data->ppn >> (data->ps - 12)) << data->ps) + | (vadr & (PSIZE(data->ps) - 1)); + return IA64_NO_FAULT; + } + } + if (!vhpt_enabled(vcpu, vadr, NA_REF)) { + if (vpsr.ic) { + vcpu_set_isr(vcpu, visr.val); + alt_dtlb(vcpu, vadr); + return IA64_FAULT; + } else { + nested_dtlb(vcpu); + return IA64_FAULT; + } + } else { + if (vpsr.ic) { + vcpu_set_isr(vcpu, visr.val); + dvhpt_fault(vcpu, vadr); + return IA64_FAULT; + } else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + + return IA64_NO_FAULT; +} + + +int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1, r3; + + r3 = vcpu_get_gr(vcpu, inst.M46.r3); + + if (vcpu_tpa(vcpu, r3, &r1)) + return IA64_FAULT; + + vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + +void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1, r3; + + r3 = vcpu_get_gr(vcpu, inst.M46.r3); + r1 = vcpu_tak(vcpu, r3); + vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); +} + + +/************************************ + * Insert/Purge translation register/cache + ************************************/ +void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) +{ + thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB); +} + +void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) +{ + thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB); +} + +void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + u64 ps, va, rid; + struct thash_data *p_itr; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + pte &= ~PAGE_FLAGS_RV_MASK; + rid = vcpu_get_rr(vcpu, ifa); + rid = rid & RR_RID_MASK; + p_itr = (struct thash_data *)&vcpu->arch.itrs[slot]; + vcpu_set_tr(p_itr, pte, itir, va, rid); + vcpu_quick_region_set(VMX(vcpu, itr_regions), va); +} + + +void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + u64 gpfn; + u64 ps, va, rid; + struct thash_data *p_dtr; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + pte &= ~PAGE_FLAGS_RV_MASK; + + if (ps != _PAGE_SIZE_16M) + thash_purge_entries(vcpu, va, ps); + gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; + if (__gpfn_is_io(gpfn)) + pte |= VTLB_PTE_IO; + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK; + p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot]; + vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot], + pte, itir, va, rid); + vcpu_quick_region_set(VMX(vcpu, dtr_regions), va); +} + +void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) +{ + int index; + u64 va; + + va = PAGEALIGN(ifa, ps); + while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0) + vcpu->arch.dtrs[index].page_flags = 0; + + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) +{ + int index; + u64 va; + + va = PAGEALIGN(ifa, ps); + while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0) + vcpu->arch.itrs[index].page_flags = 0; + + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + va = PAGEALIGN(va, ps); + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va) +{ + thash_purge_all(vcpu); +} + +void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + long psr; + local_irq_save(psr); + p->exit_reason = EXIT_REASON_PTC_G; + + p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va); + p->u.ptc_g_data.vaddr = va; + p->u.ptc_g_data.ps = ps; + vmm_transition(vcpu); + /* Do Local Purge Here*/ + vcpu_ptc_l(vcpu, va, ps); + local_irq_restore(psr); +} + + +void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + vcpu_ptc_ga(vcpu, va, ps); +} + +void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + vcpu_ptc_e(vcpu, ifa); +} + +void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_g(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_ga(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_l(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptr_d(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptr_i(vcpu, ifa, itir_ps(itir)); +} + +void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte, slot; + + slot = vcpu_get_gr(vcpu, inst.M45.r3); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + vcpu_itr_d(vcpu, slot, pte, itir, ifa); +} + + + +void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte, slot; + + slot = vcpu_get_gr(vcpu, inst.M45.r3); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + vcpu_itr_i(vcpu, slot, pte, itir, ifa); +} + +void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte; + + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_itc_d(vcpu, pte, itir, ifa); +} + +void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte; + + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_itc_i(vcpu, pte, itir, ifa); +} + +/************************************* + * Moves to semi-privileged registers + *************************************/ + +void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long imm; + + if (inst.M30.s) + imm = -inst.M30.imm; + else + imm = inst.M30.imm; + + vcpu_set_itc(vcpu, imm); +} + +void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r2; + + r2 = vcpu_get_gr(vcpu, inst.M29.r2); + vcpu_set_itc(vcpu, r2); +} + + +void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1; + + r1 = vcpu_get_itc(vcpu); + vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); +} +/************************************************************************** + struct kvm_vcpu*protection key register access routines + **************************************************************************/ + +unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) +{ + return ((unsigned long)ia64_get_pkr(reg)); +} + +void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val) +{ + ia64_set_pkr(reg, val); +} + + +unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa) +{ + union ia64_rr rr, rr1; + + rr.val = vcpu_get_rr(vcpu, ifa); + rr1.val = 0; + rr1.ps = rr.ps; + rr1.rid = rr.rid; + return (rr1.val); +} + + + +/******************************** + * Moves to privileged registers + ********************************/ +unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg, + unsigned long val) +{ + union ia64_rr oldrr, newrr; + unsigned long rrval; + struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long psr; + + oldrr.val = vcpu_get_rr(vcpu, reg); + newrr.val = val; + vcpu->arch.vrr[reg >> VRN_SHIFT] = val; + + switch ((unsigned long)(reg >> VRN_SHIFT)) { + case VRN6: + vcpu->arch.vmm_rr = vrrtomrr(val); + local_irq_save(psr); + p->exit_reason = EXIT_REASON_SWITCH_RR6; + vmm_transition(vcpu); + local_irq_restore(psr); + break; + case VRN4: + rrval = vrrtomrr(val); + vcpu->arch.metaphysical_saved_rr4 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg, rrval); + break; + case VRN0: + rrval = vrrtomrr(val); + vcpu->arch.metaphysical_saved_rr0 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg, rrval); + break; + default: + ia64_set_rr(reg, vrrtomrr(val)); + break; + } + + return (IA64_NO_FAULT); +} + + + +void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_rr(vcpu, r3, r2); +} + +void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst) +{ +} + +void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst) +{ +} + +void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pmc(vcpu, r3, r2); +} + +void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pmd(vcpu, r3, r2); +} + +void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst) +{ + u64 r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pkr(vcpu, r3, r2); +} + + + +void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_rr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_pkr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_dbr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_ibr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_pmc(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + + +unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) +{ + /* FIXME: This could get called as a result of a rsvd-reg fault */ + if (reg > (ia64_get_cpuid(3) & 0xff)) + return 0; + else + return ia64_get_cpuid(reg); +} + +void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_cpuid(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val) +{ + VCPU(vcpu, tpr) = val; + vcpu->arch.irq_check = 1; +} + +unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r2; + + r2 = vcpu_get_gr(vcpu, inst.M32.r2); + VCPU(vcpu, vcr[inst.M32.cr3]) = r2; + + switch (inst.M32.cr3) { + case 0: + vcpu_set_dcr(vcpu, r2); + break; + case 1: + vcpu_set_itm(vcpu, r2); + break; + case 66: + vcpu_set_tpr(vcpu, r2); + break; + case 67: + vcpu_set_eoi(vcpu, r2); + break; + default: + break; + } + + return 0; +} + + +unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long tgt = inst.M33.r1; + unsigned long val; + + switch (inst.M33.cr3) { + case 65: + val = vcpu_get_ivr(vcpu); + vcpu_set_gr(vcpu, tgt, val, 0); + break; + + case 67: + vcpu_set_gr(vcpu, tgt, 0L, 0); + break; + default: + val = VCPU(vcpu, vcr[inst.M33.cr3]); + vcpu_set_gr(vcpu, tgt, val, 0); + break; + } + + return 0; +} + + + +void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) +{ + + unsigned long mask; + struct kvm_pt_regs *regs; + struct ia64_psr old_psr, new_psr; + + old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + regs = vcpu_regs(vcpu); + /* We only support guest as: + * vpsr.pk = 0 + * vpsr.is = 0 + * Otherwise panic + */ + if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) + panic_vm(vcpu); + + /* + * For those IA64_PSR bits: id/da/dd/ss/ed/ia + * Since these bits will become 0, after success execution of each + * instruction, we will change set them to mIA64_PSR + */ + VCPU(vcpu, vpsr) = val + & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA)); + + if (!old_psr.i && (val & IA64_PSR_I)) { + /* vpsr.i 0->1 */ + vcpu->arch.irq_check = 1; + } + new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + /* + * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) + * , except for the following bits: + * ic/i/dt/si/rt/mc/it/bn/vm + */ + mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + + IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + + IA64_PSR_VM; + + regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask)); + + check_mm_mode_switch(vcpu, old_psr, new_psr); + + return ; +} + +unsigned long vcpu_cover(struct kvm_vcpu *vcpu) +{ + struct ia64_psr vpsr; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + if (!vpsr.ic) + VCPU(vcpu, ifs) = regs->cr_ifs; + regs->cr_ifs = IA64_IFS_V; + return (IA64_NO_FAULT); +} + + + +/************************************************************************** + VCPU banked general register access routines + **************************************************************************/ +#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ + do { \ + __asm__ __volatile__ ( \ + ";;extr.u %0 = %3,%6,16;;\n" \ + "dep %1 = %0, %1, 0, 16;;\n" \ + "st8 [%4] = %1\n" \ + "extr.u %0 = %2, 16, 16;;\n" \ + "dep %3 = %0, %3, %6, 16;;\n" \ + "st8 [%5] = %3\n" \ + ::"r"(i), "r"(*b1unat), "r"(*b0unat), \ + "r"(*runat), "r"(b1unat), "r"(runat), \ + "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ + } while (0) + +void vcpu_bsw0(struct kvm_vcpu *vcpu) +{ + unsigned long i; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + unsigned long *r = ®s->r16; + unsigned long *b0 = &VCPU(vcpu, vbgr[0]); + unsigned long *b1 = &VCPU(vcpu, vgr[0]); + unsigned long *runat = ®s->eml_unat; + unsigned long *b0unat = &VCPU(vcpu, vbnat); + unsigned long *b1unat = &VCPU(vcpu, vnat); + + + if (VCPU(vcpu, vpsr) & IA64_PSR_BN) { + for (i = 0; i < 16; i++) { + *b1++ = *r; + *r++ = *b0++; + } + vcpu_bsw0_unat(i, b0unat, b1unat, runat, + VMM_PT_REGS_R16_SLOT); + VCPU(vcpu, vpsr) &= ~IA64_PSR_BN; + } +} + +#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ + do { \ + __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \ + "dep %1 = %0, %1, 16, 16;;\n" \ + "st8 [%4] = %1\n" \ + "extr.u %0 = %2, 0, 16;;\n" \ + "dep %3 = %0, %3, %6, 16;;\n" \ + "st8 [%5] = %3\n" \ + ::"r"(i), "r"(*b0unat), "r"(*b1unat), \ + "r"(*runat), "r"(b0unat), "r"(runat), \ + "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ + } while (0) + +void vcpu_bsw1(struct kvm_vcpu *vcpu) +{ + unsigned long i; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + unsigned long *r = ®s->r16; + unsigned long *b0 = &VCPU(vcpu, vbgr[0]); + unsigned long *b1 = &VCPU(vcpu, vgr[0]); + unsigned long *runat = ®s->eml_unat; + unsigned long *b0unat = &VCPU(vcpu, vbnat); + unsigned long *b1unat = &VCPU(vcpu, vnat); + + if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) { + for (i = 0; i < 16; i++) { + *b0++ = *r; + *r++ = *b1++; + } + vcpu_bsw1_unat(i, b0unat, b1unat, runat, + VMM_PT_REGS_R16_SLOT); + VCPU(vcpu, vpsr) |= IA64_PSR_BN; + } +} + + + + +void vcpu_rfi(struct kvm_vcpu *vcpu) +{ + unsigned long ifs, psr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + psr = VCPU(vcpu, ipsr); + if (psr & IA64_PSR_BN) + vcpu_bsw1(vcpu); + else + vcpu_bsw0(vcpu); + vcpu_set_psr(vcpu, psr); + ifs = VCPU(vcpu, ifs); + if (ifs >> 63) + regs->cr_ifs = ifs; + regs->cr_iip = VCPU(vcpu, iip); +} + + +/* + VPSR can't keep track of below bits of guest PSR + This function gets guest PSR + */ + +unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu) +{ + unsigned long mask; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | + IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI; + return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask); +} + +void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long vpsr; + unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21) + | inst.M44.imm; + + vpsr = vcpu_get_psr(vcpu); + vpsr &= (~imm24); + vcpu_set_psr(vcpu, vpsr); +} + +void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long vpsr; + unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21) + | inst.M44.imm; + + vpsr = vcpu_get_psr(vcpu); + vpsr |= imm24; + vcpu_set_psr(vcpu, vpsr); +} + +/* Generate Mask + * Parameter: + * bit -- starting bit + * len -- how many bits + */ +#define MASK(bit,len) \ +({ \ + __u64 ret; \ + \ + __asm __volatile("dep %0=-1, r0, %1, %2"\ + : "=r" (ret): \ + "M" (bit), \ + "M" (len)); \ + ret; \ +}) + +void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val) +{ + val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32)); + vcpu_set_psr(vcpu, val); +} + +void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long val; + + val = vcpu_get_gr(vcpu, inst.M35.r2); + vcpu_set_psr_l(vcpu, val); +} + +void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long val; + + val = vcpu_get_psr(vcpu); + val = (val & MASK(0, 32)) | (val & MASK(35, 2)); + vcpu_set_gr(vcpu, inst.M33.r1, val, 0); +} + +void vcpu_increment_iip(struct kvm_vcpu *vcpu) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; + if (ipsr->ri == 2) { + ipsr->ri = 0; + regs->cr_iip += 16; + } else + ipsr->ri++; +} + +void vcpu_decrement_iip(struct kvm_vcpu *vcpu) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; + + if (ipsr->ri == 0) { + ipsr->ri = 2; + regs->cr_iip -= 16; + } else + ipsr->ri--; +} + +/** Emulate a privileged operation. + * + * + * @param vcpu virtual cpu + * @cause the reason cause virtualization fault + * @opcode the instruction code which cause virtualization fault + */ + +void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs) +{ + unsigned long status, cause, opcode ; + INST64 inst; + + status = IA64_NO_FAULT; + cause = VMX(vcpu, cause); + opcode = VMX(vcpu, opcode); + inst.inst = opcode; + /* + * Switch to actual virtual rid in rr0 and rr4, + * which is required by some tlb related instructions. + */ + prepare_if_physical_mode(vcpu); + + switch (cause) { + case EVENT_RSM: + kvm_rsm(vcpu, inst); + break; + case EVENT_SSM: + kvm_ssm(vcpu, inst); + break; + case EVENT_MOV_TO_PSR: + kvm_mov_to_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_PSR: + kvm_mov_from_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_CR: + kvm_mov_from_cr(vcpu, inst); + break; + case EVENT_MOV_TO_CR: + kvm_mov_to_cr(vcpu, inst); + break; + case EVENT_BSW_0: + vcpu_bsw0(vcpu); + break; + case EVENT_BSW_1: + vcpu_bsw1(vcpu); + break; + case EVENT_COVER: + vcpu_cover(vcpu); + break; + case EVENT_RFI: + vcpu_rfi(vcpu); + break; + case EVENT_ITR_D: + kvm_itr_d(vcpu, inst); + break; + case EVENT_ITR_I: + kvm_itr_i(vcpu, inst); + break; + case EVENT_PTR_D: + kvm_ptr_d(vcpu, inst); + break; + case EVENT_PTR_I: + kvm_ptr_i(vcpu, inst); + break; + case EVENT_ITC_D: + kvm_itc_d(vcpu, inst); + break; + case EVENT_ITC_I: + kvm_itc_i(vcpu, inst); + break; + case EVENT_PTC_L: + kvm_ptc_l(vcpu, inst); + break; + case EVENT_PTC_G: + kvm_ptc_g(vcpu, inst); + break; + case EVENT_PTC_GA: + kvm_ptc_ga(vcpu, inst); + break; + case EVENT_PTC_E: + kvm_ptc_e(vcpu, inst); + break; + case EVENT_MOV_TO_RR: + kvm_mov_to_rr(vcpu, inst); + break; + case EVENT_MOV_FROM_RR: + kvm_mov_from_rr(vcpu, inst); + break; + case EVENT_THASH: + kvm_thash(vcpu, inst); + break; + case EVENT_TTAG: + kvm_ttag(vcpu, inst); + break; + case EVENT_TPA: + status = kvm_tpa(vcpu, inst); + break; + case EVENT_TAK: + kvm_tak(vcpu, inst); + break; + case EVENT_MOV_TO_AR_IMM: + kvm_mov_to_ar_imm(vcpu, inst); + break; + case EVENT_MOV_TO_AR: + kvm_mov_to_ar_reg(vcpu, inst); + break; + case EVENT_MOV_FROM_AR: + kvm_mov_from_ar_reg(vcpu, inst); + break; + case EVENT_MOV_TO_DBR: + kvm_mov_to_dbr(vcpu, inst); + break; + case EVENT_MOV_TO_IBR: + kvm_mov_to_ibr(vcpu, inst); + break; + case EVENT_MOV_TO_PMC: + kvm_mov_to_pmc(vcpu, inst); + break; + case EVENT_MOV_TO_PMD: + kvm_mov_to_pmd(vcpu, inst); + break; + case EVENT_MOV_TO_PKR: + kvm_mov_to_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_DBR: + kvm_mov_from_dbr(vcpu, inst); + break; + case EVENT_MOV_FROM_IBR: + kvm_mov_from_ibr(vcpu, inst); + break; + case EVENT_MOV_FROM_PMC: + kvm_mov_from_pmc(vcpu, inst); + break; + case EVENT_MOV_FROM_PKR: + kvm_mov_from_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_CPUID: + kvm_mov_from_cpuid(vcpu, inst); + break; + case EVENT_VMSW: + status = IA64_FAULT; + break; + default: + break; + }; + /*Assume all status is NO_FAULT ?*/ + if (status == IA64_NO_FAULT && cause != EVENT_RFI) + vcpu_increment_iip(vcpu); + + recover_if_physical_mode(vcpu); +} + +void init_vcpu(struct kvm_vcpu *vcpu) +{ + int i; + + vcpu->arch.mode_flags = GUEST_IN_PHY; + VMX(vcpu, vrr[0]) = 0x38; + VMX(vcpu, vrr[1]) = 0x38; + VMX(vcpu, vrr[2]) = 0x38; + VMX(vcpu, vrr[3]) = 0x38; + VMX(vcpu, vrr[4]) = 0x38; + VMX(vcpu, vrr[5]) = 0x38; + VMX(vcpu, vrr[6]) = 0x38; + VMX(vcpu, vrr[7]) = 0x38; + VCPU(vcpu, vpsr) = IA64_PSR_BN; + VCPU(vcpu, dcr) = 0; + /* pta.size must not be 0. The minimum is 15 (32k) */ + VCPU(vcpu, pta) = 15 << 2; + VCPU(vcpu, itv) = 0x10000; + VCPU(vcpu, itm) = 0; + VMX(vcpu, last_itc) = 0; + + VCPU(vcpu, lid) = VCPU_LID(vcpu); + VCPU(vcpu, ivr) = 0; + VCPU(vcpu, tpr) = 0x10000; + VCPU(vcpu, eoi) = 0; + VCPU(vcpu, irr[0]) = 0; + VCPU(vcpu, irr[1]) = 0; + VCPU(vcpu, irr[2]) = 0; + VCPU(vcpu, irr[3]) = 0; + VCPU(vcpu, pmv) = 0x10000; + VCPU(vcpu, cmcv) = 0x10000; + VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */ + VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */ + update_vhpi(vcpu, NULL_VECTOR); + VLSAPIC_XTP(vcpu) = 0x80; /* disabled */ + + for (i = 0; i < 4; i++) + VLSAPIC_INSVC(vcpu, i) = 0; +} + +void kvm_init_all_rr(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + local_irq_save(psr); + + /* WARNING: not allow co-exist of both virtual mode and physical + * mode in same region + */ + + vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0])); + vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4])); + + if (is_physical_mode(vcpu)) { + if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) + panic_vm(vcpu); + + ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4); + ia64_dv_serialize_data(); + } else { + ia64_set_rr((VRN0 << VRN_SHIFT), + vcpu->arch.metaphysical_saved_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), + vcpu->arch.metaphysical_saved_rr4); + ia64_dv_serialize_data(); + } + ia64_set_rr((VRN1 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN1]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN2 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN2]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN3 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN3]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN5 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN5]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN7 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN7]))); + ia64_dv_serialize_data(); + ia64_srlz_d(); + ia64_set_psr(psr); +} + +int vmm_entry(void) +{ + struct kvm_vcpu *v; + v = current_vcpu; + + ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd, + 0, 0, 0, 0, 0, 0); + kvm_init_vtlb(v); + kvm_init_vhpt(v); + init_vcpu(v); + kvm_init_all_rr(v); + vmm_reset_entry(); + + return 0; +} + +void panic_vm(struct kvm_vcpu *v) +{ + struct exit_ctl_data *p = &v->arch.exit_data; + + p->exit_reason = EXIT_REASON_VM_PANIC; + vmm_transition(v); + /*Never to return*/ + while (1); +} diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h new file mode 100644 index 00000000000..b0fcfb62c49 --- /dev/null +++ b/arch/ia64/kvm/vcpu.h @@ -0,0 +1,740 @@ +/* + * vcpu.h: vcpu routines + * Copyright (c) 2005, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * + * Copyright (c) 2007, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + + +#ifndef __KVM_VCPU_H__ +#define __KVM_VCPU_H__ + +#include <asm/types.h> +#include <asm/fpu.h> +#include <asm/processor.h> + +#ifndef __ASSEMBLY__ +#include "vti.h" + +#include <linux/kvm_host.h> +#include <linux/spinlock.h> + +typedef unsigned long IA64_INST; + +typedef union U_IA64_BUNDLE { + unsigned long i64[2]; + struct { unsigned long template:5, slot0:41, slot1a:18, + slot1b:23, slot2:41; }; + /* NOTE: following doesn't work because bitfields can't cross natural + size boundaries + struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */ +} IA64_BUNDLE; + +typedef union U_INST64_A5 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5, + imm9d:9, s:1, major:4; }; +} INST64_A5; + +typedef union U_INST64_B4 { + IA64_INST inst; + struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, + wh:2, d:1, un1:1, major:4; }; +} INST64_B4; + +typedef union U_INST64_B8 { + IA64_INST inst; + struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; }; +} INST64_B8; + +typedef union U_INST64_B9 { + IA64_INST inst; + struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; }; +} INST64_B9; + +typedef union U_INST64_I19 { + IA64_INST inst; + struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; }; +} INST64_I19; + +typedef union U_INST64_I26 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_I26; + +typedef union U_INST64_I27 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; }; +} INST64_I27; + +typedef union U_INST64_I28 { /* not privileged (mov from AR) */ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_I28; + +typedef union U_INST64_M28 { + IA64_INST inst; + struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M28; + +typedef union U_INST64_M29 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M29; + +typedef union U_INST64_M30 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2, + x3:3, s:1, major:4; }; +} INST64_M30; + +typedef union U_INST64_M31 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M31; + +typedef union U_INST64_M32 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M32; + +typedef union U_INST64_M33 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M33; + +typedef union U_INST64_M35 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; + +} INST64_M35; + +typedef union U_INST64_M36 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; }; +} INST64_M36; + +typedef union U_INST64_M37 { + IA64_INST inst; + struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3, + i:1, major:4; }; +} INST64_M37; + +typedef union U_INST64_M41 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; +} INST64_M41; + +typedef union U_INST64_M42 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M42; + +typedef union U_INST64_M43 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M43; + +typedef union U_INST64_M44 { + IA64_INST inst; + struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; }; +} INST64_M44; + +typedef union U_INST64_M45 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M45; + +typedef union U_INST64_M46 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, + x3:3, un1:1, major:4; }; +} INST64_M46; + +typedef union U_INST64_M47 { + IA64_INST inst; + struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; }; +} INST64_M47; + +typedef union U_INST64_M1{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M1; + +typedef union U_INST64_M2{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M2; + +typedef union U_INST64_M3{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M3; + +typedef union U_INST64_M4 { + IA64_INST inst; + struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M4; + +typedef union U_INST64_M5 { + IA64_INST inst; + struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M5; + +typedef union U_INST64_M6 { + IA64_INST inst; + struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M6; + +typedef union U_INST64_M9 { + IA64_INST inst; + struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M9; + +typedef union U_INST64_M10 { + IA64_INST inst; + struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M10; + +typedef union U_INST64_M12 { + IA64_INST inst; + struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M12; + +typedef union U_INST64_M15 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M15; + +typedef union U_INST64 { + IA64_INST inst; + struct { unsigned long :37, major:4; } generic; + INST64_A5 A5; /* used in build_hypercall_bundle only */ + INST64_B4 B4; /* used in build_hypercall_bundle only */ + INST64_B8 B8; /* rfi, bsw.[01] */ + INST64_B9 B9; /* break.b */ + INST64_I19 I19; /* used in build_hypercall_bundle only */ + INST64_I26 I26; /* mov register to ar (I unit) */ + INST64_I27 I27; /* mov immediate to ar (I unit) */ + INST64_I28 I28; /* mov from ar (I unit) */ + INST64_M1 M1; /* ld integer */ + INST64_M2 M2; + INST64_M3 M3; + INST64_M4 M4; /* st integer */ + INST64_M5 M5; + INST64_M6 M6; /* ldfd floating pointer */ + INST64_M9 M9; /* stfd floating pointer */ + INST64_M10 M10; /* stfd floating pointer */ + INST64_M12 M12; /* ldfd pair floating pointer */ + INST64_M15 M15; /* lfetch + imm update */ + INST64_M28 M28; /* purge translation cache entry */ + INST64_M29 M29; /* mov register to ar (M unit) */ + INST64_M30 M30; /* mov immediate to ar (M unit) */ + INST64_M31 M31; /* mov from ar (M unit) */ + INST64_M32 M32; /* mov reg to cr */ + INST64_M33 M33; /* mov from cr */ + INST64_M35 M35; /* mov to psr */ + INST64_M36 M36; /* mov from psr */ + INST64_M37 M37; /* break.m */ + INST64_M41 M41; /* translation cache insert */ + INST64_M42 M42; /* mov to indirect reg/translation reg insert*/ + INST64_M43 M43; /* mov from indirect reg */ + INST64_M44 M44; /* set/reset system mask */ + INST64_M45 M45; /* translation purge */ + INST64_M46 M46; /* translation access (tpa,tak) */ + INST64_M47 M47; /* purge translation entry */ +} INST64; + +#define MASK_41 ((unsigned long)0x1ffffffffff) + +/* Virtual address memory attributes encoding */ +#define VA_MATTR_WB 0x0 +#define VA_MATTR_UC 0x4 +#define VA_MATTR_UCE 0x5 +#define VA_MATTR_WC 0x6 +#define VA_MATTR_NATPAGE 0x7 + +#define PMASK(size) (~((size) - 1)) +#define PSIZE(size) (1UL<<(size)) +#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits)) +#define PAGEALIGN(va, ps) CLEARLSB(va, ps) +#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53)) +#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */ + +#define ARCH_PAGE_SHIFT 12 + +#define INVALID_TI_TAG (1UL << 63) + +#define VTLB_PTE_P_BIT 0 +#define VTLB_PTE_IO_BIT 60 +#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT) +#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT) + +#define vcpu_quick_region_check(_tr_regions,_ifa) \ + (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) + +#define vcpu_quick_region_set(_tr_regions,_ifa) \ + do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) + +static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir, + u64 va, u64 rid) +{ + trp->page_flags = pte; + trp->itir = itir; + trp->vadr = va; + trp->rid = rid; +} + +extern u64 kvm_lookup_mpa(u64 gpfn); +extern u64 kvm_gpa_to_mpa(u64 gpa); + +/* Return I/O type if trye */ +#define __gpfn_is_io(gpfn) \ + ({ \ + u64 pte, ret = 0; \ + pte = kvm_lookup_mpa(gpfn); \ + if (!(pte & GPFN_INV_MASK)) \ + ret = pte & GPFN_IO_MASK; \ + ret; \ + }) + +#endif + +#define IA64_NO_FAULT 0 +#define IA64_FAULT 1 + +#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15) + +#define SW_BAD 0 /* Bad mode transitition */ +#define SW_V2P 1 /* Physical emulatino is activated */ +#define SW_P2V 2 /* Exit physical mode emulation */ +#define SW_SELF 3 /* No mode transition */ +#define SW_NOP 4 /* Mode transition, but without action required */ + +#define GUEST_IN_PHY 0x1 +#define GUEST_PHY_EMUL 0x2 + +#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP)) + +#define VRN_SHIFT 61 +#define VRN_MASK 0xe000000000000000 +#define VRN0 0x0UL +#define VRN1 0x1UL +#define VRN2 0x2UL +#define VRN3 0x3UL +#define VRN4 0x4UL +#define VRN5 0x5UL +#define VRN6 0x6UL +#define VRN7 0x7UL + +#define IRQ_NO_MASKED 0 +#define IRQ_MASKED_BY_VTPR 1 +#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */ + +#define PTA_BASE_SHIFT 15 + +#define IA64_PSR_VM_BIT 46 +#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT) + +/* Interruption Function State */ +#define IA64_IFS_V_BIT 63 +#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT) + +#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) +#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) + +#ifndef __ASSEMBLY__ + +#include <asm/gcc_intrin.h> + +#define is_physical_mode(v) \ + ((v->arch.mode_flags) & GUEST_IN_PHY) + +#define is_virtual_mode(v) \ + (!is_physical_mode(v)) + +#define MODE_IND(psr) \ + (((psr).it << 2) + ((psr).dt << 1) + (psr).rt) + +#define _vmm_raw_spin_lock(x) \ + do { \ + __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ + __u64 ia64_spinlock_val; \ + ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ + if (unlikely(ia64_spinlock_val)) { \ + do { \ + while (*ia64_spinlock_ptr) \ + ia64_barrier(); \ + ia64_spinlock_val = \ + ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ + } while (ia64_spinlock_val); \ + } \ + } while (0) + +#define _vmm_raw_spin_unlock(x) \ + do { barrier(); \ + ((spinlock_t *)x)->raw_lock.lock = 0; } \ +while (0) + +void vmm_spin_lock(spinlock_t *lock); +void vmm_spin_unlock(spinlock_t *lock); +enum { + I_TLB = 1, + D_TLB = 2 +}; + +union kvm_va { + struct { + unsigned long off : 60; /* intra-region offset */ + unsigned long reg : 4; /* region number */ + } f; + unsigned long l; + void *p; +}; + +#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \ + _v.f.reg = 0; _v.l; }) +#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \ + _v.f.reg = -1; _v.p; }) + +#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.rid; }) +#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.ps; }) +#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.ve; }) + +enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF }; +enum tlb_miss_type { INSTRUCTION, DATA, REGISTER }; + +#define VCPU(_v, _x) ((_v)->arch.vpd->_x) +#define VMX(_v, _x) ((_v)->arch._x) + +#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i]) +#define VLSAPIC_XTP(_v) VMX(_v, xtp) + +static inline unsigned long itir_ps(unsigned long itir) +{ + return ((itir >> 2) & 0x3f); +} + + +/************************************************************************** + VCPU control register access routines + **************************************************************************/ + +static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, itir)); +} + +static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, itir) = val; +} + +static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, ifa)); +} + +static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ifa) = val; +} + +static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, iva)); +} + +static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, pta)); +} + +static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, lid)); +} + +static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, tpr)); +} + +static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu) +{ + return (0UL); /*reads of eoi always return 0 */ +} + +static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[0])); +} + +static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[1])); +} + +static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[2])); +} + +static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[3])); +} + +static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val) +{ + ia64_setreg(_IA64_REG_CR_DCR, val); +} + +static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, isr) = val; +} + +static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, lid) = val; +} + +static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ipsr) = val; +} + +static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iip) = val; +} + +static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ifs) = val; +} + +static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iipa) = val; +} + +static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iha) = val; +} + + +static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg) +{ + return vcpu->arch.vrr[reg>>61]; +} + +/************************************************************************** + VCPU debug breakpoint register access routines + **************************************************************************/ + +static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + __ia64_set_dbr(reg, val); +} + +static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + ia64_set_ibr(reg, val); +} + +static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg) +{ + return ((u64)__ia64_get_dbr(reg)); +} + +static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg) +{ + return ((u64)ia64_get_ibr(reg)); +} + +/************************************************************************** + VCPU performance monitor register access routines + **************************************************************************/ +static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + /* NOTE: Writes to unimplemented PMC registers are discarded */ + ia64_set_pmc(reg, val); +} + +static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + /* NOTE: Writes to unimplemented PMD registers are discarded */ + ia64_set_pmd(reg, val); +} + +static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg) +{ + /* NOTE: Reads from unimplemented PMC registers return zero */ + return ((u64)ia64_get_pmc(reg)); +} + +static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg) +{ + /* NOTE: Reads from unimplemented PMD registers return zero */ + return ((u64)ia64_get_pmd(reg)); +} + +static inline unsigned long vrrtomrr(unsigned long val) +{ + union ia64_rr rr; + rr.val = val; + rr.rid = (rr.rid << 4) | 0xe; + if (rr.ps > PAGE_SHIFT) + rr.ps = PAGE_SHIFT; + rr.ve = 1; + return rr.val; +} + + +static inline int highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + return NULL_VECTOR; +} + +/* + * The pending irq is higher than the inservice one. + * + */ +static inline int is_higher_irq(int pending, int inservice) +{ + return ((pending > inservice) + || ((pending != NULL_VECTOR) + && (inservice == NULL_VECTOR))); +} + +static inline int is_higher_class(int pending, int mic) +{ + return ((pending >> 4) > mic); +} + +/* + * Return 0-255 for pending irq. + * NULL_VECTOR: when no pending. + */ +static inline int highest_pending_irq(struct kvm_vcpu *vcpu) +{ + if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR)) + return NMI_VECTOR; + if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return highest_bits((int *)&VCPU(vcpu, irr[0])); +} + +static inline int highest_inservice_irq(struct kvm_vcpu *vcpu) +{ + if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR)) + return NMI_VECTOR; + if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return highest_bits((int *)&(VMX(vcpu, insvc[0]))); +} + +extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg, + struct ia64_fpreg *val); +extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg, + struct ia64_fpreg *val); +extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg); +extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat); +extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu); +extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val); +extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); +extern void vcpu_bsw0(struct kvm_vcpu *vcpu); +extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, + u64 itir, u64 va, int type); +extern struct thash_data *vhpt_lookup(u64 va); +extern u64 guest_vhpt_lookup(u64 iha, u64 *pte); +extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps); +extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps); +extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va); +extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, + u64 itir, u64 ifa, int type); +extern void thash_purge_all(struct kvm_vcpu *v); +extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v, + u64 va, int is_data); +extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, + u64 ps, int is_data); + +extern void vcpu_increment_iip(struct kvm_vcpu *v); +extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu); +extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec); +extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec); +extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr); +extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr); +extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr); +extern void nested_dtlb(struct kvm_vcpu *vcpu); +extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr); +extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref); + +extern void update_vhpi(struct kvm_vcpu *vcpu, int vec); +extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice); + +extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle); +extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma); +extern void vmm_transition(struct kvm_vcpu *vcpu); +extern void vmm_trampoline(union context *from, union context *to); +extern int vmm_entry(void); +extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu); + +extern void vmm_reset_entry(void); +void kvm_init_vtlb(struct kvm_vcpu *v); +void kvm_init_vhpt(struct kvm_vcpu *v); +void thash_init(struct thash_cb *hcb, u64 sz); + +void panic_vm(struct kvm_vcpu *v); + +extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5, u64 arg6, u64 arg7); +#endif +#endif /* __VCPU_H__ */ diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c new file mode 100644 index 00000000000..2275bf4e681 --- /dev/null +++ b/arch/ia64/kvm/vmm.c @@ -0,0 +1,66 @@ +/* + * vmm.c: vmm module interface with kvm module + * + * Copyright (c) 2007, Intel Corporation. + * + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + + +#include<linux/module.h> +#include<asm/fpswa.h> + +#include "vcpu.h" + +MODULE_AUTHOR("Intel"); +MODULE_LICENSE("GPL"); + +extern char kvm_ia64_ivt; +extern fpswa_interface_t *vmm_fpswa_interface; + +struct kvm_vmm_info vmm_info = { + .module = THIS_MODULE, + .vmm_entry = vmm_entry, + .tramp_entry = vmm_trampoline, + .vmm_ivt = (unsigned long)&kvm_ia64_ivt, +}; + +static int __init kvm_vmm_init(void) +{ + + vmm_fpswa_interface = fpswa_interface; + + /*Register vmm data to kvm side*/ + return kvm_init(&vmm_info, 1024, THIS_MODULE); +} + +static void __exit kvm_vmm_exit(void) +{ + kvm_exit(); + return ; +} + +void vmm_spin_lock(spinlock_t *lock) +{ + _vmm_raw_spin_lock(lock); +} + +void vmm_spin_unlock(spinlock_t *lock) +{ + _vmm_raw_spin_unlock(lock); +} +module_init(kvm_vmm_init) +module_exit(kvm_vmm_exit) diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S new file mode 100644 index 00000000000..3ee5f481c06 --- /dev/null +++ b/arch/ia64/kvm/vmm_ivt.S @@ -0,0 +1,1424 @@ +/* + * /ia64/kvm_ivt.S + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger <davidm@hpl.hp.com> + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick <asit.k.mallick@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + * Kenneth Chen <kenneth.w.chen@intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + * + * + * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling + * for SMP + * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB + * handler now uses virtual PT. + * + * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Supporting Intel virtualization architecture + * + */ + +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for + * critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss + * (12,51) + * entry offset ----/ / / / + * / + * entry number ---------/ / / + * / + * size of the entry -------------/ / + * / + * vector name -------------------------------------/ + * / + * interruptions triggering this vector + * ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB + * boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + + +#include <asm/asmmacro.h> +#include <asm/cache.h> +#include <asm/pgtable.h> + +#include "asm-offsets.h" +#include "vcpu.h" +#include "kvm_minstate.h" +#include "vti.h" + +#if 1 +# define PSR_DEFAULT_BITS psr.ac +#else +# define PSR_DEFAULT_BITS 0 +#endif + + +#define KVM_FAULT(n) \ + kvm_fault_##n:; \ + mov r19=n;; \ + br.sptk.many kvm_fault_##n; \ + ;; \ + + +#define KVM_REFLECT(n) \ + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7)br.sptk.many kvm_dispatch_reflection; \ + br.sptk.many kvm_panic; \ + + +GLOBAL_ENTRY(kvm_panic) + br.sptk.many kvm_panic + ;; +END(kvm_panic) + + + + + + .section .text.ivt,"ax" + + .align 32768 // align on 32KB boundary + .global kvm_ia64_ivt +kvm_ia64_ivt: +/////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(kvm_vhpt_miss) + KVM_FAULT(0) +END(kvm_vhpt_miss) + + + .org kvm_ia64_ivt+0x400 +//////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(kvm_itlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; + (p6) br.sptk kvm_alt_itlb_miss + mov r19 = 1 + br.sptk kvm_itlb_miss_dispatch + KVM_FAULT(1); +END(kvm_itlb_miss) + + .org kvm_ia64_ivt+0x0800 +////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(kvm_dtlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6)br.sptk kvm_alt_dtlb_miss + br.sptk kvm_dtlb_miss_dispatch +END(kvm_dtlb_miss) + + .org kvm_ia64_ivt+0x0c00 +//////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(kvm_alt_itlb_miss) + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r17,r19 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(kvm_alt_itlb_miss) + + .org kvm_ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(kvm_alt_dtlb_miss) + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r19,r17 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(kvm_alt_dtlb_miss) + + .org kvm_ia64_ivt+0x1400 +////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(kvm_nested_dtlb_miss) + KVM_FAULT(5) +END(kvm_nested_dtlb_miss) + + .org kvm_ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(kvm_ikey_miss) + KVM_REFLECT(6) +END(kvm_ikey_miss) + + .org kvm_ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(kvm_dkey_miss) + KVM_REFLECT(7) +END(kvm_dkey_miss) + + .org kvm_ia64_ivt+0x2000 +//////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(kvm_dirty_bit) + KVM_REFLECT(8) +END(kvm_dirty_bit) + + .org kvm_ia64_ivt+0x2400 +//////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(kvm_iaccess_bit) + KVM_REFLECT(9) +END(kvm_iaccess_bit) + + .org kvm_ia64_ivt+0x2800 +/////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(kvm_daccess_bit) + KVM_REFLECT(10) +END(kvm_daccess_bit) + + .org kvm_ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(kvm_break_fault) + mov r31=pr + mov r19=11 + mov r29=cr.ipsr + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15)ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out1=16,sp + br.call.sptk.many b6=kvm_ia64_handle_break + ;; +END(kvm_break_fault) + + .org kvm_ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(kvm_interrupt) + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT + tbit.z p0,p15=r29,IA64_PSR_I_BIT + ;; +(p7) br.sptk kvm_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-VMM_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; +.mem.offset 0,0; st8.spill [r16]=r8,16 +.mem.offset 8,0; st8.spill [r17]=r9,16 + ;; +.mem.offset 0,0; st8.spill [r16]=r10,24 +.mem.offset 8,0; st8.spill [r17]=r11,24 + ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; +.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ +.mem.offset 8,0; st8.spill [r17]=r12,16 + adds r12=-16,r1 + /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; +.mem.offset 0,0; st8.spill [r16]=r13,16 +.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ + ;; +.mem.offset 0,0; st8.spill [r16]=r15,16 +.mem.offset 8,0; st8.spill [r17]=r14,16 + dep r14=-1,r0,60,4 + ;; +.mem.offset 0,0; st8.spill [r16]=r2,16 +.mem.offset 8,0; st8.spill [r17]=r3,16 + adds r2=VMM_PT_REGS_R16_OFFSET,r1 + adds r14 = VMM_VCPU_GP_OFFSET,r13 + ;; + mov r8=ar.ccv + ld8 r14 = [r14] + ;; + mov r1=r14 /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + mov out0=r13 + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; +.mem.offset 0,0; st8.spill [r2]=r16,16 +.mem.offset 8,0; st8.spill [r3]=r17,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r18,16 +.mem.offset 8,0; st8.spill [r3]=r19,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r20,16 +.mem.offset 8,0; st8.spill [r3]=r21,16 + mov r18=b6 + ;; +.mem.offset 0,0; st8.spill [r2]=r22,16 +.mem.offset 8,0; st8.spill [r3]=r23,16 + mov r19=b7 + ;; +.mem.offset 0,0; st8.spill [r2]=r24,16 +.mem.offset 8,0; st8.spill [r3]=r25,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r26,16 +.mem.offset 8,0; st8.spill [r3]=r27,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r28,16 +.mem.offset 8,0; st8.spill [r3]=r29,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r30,16 +.mem.offset 8,0; st8.spill [r3]=r31,32 + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + addl r14=@gprel(ia64_leave_nested),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_ia64_handle_irq + ;; +END(kvm_interrupt) + + .global kvm_dispatch_vexirq + .org kvm_ia64_ivt+0x3400 +////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved +ENTRY(kvm_virtual_exirq) + mov r31=pr + mov r19=13 + mov r30 =r0 + ;; +kvm_dispatch_vexirq: + cmp.eq p6,p0 = 1,r30 + ;; +(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; +(p6)ld8 r1 = [r29] + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r13 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + KVM_SAVE_REST + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_vexirq +END(kvm_virtual_exirq) + + .org kvm_ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + KVM_FAULT(14) + // this code segment is from 2.6.16.13 + + + .org kvm_ia64_ivt+0x3c00 +/////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + KVM_FAULT(15) + + + .org kvm_ia64_ivt+0x4000 +/////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + KVM_FAULT(16) + + .org kvm_ia64_ivt+0x4400 +////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + KVM_FAULT(17) + + .org kvm_ia64_ivt+0x4800 +////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + KVM_FAULT(18) + + .org kvm_ia64_ivt+0x4c00 +////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + KVM_FAULT(19) + + .org kvm_ia64_ivt+0x5000 +////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present +ENTRY(kvm_page_not_present) + KVM_REFLECT(20) +END(kvm_page_not_present) + + .org kvm_ia64_ivt+0x5100 +/////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission vector +ENTRY(kvm_key_permission) + KVM_REFLECT(21) +END(kvm_key_permission) + + .org kvm_ia64_ivt+0x5200 +////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(kvm_iaccess_rights) + KVM_REFLECT(22) +END(kvm_iaccess_rights) + + .org kvm_ia64_ivt+0x5300 +////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(kvm_daccess_rights) + KVM_REFLECT(23) +END(kvm_daccess_rights) + + .org kvm_ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(kvm_general_exception) + KVM_REFLECT(24) + KVM_FAULT(24) +END(kvm_general_exception) + + .org kvm_ia64_ivt+0x5500 +////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(kvm_disabled_fp_reg) + KVM_REFLECT(25) +END(kvm_disabled_fp_reg) + + .org kvm_ia64_ivt+0x5600 +//////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(kvm_nat_consumption) + KVM_REFLECT(26) +END(kvm_nat_consumption) + + .org kvm_ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(kvm_speculation_vector) + KVM_REFLECT(27) +END(kvm_speculation_vector) + + .org kvm_ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + KVM_FAULT(28) + + .org kvm_ia64_ivt+0x5900 +/////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(kvm_debug_vector) + KVM_FAULT(29) +END(kvm_debug_vector) + + .org kvm_ia64_ivt+0x5a00 +/////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(kvm_unaligned_access) + KVM_REFLECT(30) +END(kvm_unaligned_access) + + .org kvm_ia64_ivt+0x5b00 +////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(kvm_unsupported_data_reference) + KVM_REFLECT(31) +END(kvm_unsupported_data_reference) + + .org kvm_ia64_ivt+0x5c00 +//////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) +ENTRY(kvm_floating_point_fault) + KVM_REFLECT(32) +END(kvm_floating_point_fault) + + .org kvm_ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(kvm_floating_point_trap) + KVM_REFLECT(33) +END(kvm_floating_point_trap) + + .org kvm_ia64_ivt+0x5e00 +////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(kvm_lower_privilege_trap) + KVM_REFLECT(34) +END(kvm_lower_privilege_trap) + + .org kvm_ia64_ivt+0x5f00 +////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(kvm_taken_branch_trap) + KVM_REFLECT(35) +END(kvm_taken_branch_trap) + + .org kvm_ia64_ivt+0x6000 +//////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(kvm_single_step_trap) + KVM_REFLECT(36) +END(kvm_single_step_trap) + .global kvm_virtualization_fault_back + .org kvm_ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault +ENTRY(kvm_virtualization_fault) + mov r31=pr + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + st8 [r16] = r1 + adds r17 = VMM_VCPU_GP_OFFSET, r21 + ;; + ld8 r1 = [r17] + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 + cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 + cmp.eq p9,p0=EVENT_RSM,r24 + cmp.eq p10,p0=EVENT_SSM,r24 + cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 + cmp.eq p12,p0=EVENT_THASH,r24 + (p6) br.dptk.many kvm_asm_mov_from_ar + (p7) br.dptk.many kvm_asm_mov_from_rr + (p8) br.dptk.many kvm_asm_mov_to_rr + (p9) br.dptk.many kvm_asm_rsm + (p10) br.dptk.many kvm_asm_ssm + (p11) br.dptk.many kvm_asm_mov_to_psr + (p12) br.dptk.many kvm_asm_thash + ;; +kvm_virtualization_fault_back: + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 = [r16] + ;; + mov r19=37 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + cmp.ne p6,p0=EVENT_RFI, r24 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] + ;; + adds r18=VMM_VPD_VIFS_OFFSET,r18 + ;; + ld8 r18=[r18] + ;; + tbit.z p6,p0=r18,63 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + //if vifs.v=1 desert current register frame + alloc r18=ar.pfs,0,0,0,0 + br.sptk kvm_dispatch_virtualization_fault +END(kvm_virtualization_fault) + + .org kvm_ia64_ivt+0x6200 +////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + KVM_FAULT(38) + + .org kvm_ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + KVM_FAULT(39) + + .org kvm_ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + KVM_FAULT(40) + + .org kvm_ia64_ivt+0x6500 +////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + KVM_FAULT(41) + + .org kvm_ia64_ivt+0x6600 +////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + KVM_FAULT(42) + + .org kvm_ia64_ivt+0x6700 +////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + KVM_FAULT(43) + + .org kvm_ia64_ivt+0x6800 +////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + KVM_FAULT(44) + + .org kvm_ia64_ivt+0x6900 +/////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception +//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(kvm_ia32_exception) + KVM_FAULT(45) +END(kvm_ia32_exception) + + .org kvm_ia64_ivt+0x6a00 +//////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(kvm_ia32_intercept) + KVM_FAULT(47) +END(kvm_ia32_intercept) + + .org kvm_ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + KVM_FAULT(48) + + .org kvm_ia64_ivt+0x6d00 +////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + KVM_FAULT(49) + + .org kvm_ia64_ivt+0x6e00 +////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + KVM_FAULT(50) + + .org kvm_ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + KVM_FAULT(52) + + .org kvm_ia64_ivt+0x7100 +//////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + KVM_FAULT(53) + + .org kvm_ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + KVM_FAULT(54) + + .org kvm_ia64_ivt+0x7300 +//////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + KVM_FAULT(55) + + .org kvm_ia64_ivt+0x7400 +//////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + KVM_FAULT(56) + + .org kvm_ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + KVM_FAULT(57) + + .org kvm_ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + KVM_FAULT(58) + + .org kvm_ia64_ivt+0x7700 +//////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + KVM_FAULT(59) + + .org kvm_ia64_ivt+0x7800 +//////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + KVM_FAULT(60) + + .org kvm_ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + KVM_FAULT(61) + + .org kvm_ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + KVM_FAULT(62) + + .org kvm_ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + KVM_FAULT(63) + + .org kvm_ia64_ivt+0x7c00 +//////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + KVM_FAULT(64) + + .org kvm_ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + KVM_FAULT(65) + + .org kvm_ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + KVM_FAULT(66) + + .org kvm_ia64_ivt+0x7f00 +//////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + KVM_FAULT(67) + + .org kvm_ia64_ivt+0x8000 +// There is no particular reason for this code to be here, other than that +// there happens to be space here that would go unused otherwise. If this +// fault ever gets "unreserved", simply moved the following code to a more +// suitable spot... + + +ENTRY(kvm_dtlb_miss_dispatch) + mov r19 = 2 + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault +END(kvm_dtlb_miss_dispatch) + +ENTRY(kvm_itlb_miss_dispatch) + + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault +END(kvm_itlb_miss_dispatch) + +ENTRY(kvm_dispatch_reflection) + /* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out4=16,r12 + br.call.sptk.many b6=reflect_interruption +END(kvm_dispatch_reflection) + +ENTRY(kvm_dispatch_virtualization_fault) + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + mov out0=r13 //vcpu + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out1=16,sp //regs + br.call.sptk.many b6=kvm_emulate +END(kvm_dispatch_virtualization_fault) + + +ENTRY(kvm_dispatch_interrupt) + KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + //mov out0=cr.ivr // pass cr.ivr as first arg + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + mov out0=r13 // pass pointer to pt_regs as second arg + br.call.sptk.many b6=kvm_ia64_handle_irq +END(kvm_dispatch_interrupt) + + + + +GLOBAL_ENTRY(ia64_leave_nested) + rsm psr.i + ;; + adds r21=PT(PR)+16,r12 + ;; + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3] + adds r3=PT(AR_CSD)-PT(R16),r3 + adds r30=PT(AR_CCV)+16,r12 + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.i // ensure interruption collection is off + mov ar.ccv=r15 + ;; + bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + ;; + ldf.fill f11=[r2] +// mov r18=r13 +// mov r21=r13 + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + ;; + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 + // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16// load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] + ;; + ld8.fill r3=[r16] + ;; + mov r16=ar.bsp // get existing backing store pointer + ;; + mov b0=r22 + mov ar.pfs=r26 + mov cr.ifs=r30 + mov cr.ipsr=r29 + mov ar.fpsr=r20 + mov cr.iip=r28 + ;; + mov ar.rsc=r27 + mov ar.unat=r25 + mov pr=r31,-1 + rfi +END(ia64_leave_nested) + + + +GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) + /* + * work.need_resched etc. mustn't get changed + *by this CPU before it returns to + ;; + * user- or fsys-mode, hence we disable interrupts early on: + */ + adds r2 = PT(R4)+16,r12 + adds r3 = PT(R5)+16,r12 + adds r8 = PT(EML_UNAT)+16,r12 + ;; + ld8 r8 = [r8] + ;; + mov ar.unat=r8 + ;; + ld8.fill r4=[r2],16 //load r4 + ld8.fill r5=[r3],16 //load r5 + ;; + ld8.fill r6=[r2] //load r6 + ld8.fill r7=[r3] //load r7 + ;; +END(ia64_leave_hypervisor_prepare) +//fall through +GLOBAL_ENTRY(ia64_leave_hypervisor) + rsm psr.i + ;; + br.call.sptk.many b0=leave_hypervisor_tail + ;; + adds r20=PT(PR)+16,r12 + adds r8=PT(EML_UNAT)+16,r12 + ;; + ld8 r8=[r8] + ;; + mov ar.unat=r8 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r2 = PT(B6)+16,r12 + adds r3 = PT(B7)+16,r12 + ;; + lfetch [r20] + ;; + ld8 r24=[r2],16 /* B6 */ + ld8 r25=[r3],16 /* B7 */ + ;; + ld8 r26=[r2],16 /* ar_csd */ + ld8 r27=[r3],16 /* ar_ssd */ + mov b6 = r24 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + mov b7 = r25 + ;; + mov ar.csd = r26 + mov ar.ssd = r27 + ;; + ld8.fill r10=[r2],PT(R15)-PT(R10) + ld8.fill r11=[r3],PT(R14)-PT(R11) + ;; + ld8.fill r15=[r2],PT(R16)-PT(R15) + ld8.fill r14=[r3],PT(R17)-PT(R14) + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],PT(F6)-PT(R30) + ld8.fill r31=[r3],PT(F7)-PT(R31) + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i // ensure interruption collection is off + ;; + bsw.0 + ;; + adds r16 = PT(CR_IPSR)+16,r12 + adds r17 = PT(CR_IIP)+16,r12 + mov r21=r13 // get current + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat + ld8 r24=[r17],16 // load ar.bspstore + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 //load r1 + ;; + ld8.fill r12=[r16],16 //load r12 + ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 + ;; + ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr + ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 + ;; + ld8.fill r3=[r16] //load r3 + ld8 r18=[r17] //load ar_ccv + ;; + mov ar.fpsr=r19 + mov ar.ccv=r18 + shr.u r18=r20,16 + ;; +kvm_rbs_switch: + mov r19=96 + +kvm_dont_preserve_current_frame: +/* + * To prevent leaking bits between the hypervisor and guest domain, + * we must clear the stacked registers in the "invalid" partition here. + * 5 registers/cycle on McKinley). + */ +# define pRecurse p6 +# define pReturn p7 +# define Nregs 14 + + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r19 + mov in1=0 + ;; + TEXT_ALIGN(32) +kvm_rse_clear_invalid: + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 + // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 +(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 + // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 +(pReturn) br.ret.dptk.many b0 + +# undef pRecurse +# undef pReturn + +// loadrs has already been shifted + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + mov ar.pfs=r27 + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] //vpd + adds r17=VMM_VCPU_ISR_OFFSET,r21 + ;; + ld8 r17=[r17] + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] //vpsr + adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21 + ;; + ld8 r20=[r20] + ;; +//vsa_sync_write_start + mov r25=r18 + adds r16= VMM_VCPU_GP_OFFSET,r21 + ;; + ld8 r16= [r16] // Put gp in r24 + movl r24=@gprel(ia64_vmm_entry) // calculate return address + ;; + add r24=r24,r16 + ;; + add r16=PAL_VPS_SYNC_WRITE,r20 + ;; + mov b0=r16 + br.cond.sptk b0 // call the service + ;; +END(ia64_leave_hypervisor) +// fall through +GLOBAL_ENTRY(ia64_vmm_entry) +/* + * must be at bank 0 + * parameter: + * r17:cr.isr + * r18:vpd + * r19:vpsr + * r20:__vsa_base + * r22:b0 + * r23:predicate + */ + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p1) br.sptk.many ia64_vmm_entry_out + ;; + tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 + (p2) ld8 r26=[r25] + ;; +ia64_vmm_entry_out: + mov pr=r23,-2 + mov b0=r29 + ;; + br.cond.sptk b0 // call pal service +END(ia64_vmm_entry) + + + +/* + * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, + * u64 arg3, u64 arg4, u64 arg5, + * u64 arg6, u64 arg7); + * + * XXX: The currently defined services use only 4 args at the max. The + * rest are not consumed. + */ +GLOBAL_ENTRY(ia64_call_vsa) + .regstk 4,4,0,0 + +rpsave = loc0 +pfssave = loc1 +psrsave = loc2 +entry = loc3 +hostret = r24 + + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 + ;; + ld8 entry=[entry] +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.i + mov b6=entry + br.cond.sptk b6 // call the service +2: + // Architectural sequence for enabling interrupts if necessary +(p7) ssm psr.ic + ;; +(p7) srlz.i + ;; +//(p6) ssm psr.i + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp + +END(ia64_call_vsa) + +#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) + +GLOBAL_ENTRY(vmm_reset_entry) + //set up ipsr, iip, vpd.vpsr, dcr + // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 + // For DCR: all bits 0 + adds r14=-VMM_PT_REGS_SIZE, r12 + ;; + movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 + movl r10=0x8000000000000000 + adds r16=PT(CR_IIP), r14 + adds r20=PT(R1), r14 + ;; + rsm psr.ic | psr.i + ;; + srlz.i + ;; + bsw.0 + ;; + mov r21 =r13 + ;; + bsw.1 + ;; + mov ar.rsc = 0 + ;; + flushrs + ;; + mov ar.bspstore = 0 + // clear BSPSTORE + ;; + mov cr.ipsr=r6 + mov cr.ifs=r10 + ld8 r4 = [r16] // Set init iip for first run. + ld8 r1 = [r20] + ;; + mov cr.iip=r4 + ;; + adds r16=VMM_VPD_BASE_OFFSET,r13 + adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13 + ;; + ld8 r18=[r16] + ld8 r20=[r20] + ;; + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] + mov r17=r0 + mov r22=r0 + mov r23=r0 + br.cond.sptk ia64_vmm_entry + br.ret.sptk b0 +END(vmm_reset_entry) diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h new file mode 100644 index 00000000000..f6c5617e16a --- /dev/null +++ b/arch/ia64/kvm/vti.h @@ -0,0 +1,290 @@ +/* + * vti.h: prototype for generial vt related interface + * Copyright (c) 2004, Intel Corporation. + * + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Fred Yang (fred.yang@intel.com) + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + * + * Copyright (c) 2007, Intel Corporation. + * Zhang xiantao <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ +#ifndef _KVM_VT_I_H +#define _KVM_VT_I_H + +#ifndef __ASSEMBLY__ +#include <asm/page.h> + +#include <linux/kvm_host.h> + +/* define itr.i and itr.d in ia64_itr function */ +#define ITR 0x01 +#define DTR 0x02 +#define IaDTR 0x03 + +#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/ +#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/ + +#define RR6 (6UL<<61) +#define RR7 (7UL<<61) + + +/* config_options in pal_vp_init_env */ +#define VP_INITIALIZE 1UL +#define VP_FR_PMC 1UL<<1 +#define VP_OPCODE 1UL<<8 +#define VP_CAUSE 1UL<<9 +#define VP_FW_ACC 1UL<<63 + +/* init vp env with initializing vm_buffer */ +#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\ + VP_OPCODE | VP_CAUSE | VP_FW_ACC) +/* init vp env without initializing vm_buffer */ +#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC + +#define PAL_VP_CREATE 265 +/* Stacked Virt. Initializes a new VPD for the operation of + * a new virtual processor in the virtual environment. + */ +#define PAL_VP_ENV_INFO 266 +/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/ +#define PAL_VP_EXIT_ENV 267 +/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/ +#define PAL_VP_INIT_ENV 268 +/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/ +#define PAL_VP_REGISTER 269 +/*Stacked Virt. Register a different host IVT for the virtual processor.*/ +#define PAL_VP_RESUME 270 +/* Renamed from PAL_VP_RESUME */ +#define PAL_VP_RESTORE 270 +/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/ +#define PAL_VP_SUSPEND 271 +/* Renamed from PAL_VP_SUSPEND */ +#define PAL_VP_SAVE 271 +/* Stacked Virt. Suspends operation for the specified virtual processor on + * the logical processor. + */ +#define PAL_VP_TERMINATE 272 +/* Stacked Virt. Terminates operation for the specified virtual processor.*/ + +union vac { + unsigned long value; + struct { + int a_int:1; + int a_from_int_cr:1; + int a_to_int_cr:1; + int a_from_psr:1; + int a_from_cpuid:1; + int a_cover:1; + int a_bsw:1; + long reserved:57; + }; +}; + +union vdc { + unsigned long value; + struct { + int d_vmsw:1; + int d_extint:1; + int d_ibr_dbr:1; + int d_pmc:1; + int d_to_pmd:1; + int d_itm:1; + long reserved:58; + }; +}; + +struct vpd { + union vac vac; + union vdc vdc; + unsigned long virt_env_vaddr; + unsigned long reserved1[29]; + unsigned long vhpi; + unsigned long reserved2[95]; + unsigned long vgr[16]; + unsigned long vbgr[16]; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long reserved3[11]; + unsigned long vpsr; + unsigned long vpr; + unsigned long reserved4[76]; + union { + unsigned long vcr[128]; + struct { + unsigned long dcr; + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; + unsigned long rsv2[7]; + unsigned long ipsr; + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; + unsigned long lrr1; + unsigned long rsv6[46]; + }; + }; + unsigned long reserved5[128]; + unsigned long reserved6[3456]; + unsigned long vmm_avail[128]; + unsigned long reserved7[4096]; +}; + +#define PAL_PROC_VM_BIT (1UL << 40) +#define PAL_PROC_VMSW_BIT (1UL << 54) + +static inline s64 ia64_pal_vp_env_info(u64 *buffer_size, + u64 *vp_env_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0); + *buffer_size = iprv.v0; + *vp_env_info = iprv.v1; + return iprv.status; +} + +static inline s64 ia64_pal_vp_exit_env(u64 iva) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0); + return iprv.status; +} + +static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr, + u64 vbase_addr, u64 *vsa_base) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr, + vbase_addr); + *vsa_base = iprv.v0; + + return iprv.status; +} + +static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0); + + return iprv.status; +} + +static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0); + + return iprv.status; +} + +#endif + +/*VPD field offset*/ +#define VPD_VAC_START_OFFSET 0 +#define VPD_VDC_START_OFFSET 8 +#define VPD_VHPI_START_OFFSET 256 +#define VPD_VGR_START_OFFSET 1024 +#define VPD_VBGR_START_OFFSET 1152 +#define VPD_VNAT_START_OFFSET 1280 +#define VPD_VBNAT_START_OFFSET 1288 +#define VPD_VCPUID_START_OFFSET 1296 +#define VPD_VPSR_START_OFFSET 1424 +#define VPD_VPR_START_OFFSET 1432 +#define VPD_VRSE_CFLE_START_OFFSET 1440 +#define VPD_VCR_START_OFFSET 2048 +#define VPD_VTPR_START_OFFSET 2576 +#define VPD_VRR_START_OFFSET 3072 +#define VPD_VMM_VAIL_START_OFFSET 31744 + +/*Virtualization faults*/ + +#define EVENT_MOV_TO_AR 1 +#define EVENT_MOV_TO_AR_IMM 2 +#define EVENT_MOV_FROM_AR 3 +#define EVENT_MOV_TO_CR 4 +#define EVENT_MOV_FROM_CR 5 +#define EVENT_MOV_TO_PSR 6 +#define EVENT_MOV_FROM_PSR 7 +#define EVENT_ITC_D 8 +#define EVENT_ITC_I 9 +#define EVENT_MOV_TO_RR 10 +#define EVENT_MOV_TO_DBR 11 +#define EVENT_MOV_TO_IBR 12 +#define EVENT_MOV_TO_PKR 13 +#define EVENT_MOV_TO_PMC 14 +#define EVENT_MOV_TO_PMD 15 +#define EVENT_ITR_D 16 +#define EVENT_ITR_I 17 +#define EVENT_MOV_FROM_RR 18 +#define EVENT_MOV_FROM_DBR 19 +#define EVENT_MOV_FROM_IBR 20 +#define EVENT_MOV_FROM_PKR 21 +#define EVENT_MOV_FROM_PMC 22 +#define EVENT_MOV_FROM_CPUID 23 +#define EVENT_SSM 24 +#define EVENT_RSM 25 +#define EVENT_PTC_L 26 +#define EVENT_PTC_G 27 +#define EVENT_PTC_GA 28 +#define EVENT_PTR_D 29 +#define EVENT_PTR_I 30 +#define EVENT_THASH 31 +#define EVENT_TTAG 32 +#define EVENT_TPA 33 +#define EVENT_TAK 34 +#define EVENT_PTC_E 35 +#define EVENT_COVER 36 +#define EVENT_RFI 37 +#define EVENT_BSW_0 38 +#define EVENT_BSW_1 39 +#define EVENT_VMSW 40 + +/**PAL virtual services offsets */ +#define PAL_VPS_RESUME_NORMAL 0x0000 +#define PAL_VPS_RESUME_HANDLER 0x0400 +#define PAL_VPS_SYNC_READ 0x0800 +#define PAL_VPS_SYNC_WRITE 0x0c00 +#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000 +#define PAL_VPS_THASH 0x1400 +#define PAL_VPS_TTAG 0x1800 +#define PAL_VPS_RESTORE 0x1c00 +#define PAL_VPS_SAVE 0x2000 + +#endif/* _VT_I_H*/ diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c new file mode 100644 index 00000000000..def4576d22b --- /dev/null +++ b/arch/ia64/kvm/vtlb.c @@ -0,0 +1,636 @@ +/* + * vtlb.c: guest virtual tlb handling module. + * Copyright (c) 2004, Intel Corporation. + * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com> + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + * + * Copyright (c) 2007, Intel Corporation. + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + * Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include "vcpu.h" + +#include <linux/rwsem.h> + +#include <asm/tlb.h> + +/* + * Check to see if the address rid:va is translated by the TLB + */ + +static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va) +{ + return ((trp->p) && (trp->rid == rid) + && ((va-trp->vadr) < PSIZE(trp->ps))); +} + +/* + * Only for GUEST TR format. + */ +static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva) +{ + u64 sa1, ea1; + + if (!trp->p || trp->rid != rid) + return 0; + + sa1 = trp->vadr; + ea1 = sa1 + PSIZE(trp->ps) - 1; + eva -= 1; + if ((sva > ea1) || (sa1 > eva)) + return 0; + else + return 1; + +} + +void machine_tlb_purge(u64 va, u64 ps) +{ + ia64_ptcl(va, ps << 2); +} + +void local_flush_tlb_all(void) +{ + int i, j; + unsigned long flags, count0, count1; + unsigned long stride0, stride1, addr; + + addr = current_vcpu->arch.ptce_base; + count0 = current_vcpu->arch.ptce_count[0]; + count1 = current_vcpu->arch.ptce_count[1]; + stride0 = current_vcpu->arch.ptce_stride[0]; + stride1 = current_vcpu->arch.ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref) +{ + union ia64_rr vrr; + union ia64_pta vpta; + struct ia64_psr vpsr; + + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + vrr.val = vcpu_get_rr(vcpu, vadr); + vpta.val = vcpu_get_pta(vcpu); + + if (vrr.ve & vpta.ve) { + switch (ref) { + case DATA_REF: + case NA_REF: + return vpsr.dt; + case INST_REF: + return vpsr.dt && vpsr.it && vpsr.ic; + case RSE_REF: + return vpsr.dt && vpsr.rt; + + } + } + return 0; +} + +struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag) +{ + u64 index, pfn, rid, pfn_bits; + + pfn_bits = vpta.size - 5 - 8; + pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr); + rid = _REGION_ID(vrr); + index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1)); + *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16); + + return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) + + (index << 5)); +} + +struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) +{ + + struct thash_data *trp; + int i; + u64 rid; + + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK;; + if (type == D_TLB) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; + i < NDTRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) + return trp; + } + } + } else { + if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; + i < NITRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) + return trp; + } + } + } + + return NULL; +} + +static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) +{ + union ia64_rr rr; + struct thash_data *head; + unsigned long ps, gpaddr; + + ps = itir_ps(itir); + + gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | + (ifa & ((1UL << ps) - 1)); + + rr.val = ia64_get_rr(ifa); + head = (struct thash_data *)ia64_thash(ifa); + head->etag = INVALID_TI_TAG; + ia64_mf(); + head->page_flags = pte & ~PAGE_FLAGS_RV_MASK; + head->itir = rr.ps << 2; + head->etag = ia64_ttag(ifa); + head->gpaddr = gpaddr; +} + +void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) +{ + u64 i, dirty_pages = 1; + u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; + spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); + void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) + + KVM_MEM_DIRTY_LOG_OFS; + dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; + + vmm_spin_lock(lock); + for (i = 0; i < dirty_pages; i++) { + /* avoid RMW */ + if (!test_bit(base_gfn + i, dirty_bitmap)) + set_bit(base_gfn + i , dirty_bitmap); + } + vmm_spin_unlock(lock); +} + +void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) +{ + u64 phy_pte, psr; + union ia64_rr mrr; + + mrr.val = ia64_get_rr(va); + phy_pte = translate_phy_pte(&pte, itir, va); + + if (itir_ps(itir) >= mrr.ps) { + vhpt_insert(phy_pte, itir, va, pte); + } else { + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type, va, phy_pte, itir_ps(itir)); + ia64_set_psr(psr); + } + + if (!(pte&VTLB_PTE_IO)) + mark_pages_dirty(v, pte, itir_ps(itir)); +} + +/* + * vhpt lookup + */ +struct thash_data *vhpt_lookup(u64 va) +{ + struct thash_data *head; + u64 tag; + + head = (struct thash_data *)ia64_thash(va); + tag = ia64_ttag(va); + if (head->etag == tag) + return head; + return NULL; +} + +u64 guest_vhpt_lookup(u64 iha, u64 *pte) +{ + u64 ret; + struct thash_data *data; + + data = __vtr_lookup(current_vcpu, iha, D_TLB); + if (data != NULL) + thash_vhpt_insert(current_vcpu, data->page_flags, + data->itir, iha, D_TLB); + + asm volatile ("rsm psr.ic|psr.i;;" + "srlz.d;;" + "ld8.s r9=[%1];;" + "tnat.nz p6,p7=r9;;" + "(p6) mov %0=1;" + "(p6) mov r9=r0;" + "(p7) extr.u r9=r9,0,53;;" + "(p7) mov %0=r0;" + "(p7) st8 [%2]=r9;;" + "ssm psr.ic;;" + "srlz.d;;" + /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ + : "=r"(ret) : "r"(iha), "r"(pte):"memory"); + + return ret; +} + +/* + * purge software guest tlb + */ + +static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps) +{ + struct thash_data *cur; + u64 start, curadr, size, psbits, tag, rr_ps, num; + union ia64_rr vrr; + struct thash_cb *hcb = &v->arch.vtlb; + + vrr.val = vcpu_get_rr(v, va); + psbits = VMX(v, psbits[(va >> 61)]); + start = va & ~((1UL << ps) - 1); + while (psbits) { + curadr = start; + rr_ps = __ffs(psbits); + psbits &= ~(1UL << rr_ps); + num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps)); + size = PSIZE(rr_ps); + vrr.ps = rr_ps; + while (num) { + cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag); + if (cur->etag == tag && cur->ps == rr_ps) + cur->etag = INVALID_TI_TAG; + curadr += size; + num--; + } + } +} + + +/* + * purge VHPT and machine TLB + */ +static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps) +{ + struct thash_data *cur; + u64 start, size, tag, num; + union ia64_rr rr; + + start = va & ~((1UL << ps) - 1); + rr.val = ia64_get_rr(va); + size = PSIZE(rr.ps); + num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps)); + while (num) { + cur = (struct thash_data *)ia64_thash(start); + tag = ia64_ttag(start); + if (cur->etag == tag) + cur->etag = INVALID_TI_TAG; + start += size; + num--; + } + machine_tlb_purge(va, ps); +} + +/* + * Insert an entry into hash TLB or VHPT. + * NOTES: + * 1: When inserting VHPT to thash, "va" is a must covered + * address by the inserted machine VHPT entry. + * 2: The format of entry is always in TLB. + * 3: The caller need to make sure the new entry will not overlap + * with any existed entry. + */ +void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va) +{ + struct thash_data *head; + union ia64_rr vrr; + u64 tag; + struct thash_cb *hcb = &v->arch.vtlb; + + vrr.val = vcpu_get_rr(v, va); + vrr.ps = itir_ps(itir); + VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); + head = vsa_thash(hcb->pta, va, vrr.val, &tag); + head->page_flags = pte; + head->itir = itir; + head->etag = tag; +} + +int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type) +{ + struct thash_data *trp; + int i; + u64 end, rid; + + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK; + end = va + PSIZE(ps); + if (type == D_TLB) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; + i < NDTRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end)) + return i; + } + } + } else { + if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; + i < NITRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end)) + return i; + } + } + } + return -1; +} + +/* + * Purge entries in VTLB and VHPT + */ +void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps) +{ + if (vcpu_quick_region_check(v->arch.tc_regions, va)) + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); +} + +void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) +{ + u64 old_va = va; + va = REGION_OFFSET(va); + if (vcpu_quick_region_check(v->arch.tc_regions, old_va)) + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); +} + +u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) +{ + u64 ps, ps_mask, paddr, maddr; + union pte_flags phy_pte; + + ps = itir_ps(itir); + ps_mask = ~((1UL << ps) - 1); + phy_pte.val = *pte; + paddr = *pte; + paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); + maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT); + if (maddr & GPFN_IO_MASK) { + *pte |= VTLB_PTE_IO; + return -1; + } + maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | + (paddr & ~PAGE_MASK); + phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; + return phy_pte.val; +} + +/* + * Purge overlap TCs and then insert the new entry to emulate itc ops. + * Notes: Only TC entry can purge and insert. + * 1 indicates this is MMIO + */ +int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, + u64 ifa, int type) +{ + u64 ps; + u64 phy_pte; + union ia64_rr vrr, mrr; + int ret = 0; + + ps = itir_ps(itir); + vrr.val = vcpu_get_rr(v, ifa); + mrr.val = ia64_get_rr(ifa); + + phy_pte = translate_phy_pte(&pte, itir, ifa); + + /* Ensure WB attribute if pte is related to a normal mem page, + * which is required by vga acceleration since qemu maps shared + * vram buffer with WB. + */ + if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) { + pte &= ~_PAGE_MA_MASK; + phy_pte &= ~_PAGE_MA_MASK; + } + + if (pte & VTLB_PTE_IO) + ret = 1; + + vtlb_purge(v, ifa, ps); + vhpt_purge(v, ifa, ps); + + if (ps == mrr.ps) { + if (!(pte&VTLB_PTE_IO)) { + vhpt_insert(phy_pte, itir, ifa, pte); + } else { + vtlb_insert(v, pte, itir, ifa); + vcpu_quick_region_set(VMX(v, tc_regions), ifa); + } + } else if (ps > mrr.ps) { + vtlb_insert(v, pte, itir, ifa); + vcpu_quick_region_set(VMX(v, tc_regions), ifa); + if (!(pte&VTLB_PTE_IO)) + vhpt_insert(phy_pte, itir, ifa, pte); + } else { + u64 psr; + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type, ifa, phy_pte, ps); + ia64_set_psr(psr); + } + if (!(pte&VTLB_PTE_IO)) + mark_pages_dirty(v, pte, ps); + + return ret; +} + +/* + * Purge all TCs or VHPT entries including those in Hash table. + * + */ + +void thash_purge_all(struct kvm_vcpu *v) +{ + int i; + struct thash_data *head; + struct thash_cb *vtlb, *vhpt; + vtlb = &v->arch.vtlb; + vhpt = &v->arch.vhpt; + + for (i = 0; i < 8; i++) + VMX(v, psbits[i]) = 0; + + head = vtlb->hash; + for (i = 0; i < vtlb->num; i++) { + head->page_flags = 0; + head->etag = INVALID_TI_TAG; + head->itir = 0; + head->next = 0; + head++; + }; + + head = vhpt->hash; + for (i = 0; i < vhpt->num; i++) { + head->page_flags = 0; + head->etag = INVALID_TI_TAG; + head->itir = 0; + head->next = 0; + head++; + }; + + local_flush_tlb_all(); +} + + +/* + * Lookup the hash table and its collision chain to find an entry + * covering this address rid:va or the entry. + * + * INPUT: + * in: TLB format for both VHPT & TLB. + */ + +struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) +{ + struct thash_data *cch; + u64 psbits, ps, tag; + union ia64_rr vrr; + + struct thash_cb *hcb = &v->arch.vtlb; + + cch = __vtr_lookup(v, va, is_data);; + if (cch) + return cch; + + if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0) + return NULL; + + psbits = VMX(v, psbits[(va >> 61)]); + vrr.val = vcpu_get_rr(v, va); + while (psbits) { + ps = __ffs(psbits); + psbits &= ~(1UL << ps); + vrr.ps = ps; + cch = vsa_thash(hcb->pta, va, vrr.val, &tag); + if (cch->etag == tag && cch->ps == ps) + return cch; + } + + return NULL; +} + + +/* + * Initialize internal control data before service. + */ +void thash_init(struct thash_cb *hcb, u64 sz) +{ + int i; + struct thash_data *head; + + hcb->pta.val = (unsigned long)hcb->hash; + hcb->pta.vf = 1; + hcb->pta.ve = 1; + hcb->pta.size = sz; + head = hcb->hash; + for (i = 0; i < hcb->num; i++) { + head->page_flags = 0; + head->itir = 0; + head->etag = INVALID_TI_TAG; + head->next = 0; + head++; + } +} + +u64 kvm_lookup_mpa(u64 gpfn) +{ + u64 *base = (u64 *) KVM_P2M_BASE; + return *(base + gpfn); +} + +u64 kvm_gpa_to_mpa(u64 gpa) +{ + u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); + return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); +} + + +/* + * Fetch guest bundle code. + * INPUT: + * gip: guest ip + * pbundle: used to return fetched bundle. + */ +int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) +{ + u64 gpip = 0; /* guest physical IP*/ + u64 *vpa; + struct thash_data *tlb; + u64 maddr; + + if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) { + /* I-side physical mode */ + gpip = gip; + } else { + tlb = vtlb_lookup(vcpu, gip, I_TLB); + if (tlb) + gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | + (gip & (PSIZE(tlb->ps) - 1)); + } + if (gpip) { + maddr = kvm_gpa_to_mpa(gpip); + } else { + tlb = vhpt_lookup(gip); + if (tlb == NULL) { + ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); + return IA64_FAULT; + } + maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) + | (gip & (PSIZE(tlb->ps) - 1)); + } + vpa = (u64 *)__kvm_va(maddr); + + pbundle->i64[0] = *vpa++; + pbundle->i64[1] = *vpa; + + return IA64_NO_FAULT; +} + + +void kvm_init_vhpt(struct kvm_vcpu *v) +{ + v->arch.vhpt.num = VHPT_NUM_ENTRIES; + thash_init(&v->arch.vhpt, VHPT_SHIFT); + ia64_set_pta(v->arch.vhpt.pta.val); + /*Enable VHPT here?*/ +} + +void kvm_init_vtlb(struct kvm_vcpu *v) +{ + v->arch.vtlb.num = VTLB_NUM_ENTRIES; + thash_init(&v->arch.vtlb, VTLB_SHIFT); +} diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 344f64eca7a..798bf9835a5 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -45,8 +45,6 @@ void show_mem(void) printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk(KERN_INFO "Free swap: %6ldkB\n", - nr_swap_pages<<(PAGE_SHIFT-10)); printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; @@ -255,7 +253,7 @@ paging_init (void) max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP - efi_memmap_walk(register_active_ranges, NULL); + efi_memmap_walk(filter_memory, register_active_ranges); efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); if (max_gap < LARGE_GAP) { vmem_map = (struct page *) 0; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index ee5e68b2af9..544dc420c65 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node) { int cpu, n = 0; - for (cpu = 0; cpu < NR_CPUS; cpu++) + for_each_possible_early_cpu(cpu) if (node == node_cpuid[cpu].nid) n++; @@ -124,6 +124,7 @@ static unsigned long __meminit compute_pernodesize(int node) pernodesize += node * L1_CACHE_BYTES; pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); pernodesize = PAGE_ALIGN(pernodesize); return pernodesize; } @@ -142,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node) #ifdef CONFIG_SMP int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_possible_early_cpu(cpu) { if (node == node_cpuid[cpu].nid) { memcpy(__va(cpu_data), __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); @@ -345,7 +346,7 @@ static void __init initialize_pernode_data(void) #ifdef CONFIG_SMP /* Set the node_data pointer for each per-cpu struct */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_possible_early_cpu(cpu) { node = node_cpuid[cpu].nid; per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; } @@ -444,7 +445,7 @@ void __init find_memory(void) mem_data[node].min_pfn = ~0UL; } - efi_memmap_walk(register_active_ranges, NULL); + efi_memmap_walk(filter_memory, register_active_ranges); /* * Initialize the boot memory maps in reverse order since that's @@ -493,13 +494,9 @@ void __cpuinit *per_cpu_init(void) int cpu; static int first_time = 1; - - if (smp_processor_id() != 0) - return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; - if (first_time) { first_time = 0; - for (cpu = 0; cpu < NR_CPUS; cpu++) + for_each_possible_early_cpu(cpu) per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; } @@ -522,8 +519,6 @@ void show_mem(void) printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk(KERN_INFO "Free swap: %6ldkB\n", - nr_swap_pages<<(PAGE_SHIFT-10)); printk(KERN_INFO "Node memory in pages:\n"); for_each_online_pgdat(pgdat) { unsigned long present; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index a4ca657c72c..fc6c6636ffd 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -58,7 +58,6 @@ __ia64_sync_icache_dcache (pte_t pte) { unsigned long addr; struct page *page; - unsigned long order; page = pte_page(pte); addr = (unsigned long) page_address(page); @@ -66,12 +65,7 @@ __ia64_sync_icache_dcache (pte_t pte) if (test_bit(PG_arch_1, &page->flags)) return; /* i-cache is already coherent with d-cache */ - if (PageCompound(page)) { - order = compound_order(page); - flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT)); - } - else - flush_icache_range(addr, addr + PAGE_SIZE); + flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page))); set_bit(PG_arch_1, &page->flags); /* mark page as clean */ } @@ -553,12 +547,10 @@ find_largest_hole (u64 start, u64 end, void *arg) #endif /* CONFIG_VIRTUAL_MEM_MAP */ int __init -register_active_ranges(u64 start, u64 end, void *arg) +register_active_ranges(u64 start, u64 len, int nid) { - int nid = paddr_to_nid(__pa(start)); + u64 end = start + len; - if (nid < 0) - nid = 0; #ifdef CONFIG_KEXEC if (start > crashk_res.start && start < crashk_res.end) start = crashk_res.end; @@ -690,15 +682,6 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -void online_page(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - num_physpages++; -} - int arch_add_memory(int nid, u64 start, u64 size) { pg_data_t *pgdat; diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 7807fc5c042..b73bf1838e5 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -27,7 +27,9 @@ */ int num_node_memblks; struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; -struct node_cpuid_s node_cpuid[NR_CPUS]; +struct node_cpuid_s node_cpuid[NR_CPUS] = + { [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } }; + /* * This is a matrix with "distances" between nodes, they should be * proportional to the memory access latency ratios. diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 655da240d13..d52ec4e8340 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -11,6 +11,9 @@ * Rohit Seth <rohit.seth@intel.com> * Ken Chen <kenneth.w.chen@intel.com> * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation + * Copyright (C) 2007 Intel Corp + * Fenghua Yu <fenghua.yu@intel.com> + * Add multiple ptc.g/ptc.ga instruction support in global tlb purge. */ #include <linux/module.h> #include <linux/init.h> @@ -26,6 +29,9 @@ #include <asm/pal.h> #include <asm/tlbflush.h> #include <asm/dma.h> +#include <asm/processor.h> +#include <asm/sal.h> +#include <asm/tlb.h> static struct { unsigned long mask; /* mask of supported purge page-sizes */ @@ -39,6 +45,10 @@ struct ia64_ctx ia64_ctx = { }; DEFINE_PER_CPU(u8, ia64_need_tlb_flush); +DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ +DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ + +struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; /* * Initializes the ia64_ctx.bitmap array based on max_ctx+1. @@ -84,14 +94,140 @@ wrap_mmu_context (struct mm_struct *mm) local_flush_tlb_all(); } +/* + * Implement "spinaphores" ... like counting semaphores, but they + * spin instead of sleeping. If there are ever any other users for + * this primitive it can be moved up to a spinaphore.h header. + */ +struct spinaphore { + atomic_t cur; +}; + +static inline void spinaphore_init(struct spinaphore *ss, int val) +{ + atomic_set(&ss->cur, val); +} + +static inline void down_spin(struct spinaphore *ss) +{ + while (unlikely(!atomic_add_unless(&ss->cur, -1, 0))) + while (atomic_read(&ss->cur) == 0) + cpu_relax(); +} + +static inline void up_spin(struct spinaphore *ss) +{ + atomic_add(1, &ss->cur); +} + +static struct spinaphore ptcg_sem; +static u16 nptcg = 1; +static int need_ptcg_sem = 1; +static int toolatetochangeptcgsem = 0; + +/* + * Kernel parameter "nptcg=" overrides max number of concurrent global TLB + * purges which is reported from either PAL or SAL PALO. + * + * We don't have sanity checking for nptcg value. It's the user's responsibility + * for valid nptcg value on the platform. Otherwise, kernel may hang in some + * cases. + */ +static int __init +set_nptcg(char *str) +{ + int value = 0; + + get_option(&str, &value); + setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER); + + return 1; +} + +__setup("nptcg=", set_nptcg); + +/* + * Maximum number of simultaneous ptc.g purges in the system can + * be defined by PAL_VM_SUMMARY (in which case we should take + * the smallest value for any cpu in the system) or by the PAL + * override table (in which case we should ignore the value from + * PAL_VM_SUMMARY). + * + * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g + * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case, + * we should ignore the value from either PAL_VM_SUMMARY or PAL override table. + * + * Complicating the logic here is the fact that num_possible_cpus() + * isn't fully setup until we start bringing cpus online. + */ +void +setup_ptcg_sem(int max_purges, int nptcg_from) +{ + static int kp_override; + static int palo_override; + static int firstcpu = 1; + + if (toolatetochangeptcgsem) { + BUG_ON(max_purges < nptcg); + return; + } + + if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) { + kp_override = 1; + nptcg = max_purges; + goto resetsema; + } + if (kp_override) { + need_ptcg_sem = num_possible_cpus() > nptcg; + return; + } + + if (nptcg_from == NPTCG_FROM_PALO) { + palo_override = 1; + + /* In PALO max_purges == 0 really means it! */ + if (max_purges == 0) + panic("Whoa! Platform does not support global TLB purges.\n"); + nptcg = max_purges; + if (nptcg == PALO_MAX_TLB_PURGES) { + need_ptcg_sem = 0; + return; + } + goto resetsema; + } + if (palo_override) { + if (nptcg != PALO_MAX_TLB_PURGES) + need_ptcg_sem = (num_possible_cpus() > nptcg); + return; + } + + /* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */ + if (max_purges == 0) max_purges = 1; + + if (firstcpu) { + nptcg = max_purges; + firstcpu = 0; + } + if (max_purges < nptcg) + nptcg = max_purges; + if (nptcg == PAL_MAX_PURGES) { + need_ptcg_sem = 0; + return; + } else + need_ptcg_sem = (num_possible_cpus() > nptcg); + +resetsema: + spinaphore_init(&ptcg_sem, max_purges); +} + void ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) { - static DEFINE_SPINLOCK(ptcg_lock); - struct mm_struct *active_mm = current->active_mm; + toolatetochangeptcgsem = 1; + if (mm != active_mm) { /* Restore region IDs for mm */ if (mm && active_mm) { @@ -102,19 +238,20 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, } } - /* HW requires global serialization of ptc.ga. */ - spin_lock(&ptcg_lock); - { - do { - /* - * Flush ALAT entries also. - */ - ia64_ptcga(start, (nbits<<2)); - ia64_srlz_i(); - start += (1UL << nbits); - } while (start < end); - } - spin_unlock(&ptcg_lock); + if (need_ptcg_sem) + down_spin(&ptcg_sem); + + do { + /* + * Flush ALAT entries also. + */ + ia64_ptcga(start, (nbits << 2)); + ia64_srlz_i(); + start += (1UL << nbits); + } while (start < end); + + if (need_ptcg_sem) + up_spin(&ptcg_sem); if (mm != active_mm) { activate_context(active_mm); @@ -190,6 +327,9 @@ ia64_tlb_init (void) ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */ unsigned long tr_pgbits; long status; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + int cpu = smp_processor_id(); if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; " @@ -206,4 +346,191 @@ ia64_tlb_init (void) local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ + status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2); + + if (status) { + printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); + per_cpu(ia64_tr_num, cpu) = 8; + return; + } + per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; + if (per_cpu(ia64_tr_num, cpu) > + (vm_info_1.pal_vm_info_1_s.max_dtr_entry+1)) + per_cpu(ia64_tr_num, cpu) = + vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; + if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) { + per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX; + printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!" + "IA64_TR_ALLOC_MAX should be extended\n"); + } +} + +/* + * is_tr_overlap + * + * Check overlap with inserted TRs. + */ +static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size) +{ + u64 tr_log_size; + u64 tr_end; + u64 va_rr = ia64_get_rr(va); + u64 va_rid = RR_TO_RID(va_rr); + u64 va_end = va + (1<<log_size) - 1; + + if (va_rid != RR_TO_RID(p->rr)) + return 0; + tr_log_size = (p->itir & 0xff) >> 2; + tr_end = p->ifa + (1<<tr_log_size) - 1; + + if (va > tr_end || p->ifa > va_end) + return 0; + return 1; + +} + +/* + * ia64_insert_tr in virtual mode. Allocate a TR slot + * + * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr + * + * va : virtual address. + * pte : pte entries inserted. + * log_size: range to be covered. + * + * Return value: <0 : error No. + * + * >=0 : slot number allocated for TR. + * Must be called with preemption disabled. + */ +int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) +{ + int i, r; + unsigned long psr; + struct ia64_tr_entry *p; + int cpu = smp_processor_id(); + + r = -EINVAL; + /*Check overlap with existing TR entries*/ + if (target_mask & 0x1) { + p = &__per_cpu_idtrs[cpu][0][0]; + for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); + i++, p++) { + if (p->pte & 0x1) + if (is_tr_overlap(p, va, log_size)) { + printk(KERN_DEBUG "Overlapped Entry" + "Inserted for TR Reigster!!\n"); + goto out; + } + } + } + if (target_mask & 0x2) { + p = &__per_cpu_idtrs[cpu][1][0]; + for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); + i++, p++) { + if (p->pte & 0x1) + if (is_tr_overlap(p, va, log_size)) { + printk(KERN_DEBUG "Overlapped Entry" + "Inserted for TR Reigster!!\n"); + goto out; + } + } + } + + for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { + switch (target_mask & 0x3) { + case 1: + if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1)) + goto found; + continue; + case 2: + if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + goto found; + continue; + case 3: + if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) && + !(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + goto found; + continue; + default: + r = -EINVAL; + goto out; + } + } +found: + if (i >= per_cpu(ia64_tr_num, cpu)) + return -EBUSY; + + /*Record tr info for mca hander use!*/ + if (i > per_cpu(ia64_tr_used, cpu)) + per_cpu(ia64_tr_used, cpu) = i; + + psr = ia64_clear_ic(); + if (target_mask & 0x1) { + ia64_itr(0x1, i, va, pte, log_size); + ia64_srlz_i(); + p = &__per_cpu_idtrs[cpu][0][i]; + p->ifa = va; + p->pte = pte; + p->itir = log_size << 2; + p->rr = ia64_get_rr(va); + } + if (target_mask & 0x2) { + ia64_itr(0x2, i, va, pte, log_size); + ia64_srlz_i(); + p = &__per_cpu_idtrs[cpu][1][i]; + p->ifa = va; + p->pte = pte; + p->itir = log_size << 2; + p->rr = ia64_get_rr(va); + } + ia64_set_psr(psr); + r = i; +out: + return r; +} +EXPORT_SYMBOL_GPL(ia64_itr_entry); + +/* + * ia64_purge_tr + * + * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. + * slot: slot number to be freed. + * + * Must be called with preemption disabled. + */ +void ia64_ptr_entry(u64 target_mask, int slot) +{ + int cpu = smp_processor_id(); + int i; + struct ia64_tr_entry *p; + + if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu)) + return; + + if (target_mask & 0x1) { + p = &__per_cpu_idtrs[cpu][0][slot]; + if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { + p->pte = 0; + ia64_ptr(0x1, p->ifa, p->itir>>2); + ia64_srlz_i(); + } + } + + if (target_mask & 0x2) { + p = &__per_cpu_idtrs[cpu][1][slot]; + if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { + p->pte = 0; + ia64_ptr(0x2, p->ifa, p->itir>>2); + ia64_srlz_i(); + } + } + + for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { + if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) || + (__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + break; + } + per_cpu(ia64_tr_used, cpu) = i; } +EXPORT_SYMBOL_GPL(ia64_ptr_entry); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 53d0a8ee35d..77b15f80f10 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -504,54 +504,12 @@ pcibios_update_irq (struct pci_dev *dev, int irq) /* ??? FIXME -- record old value for shutdown. */ } -static inline int -pcibios_enable_resources (struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM; - - if (!dev) - return -EINVAL; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx=0; idx<PCI_NUM_RESOURCES; idx++) { - /* Only set up the desired resources. */ - if (!(mask & (1 << idx))) - continue; - - r = &dev->resource[idx]; - if (!(r->flags & type_mask)) - continue; - if ((idx == PCI_ROM_RESOURCE) && - (!(r->flags & IORESOURCE_ROM_ENABLE))) - continue; - if (!r->start && r->end) { - printk(KERN_ERR - "PCI: Device %s not available because of resource collisions\n", - pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - int pcibios_enable_device (struct pci_dev *dev, int mask) { int ret; - ret = pcibios_enable_resources(dev, mask); + ret = pci_enable_resources(dev, mask); if (ret < 0) return ret; diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 688a3c27e0f..0591038735a 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile @@ -4,7 +4,7 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # -# Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All Rights Reserved. +# Copyright (C) 1999,2001-2006,2008 Silicon Graphics, Inc. All Rights Reserved. # EXTRA_CFLAGS += -Iarch/ia64/sn/include @@ -15,9 +15,4 @@ obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ sn2/ obj-$(CONFIG_IA64_GENERIC) += machvec.o obj-$(CONFIG_SGI_TIOCX) += tiocx.o -obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o -xp-y := xp_main.o xp_nofault.o -obj-$(CONFIG_IA64_SGI_SN_XP) += xpc.o -xpc-y := xpc_main.o xpc_channel.o xpc_partition.o -obj-$(CONFIG_IA64_SGI_SN_XP) += xpnet.o obj-$(CONFIG_PCI_MSI) += msi_sn.o diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 0101c7924a4..08b0d9bb62e 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -187,8 +187,8 @@ void hub_error_init(struct hubdev_info *hubdev_info) { if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED, - "SN_hub_error", (void *)hubdev_info)) { - printk("hub_error_init: Failed to request_irq for 0x%p\n", + "SN_hub_error", hubdev_info)) { + printk(KERN_ERR "hub_error_init: Failed to request_irq for 0x%p\n", hubdev_info); return; } diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 4b0d1538e7e..8cc0c4753d8 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -37,7 +37,6 @@ #include <asm/processor.h> #include <asm/topology.h> -#include <asm/semaphore.h> #include <asm/uaccess.h> #include <asm/sal.h> #include <asm/sn/io.h> diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c deleted file mode 100644 index b7ea46645e1..00000000000 --- a/arch/ia64/sn/kernel/xp_main.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition (XP) base. - * - * XP provides a base from which its users can interact - * with XPC, yet not be dependent on XPC. - * - */ - - -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> -#include <asm/sn/xp.h> - - -/* - * Target of nofault PIO read. - */ -u64 xp_nofault_PIOR_target; - - -/* - * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level - * users of XPC. - */ -struct xpc_registration xpc_registrations[XPC_NCHANNELS]; - - -/* - * Initialize the XPC interface to indicate that XPC isn't loaded. - */ -static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; } - -struct xpc_interface xpc_interface = { - (void (*)(int)) xpc_notloaded, - (void (*)(int)) xpc_notloaded, - (enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded, - (enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded, - (enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *)) - xpc_notloaded, - (void (*)(partid_t, int, void *)) xpc_notloaded, - (enum xpc_retval (*)(partid_t, void *)) xpc_notloaded -}; - - -/* - * XPC calls this when it (the XPC module) has been loaded. - */ -void -xpc_set_interface(void (*connect)(int), - void (*disconnect)(int), - enum xpc_retval (*allocate)(partid_t, int, u32, void **), - enum xpc_retval (*send)(partid_t, int, void *), - enum xpc_retval (*send_notify)(partid_t, int, void *, - xpc_notify_func, void *), - void (*received)(partid_t, int, void *), - enum xpc_retval (*partid_to_nasids)(partid_t, void *)) -{ - xpc_interface.connect = connect; - xpc_interface.disconnect = disconnect; - xpc_interface.allocate = allocate; - xpc_interface.send = send; - xpc_interface.send_notify = send_notify; - xpc_interface.received = received; - xpc_interface.partid_to_nasids = partid_to_nasids; -} - - -/* - * XPC calls this when it (the XPC module) is being unloaded. - */ -void -xpc_clear_interface(void) -{ - xpc_interface.connect = (void (*)(int)) xpc_notloaded; - xpc_interface.disconnect = (void (*)(int)) xpc_notloaded; - xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32, - void **)) xpc_notloaded; - xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *)) - xpc_notloaded; - xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *, - xpc_notify_func, void *)) xpc_notloaded; - xpc_interface.received = (void (*)(partid_t, int, void *)) - xpc_notloaded; - xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *)) - xpc_notloaded; -} - - -/* - * Register for automatic establishment of a channel connection whenever - * a partition comes up. - * - * Arguments: - * - * ch_number - channel # to register for connection. - * func - function to call for asynchronous notification of channel - * state changes (i.e., connection, disconnection, error) and - * the arrival of incoming messages. - * key - pointer to optional user-defined value that gets passed back - * to the user on any callouts made to func. - * payload_size - size in bytes of the XPC message's payload area which - * contains a user-defined message. The user should make - * this large enough to hold their largest message. - * nentries - max #of XPC message entries a message queue can contain. - * The actual number, which is determined when a connection - * is established and may be less then requested, will be - * passed to the user via the xpcConnected callout. - * assigned_limit - max number of kthreads allowed to be processing - * messages (per connection) at any given instant. - * idle_limit - max number of kthreads allowed to be idle at any given - * instant. - */ -enum xpc_retval -xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, - u16 nentries, u32 assigned_limit, u32 idle_limit) -{ - struct xpc_registration *registration; - - - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); - DBUG_ON(payload_size == 0 || nentries == 0); - DBUG_ON(func == NULL); - DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); - - registration = &xpc_registrations[ch_number]; - - if (mutex_lock_interruptible(®istration->mutex) != 0) { - return xpcInterrupted; - } - - /* if XPC_CHANNEL_REGISTERED(ch_number) */ - if (registration->func != NULL) { - mutex_unlock(®istration->mutex); - return xpcAlreadyRegistered; - } - - /* register the channel for connection */ - registration->msg_size = XPC_MSG_SIZE(payload_size); - registration->nentries = nentries; - registration->assigned_limit = assigned_limit; - registration->idle_limit = idle_limit; - registration->key = key; - registration->func = func; - - mutex_unlock(®istration->mutex); - - xpc_interface.connect(ch_number); - - return xpcSuccess; -} - - -/* - * Remove the registration for automatic connection of the specified channel - * when a partition comes up. - * - * Before returning this xpc_disconnect() will wait for all connections on the - * specified channel have been closed/torndown. So the caller can be assured - * that they will not be receiving any more callouts from XPC to their - * function registered via xpc_connect(). - * - * Arguments: - * - * ch_number - channel # to unregister. - */ -void -xpc_disconnect(int ch_number) -{ - struct xpc_registration *registration; - - - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); - - registration = &xpc_registrations[ch_number]; - - /* - * We've decided not to make this a down_interruptible(), since we - * figured XPC's users will just turn around and call xpc_disconnect() - * again anyways, so we might as well wait, if need be. - */ - mutex_lock(®istration->mutex); - - /* if !XPC_CHANNEL_REGISTERED(ch_number) */ - if (registration->func == NULL) { - mutex_unlock(®istration->mutex); - return; - } - - /* remove the connection registration for the specified channel */ - registration->func = NULL; - registration->key = NULL; - registration->nentries = 0; - registration->msg_size = 0; - registration->assigned_limit = 0; - registration->idle_limit = 0; - - xpc_interface.disconnect(ch_number); - - mutex_unlock(®istration->mutex); - - return; -} - - -int __init -xp_init(void) -{ - int ret, ch_number; - u64 func_addr = *(u64 *) xp_nofault_PIOR; - u64 err_func_addr = *(u64 *) xp_error_PIOR; - - - if (!ia64_platform_is("sn2")) { - return -ENODEV; - } - - /* - * Register a nofault code region which performs a cross-partition - * PIO read. If the PIO read times out, the MCA handler will consume - * the error and return to a kernel-provided instruction to indicate - * an error. This PIO read exists because it is guaranteed to timeout - * if the destination is down (AMO operations do not timeout on at - * least some CPUs on Shubs <= v1.2, which unfortunately we have to - * work around). - */ - if ((ret = sn_register_nofault_code(func_addr, err_func_addr, - err_func_addr, 1, 1)) != 0) { - printk(KERN_ERR "XP: can't register nofault code, error=%d\n", - ret); - } - /* - * Setup the nofault PIO read target. (There is no special reason why - * SH_IPI_ACCESS was selected.) - */ - if (is_shub2()) { - xp_nofault_PIOR_target = SH2_IPI_ACCESS0; - } else { - xp_nofault_PIOR_target = SH1_IPI_ACCESS; - } - - /* initialize the connection registration mutex */ - for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) { - mutex_init(&xpc_registrations[ch_number].mutex); - } - - return 0; -} -module_init(xp_init); - - -void __exit -xp_exit(void) -{ - u64 func_addr = *(u64 *) xp_nofault_PIOR; - u64 err_func_addr = *(u64 *) xp_error_PIOR; - - - /* unregister the PIO read nofault code region */ - (void) sn_register_nofault_code(func_addr, err_func_addr, - err_func_addr, 1, 0); -} -module_exit(xp_exit); - - -MODULE_AUTHOR("Silicon Graphics, Inc."); -MODULE_DESCRIPTION("Cross Partition (XP) base"); -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(xp_nofault_PIOR); -EXPORT_SYMBOL(xp_nofault_PIOR_target); -EXPORT_SYMBOL(xpc_registrations); -EXPORT_SYMBOL(xpc_interface); -EXPORT_SYMBOL(xpc_clear_interface); -EXPORT_SYMBOL(xpc_set_interface); -EXPORT_SYMBOL(xpc_connect); -EXPORT_SYMBOL(xpc_disconnect); - diff --git a/arch/ia64/sn/kernel/xp_nofault.S b/arch/ia64/sn/kernel/xp_nofault.S deleted file mode 100644 index 98e7c7dbfdd..00000000000 --- a/arch/ia64/sn/kernel/xp_nofault.S +++ /dev/null @@ -1,36 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * The xp_nofault_PIOR function takes a pointer to a remote PIO register - * and attempts to load and consume a value from it. This function - * will be registered as a nofault code block. In the event that the - * PIO read fails, the MCA handler will force the error to look - * corrected and vector to the xp_error_PIOR which will return an error. - * - * The definition of "consumption" and the time it takes for an MCA - * to surface is processor implementation specific. This code - * is sufficient on Itanium through the Montvale processor family. - * It may need to be adjusted for future processor implementations. - * - * extern int xp_nofault_PIOR(void *remote_register); - */ - - .global xp_nofault_PIOR -xp_nofault_PIOR: - mov r8=r0 // Stage a success return value - ld8.acq r9=[r32];; // PIO Read the specified register - adds r9=1,r9;; // Add to force consumption - srlz.i;; // Allow time for MCA to surface - br.ret.sptk.many b0;; // Return success - - .global xp_error_PIOR -xp_error_PIOR: - mov r8=1 // Return value of 1 - br.ret.sptk.many b0;; // Return failure diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c deleted file mode 100644 index 44ccc0d789c..00000000000 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ /dev/null @@ -1,2379 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition Communication (XPC) channel support. - * - * This is the part of XPC that manages the channels and - * sends/receives messages across them to/from other partitions. - * - */ - - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/cache.h> -#include <linux/interrupt.h> -#include <linux/mutex.h> -#include <linux/completion.h> -#include <asm/sn/bte.h> -#include <asm/sn/sn_sal.h> -#include <asm/sn/xpc.h> - - -/* - * Guarantee that the kzalloc'd memory is cacheline aligned. - */ -static void * -xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) -{ - /* see if kzalloc will give us cachline aligned memory by default */ - *base = kzalloc(size, flags); - if (*base == NULL) { - return NULL; - } - if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { - return *base; - } - kfree(*base); - - /* nope, we'll have to do it ourselves */ - *base = kzalloc(size + L1_CACHE_BYTES, flags); - if (*base == NULL) { - return NULL; - } - return (void *) L1_CACHE_ALIGN((u64) *base); -} - - -/* - * Set up the initial values for the XPartition Communication channels. - */ -static void -xpc_initialize_channels(struct xpc_partition *part, partid_t partid) -{ - int ch_number; - struct xpc_channel *ch; - - - for (ch_number = 0; ch_number < part->nchannels; ch_number++) { - ch = &part->channels[ch_number]; - - ch->partid = partid; - ch->number = ch_number; - ch->flags = XPC_C_DISCONNECTED; - - ch->local_GP = &part->local_GPs[ch_number]; - ch->local_openclose_args = - &part->local_openclose_args[ch_number]; - - atomic_set(&ch->kthreads_assigned, 0); - atomic_set(&ch->kthreads_idle, 0); - atomic_set(&ch->kthreads_active, 0); - - atomic_set(&ch->references, 0); - atomic_set(&ch->n_to_notify, 0); - - spin_lock_init(&ch->lock); - mutex_init(&ch->msg_to_pull_mutex); - init_completion(&ch->wdisconnect_wait); - - atomic_set(&ch->n_on_msg_allocate_wq, 0); - init_waitqueue_head(&ch->msg_allocate_wq); - init_waitqueue_head(&ch->idle_wq); - } -} - - -/* - * Setup the infrastructure necessary to support XPartition Communication - * between the specified remote partition and the local one. - */ -enum xpc_retval -xpc_setup_infrastructure(struct xpc_partition *part) -{ - int ret, cpuid; - struct timer_list *timer; - partid_t partid = XPC_PARTID(part); - - - /* - * Zero out MOST of the entry for this partition. Only the fields - * starting with `nchannels' will be zeroed. The preceding fields must - * remain `viable' across partition ups and downs, since they may be - * referenced during this memset() operation. - */ - memset(&part->nchannels, 0, sizeof(struct xpc_partition) - - offsetof(struct xpc_partition, nchannels)); - - /* - * Allocate all of the channel structures as a contiguous chunk of - * memory. - */ - part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, - GFP_KERNEL); - if (part->channels == NULL) { - dev_err(xpc_chan, "can't get memory for channels\n"); - return xpcNoMemory; - } - - part->nchannels = XPC_NCHANNELS; - - - /* allocate all the required GET/PUT values */ - - part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, - GFP_KERNEL, &part->local_GPs_base); - if (part->local_GPs == NULL) { - kfree(part->channels); - part->channels = NULL; - dev_err(xpc_chan, "can't get memory for local get/put " - "values\n"); - return xpcNoMemory; - } - - part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, - GFP_KERNEL, &part->remote_GPs_base); - if (part->remote_GPs == NULL) { - dev_err(xpc_chan, "can't get memory for remote get/put " - "values\n"); - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpcNoMemory; - } - - - /* allocate all the required open and close args */ - - part->local_openclose_args = xpc_kzalloc_cacheline_aligned( - XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, - &part->local_openclose_args_base); - if (part->local_openclose_args == NULL) { - dev_err(xpc_chan, "can't get memory for local connect args\n"); - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpcNoMemory; - } - - part->remote_openclose_args = xpc_kzalloc_cacheline_aligned( - XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, - &part->remote_openclose_args_base); - if (part->remote_openclose_args == NULL) { - dev_err(xpc_chan, "can't get memory for remote connect args\n"); - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpcNoMemory; - } - - - xpc_initialize_channels(part, partid); - - atomic_set(&part->nchannels_active, 0); - atomic_set(&part->nchannels_engaged, 0); - - - /* local_IPI_amo were set to 0 by an earlier memset() */ - - /* Initialize this partitions AMO_t structure */ - part->local_IPI_amo_va = xpc_IPI_init(partid); - - spin_lock_init(&part->IPI_lock); - - atomic_set(&part->channel_mgr_requests, 1); - init_waitqueue_head(&part->channel_mgr_wq); - - sprintf(part->IPI_owner, "xpc%02d", partid); - ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED, - part->IPI_owner, (void *) (u64) partid); - if (ret != 0) { - dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " - "errno=%d\n", -ret); - kfree(part->remote_openclose_args_base); - part->remote_openclose_args = NULL; - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpcLackOfResources; - } - - /* Setup a timer to check for dropped IPIs */ - timer = &part->dropped_IPI_timer; - init_timer(timer); - timer->function = (void (*)(unsigned long)) xpc_dropped_IPI_check; - timer->data = (unsigned long) part; - timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT; - add_timer(timer); - - /* - * With the setting of the partition setup_state to XPC_P_SETUP, we're - * declaring that this partition is ready to go. - */ - part->setup_state = XPC_P_SETUP; - - - /* - * Setup the per partition specific variables required by the - * remote partition to establish channel connections with us. - * - * The setting of the magic # indicates that these per partition - * specific variables are ready to be used. - */ - xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs); - xpc_vars_part[partid].openclose_args_pa = - __pa(part->local_openclose_args); - xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va); - cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */ - xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid); - xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid); - xpc_vars_part[partid].nchannels = part->nchannels; - xpc_vars_part[partid].magic = XPC_VP_MAGIC1; - - return xpcSuccess; -} - - -/* - * Create a wrapper that hides the underlying mechanism for pulling a cacheline - * (or multiple cachelines) from a remote partition. - * - * src must be a cacheline aligned physical address on the remote partition. - * dst must be a cacheline aligned virtual address on this partition. - * cnt must be an cacheline sized - */ -static enum xpc_retval -xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst, - const void *src, size_t cnt) -{ - bte_result_t bte_ret; - - - DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src)); - DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst)); - DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); - - if (part->act_state == XPC_P_DEACTIVATING) { - return part->reason; - } - - bte_ret = xp_bte_copy((u64) src, (u64) dst, (u64) cnt, - (BTE_NORMAL | BTE_WACQUIRE), NULL); - if (bte_ret == BTE_SUCCESS) { - return xpcSuccess; - } - - dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n", - XPC_PARTID(part), bte_ret); - - return xpc_map_bte_errors(bte_ret); -} - - -/* - * Pull the remote per partition specific variables from the specified - * partition. - */ -enum xpc_retval -xpc_pull_remote_vars_part(struct xpc_partition *part) -{ - u8 buffer[L1_CACHE_BYTES * 2]; - struct xpc_vars_part *pulled_entry_cacheline = - (struct xpc_vars_part *) L1_CACHE_ALIGN((u64) buffer); - struct xpc_vars_part *pulled_entry; - u64 remote_entry_cacheline_pa, remote_entry_pa; - partid_t partid = XPC_PARTID(part); - enum xpc_retval ret; - - - /* pull the cacheline that contains the variables we're interested in */ - - DBUG_ON(part->remote_vars_part_pa != - L1_CACHE_ALIGN(part->remote_vars_part_pa)); - DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2); - - remote_entry_pa = part->remote_vars_part_pa + - sn_partition_id * sizeof(struct xpc_vars_part); - - remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); - - pulled_entry = (struct xpc_vars_part *) ((u64) pulled_entry_cacheline + - (remote_entry_pa & (L1_CACHE_BYTES - 1))); - - ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline, - (void *) remote_entry_cacheline_pa, - L1_CACHE_BYTES); - if (ret != xpcSuccess) { - dev_dbg(xpc_chan, "failed to pull XPC vars_part from " - "partition %d, ret=%d\n", partid, ret); - return ret; - } - - - /* see if they've been set up yet */ - - if (pulled_entry->magic != XPC_VP_MAGIC1 && - pulled_entry->magic != XPC_VP_MAGIC2) { - - if (pulled_entry->magic != 0) { - dev_dbg(xpc_chan, "partition %d's XPC vars_part for " - "partition %d has bad magic value (=0x%lx)\n", - partid, sn_partition_id, pulled_entry->magic); - return xpcBadMagic; - } - - /* they've not been initialized yet */ - return xpcRetry; - } - - if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) { - - /* validate the variables */ - - if (pulled_entry->GPs_pa == 0 || - pulled_entry->openclose_args_pa == 0 || - pulled_entry->IPI_amo_pa == 0) { - - dev_err(xpc_chan, "partition %d's XPC vars_part for " - "partition %d are not valid\n", partid, - sn_partition_id); - return xpcInvalidAddress; - } - - /* the variables we imported look to be valid */ - - part->remote_GPs_pa = pulled_entry->GPs_pa; - part->remote_openclose_args_pa = - pulled_entry->openclose_args_pa; - part->remote_IPI_amo_va = - (AMO_t *) __va(pulled_entry->IPI_amo_pa); - part->remote_IPI_nasid = pulled_entry->IPI_nasid; - part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid; - - if (part->nchannels > pulled_entry->nchannels) { - part->nchannels = pulled_entry->nchannels; - } - - /* let the other side know that we've pulled their variables */ - - xpc_vars_part[partid].magic = XPC_VP_MAGIC2; - } - - if (pulled_entry->magic == XPC_VP_MAGIC1) { - return xpcRetry; - } - - return xpcSuccess; -} - - -/* - * Get the IPI flags and pull the openclose args and/or remote GPs as needed. - */ -static u64 -xpc_get_IPI_flags(struct xpc_partition *part) -{ - unsigned long irq_flags; - u64 IPI_amo; - enum xpc_retval ret; - - - /* - * See if there are any IPI flags to be handled. - */ - - spin_lock_irqsave(&part->IPI_lock, irq_flags); - if ((IPI_amo = part->local_IPI_amo) != 0) { - part->local_IPI_amo = 0; - } - spin_unlock_irqrestore(&part->IPI_lock, irq_flags); - - - if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) { - ret = xpc_pull_remote_cachelines(part, - part->remote_openclose_args, - (void *) part->remote_openclose_args_pa, - XPC_OPENCLOSE_ARGS_SIZE); - if (ret != xpcSuccess) { - XPC_DEACTIVATE_PARTITION(part, ret); - - dev_dbg(xpc_chan, "failed to pull openclose args from " - "partition %d, ret=%d\n", XPC_PARTID(part), - ret); - - /* don't bother processing IPIs anymore */ - IPI_amo = 0; - } - } - - if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) { - ret = xpc_pull_remote_cachelines(part, part->remote_GPs, - (void *) part->remote_GPs_pa, - XPC_GP_SIZE); - if (ret != xpcSuccess) { - XPC_DEACTIVATE_PARTITION(part, ret); - - dev_dbg(xpc_chan, "failed to pull GPs from partition " - "%d, ret=%d\n", XPC_PARTID(part), ret); - - /* don't bother processing IPIs anymore */ - IPI_amo = 0; - } - } - - return IPI_amo; -} - - -/* - * Allocate the local message queue and the notify queue. - */ -static enum xpc_retval -xpc_allocate_local_msgqueue(struct xpc_channel *ch) -{ - unsigned long irq_flags; - int nentries; - size_t nbytes; - - - // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between - // >>> iterations of the for-loop, bail if set? - - // >>> should we impose a minimum #of entries? like 4 or 8? - for (nentries = ch->local_nentries; nentries > 0; nentries--) { - - nbytes = nentries * ch->msg_size; - ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, - GFP_KERNEL, - &ch->local_msgqueue_base); - if (ch->local_msgqueue == NULL) { - continue; - } - - nbytes = nentries * sizeof(struct xpc_notify); - ch->notify_queue = kzalloc(nbytes, GFP_KERNEL); - if (ch->notify_queue == NULL) { - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - continue; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - if (nentries < ch->local_nentries) { - dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " - "partid=%d, channel=%d\n", nentries, - ch->local_nentries, ch->partid, ch->number); - - ch->local_nentries = nentries; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpcSuccess; - } - - dev_dbg(xpc_chan, "can't get memory for local message queue and notify " - "queue, partid=%d, channel=%d\n", ch->partid, ch->number); - return xpcNoMemory; -} - - -/* - * Allocate the cached remote message queue. - */ -static enum xpc_retval -xpc_allocate_remote_msgqueue(struct xpc_channel *ch) -{ - unsigned long irq_flags; - int nentries; - size_t nbytes; - - - DBUG_ON(ch->remote_nentries <= 0); - - // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between - // >>> iterations of the for-loop, bail if set? - - // >>> should we impose a minimum #of entries? like 4 or 8? - for (nentries = ch->remote_nentries; nentries > 0; nentries--) { - - nbytes = nentries * ch->msg_size; - ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, - GFP_KERNEL, - &ch->remote_msgqueue_base); - if (ch->remote_msgqueue == NULL) { - continue; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - if (nentries < ch->remote_nentries) { - dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " - "partid=%d, channel=%d\n", nentries, - ch->remote_nentries, ch->partid, ch->number); - - ch->remote_nentries = nentries; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpcSuccess; - } - - dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " - "partid=%d, channel=%d\n", ch->partid, ch->number); - return xpcNoMemory; -} - - -/* - * Allocate message queues and other stuff associated with a channel. - * - * Note: Assumes all of the channel sizes are filled in. - */ -static enum xpc_retval -xpc_allocate_msgqueues(struct xpc_channel *ch) -{ - unsigned long irq_flags; - enum xpc_retval ret; - - - DBUG_ON(ch->flags & XPC_C_SETUP); - - if ((ret = xpc_allocate_local_msgqueue(ch)) != xpcSuccess) { - return ret; - } - - if ((ret = xpc_allocate_remote_msgqueue(ch)) != xpcSuccess) { - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - kfree(ch->notify_queue); - ch->notify_queue = NULL; - return ret; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_SETUP; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - return xpcSuccess; -} - - -/* - * Process a connect message from a remote partition. - * - * Note: xpc_process_connect() is expecting to be called with the - * spin_lock_irqsave held and will leave it locked upon return. - */ -static void -xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) -{ - enum xpc_retval ret; - - - DBUG_ON(!spin_is_locked(&ch->lock)); - - if (!(ch->flags & XPC_C_OPENREQUEST) || - !(ch->flags & XPC_C_ROPENREQUEST)) { - /* nothing more to do for now */ - return; - } - DBUG_ON(!(ch->flags & XPC_C_CONNECTING)); - - if (!(ch->flags & XPC_C_SETUP)) { - spin_unlock_irqrestore(&ch->lock, *irq_flags); - ret = xpc_allocate_msgqueues(ch); - spin_lock_irqsave(&ch->lock, *irq_flags); - - if (ret != xpcSuccess) { - XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); - } - if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) { - return; - } - - DBUG_ON(!(ch->flags & XPC_C_SETUP)); - DBUG_ON(ch->local_msgqueue == NULL); - DBUG_ON(ch->remote_msgqueue == NULL); - } - - if (!(ch->flags & XPC_C_OPENREPLY)) { - ch->flags |= XPC_C_OPENREPLY; - xpc_IPI_send_openreply(ch, irq_flags); - } - - if (!(ch->flags & XPC_C_ROPENREPLY)) { - return; - } - - DBUG_ON(ch->remote_msgqueue_pa == 0); - - ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ - - dev_info(xpc_chan, "channel %d to partition %d connected\n", - ch->number, ch->partid); - - spin_unlock_irqrestore(&ch->lock, *irq_flags); - xpc_create_kthreads(ch, 1, 0); - spin_lock_irqsave(&ch->lock, *irq_flags); -} - - -/* - * Notify those who wanted to be notified upon delivery of their message. - */ -static void -xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put) -{ - struct xpc_notify *notify; - u8 notify_type; - s64 get = ch->w_remote_GP.get - 1; - - - while (++get < put && atomic_read(&ch->n_to_notify) > 0) { - - notify = &ch->notify_queue[get % ch->local_nentries]; - - /* - * See if the notify entry indicates it was associated with - * a message who's sender wants to be notified. It is possible - * that it is, but someone else is doing or has done the - * notification. - */ - notify_type = notify->type; - if (notify_type == 0 || - cmpxchg(¬ify->type, notify_type, 0) != - notify_type) { - continue; - } - - DBUG_ON(notify_type != XPC_N_CALL); - - atomic_dec(&ch->n_to_notify); - - if (notify->func != NULL) { - dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *) notify, get, ch->partid, ch->number); - - notify->func(reason, ch->partid, ch->number, - notify->key); - - dev_dbg(xpc_chan, "notify->func() returned, " - "notify=0x%p, msg_number=%ld, partid=%d, " - "channel=%d\n", (void *) notify, get, - ch->partid, ch->number); - } - } -} - - -/* - * Free up message queues and other stuff that were allocated for the specified - * channel. - * - * Note: ch->reason and ch->reason_line are left set for debugging purposes, - * they're cleared when XPC_C_DISCONNECTED is cleared. - */ -static void -xpc_free_msgqueues(struct xpc_channel *ch) -{ - DBUG_ON(!spin_is_locked(&ch->lock)); - DBUG_ON(atomic_read(&ch->n_to_notify) != 0); - - ch->remote_msgqueue_pa = 0; - ch->func = NULL; - ch->key = NULL; - ch->msg_size = 0; - ch->local_nentries = 0; - ch->remote_nentries = 0; - ch->kthreads_assigned_limit = 0; - ch->kthreads_idle_limit = 0; - - ch->local_GP->get = 0; - ch->local_GP->put = 0; - ch->remote_GP.get = 0; - ch->remote_GP.put = 0; - ch->w_local_GP.get = 0; - ch->w_local_GP.put = 0; - ch->w_remote_GP.get = 0; - ch->w_remote_GP.put = 0; - ch->next_msg_to_pull = 0; - - if (ch->flags & XPC_C_SETUP) { - ch->flags &= ~XPC_C_SETUP; - - dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", - ch->flags, ch->partid, ch->number); - - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - kfree(ch->remote_msgqueue_base); - ch->remote_msgqueue = NULL; - kfree(ch->notify_queue); - ch->notify_queue = NULL; - } -} - - -/* - * spin_lock_irqsave() is expected to be held on entry. - */ -static void -xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED); - - - DBUG_ON(!spin_is_locked(&ch->lock)); - - if (!(ch->flags & XPC_C_DISCONNECTING)) { - return; - } - - DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); - - /* make sure all activity has settled down first */ - - if (atomic_read(&ch->kthreads_assigned) > 0 || - atomic_read(&ch->references) > 0) { - return; - } - DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); - - if (part->act_state == XPC_P_DEACTIVATING) { - /* can't proceed until the other side disengages from us */ - if (xpc_partition_engaged(1UL << ch->partid)) { - return; - } - - } else { - - /* as long as the other side is up do the full protocol */ - - if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { - return; - } - - if (!(ch->flags & XPC_C_CLOSEREPLY)) { - ch->flags |= XPC_C_CLOSEREPLY; - xpc_IPI_send_closereply(ch, irq_flags); - } - - if (!(ch->flags & XPC_C_RCLOSEREPLY)) { - return; - } - } - - /* wake those waiting for notify completion */ - if (atomic_read(&ch->n_to_notify) > 0) { - /* >>> we do callout while holding ch->lock */ - xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put); - } - - /* both sides are disconnected now */ - - if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { - spin_unlock_irqrestore(&ch->lock, *irq_flags); - xpc_disconnect_callout(ch, xpcDisconnected); - spin_lock_irqsave(&ch->lock, *irq_flags); - } - - /* it's now safe to free the channel's message queues */ - xpc_free_msgqueues(ch); - - /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */ - ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); - - atomic_dec(&part->nchannels_active); - - if (channel_was_connected) { - dev_info(xpc_chan, "channel %d to partition %d disconnected, " - "reason=%d\n", ch->number, ch->partid, ch->reason); - } - - if (ch->flags & XPC_C_WDISCONNECT) { - /* we won't lose the CPU since we're holding ch->lock */ - complete(&ch->wdisconnect_wait); - } else if (ch->delayed_IPI_flags) { - if (part->act_state != XPC_P_DEACTIVATING) { - /* time to take action on any delayed IPI flags */ - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number, - ch->delayed_IPI_flags); - spin_unlock(&part->IPI_lock); - } - ch->delayed_IPI_flags = 0; - } -} - - -/* - * Process a change in the channel's remote connection state. - */ -static void -xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, - u8 IPI_flags) -{ - unsigned long irq_flags; - struct xpc_openclose_args *args = - &part->remote_openclose_args[ch_number]; - struct xpc_channel *ch = &part->channels[ch_number]; - enum xpc_retval reason; - - - - spin_lock_irqsave(&ch->lock, irq_flags); - -again: - - if ((ch->flags & XPC_C_DISCONNECTED) && - (ch->flags & XPC_C_WDISCONNECT)) { - /* - * Delay processing IPI flags until thread waiting disconnect - * has had a chance to see that the channel is disconnected. - */ - ch->delayed_IPI_flags |= IPI_flags; - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - - if (IPI_flags & XPC_IPI_CLOSEREQUEST) { - - dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received " - "from partid=%d, channel=%d\n", args->reason, - ch->partid, ch->number); - - /* - * If RCLOSEREQUEST is set, we're probably waiting for - * RCLOSEREPLY. We should find it and a ROPENREQUEST packed - * with this RCLOSEREQUEST in the IPI_flags. - */ - - if (ch->flags & XPC_C_RCLOSEREQUEST) { - DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); - DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); - DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); - DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); - - DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY)); - IPI_flags &= ~XPC_IPI_CLOSEREPLY; - ch->flags |= XPC_C_RCLOSEREPLY; - - /* both sides have finished disconnecting */ - xpc_process_disconnect(ch, &irq_flags); - DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); - goto again; - } - - if (ch->flags & XPC_C_DISCONNECTED) { - if (!(IPI_flags & XPC_IPI_OPENREQUEST)) { - if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, - ch_number) & XPC_IPI_OPENREQUEST)) { - - DBUG_ON(ch->delayed_IPI_flags != 0); - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch_number, - XPC_IPI_CLOSEREQUEST); - spin_unlock(&part->IPI_lock); - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - XPC_SET_REASON(ch, 0, 0); - ch->flags &= ~XPC_C_DISCONNECTED; - - atomic_inc(&part->nchannels_active); - ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); - } - - IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY); - - /* - * The meaningful CLOSEREQUEST connection state fields are: - * reason = reason connection is to be closed - */ - - ch->flags |= XPC_C_RCLOSEREQUEST; - - if (!(ch->flags & XPC_C_DISCONNECTING)) { - reason = args->reason; - if (reason <= xpcSuccess || reason > xpcUnknownReason) { - reason = xpcUnknownReason; - } else if (reason == xpcUnregistering) { - reason = xpcOtherUnregistering; - } - - XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); - - DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - xpc_process_disconnect(ch, &irq_flags); - } - - - if (IPI_flags & XPC_IPI_CLOSEREPLY) { - - dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d," - " channel=%d\n", ch->partid, ch->number); - - if (ch->flags & XPC_C_DISCONNECTED) { - DBUG_ON(part->act_state != XPC_P_DEACTIVATING); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); - - if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { - if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number) - & XPC_IPI_CLOSEREQUEST)) { - - DBUG_ON(ch->delayed_IPI_flags != 0); - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch_number, XPC_IPI_CLOSEREPLY); - spin_unlock(&part->IPI_lock); - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - ch->flags |= XPC_C_RCLOSEREPLY; - - if (ch->flags & XPC_C_CLOSEREPLY) { - /* both sides have finished disconnecting */ - xpc_process_disconnect(ch, &irq_flags); - } - } - - - if (IPI_flags & XPC_IPI_OPENREQUEST) { - - dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, " - "local_nentries=%d) received from partid=%d, " - "channel=%d\n", args->msg_size, args->local_nentries, - ch->partid, ch->number); - - if (part->act_state == XPC_P_DEACTIVATING || - (ch->flags & XPC_C_ROPENREQUEST)) { - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { - ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST; - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | - XPC_C_OPENREQUEST))); - DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | - XPC_C_OPENREPLY | XPC_C_CONNECTED)); - - /* - * The meaningful OPENREQUEST connection state fields are: - * msg_size = size of channel's messages in bytes - * local_nentries = remote partition's local_nentries - */ - if (args->msg_size == 0 || args->local_nentries == 0) { - /* assume OPENREQUEST was delayed by mistake */ - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); - ch->remote_nentries = args->local_nentries; - - - if (ch->flags & XPC_C_OPENREQUEST) { - if (args->msg_size != ch->msg_size) { - XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, - &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - } else { - ch->msg_size = args->msg_size; - - XPC_SET_REASON(ch, 0, 0); - ch->flags &= ~XPC_C_DISCONNECTED; - - atomic_inc(&part->nchannels_active); - } - - xpc_process_connect(ch, &irq_flags); - } - - - if (IPI_flags & XPC_IPI_OPENREPLY) { - - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, " - "local_nentries=%d, remote_nentries=%d) received from " - "partid=%d, channel=%d\n", args->local_msgqueue_pa, - args->local_nentries, args->remote_nentries, - ch->partid, ch->number); - - if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - if (!(ch->flags & XPC_C_OPENREQUEST)) { - XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError, - &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); - DBUG_ON(ch->flags & XPC_C_CONNECTED); - - /* - * The meaningful OPENREPLY connection state fields are: - * local_msgqueue_pa = physical address of remote - * partition's local_msgqueue - * local_nentries = remote partition's local_nentries - * remote_nentries = remote partition's remote_nentries - */ - DBUG_ON(args->local_msgqueue_pa == 0); - DBUG_ON(args->local_nentries == 0); - DBUG_ON(args->remote_nentries == 0); - - ch->flags |= XPC_C_ROPENREPLY; - ch->remote_msgqueue_pa = args->local_msgqueue_pa; - - if (args->local_nentries < ch->remote_nentries) { - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " - "remote_nentries=%d, old remote_nentries=%d, " - "partid=%d, channel=%d\n", - args->local_nentries, ch->remote_nentries, - ch->partid, ch->number); - - ch->remote_nentries = args->local_nentries; - } - if (args->remote_nentries < ch->local_nentries) { - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " - "local_nentries=%d, old local_nentries=%d, " - "partid=%d, channel=%d\n", - args->remote_nentries, ch->local_nentries, - ch->partid, ch->number); - - ch->local_nentries = args->remote_nentries; - } - - xpc_process_connect(ch, &irq_flags); - } - - spin_unlock_irqrestore(&ch->lock, irq_flags); -} - - -/* - * Attempt to establish a channel connection to a remote partition. - */ -static enum xpc_retval -xpc_connect_channel(struct xpc_channel *ch) -{ - unsigned long irq_flags; - struct xpc_registration *registration = &xpc_registrations[ch->number]; - - - if (mutex_trylock(®istration->mutex) == 0) { - return xpcRetry; - } - - if (!XPC_CHANNEL_REGISTERED(ch->number)) { - mutex_unlock(®istration->mutex); - return xpcUnregistered; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - - DBUG_ON(ch->flags & XPC_C_CONNECTED); - DBUG_ON(ch->flags & XPC_C_OPENREQUEST); - - if (ch->flags & XPC_C_DISCONNECTING) { - spin_unlock_irqrestore(&ch->lock, irq_flags); - mutex_unlock(®istration->mutex); - return ch->reason; - } - - - /* add info from the channel connect registration to the channel */ - - ch->kthreads_assigned_limit = registration->assigned_limit; - ch->kthreads_idle_limit = registration->idle_limit; - DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); - DBUG_ON(atomic_read(&ch->kthreads_idle) != 0); - DBUG_ON(atomic_read(&ch->kthreads_active) != 0); - - ch->func = registration->func; - DBUG_ON(registration->func == NULL); - ch->key = registration->key; - - ch->local_nentries = registration->nentries; - - if (ch->flags & XPC_C_ROPENREQUEST) { - if (registration->msg_size != ch->msg_size) { - /* the local and remote sides aren't the same */ - - /* - * Because XPC_DISCONNECT_CHANNEL() can block we're - * forced to up the registration sema before we unlock - * the channel lock. But that's okay here because we're - * done with the part that required the registration - * sema. XPC_DISCONNECT_CHANNEL() requires that the - * channel lock be locked and will unlock and relock - * the channel lock as needed. - */ - mutex_unlock(®istration->mutex); - XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, - &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpcUnequalMsgSizes; - } - } else { - ch->msg_size = registration->msg_size; - - XPC_SET_REASON(ch, 0, 0); - ch->flags &= ~XPC_C_DISCONNECTED; - - atomic_inc(&xpc_partitions[ch->partid].nchannels_active); - } - - mutex_unlock(®istration->mutex); - - - /* initiate the connection */ - - ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); - xpc_IPI_send_openrequest(ch, &irq_flags); - - xpc_process_connect(ch, &irq_flags); - - spin_unlock_irqrestore(&ch->lock, irq_flags); - - return xpcSuccess; -} - - -/* - * Clear some of the msg flags in the local message queue. - */ -static inline void -xpc_clear_local_msgqueue_flags(struct xpc_channel *ch) -{ - struct xpc_msg *msg; - s64 get; - - - get = ch->w_remote_GP.get; - do { - msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + - (get % ch->local_nentries) * ch->msg_size); - msg->flags = 0; - } while (++get < (volatile s64) ch->remote_GP.get); -} - - -/* - * Clear some of the msg flags in the remote message queue. - */ -static inline void -xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch) -{ - struct xpc_msg *msg; - s64 put; - - - put = ch->w_remote_GP.put; - do { - msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + - (put % ch->remote_nentries) * ch->msg_size); - msg->flags = 0; - } while (++put < (volatile s64) ch->remote_GP.put); -} - - -static void -xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) -{ - struct xpc_channel *ch = &part->channels[ch_number]; - int nmsgs_sent; - - - ch->remote_GP = part->remote_GPs[ch_number]; - - - /* See what, if anything, has changed for each connected channel */ - - xpc_msgqueue_ref(ch); - - if (ch->w_remote_GP.get == ch->remote_GP.get && - ch->w_remote_GP.put == ch->remote_GP.put) { - /* nothing changed since GPs were last pulled */ - xpc_msgqueue_deref(ch); - return; - } - - if (!(ch->flags & XPC_C_CONNECTED)){ - xpc_msgqueue_deref(ch); - return; - } - - - /* - * First check to see if messages recently sent by us have been - * received by the other side. (The remote GET value will have - * changed since we last looked at it.) - */ - - if (ch->w_remote_GP.get != ch->remote_GP.get) { - - /* - * We need to notify any senders that want to be notified - * that their sent messages have been received by their - * intended recipients. We need to do this before updating - * w_remote_GP.get so that we don't allocate the same message - * queue entries prematurely (see xpc_allocate_msg()). - */ - if (atomic_read(&ch->n_to_notify) > 0) { - /* - * Notify senders that messages sent have been - * received and delivered by the other side. - */ - xpc_notify_senders(ch, xpcMsgDelivered, - ch->remote_GP.get); - } - - /* - * Clear msg->flags in previously sent messages, so that - * they're ready for xpc_allocate_msg(). - */ - xpc_clear_local_msgqueue_flags(ch); - - ch->w_remote_GP.get = ch->remote_GP.get; - - dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, " - "channel=%d\n", ch->w_remote_GP.get, ch->partid, - ch->number); - - /* - * If anyone was waiting for message queue entries to become - * available, wake them up. - */ - if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { - wake_up(&ch->msg_allocate_wq); - } - } - - - /* - * Now check for newly sent messages by the other side. (The remote - * PUT value will have changed since we last looked at it.) - */ - - if (ch->w_remote_GP.put != ch->remote_GP.put) { - /* - * Clear msg->flags in previously received messages, so that - * they're ready for xpc_get_deliverable_msg(). - */ - xpc_clear_remote_msgqueue_flags(ch); - - ch->w_remote_GP.put = ch->remote_GP.put; - - dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, " - "channel=%d\n", ch->w_remote_GP.put, ch->partid, - ch->number); - - nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get; - if (nmsgs_sent > 0) { - dev_dbg(xpc_chan, "msgs waiting to be copied and " - "delivered=%d, partid=%d, channel=%d\n", - nmsgs_sent, ch->partid, ch->number); - - if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { - xpc_activate_kthreads(ch, nmsgs_sent); - } - } - } - - xpc_msgqueue_deref(ch); -} - - -void -xpc_process_channel_activity(struct xpc_partition *part) -{ - unsigned long irq_flags; - u64 IPI_amo, IPI_flags; - struct xpc_channel *ch; - int ch_number; - u32 ch_flags; - - - IPI_amo = xpc_get_IPI_flags(part); - - /* - * Initiate channel connections for registered channels. - * - * For each connected channel that has pending messages activate idle - * kthreads and/or create new kthreads as needed. - */ - - for (ch_number = 0; ch_number < part->nchannels; ch_number++) { - ch = &part->channels[ch_number]; - - - /* - * Process any open or close related IPI flags, and then deal - * with connecting or disconnecting the channel as required. - */ - - IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number); - - if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) { - xpc_process_openclose_IPI(part, ch_number, IPI_flags); - } - - ch_flags = ch->flags; /* need an atomic snapshot of flags */ - - if (ch_flags & XPC_C_DISCONNECTING) { - spin_lock_irqsave(&ch->lock, irq_flags); - xpc_process_disconnect(ch, &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - continue; - } - - if (part->act_state == XPC_P_DEACTIVATING) { - continue; - } - - if (!(ch_flags & XPC_C_CONNECTED)) { - if (!(ch_flags & XPC_C_OPENREQUEST)) { - DBUG_ON(ch_flags & XPC_C_SETUP); - (void) xpc_connect_channel(ch); - } else { - spin_lock_irqsave(&ch->lock, irq_flags); - xpc_process_connect(ch, &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - } - continue; - } - - - /* - * Process any message related IPI flags, this may involve the - * activation of kthreads to deliver any pending messages sent - * from the other partition. - */ - - if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) { - xpc_process_msg_IPI(part, ch_number); - } - } -} - - -/* - * XPC's heartbeat code calls this function to inform XPC that a partition is - * going down. XPC responds by tearing down the XPartition Communication - * infrastructure used for the just downed partition. - * - * XPC's heartbeat code will never call this function and xpc_partition_up() - * at the same time. Nor will it ever make multiple calls to either function - * at the same time. - */ -void -xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason) -{ - unsigned long irq_flags; - int ch_number; - struct xpc_channel *ch; - - - dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n", - XPC_PARTID(part), reason); - - if (!xpc_part_ref(part)) { - /* infrastructure for this partition isn't currently set up */ - return; - } - - - /* disconnect channels associated with the partition going down */ - - for (ch_number = 0; ch_number < part->nchannels; ch_number++) { - ch = &part->channels[ch_number]; - - xpc_msgqueue_ref(ch); - spin_lock_irqsave(&ch->lock, irq_flags); - - XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); - - spin_unlock_irqrestore(&ch->lock, irq_flags); - xpc_msgqueue_deref(ch); - } - - xpc_wakeup_channel_mgr(part); - - xpc_part_deref(part); -} - - -/* - * Teardown the infrastructure necessary to support XPartition Communication - * between the specified remote partition and the local one. - */ -void -xpc_teardown_infrastructure(struct xpc_partition *part) -{ - partid_t partid = XPC_PARTID(part); - - - /* - * We start off by making this partition inaccessible to local - * processes by marking it as no longer setup. Then we make it - * inaccessible to remote processes by clearing the XPC per partition - * specific variable's magic # (which indicates that these variables - * are no longer valid) and by ignoring all XPC notify IPIs sent to - * this partition. - */ - - DBUG_ON(atomic_read(&part->nchannels_engaged) != 0); - DBUG_ON(atomic_read(&part->nchannels_active) != 0); - DBUG_ON(part->setup_state != XPC_P_SETUP); - part->setup_state = XPC_P_WTEARDOWN; - - xpc_vars_part[partid].magic = 0; - - - free_irq(SGI_XPC_NOTIFY, (void *) (u64) partid); - - - /* - * Before proceeding with the teardown we have to wait until all - * existing references cease. - */ - wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); - - - /* now we can begin tearing down the infrastructure */ - - part->setup_state = XPC_P_TORNDOWN; - - /* in case we've still got outstanding timers registered... */ - del_timer_sync(&part->dropped_IPI_timer); - - kfree(part->remote_openclose_args_base); - part->remote_openclose_args = NULL; - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - part->local_IPI_amo_va = NULL; -} - - -/* - * Called by XP at the time of channel connection registration to cause - * XPC to establish connections to all currently active partitions. - */ -void -xpc_initiate_connect(int ch_number) -{ - partid_t partid; - struct xpc_partition *part; - struct xpc_channel *ch; - - - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (xpc_part_ref(part)) { - ch = &part->channels[ch_number]; - - /* - * Initiate the establishment of a connection on the - * newly registered channel to the remote partition. - */ - xpc_wakeup_channel_mgr(part); - xpc_part_deref(part); - } - } -} - - -void -xpc_connected_callout(struct xpc_channel *ch) -{ - /* let the registerer know that a connection has been established */ - - if (ch->func != NULL) { - dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, " - "partid=%d, channel=%d\n", ch->partid, ch->number); - - ch->func(xpcConnected, ch->partid, ch->number, - (void *) (u64) ch->local_nentries, ch->key); - - dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, " - "partid=%d, channel=%d\n", ch->partid, ch->number); - } -} - - -/* - * Called by XP at the time of channel connection unregistration to cause - * XPC to teardown all current connections for the specified channel. - * - * Before returning xpc_initiate_disconnect() will wait until all connections - * on the specified channel have been closed/torndown. So the caller can be - * assured that they will not be receiving any more callouts from XPC to the - * function they registered via xpc_connect(). - * - * Arguments: - * - * ch_number - channel # to unregister. - */ -void -xpc_initiate_disconnect(int ch_number) -{ - unsigned long irq_flags; - partid_t partid; - struct xpc_partition *part; - struct xpc_channel *ch; - - - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); - - /* initiate the channel disconnect for every active partition */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (xpc_part_ref(part)) { - ch = &part->channels[ch_number]; - xpc_msgqueue_ref(ch); - - spin_lock_irqsave(&ch->lock, irq_flags); - - if (!(ch->flags & XPC_C_DISCONNECTED)) { - ch->flags |= XPC_C_WDISCONNECT; - - XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering, - &irq_flags); - } - - spin_unlock_irqrestore(&ch->lock, irq_flags); - - xpc_msgqueue_deref(ch); - xpc_part_deref(part); - } - } - - xpc_disconnect_wait(ch_number); -} - - -/* - * To disconnect a channel, and reflect it back to all who may be waiting. - * - * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by - * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by - * xpc_disconnect_wait(). - * - * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. - */ -void -xpc_disconnect_channel(const int line, struct xpc_channel *ch, - enum xpc_retval reason, unsigned long *irq_flags) -{ - u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED); - - - DBUG_ON(!spin_is_locked(&ch->lock)); - - if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { - return; - } - DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED))); - - dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n", - reason, line, ch->partid, ch->number); - - XPC_SET_REASON(ch, reason, line); - - ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING); - /* some of these may not have been set */ - ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | - XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | - XPC_C_CONNECTING | XPC_C_CONNECTED); - - xpc_IPI_send_closerequest(ch, irq_flags); - - if (channel_was_connected) { - ch->flags |= XPC_C_WASCONNECTED; - } - - spin_unlock_irqrestore(&ch->lock, *irq_flags); - - /* wake all idle kthreads so they can exit */ - if (atomic_read(&ch->kthreads_idle) > 0) { - wake_up_all(&ch->idle_wq); - - } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { - /* start a kthread that will do the xpcDisconnecting callout */ - xpc_create_kthreads(ch, 1, 1); - } - - /* wake those waiting to allocate an entry from the local msg queue */ - if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { - wake_up(&ch->msg_allocate_wq); - } - - spin_lock_irqsave(&ch->lock, *irq_flags); -} - - -void -xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason) -{ - /* - * Let the channel's registerer know that the channel is being - * disconnected. We don't want to do this if the registerer was never - * informed of a connection being made. - */ - - if (ch->func != NULL) { - dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " - "channel=%d\n", reason, ch->partid, ch->number); - - ch->func(reason, ch->partid, ch->number, NULL, ch->key); - - dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " - "channel=%d\n", reason, ch->partid, ch->number); - } -} - - -/* - * Wait for a message entry to become available for the specified channel, - * but don't wait any longer than 1 jiffy. - */ -static enum xpc_retval -xpc_allocate_msg_wait(struct xpc_channel *ch) -{ - enum xpc_retval ret; - - - if (ch->flags & XPC_C_DISCONNECTING) { - DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true? - return ch->reason; - } - - atomic_inc(&ch->n_on_msg_allocate_wq); - ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1); - atomic_dec(&ch->n_on_msg_allocate_wq); - - if (ch->flags & XPC_C_DISCONNECTING) { - ret = ch->reason; - DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true? - } else if (ret == 0) { - ret = xpcTimeout; - } else { - ret = xpcInterrupted; - } - - return ret; -} - - -/* - * Allocate an entry for a message from the message queue associated with the - * specified channel. - */ -static enum xpc_retval -xpc_allocate_msg(struct xpc_channel *ch, u32 flags, - struct xpc_msg **address_of_msg) -{ - struct xpc_msg *msg; - enum xpc_retval ret; - s64 put; - - - /* this reference will be dropped in xpc_send_msg() */ - xpc_msgqueue_ref(ch); - - if (ch->flags & XPC_C_DISCONNECTING) { - xpc_msgqueue_deref(ch); - return ch->reason; - } - if (!(ch->flags & XPC_C_CONNECTED)) { - xpc_msgqueue_deref(ch); - return xpcNotConnected; - } - - - /* - * Get the next available message entry from the local message queue. - * If none are available, we'll make sure that we grab the latest - * GP values. - */ - ret = xpcTimeout; - - while (1) { - - put = (volatile s64) ch->w_local_GP.put; - if (put - (volatile s64) ch->w_remote_GP.get < - ch->local_nentries) { - - /* There are available message entries. We need to try - * to secure one for ourselves. We'll do this by trying - * to increment w_local_GP.put as long as someone else - * doesn't beat us to it. If they do, we'll have to - * try again. - */ - if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == - put) { - /* we got the entry referenced by put */ - break; - } - continue; /* try again */ - } - - - /* - * There aren't any available msg entries at this time. - * - * In waiting for a message entry to become available, - * we set a timeout in case the other side is not - * sending completion IPIs. This lets us fake an IPI - * that will cause the IPI handler to fetch the latest - * GP values as if an IPI was sent by the other side. - */ - if (ret == xpcTimeout) { - xpc_IPI_send_local_msgrequest(ch); - } - - if (flags & XPC_NOWAIT) { - xpc_msgqueue_deref(ch); - return xpcNoWait; - } - - ret = xpc_allocate_msg_wait(ch); - if (ret != xpcInterrupted && ret != xpcTimeout) { - xpc_msgqueue_deref(ch); - return ret; - } - } - - - /* get the message's address and initialize it */ - msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + - (put % ch->local_nentries) * ch->msg_size); - - - DBUG_ON(msg->flags != 0); - msg->number = put; - - dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", put + 1, - (void *) msg, msg->number, ch->partid, ch->number); - - *address_of_msg = msg; - - return xpcSuccess; -} - - -/* - * Allocate an entry for a message from the message queue associated with the - * specified channel. NOTE that this routine can sleep waiting for a message - * entry to become available. To not sleep, pass in the XPC_NOWAIT flag. - * - * Arguments: - * - * partid - ID of partition to which the channel is connected. - * ch_number - channel #. - * flags - see xpc.h for valid flags. - * payload - address of the allocated payload area pointer (filled in on - * return) in which the user-defined message is constructed. - */ -enum xpc_retval -xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - enum xpc_retval ret = xpcUnknownReason; - struct xpc_msg *msg = NULL; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - - *payload = NULL; - - if (xpc_part_ref(part)) { - ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg); - xpc_part_deref(part); - - if (msg != NULL) { - *payload = &msg->payload; - } - } - - return ret; -} - - -/* - * Now we actually send the messages that are ready to be sent by advancing - * the local message queue's Put value and then send an IPI to the recipient - * partition. - */ -static void -xpc_send_msgs(struct xpc_channel *ch, s64 initial_put) -{ - struct xpc_msg *msg; - s64 put = initial_put + 1; - int send_IPI = 0; - - - while (1) { - - while (1) { - if (put == (volatile s64) ch->w_local_GP.put) { - break; - } - - msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + - (put % ch->local_nentries) * ch->msg_size); - - if (!(msg->flags & XPC_M_READY)) { - break; - } - - put++; - } - - if (put == initial_put) { - /* nothing's changed */ - break; - } - - if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) != - initial_put) { - /* someone else beat us to it */ - DBUG_ON((volatile s64) ch->local_GP->put < initial_put); - break; - } - - /* we just set the new value of local_GP->put */ - - dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, " - "channel=%d\n", put, ch->partid, ch->number); - - send_IPI = 1; - - /* - * We need to ensure that the message referenced by - * local_GP->put is not XPC_M_READY or that local_GP->put - * equals w_local_GP.put, so we'll go have a look. - */ - initial_put = put; - } - - if (send_IPI) { - xpc_IPI_send_msgrequest(ch); - } -} - - -/* - * Common code that does the actual sending of the message by advancing the - * local message queue's Put value and sends an IPI to the partition the - * message is being sent to. - */ -static enum xpc_retval -xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type, - xpc_notify_func func, void *key) -{ - enum xpc_retval ret = xpcSuccess; - struct xpc_notify *notify = notify; - s64 put, msg_number = msg->number; - - - DBUG_ON(notify_type == XPC_N_CALL && func == NULL); - DBUG_ON((((u64) msg - (u64) ch->local_msgqueue) / ch->msg_size) != - msg_number % ch->local_nentries); - DBUG_ON(msg->flags & XPC_M_READY); - - if (ch->flags & XPC_C_DISCONNECTING) { - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ch->reason; - } - - if (notify_type != 0) { - /* - * Tell the remote side to send an ACK interrupt when the - * message has been delivered. - */ - msg->flags |= XPC_M_INTERRUPT; - - atomic_inc(&ch->n_to_notify); - - notify = &ch->notify_queue[msg_number % ch->local_nentries]; - notify->func = func; - notify->key = key; - notify->type = notify_type; - - // >>> is a mb() needed here? - - if (ch->flags & XPC_C_DISCONNECTING) { - /* - * An error occurred between our last error check and - * this one. We will try to clear the type field from - * the notify entry. If we succeed then - * xpc_disconnect_channel() didn't already process - * the notify entry. - */ - if (cmpxchg(¬ify->type, notify_type, 0) == - notify_type) { - atomic_dec(&ch->n_to_notify); - ret = ch->reason; - } - - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ret; - } - } - - msg->flags |= XPC_M_READY; - - /* - * The preceding store of msg->flags must occur before the following - * load of ch->local_GP->put. - */ - mb(); - - /* see if the message is next in line to be sent, if so send it */ - - put = ch->local_GP->put; - if (put == msg_number) { - xpc_send_msgs(ch, put); - } - - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ret; -} - - -/* - * Send a message previously allocated using xpc_initiate_allocate() on the - * specified channel connected to the specified partition. - * - * This routine will not wait for the message to be received, nor will - * notification be given when it does happen. Once this routine has returned - * the message entry allocated via xpc_initiate_allocate() is no longer - * accessable to the caller. - * - * This routine, although called by users, does not call xpc_part_ref() to - * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). - * - * Arguments: - * - * partid - ID of partition to which the channel is connected. - * ch_number - channel # to send message on. - * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). - */ -enum xpc_retval -xpc_initiate_send(partid_t partid, int ch_number, void *payload) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - enum xpc_retval ret; - - - dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg, - partid, ch_number); - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - DBUG_ON(msg == NULL); - - ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL); - - return ret; -} - - -/* - * Send a message previously allocated using xpc_initiate_allocate on the - * specified channel connected to the specified partition. - * - * This routine will not wait for the message to be sent. Once this routine - * has returned the message entry allocated via xpc_initiate_allocate() is no - * longer accessable to the caller. - * - * Once the remote end of the channel has received the message, the function - * passed as an argument to xpc_initiate_send_notify() will be called. This - * allows the sender to free up or re-use any buffers referenced by the - * message, but does NOT mean the message has been processed at the remote - * end by a receiver. - * - * If this routine returns an error, the caller's function will NOT be called. - * - * This routine, although called by users, does not call xpc_part_ref() to - * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). - * - * Arguments: - * - * partid - ID of partition to which the channel is connected. - * ch_number - channel # to send message on. - * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). - * func - function to call with asynchronous notification of message - * receipt. THIS FUNCTION MUST BE NON-BLOCKING. - * key - user-defined key to be passed to the function when it's called. - */ -enum xpc_retval -xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload, - xpc_notify_func func, void *key) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - enum xpc_retval ret; - - - dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg, - partid, ch_number); - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - DBUG_ON(msg == NULL); - DBUG_ON(func == NULL); - - ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL, - func, key); - return ret; -} - - -static struct xpc_msg * -xpc_pull_remote_msg(struct xpc_channel *ch, s64 get) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - struct xpc_msg *remote_msg, *msg; - u32 msg_index, nmsgs; - u64 msg_offset; - enum xpc_retval ret; - - - if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) { - /* we were interrupted by a signal */ - return NULL; - } - - while (get >= ch->next_msg_to_pull) { - - /* pull as many messages as are ready and able to be pulled */ - - msg_index = ch->next_msg_to_pull % ch->remote_nentries; - - DBUG_ON(ch->next_msg_to_pull >= - (volatile s64) ch->w_remote_GP.put); - nmsgs = (volatile s64) ch->w_remote_GP.put - - ch->next_msg_to_pull; - if (msg_index + nmsgs > ch->remote_nentries) { - /* ignore the ones that wrap the msg queue for now */ - nmsgs = ch->remote_nentries - msg_index; - } - - msg_offset = msg_index * ch->msg_size; - msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + - msg_offset); - remote_msg = (struct xpc_msg *) (ch->remote_msgqueue_pa + - msg_offset); - - if ((ret = xpc_pull_remote_cachelines(part, msg, remote_msg, - nmsgs * ch->msg_size)) != xpcSuccess) { - - dev_dbg(xpc_chan, "failed to pull %d msgs starting with" - " msg %ld from partition %d, channel=%d, " - "ret=%d\n", nmsgs, ch->next_msg_to_pull, - ch->partid, ch->number, ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - - mutex_unlock(&ch->msg_to_pull_mutex); - return NULL; - } - - mb(); /* >>> this may not be needed, we're not sure */ - - ch->next_msg_to_pull += nmsgs; - } - - mutex_unlock(&ch->msg_to_pull_mutex); - - /* return the message we were looking for */ - msg_offset = (get % ch->remote_nentries) * ch->msg_size; - msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + msg_offset); - - return msg; -} - - -/* - * Get a message to be delivered. - */ -static struct xpc_msg * -xpc_get_deliverable_msg(struct xpc_channel *ch) -{ - struct xpc_msg *msg = NULL; - s64 get; - - - do { - if ((volatile u32) ch->flags & XPC_C_DISCONNECTING) { - break; - } - - get = (volatile s64) ch->w_local_GP.get; - if (get == (volatile s64) ch->w_remote_GP.put) { - break; - } - - /* There are messages waiting to be pulled and delivered. - * We need to try to secure one for ourselves. We'll do this - * by trying to increment w_local_GP.get and hope that no one - * else beats us to it. If they do, we'll we'll simply have - * to try again for the next one. - */ - - if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) { - /* we got the entry referenced by get */ - - dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, " - "partid=%d, channel=%d\n", get + 1, - ch->partid, ch->number); - - /* pull the message from the remote partition */ - - msg = xpc_pull_remote_msg(ch, get); - - DBUG_ON(msg != NULL && msg->number != get); - DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE)); - DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY)); - - break; - } - - } while (1); - - return msg; -} - - -/* - * Deliver a message to its intended recipient. - */ -void -xpc_deliver_msg(struct xpc_channel *ch) -{ - struct xpc_msg *msg; - - - if ((msg = xpc_get_deliverable_msg(ch)) != NULL) { - - /* - * This ref is taken to protect the payload itself from being - * freed before the user is finished with it, which the user - * indicates by calling xpc_initiate_received(). - */ - xpc_msgqueue_ref(ch); - - atomic_inc(&ch->kthreads_active); - - if (ch->func != NULL) { - dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *) msg, msg->number, ch->partid, - ch->number); - - /* deliver the message to its intended recipient */ - ch->func(xpcMsgReceived, ch->partid, ch->number, - &msg->payload, ch->key); - - dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *) msg, msg->number, ch->partid, - ch->number); - } - - atomic_dec(&ch->kthreads_active); - } -} - - -/* - * Now we actually acknowledge the messages that have been delivered and ack'd - * by advancing the cached remote message queue's Get value and if requested - * send an IPI to the message sender's partition. - */ -static void -xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) -{ - struct xpc_msg *msg; - s64 get = initial_get + 1; - int send_IPI = 0; - - - while (1) { - - while (1) { - if (get == (volatile s64) ch->w_local_GP.get) { - break; - } - - msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + - (get % ch->remote_nentries) * ch->msg_size); - - if (!(msg->flags & XPC_M_DONE)) { - break; - } - - msg_flags |= msg->flags; - get++; - } - - if (get == initial_get) { - /* nothing's changed */ - break; - } - - if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) != - initial_get) { - /* someone else beat us to it */ - DBUG_ON((volatile s64) ch->local_GP->get <= - initial_get); - break; - } - - /* we just set the new value of local_GP->get */ - - dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, " - "channel=%d\n", get, ch->partid, ch->number); - - send_IPI = (msg_flags & XPC_M_INTERRUPT); - - /* - * We need to ensure that the message referenced by - * local_GP->get is not XPC_M_DONE or that local_GP->get - * equals w_local_GP.get, so we'll go have a look. - */ - initial_get = get; - } - - if (send_IPI) { - xpc_IPI_send_msgrequest(ch); - } -} - - -/* - * Acknowledge receipt of a delivered message. - * - * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition - * that sent the message. - * - * This function, although called by users, does not call xpc_part_ref() to - * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg(). - * - * Arguments: - * - * partid - ID of partition to which the channel is connected. - * ch_number - channel # message received on. - * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). - */ -void -xpc_initiate_received(partid_t partid, int ch_number, void *payload) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_channel *ch; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - s64 get, msg_number = msg->number; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - - ch = &part->channels[ch_number]; - - dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n", - (void *) msg, msg_number, ch->partid, ch->number); - - DBUG_ON((((u64) msg - (u64) ch->remote_msgqueue) / ch->msg_size) != - msg_number % ch->remote_nentries); - DBUG_ON(msg->flags & XPC_M_DONE); - - msg->flags |= XPC_M_DONE; - - /* - * The preceding store of msg->flags must occur before the following - * load of ch->local_GP->get. - */ - mb(); - - /* - * See if this message is next in line to be acknowledged as having - * been delivered. - */ - get = ch->local_GP->get; - if (get == msg_number) { - xpc_acknowledge_msgs(ch, get, msg->flags); - } - - /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */ - xpc_msgqueue_deref(ch); -} - diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c deleted file mode 100644 index 81785b78bc1..00000000000 --- a/arch/ia64/sn/kernel/xpc_main.c +++ /dev/null @@ -1,1431 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition Communication (XPC) support - standard version. - * - * XPC provides a message passing capability that crosses partition - * boundaries. This module is made up of two parts: - * - * partition This part detects the presence/absence of other - * partitions. It provides a heartbeat and monitors - * the heartbeats of other partitions. - * - * channel This part manages the channels and sends/receives - * messages across them to/from other partitions. - * - * There are a couple of additional functions residing in XP, which - * provide an interface to XPC for its users. - * - * - * Caveats: - * - * . We currently have no way to determine which nasid an IPI came - * from. Thus, xpc_IPI_send() does a remote AMO write followed by - * an IPI. The AMO indicates where data is to be pulled from, so - * after the IPI arrives, the remote partition checks the AMO word. - * The IPI can actually arrive before the AMO however, so other code - * must periodically check for this case. Also, remote AMO operations - * do not reliably time out. Thus we do a remote PIO read solely to - * know whether the remote partition is down and whether we should - * stop sending IPIs to it. This remote PIO read operation is set up - * in a special nofault region so SAL knows to ignore (and cleanup) - * any errors due to the remote AMO write, PIO read, and/or PIO - * write operations. - * - * If/when new hardware solves this IPI problem, we should abandon - * the current approach. - * - */ - - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/syscalls.h> -#include <linux/cache.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/completion.h> -#include <linux/kdebug.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> -#include <asm/uaccess.h> -#include <asm/sn/xpc.h> - - -/* define two XPC debug device structures to be used with dev_dbg() et al */ - -struct device_driver xpc_dbg_name = { - .name = "xpc" -}; - -struct device xpc_part_dbg_subname = { - .bus_id = {0}, /* set to "part" at xpc_init() time */ - .driver = &xpc_dbg_name -}; - -struct device xpc_chan_dbg_subname = { - .bus_id = {0}, /* set to "chan" at xpc_init() time */ - .driver = &xpc_dbg_name -}; - -struct device *xpc_part = &xpc_part_dbg_subname; -struct device *xpc_chan = &xpc_chan_dbg_subname; - - -static int xpc_kdebug_ignore; - - -/* systune related variables for /proc/sys directories */ - -static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; -static int xpc_hb_min_interval = 1; -static int xpc_hb_max_interval = 10; - -static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL; -static int xpc_hb_check_min_interval = 10; -static int xpc_hb_check_max_interval = 120; - -int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT; -static int xpc_disengage_request_min_timelimit = 0; -static int xpc_disengage_request_max_timelimit = 120; - -static ctl_table xpc_sys_xpc_hb_dir[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "hb_interval", - .data = &xpc_hb_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xpc_hb_min_interval, - .extra2 = &xpc_hb_max_interval - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "hb_check_interval", - .data = &xpc_hb_check_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xpc_hb_check_min_interval, - .extra2 = &xpc_hb_check_max_interval - }, - {} -}; -static ctl_table xpc_sys_xpc_dir[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "hb", - .mode = 0555, - .child = xpc_sys_xpc_hb_dir - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "disengage_request_timelimit", - .data = &xpc_disengage_request_timelimit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xpc_disengage_request_min_timelimit, - .extra2 = &xpc_disengage_request_max_timelimit - }, - {} -}; -static ctl_table xpc_sys_dir[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xpc", - .mode = 0555, - .child = xpc_sys_xpc_dir - }, - {} -}; -static struct ctl_table_header *xpc_sysctl; - -/* non-zero if any remote partition disengage request was timed out */ -int xpc_disengage_request_timedout; - -/* #of IRQs received */ -static atomic_t xpc_act_IRQ_rcvd; - -/* IRQ handler notifies this wait queue on receipt of an IRQ */ -static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); - -static unsigned long xpc_hb_check_timeout; - -/* notification that the xpc_hb_checker thread has exited */ -static DECLARE_COMPLETION(xpc_hb_checker_exited); - -/* notification that the xpc_discovery thread has exited */ -static DECLARE_COMPLETION(xpc_discovery_exited); - - -static struct timer_list xpc_hb_timer; - - -static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); - - -static int xpc_system_reboot(struct notifier_block *, unsigned long, void *); -static struct notifier_block xpc_reboot_notifier = { - .notifier_call = xpc_system_reboot, -}; - -static int xpc_system_die(struct notifier_block *, unsigned long, void *); -static struct notifier_block xpc_die_notifier = { - .notifier_call = xpc_system_die, -}; - - -/* - * Timer function to enforce the timelimit on the partition disengage request. - */ -static void -xpc_timeout_partition_disengage_request(unsigned long data) -{ - struct xpc_partition *part = (struct xpc_partition *) data; - - - DBUG_ON(jiffies < part->disengage_request_timeout); - - (void) xpc_partition_disengaged(part); - - DBUG_ON(part->disengage_request_timeout != 0); - DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0); -} - - -/* - * Notify the heartbeat check thread that an IRQ has been received. - */ -static irqreturn_t -xpc_act_IRQ_handler(int irq, void *dev_id) -{ - atomic_inc(&xpc_act_IRQ_rcvd); - wake_up_interruptible(&xpc_act_IRQ_wq); - return IRQ_HANDLED; -} - - -/* - * Timer to produce the heartbeat. The timer structures function is - * already set when this is initially called. A tunable is used to - * specify when the next timeout should occur. - */ -static void -xpc_hb_beater(unsigned long dummy) -{ - xpc_vars->heartbeat++; - - if (jiffies >= xpc_hb_check_timeout) { - wake_up_interruptible(&xpc_act_IRQ_wq); - } - - xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); - add_timer(&xpc_hb_timer); -} - - -/* - * This thread is responsible for nearly all of the partition - * activation/deactivation. - */ -static int -xpc_hb_checker(void *ignore) -{ - int last_IRQ_count = 0; - int new_IRQ_count; - int force_IRQ=0; - - - /* this thread was marked active by xpc_hb_init() */ - - daemonize(XPC_HB_CHECK_THREAD_NAME); - - set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU)); - - /* set our heartbeating to other partitions into motion */ - xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); - xpc_hb_beater(0); - - while (!(volatile int) xpc_exiting) { - - dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " - "been received\n", - (int) (xpc_hb_check_timeout - jiffies), - atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); - - - /* checking of remote heartbeats is skewed by IRQ handling */ - if (jiffies >= xpc_hb_check_timeout) { - dev_dbg(xpc_part, "checking remote heartbeats\n"); - xpc_check_remote_hb(); - - /* - * We need to periodically recheck to ensure no - * IPI/AMO pairs have been missed. That check - * must always reset xpc_hb_check_timeout. - */ - force_IRQ = 1; - } - - - /* check for outstanding IRQs */ - new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); - if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { - force_IRQ = 0; - - dev_dbg(xpc_part, "found an IRQ to process; will be " - "resetting xpc_hb_check_timeout\n"); - - last_IRQ_count += xpc_identify_act_IRQ_sender(); - if (last_IRQ_count < new_IRQ_count) { - /* retry once to help avoid missing AMO */ - (void) xpc_identify_act_IRQ_sender(); - } - last_IRQ_count = new_IRQ_count; - - xpc_hb_check_timeout = jiffies + - (xpc_hb_check_interval * HZ); - } - - /* wait for IRQ or timeout */ - (void) wait_event_interruptible(xpc_act_IRQ_wq, - (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) || - jiffies >= xpc_hb_check_timeout || - (volatile int) xpc_exiting)); - } - - dev_dbg(xpc_part, "heartbeat checker is exiting\n"); - - - /* mark this thread as having exited */ - complete(&xpc_hb_checker_exited); - return 0; -} - - -/* - * This thread will attempt to discover other partitions to activate - * based on info provided by SAL. This new thread is short lived and - * will exit once discovery is complete. - */ -static int -xpc_initiate_discovery(void *ignore) -{ - daemonize(XPC_DISCOVERY_THREAD_NAME); - - xpc_discovery(); - - dev_dbg(xpc_part, "discovery thread is exiting\n"); - - /* mark this thread as having exited */ - complete(&xpc_discovery_exited); - return 0; -} - - -/* - * Establish first contact with the remote partititon. This involves pulling - * the XPC per partition variables from the remote partition and waiting for - * the remote partition to pull ours. - */ -static enum xpc_retval -xpc_make_first_contact(struct xpc_partition *part) -{ - enum xpc_retval ret; - - - while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) { - if (ret != xpcRetry) { - XPC_DEACTIVATE_PARTITION(part, ret); - return ret; - } - - dev_dbg(xpc_chan, "waiting to make first contact with " - "partition %d\n", XPC_PARTID(part)); - - /* wait a 1/4 of a second or so */ - (void) msleep_interruptible(250); - - if (part->act_state == XPC_P_DEACTIVATING) { - return part->reason; - } - } - - return xpc_mark_partition_active(part); -} - - -/* - * The first kthread assigned to a newly activated partition is the one - * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to - * that kthread until the partition is brought down, at which time that kthread - * returns back to XPC HB. (The return of that kthread will signify to XPC HB - * that XPC has dismantled all communication infrastructure for the associated - * partition.) This kthread becomes the channel manager for that partition. - * - * Each active partition has a channel manager, who, besides connecting and - * disconnecting channels, will ensure that each of the partition's connected - * channels has the required number of assigned kthreads to get the work done. - */ -static void -xpc_channel_mgr(struct xpc_partition *part) -{ - while (part->act_state != XPC_P_DEACTIVATING || - atomic_read(&part->nchannels_active) > 0 || - !xpc_partition_disengaged(part)) { - - xpc_process_channel_activity(part); - - - /* - * Wait until we've been requested to activate kthreads or - * all of the channel's message queues have been torn down or - * a signal is pending. - * - * The channel_mgr_requests is set to 1 after being awakened, - * This is done to prevent the channel mgr from making one pass - * through the loop for each request, since he will - * be servicing all the requests in one pass. The reason it's - * set to 1 instead of 0 is so that other kthreads will know - * that the channel mgr is running and won't bother trying to - * wake him up. - */ - atomic_dec(&part->channel_mgr_requests); - (void) wait_event_interruptible(part->channel_mgr_wq, - (atomic_read(&part->channel_mgr_requests) > 0 || - (volatile u64) part->local_IPI_amo != 0 || - ((volatile u8) part->act_state == - XPC_P_DEACTIVATING && - atomic_read(&part->nchannels_active) == 0 && - xpc_partition_disengaged(part)))); - atomic_set(&part->channel_mgr_requests, 1); - - // >>> Does it need to wakeup periodically as well? In case we - // >>> miscalculated the #of kthreads to wakeup or create? - } -} - - -/* - * When XPC HB determines that a partition has come up, it will create a new - * kthread and that kthread will call this function to attempt to set up the - * basic infrastructure used for Cross Partition Communication with the newly - * upped partition. - * - * The kthread that was created by XPC HB and which setup the XPC - * infrastructure will remain assigned to the partition until the partition - * goes down. At which time the kthread will teardown the XPC infrastructure - * and then exit. - * - * XPC HB will put the remote partition's XPC per partition specific variables - * physical address into xpc_partitions[partid].remote_vars_part_pa prior to - * calling xpc_partition_up(). - */ -static void -xpc_partition_up(struct xpc_partition *part) -{ - DBUG_ON(part->channels != NULL); - - dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); - - if (xpc_setup_infrastructure(part) != xpcSuccess) { - return; - } - - /* - * The kthread that XPC HB called us with will become the - * channel manager for this partition. It will not return - * back to XPC HB until the partition's XPC infrastructure - * has been dismantled. - */ - - (void) xpc_part_ref(part); /* this will always succeed */ - - if (xpc_make_first_contact(part) == xpcSuccess) { - xpc_channel_mgr(part); - } - - xpc_part_deref(part); - - xpc_teardown_infrastructure(part); -} - - -static int -xpc_activating(void *__partid) -{ - partid_t partid = (u64) __partid; - struct xpc_partition *part = &xpc_partitions[partid]; - unsigned long irq_flags; - struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; - int ret; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - - spin_lock_irqsave(&part->act_lock, irq_flags); - - if (part->act_state == XPC_P_DEACTIVATING) { - part->act_state = XPC_P_INACTIVE; - spin_unlock_irqrestore(&part->act_lock, irq_flags); - part->remote_rp_pa = 0; - return 0; - } - - /* indicate the thread is activating */ - DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ); - part->act_state = XPC_P_ACTIVATING; - - XPC_SET_REASON(part, 0, 0); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - - dev_dbg(xpc_part, "bringing partition %d up\n", partid); - - daemonize("xpc%02d", partid); - - /* - * This thread needs to run at a realtime priority to prevent a - * significant performance degradation. - */ - ret = sched_setscheduler(current, SCHED_FIFO, ¶m); - if (ret != 0) { - dev_warn(xpc_part, "unable to set pid %d to a realtime " - "priority, ret=%d\n", current->pid, ret); - } - - /* allow this thread and its children to run on any CPU */ - set_cpus_allowed(current, CPU_MASK_ALL); - - /* - * Register the remote partition's AMOs with SAL so it can handle - * and cleanup errors within that address range should the remote - * partition go down. We don't unregister this range because it is - * difficult to tell when outstanding writes to the remote partition - * are finished and thus when it is safe to unregister. This should - * not result in wasted space in the SAL xp_addr_region table because - * we should get the same page for remote_amos_page_pa after module - * reloads and system reboots. - */ - if (sn_register_xp_addr_region(part->remote_amos_page_pa, - PAGE_SIZE, 1) < 0) { - dev_warn(xpc_part, "xpc_partition_up(%d) failed to register " - "xp_addr region\n", partid); - - spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - part->remote_rp_pa = 0; - return 0; - } - - xpc_allow_hb(partid, xpc_vars); - xpc_IPI_send_activated(part); - - - /* - * xpc_partition_up() holds this thread and marks this partition as - * XPC_P_ACTIVE by calling xpc_hb_mark_active(). - */ - (void) xpc_partition_up(part); - - xpc_disallow_hb(partid, xpc_vars); - xpc_mark_partition_inactive(part); - - if (part->reason == xpcReactivating) { - /* interrupting ourselves results in activating partition */ - xpc_IPI_send_reactivate(part); - } - - return 0; -} - - -void -xpc_activate_partition(struct xpc_partition *part) -{ - partid_t partid = XPC_PARTID(part); - unsigned long irq_flags; - pid_t pid; - - - spin_lock_irqsave(&part->act_lock, irq_flags); - - DBUG_ON(part->act_state != XPC_P_INACTIVE); - - part->act_state = XPC_P_ACTIVATION_REQ; - XPC_SET_REASON(part, xpcCloneKThread, __LINE__); - - spin_unlock_irqrestore(&part->act_lock, irq_flags); - - pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0); - - if (unlikely(pid <= 0)) { - spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - } -} - - -/* - * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified - * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more - * than one partition, we use an AMO_t structure per partition to indicate - * whether a partition has sent an IPI or not. >>> If it has, then wake up the - * associated kthread to handle it. - * - * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC - * running on other partitions. - * - * Noteworthy Arguments: - * - * irq - Interrupt ReQuest number. NOT USED. - * - * dev_id - partid of IPI's potential sender. - */ -irqreturn_t -xpc_notify_IRQ_handler(int irq, void *dev_id) -{ - partid_t partid = (partid_t) (u64) dev_id; - struct xpc_partition *part = &xpc_partitions[partid]; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - - if (xpc_part_ref(part)) { - xpc_check_for_channel_activity(part); - - xpc_part_deref(part); - } - return IRQ_HANDLED; -} - - -/* - * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor - * because the write to their associated IPI amo completed after the IRQ/IPI - * was received. - */ -void -xpc_dropped_IPI_check(struct xpc_partition *part) -{ - if (xpc_part_ref(part)) { - xpc_check_for_channel_activity(part); - - part->dropped_IPI_timer.expires = jiffies + - XPC_P_DROPPED_IPI_WAIT; - add_timer(&part->dropped_IPI_timer); - xpc_part_deref(part); - } -} - - -void -xpc_activate_kthreads(struct xpc_channel *ch, int needed) -{ - int idle = atomic_read(&ch->kthreads_idle); - int assigned = atomic_read(&ch->kthreads_assigned); - int wakeup; - - - DBUG_ON(needed <= 0); - - if (idle > 0) { - wakeup = (needed > idle) ? idle : needed; - needed -= wakeup; - - dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, " - "channel=%d\n", wakeup, ch->partid, ch->number); - - /* only wakeup the requested number of kthreads */ - wake_up_nr(&ch->idle_wq, wakeup); - } - - if (needed <= 0) { - return; - } - - if (needed + assigned > ch->kthreads_assigned_limit) { - needed = ch->kthreads_assigned_limit - assigned; - // >>>should never be less than 0 - if (needed <= 0) { - return; - } - } - - dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", - needed, ch->partid, ch->number); - - xpc_create_kthreads(ch, needed, 0); -} - - -/* - * This function is where XPC's kthreads wait for messages to deliver. - */ -static void -xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) -{ - do { - /* deliver messages to their intended recipients */ - - while ((volatile s64) ch->w_local_GP.get < - (volatile s64) ch->w_remote_GP.put && - !((volatile u32) ch->flags & - XPC_C_DISCONNECTING)) { - xpc_deliver_msg(ch); - } - - if (atomic_inc_return(&ch->kthreads_idle) > - ch->kthreads_idle_limit) { - /* too many idle kthreads on this channel */ - atomic_dec(&ch->kthreads_idle); - break; - } - - dev_dbg(xpc_chan, "idle kthread calling " - "wait_event_interruptible_exclusive()\n"); - - (void) wait_event_interruptible_exclusive(ch->idle_wq, - ((volatile s64) ch->w_local_GP.get < - (volatile s64) ch->w_remote_GP.put || - ((volatile u32) ch->flags & - XPC_C_DISCONNECTING))); - - atomic_dec(&ch->kthreads_idle); - - } while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING)); -} - - -static int -xpc_daemonize_kthread(void *args) -{ - partid_t partid = XPC_UNPACK_ARG1(args); - u16 ch_number = XPC_UNPACK_ARG2(args); - struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_channel *ch; - int n_needed; - unsigned long irq_flags; - - - daemonize("xpc%02dc%d", partid, ch_number); - - dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", - partid, ch_number); - - ch = &part->channels[ch_number]; - - if (!(ch->flags & XPC_C_DISCONNECTING)) { - - /* let registerer know that connection has been established */ - - spin_lock_irqsave(&ch->lock, irq_flags); - if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { - ch->flags |= XPC_C_CONNECTEDCALLOUT; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - xpc_connected_callout(ch); - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - /* - * It is possible that while the callout was being - * made that the remote partition sent some messages. - * If that is the case, we may need to activate - * additional kthreads to help deliver them. We only - * need one less than total #of messages to deliver. - */ - n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1; - if (n_needed > 0 && - !(ch->flags & XPC_C_DISCONNECTING)) { - xpc_activate_kthreads(ch, n_needed); - } - } else { - spin_unlock_irqrestore(&ch->lock, irq_flags); - } - - xpc_kthread_waitmsgs(part, ch); - } - - /* let registerer know that connection is disconnecting */ - - spin_lock_irqsave(&ch->lock, irq_flags); - if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { - ch->flags |= XPC_C_DISCONNECTINGCALLOUT; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - xpc_disconnect_callout(ch, xpcDisconnecting); - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - - if (atomic_dec_return(&ch->kthreads_assigned) == 0) { - if (atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); - } - } - - xpc_msgqueue_deref(ch); - - dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n", - partid, ch_number); - - xpc_part_deref(part); - return 0; -} - - -/* - * For each partition that XPC has established communications with, there is - * a minimum of one kernel thread assigned to perform any operation that - * may potentially sleep or block (basically the callouts to the asynchronous - * functions registered via xpc_connect()). - * - * Additional kthreads are created and destroyed by XPC as the workload - * demands. - * - * A kthread is assigned to one of the active channels that exists for a given - * partition. - */ -void -xpc_create_kthreads(struct xpc_channel *ch, int needed, - int ignore_disconnecting) -{ - unsigned long irq_flags; - pid_t pid; - u64 args = XPC_PACK_ARGS(ch->partid, ch->number); - struct xpc_partition *part = &xpc_partitions[ch->partid]; - - - while (needed-- > 0) { - - /* - * The following is done on behalf of the newly created - * kthread. That kthread is responsible for doing the - * counterpart to the following before it exits. - */ - if (ignore_disconnecting) { - if (!atomic_inc_not_zero(&ch->kthreads_assigned)) { - /* kthreads assigned had gone to zero */ - BUG_ON(!(ch->flags & - XPC_C_DISCONNECTINGCALLOUT_MADE)); - break; - } - - } else if (ch->flags & XPC_C_DISCONNECTING) { - break; - - } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) { - if (atomic_inc_return(&part->nchannels_engaged) == 1) - xpc_mark_partition_engaged(part); - } - (void) xpc_part_ref(part); - xpc_msgqueue_ref(ch); - - pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); - if (pid < 0) { - /* the fork failed */ - - /* - * NOTE: if (ignore_disconnecting && - * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true, - * then we'll deadlock if all other kthreads assigned - * to this channel are blocked in the channel's - * registerer, because the only thing that will unblock - * them is the xpcDisconnecting callout that this - * failed kernel_thread would have made. - */ - - if (atomic_dec_return(&ch->kthreads_assigned) == 0 && - atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); - } - xpc_msgqueue_deref(ch); - xpc_part_deref(part); - - if (atomic_read(&ch->kthreads_assigned) < - ch->kthreads_idle_limit) { - /* - * Flag this as an error only if we have an - * insufficient #of kthreads for the channel - * to function. - */ - spin_lock_irqsave(&ch->lock, irq_flags); - XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources, - &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - } - break; - } - - ch->kthreads_created++; // >>> temporary debug only!!! - } -} - - -void -xpc_disconnect_wait(int ch_number) -{ - unsigned long irq_flags; - partid_t partid; - struct xpc_partition *part; - struct xpc_channel *ch; - int wakeup_channel_mgr; - - - /* now wait for all callouts to the caller's function to cease */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (!xpc_part_ref(part)) { - continue; - } - - ch = &part->channels[ch_number]; - - if (!(ch->flags & XPC_C_WDISCONNECT)) { - xpc_part_deref(part); - continue; - } - - wait_for_completion(&ch->wdisconnect_wait); - - spin_lock_irqsave(&ch->lock, irq_flags); - DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); - wakeup_channel_mgr = 0; - - if (ch->delayed_IPI_flags) { - if (part->act_state != XPC_P_DEACTIVATING) { - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch->number, ch->delayed_IPI_flags); - spin_unlock(&part->IPI_lock); - wakeup_channel_mgr = 1; - } - ch->delayed_IPI_flags = 0; - } - - ch->flags &= ~XPC_C_WDISCONNECT; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - if (wakeup_channel_mgr) { - xpc_wakeup_channel_mgr(part); - } - - xpc_part_deref(part); - } -} - - -static void -xpc_do_exit(enum xpc_retval reason) -{ - partid_t partid; - int active_part_count, printed_waiting_msg = 0; - struct xpc_partition *part; - unsigned long printmsg_time, disengage_request_timeout = 0; - - - /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ - DBUG_ON(xpc_exiting == 1); - - /* - * Let the heartbeat checker thread and the discovery thread - * (if one is running) know that they should exit. Also wake up - * the heartbeat checker thread in case it's sleeping. - */ - xpc_exiting = 1; - wake_up_interruptible(&xpc_act_IRQ_wq); - - /* ignore all incoming interrupts */ - free_irq(SGI_XPC_ACTIVATE, NULL); - - /* wait for the discovery thread to exit */ - wait_for_completion(&xpc_discovery_exited); - - /* wait for the heartbeat checker thread to exit */ - wait_for_completion(&xpc_hb_checker_exited); - - - /* sleep for a 1/3 of a second or so */ - (void) msleep_interruptible(300); - - - /* wait for all partitions to become inactive */ - - printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); - xpc_disengage_request_timedout = 0; - - do { - active_part_count = 0; - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (xpc_partition_disengaged(part) && - part->act_state == XPC_P_INACTIVE) { - continue; - } - - active_part_count++; - - XPC_DEACTIVATE_PARTITION(part, reason); - - if (part->disengage_request_timeout > - disengage_request_timeout) { - disengage_request_timeout = - part->disengage_request_timeout; - } - } - - if (xpc_partition_engaged(-1UL)) { - if (time_after(jiffies, printmsg_time)) { - dev_info(xpc_part, "waiting for remote " - "partitions to disengage, timeout in " - "%ld seconds\n", - (disengage_request_timeout - jiffies) - / HZ); - printmsg_time = jiffies + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); - printed_waiting_msg = 1; - } - - } else if (active_part_count > 0) { - if (printed_waiting_msg) { - dev_info(xpc_part, "waiting for local partition" - " to disengage\n"); - printed_waiting_msg = 0; - } - - } else { - if (!xpc_disengage_request_timedout) { - dev_info(xpc_part, "all partitions have " - "disengaged\n"); - } - break; - } - - /* sleep for a 1/3 of a second or so */ - (void) msleep_interruptible(300); - - } while (1); - - DBUG_ON(xpc_partition_engaged(-1UL)); - - - /* indicate to others that our reserved page is uninitialized */ - xpc_rsvd_page->vars_pa = 0; - - /* now it's time to eliminate our heartbeat */ - del_timer_sync(&xpc_hb_timer); - DBUG_ON(xpc_vars->heartbeating_to_mask != 0); - - if (reason == xpcUnloading) { - /* take ourselves off of the reboot_notifier_list */ - (void) unregister_reboot_notifier(&xpc_reboot_notifier); - - /* take ourselves off of the die_notifier list */ - (void) unregister_die_notifier(&xpc_die_notifier); - } - - /* close down protections for IPI operations */ - xpc_restrict_IPI_ops(); - - - /* clear the interface to XPC's functions */ - xpc_clear_interface(); - - if (xpc_sysctl) { - unregister_sysctl_table(xpc_sysctl); - } - - kfree(xpc_remote_copy_buffer_base); -} - - -/* - * This function is called when the system is being rebooted. - */ -static int -xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) -{ - enum xpc_retval reason; - - - switch (event) { - case SYS_RESTART: - reason = xpcSystemReboot; - break; - case SYS_HALT: - reason = xpcSystemHalt; - break; - case SYS_POWER_OFF: - reason = xpcSystemPoweroff; - break; - default: - reason = xpcSystemGoingDown; - } - - xpc_do_exit(reason); - return NOTIFY_DONE; -} - - -/* - * Notify other partitions to disengage from all references to our memory. - */ -static void -xpc_die_disengage(void) -{ - struct xpc_partition *part; - partid_t partid; - unsigned long engaged; - long time, printmsg_time, disengage_request_timeout; - - - /* keep xpc_hb_checker thread from doing anything (just in case) */ - xpc_exiting = 1; - - xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */ - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part-> - remote_vars_version)) { - - /* just in case it was left set by an earlier XPC */ - xpc_clear_partition_engaged(1UL << partid); - continue; - } - - if (xpc_partition_engaged(1UL << partid) || - part->act_state != XPC_P_INACTIVE) { - xpc_request_partition_disengage(part); - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); - } - } - - time = rtc_time(); - printmsg_time = time + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second); - disengage_request_timeout = time + - (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); - - /* wait for all other partitions to disengage from us */ - - while (1) { - engaged = xpc_partition_engaged(-1UL); - if (!engaged) { - dev_info(xpc_part, "all partitions have disengaged\n"); - break; - } - - time = rtc_time(); - if (time >= disengage_request_timeout) { - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - if (engaged & (1UL << partid)) { - dev_info(xpc_part, "disengage from " - "remote partition %d timed " - "out\n", partid); - } - } - break; - } - - if (time >= printmsg_time) { - dev_info(xpc_part, "waiting for remote partitions to " - "disengage, timeout in %ld seconds\n", - (disengage_request_timeout - time) / - sn_rtc_cycles_per_second); - printmsg_time = time + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * - sn_rtc_cycles_per_second); - } - } -} - - -/* - * This function is called when the system is being restarted or halted due - * to some sort of system failure. If this is the case we need to notify the - * other partitions to disengage from all references to our memory. - * This function can also be called when our heartbeater could be offlined - * for a time. In this case we need to notify other partitions to not worry - * about the lack of a heartbeat. - */ -static int -xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) -{ - switch (event) { - case DIE_MACHINE_RESTART: - case DIE_MACHINE_HALT: - xpc_die_disengage(); - break; - - case DIE_KDEBUG_ENTER: - /* Should lack of heartbeat be ignored by other partitions? */ - if (!xpc_kdebug_ignore) { - break; - } - /* fall through */ - case DIE_MCA_MONARCH_ENTER: - case DIE_INIT_MONARCH_ENTER: - xpc_vars->heartbeat++; - xpc_vars->heartbeat_offline = 1; - break; - - case DIE_KDEBUG_LEAVE: - /* Is lack of heartbeat being ignored by other partitions? */ - if (!xpc_kdebug_ignore) { - break; - } - /* fall through */ - case DIE_MCA_MONARCH_LEAVE: - case DIE_INIT_MONARCH_LEAVE: - xpc_vars->heartbeat++; - xpc_vars->heartbeat_offline = 0; - break; - } - - return NOTIFY_DONE; -} - - -int __init -xpc_init(void) -{ - int ret; - partid_t partid; - struct xpc_partition *part; - pid_t pid; - size_t buf_size; - - - if (!ia64_platform_is("sn2")) { - return -ENODEV; - } - - - buf_size = max(XPC_RP_VARS_SIZE, - XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES); - xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size, - GFP_KERNEL, &xpc_remote_copy_buffer_base); - if (xpc_remote_copy_buffer == NULL) - return -ENOMEM; - - snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part"); - snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan"); - - xpc_sysctl = register_sysctl_table(xpc_sys_dir); - - /* - * The first few fields of each entry of xpc_partitions[] need to - * be initialized now so that calls to xpc_connect() and - * xpc_disconnect() can be made prior to the activation of any remote - * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE - * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING - * PARTITION HAS BEEN ACTIVATED. - */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part)); - - part->act_IRQ_rcvd = 0; - spin_lock_init(&part->act_lock); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, 0, 0); - - init_timer(&part->disengage_request_timer); - part->disengage_request_timer.function = - xpc_timeout_partition_disengage_request; - part->disengage_request_timer.data = (unsigned long) part; - - part->setup_state = XPC_P_UNSET; - init_waitqueue_head(&part->teardown_wq); - atomic_set(&part->references, 0); - } - - /* - * Open up protections for IPI operations (and AMO operations on - * Shub 1.1 systems). - */ - xpc_allow_IPI_ops(); - - /* - * Interrupts being processed will increment this atomic variable and - * awaken the heartbeat thread which will process the interrupts. - */ - atomic_set(&xpc_act_IRQ_rcvd, 0); - - /* - * This is safe to do before the xpc_hb_checker thread has started - * because the handler releases a wait queue. If an interrupt is - * received before the thread is waiting, it will not go to sleep, - * but rather immediately process the interrupt. - */ - ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0, - "xpc hb", NULL); - if (ret != 0) { - dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " - "errno=%d\n", -ret); - - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) { - unregister_sysctl_table(xpc_sysctl); - } - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; - } - - /* - * Fill the partition reserved page with the information needed by - * other partitions to discover we are alive and establish initial - * communications. - */ - xpc_rsvd_page = xpc_rsvd_page_init(); - if (xpc_rsvd_page == NULL) { - dev_err(xpc_part, "could not setup our reserved page\n"); - - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) { - unregister_sysctl_table(xpc_sysctl); - } - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; - } - - - /* add ourselves to the reboot_notifier_list */ - ret = register_reboot_notifier(&xpc_reboot_notifier); - if (ret != 0) { - dev_warn(xpc_part, "can't register reboot notifier\n"); - } - - /* add ourselves to the die_notifier list */ - ret = register_die_notifier(&xpc_die_notifier); - if (ret != 0) { - dev_warn(xpc_part, "can't register die notifier\n"); - } - - init_timer(&xpc_hb_timer); - xpc_hb_timer.function = xpc_hb_beater; - - /* - * The real work-horse behind xpc. This processes incoming - * interrupts and monitors remote heartbeats. - */ - pid = kernel_thread(xpc_hb_checker, NULL, 0); - if (pid < 0) { - dev_err(xpc_part, "failed while forking hb check thread\n"); - - /* indicate to others that our reserved page is uninitialized */ - xpc_rsvd_page->vars_pa = 0; - - /* take ourselves off of the reboot_notifier_list */ - (void) unregister_reboot_notifier(&xpc_reboot_notifier); - - /* take ourselves off of the die_notifier list */ - (void) unregister_die_notifier(&xpc_die_notifier); - - del_timer_sync(&xpc_hb_timer); - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) { - unregister_sysctl_table(xpc_sysctl); - } - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; - } - - - /* - * Startup a thread that will attempt to discover other partitions to - * activate based on info provided by SAL. This new thread is short - * lived and will exit once discovery is complete. - */ - pid = kernel_thread(xpc_initiate_discovery, NULL, 0); - if (pid < 0) { - dev_err(xpc_part, "failed while forking discovery thread\n"); - - /* mark this new thread as a non-starter */ - complete(&xpc_discovery_exited); - - xpc_do_exit(xpcUnloading); - return -EBUSY; - } - - - /* set the interface to point at XPC's functions */ - xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, - xpc_initiate_allocate, xpc_initiate_send, - xpc_initiate_send_notify, xpc_initiate_received, - xpc_initiate_partid_to_nasids); - - return 0; -} -module_init(xpc_init); - - -void __exit -xpc_exit(void) -{ - xpc_do_exit(xpcUnloading); -} -module_exit(xpc_exit); - - -MODULE_AUTHOR("Silicon Graphics, Inc."); -MODULE_DESCRIPTION("Cross Partition Communication (XPC) support"); -MODULE_LICENSE("GPL"); - -module_param(xpc_hb_interval, int, 0); -MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between " - "heartbeat increments."); - -module_param(xpc_hb_check_interval, int, 0); -MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " - "heartbeat checks."); - -module_param(xpc_disengage_request_timelimit, int, 0); -MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait " - "for disengage request to complete."); - -module_param(xpc_kdebug_ignore, int, 0); -MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " - "other partitions when dropping into kdebug."); - diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c deleted file mode 100644 index 7ba403232cb..00000000000 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ /dev/null @@ -1,1239 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition Communication (XPC) partition support. - * - * This is the part of XPC that detects the presence/absence of - * other partitions. It provides a heartbeat and monitors the - * heartbeats of other partitions. - * - */ - - -#include <linux/kernel.h> -#include <linux/sysctl.h> -#include <linux/cache.h> -#include <linux/mmzone.h> -#include <linux/nodemask.h> -#include <asm/uncached.h> -#include <asm/sn/bte.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> -#include <asm/sn/nodepda.h> -#include <asm/sn/addrs.h> -#include <asm/sn/xpc.h> - - -/* XPC is exiting flag */ -int xpc_exiting; - - -/* SH_IPI_ACCESS shub register value on startup */ -static u64 xpc_sh1_IPI_access; -static u64 xpc_sh2_IPI_access0; -static u64 xpc_sh2_IPI_access1; -static u64 xpc_sh2_IPI_access2; -static u64 xpc_sh2_IPI_access3; - - -/* original protection values for each node */ -u64 xpc_prot_vec[MAX_NUMNODES]; - - -/* this partition's reserved page pointers */ -struct xpc_rsvd_page *xpc_rsvd_page; -static u64 *xpc_part_nasids; -static u64 *xpc_mach_nasids; -struct xpc_vars *xpc_vars; -struct xpc_vars_part *xpc_vars_part; - -static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */ -static int xp_nasid_mask_words; /* actual size in words of nasid mask */ - - -/* - * For performance reasons, each entry of xpc_partitions[] is cacheline - * aligned. And xpc_partitions[] is padded with an additional entry at the - * end so that the last legitimate entry doesn't share its cacheline with - * another variable. - */ -struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; - - -/* - * Generic buffer used to store a local copy of portions of a remote - * partition's reserved page (either its header and part_nasids mask, - * or its vars). - */ -char *xpc_remote_copy_buffer; -void *xpc_remote_copy_buffer_base; - - -/* - * Guarantee that the kmalloc'd memory is cacheline aligned. - */ -void * -xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) -{ - /* see if kmalloc will give us cachline aligned memory by default */ - *base = kmalloc(size, flags); - if (*base == NULL) { - return NULL; - } - if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { - return *base; - } - kfree(*base); - - /* nope, we'll have to do it ourselves */ - *base = kmalloc(size + L1_CACHE_BYTES, flags); - if (*base == NULL) { - return NULL; - } - return (void *) L1_CACHE_ALIGN((u64) *base); -} - - -/* - * Given a nasid, get the physical address of the partition's reserved page - * for that nasid. This function returns 0 on any error. - */ -static u64 -xpc_get_rsvd_page_pa(int nasid) -{ - bte_result_t bte_res; - s64 status; - u64 cookie = 0; - u64 rp_pa = nasid; /* seed with nasid */ - u64 len = 0; - u64 buf = buf; - u64 buf_len = 0; - void *buf_base = NULL; - - - while (1) { - - status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, - &len); - - dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" - "0x%016lx, address=0x%016lx, len=0x%016lx\n", - status, cookie, rp_pa, len); - - if (status != SALRET_MORE_PASSES) { - break; - } - - if (L1_CACHE_ALIGN(len) > buf_len) { - kfree(buf_base); - buf_len = L1_CACHE_ALIGN(len); - buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len, - GFP_KERNEL, &buf_base); - if (buf_base == NULL) { - dev_err(xpc_part, "unable to kmalloc " - "len=0x%016lx\n", buf_len); - status = SALRET_ERROR; - break; - } - } - - bte_res = xp_bte_copy(rp_pa, buf, buf_len, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bte_res != BTE_SUCCESS) { - dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); - status = SALRET_ERROR; - break; - } - } - - kfree(buf_base); - - if (status != SALRET_OK) { - rp_pa = 0; - } - dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); - return rp_pa; -} - - -/* - * Fill the partition reserved page with the information needed by - * other partitions to discover we are alive and establish initial - * communications. - */ -struct xpc_rsvd_page * -xpc_rsvd_page_init(void) -{ - struct xpc_rsvd_page *rp; - AMO_t *amos_page; - u64 rp_pa, nasid_array = 0; - int i, ret; - - - /* get the local reserved page's address */ - - preempt_disable(); - rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id())); - preempt_enable(); - if (rp_pa == 0) { - dev_err(xpc_part, "SAL failed to locate the reserved page\n"); - return NULL; - } - rp = (struct xpc_rsvd_page *) __va(rp_pa); - - if (rp->partid != sn_partition_id) { - dev_err(xpc_part, "the reserved page's partid of %d should be " - "%d\n", rp->partid, sn_partition_id); - return NULL; - } - - rp->version = XPC_RP_VERSION; - - /* establish the actual sizes of the nasid masks */ - if (rp->SAL_version == 1) { - /* SAL_version 1 didn't set the nasids_size field */ - rp->nasids_size = 128; - } - xp_nasid_mask_bytes = rp->nasids_size; - xp_nasid_mask_words = xp_nasid_mask_bytes / 8; - - /* setup the pointers to the various items in the reserved page */ - xpc_part_nasids = XPC_RP_PART_NASIDS(rp); - xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); - xpc_vars = XPC_RP_VARS(rp); - xpc_vars_part = XPC_RP_VARS_PART(rp); - - /* - * Before clearing xpc_vars, see if a page of AMOs had been previously - * allocated. If not we'll need to allocate one and set permissions - * so that cross-partition AMOs are allowed. - * - * The allocated AMO page needs MCA reporting to remain disabled after - * XPC has unloaded. To make this work, we keep a copy of the pointer - * to this page (i.e., amos_page) in the struct xpc_vars structure, - * which is pointed to by the reserved page, and re-use that saved copy - * on subsequent loads of XPC. This AMO page is never freed, and its - * memory protections are never restricted. - */ - if ((amos_page = xpc_vars->amos_page) == NULL) { - amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0)); - if (amos_page == NULL) { - dev_err(xpc_part, "can't allocate page of AMOs\n"); - return NULL; - } - - /* - * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems - * when xpc_allow_IPI_ops() is called via xpc_hb_init(). - */ - if (!enable_shub_wars_1_1()) { - ret = sn_change_memprotect(ia64_tpa((u64) amos_page), - PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1, - &nasid_array); - if (ret != 0) { - dev_err(xpc_part, "can't change memory " - "protections\n"); - uncached_free_page(__IA64_UNCACHED_OFFSET | - TO_PHYS((u64) amos_page)); - return NULL; - } - } - } else if (!IS_AMO_ADDRESS((u64) amos_page)) { - /* - * EFI's XPBOOT can also set amos_page in the reserved page, - * but it happens to leave it as an uncached physical address - * and we need it to be an uncached virtual, so we'll have to - * convert it. - */ - if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) { - dev_err(xpc_part, "previously used amos_page address " - "is bad = 0x%p\n", (void *) amos_page); - return NULL; - } - amos_page = (AMO_t *) TO_AMO((u64) amos_page); - } - - /* clear xpc_vars */ - memset(xpc_vars, 0, sizeof(struct xpc_vars)); - - xpc_vars->version = XPC_V_VERSION; - xpc_vars->act_nasid = cpuid_to_nasid(0); - xpc_vars->act_phys_cpuid = cpu_physical_id(0); - xpc_vars->vars_part_pa = __pa(xpc_vars_part); - xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page); - xpc_vars->amos_page = amos_page; /* save for next load of XPC */ - - - /* clear xpc_vars_part */ - memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) * - XP_MAX_PARTITIONS); - - /* initialize the activate IRQ related AMO variables */ - for (i = 0; i < xp_nasid_mask_words; i++) { - (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i); - } - - /* initialize the engaged remote partitions related AMO variables */ - (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO); - (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO); - - /* timestamp of when reserved page was setup by XPC */ - rp->stamp = CURRENT_TIME; - - /* - * This signifies to the remote partition that our reserved - * page is initialized. - */ - rp->vars_pa = __pa(xpc_vars); - - return rp; -} - - -/* - * Change protections to allow IPI operations (and AMO operations on - * Shub 1.1 systems). - */ -void -xpc_allow_IPI_ops(void) -{ - int node; - int nasid; - - - // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. - - if (is_shub2()) { - xpc_sh2_IPI_access0 = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); - xpc_sh2_IPI_access1 = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); - xpc_sh2_IPI_access2 = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); - xpc_sh2_IPI_access3 = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), - -1UL); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), - -1UL); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), - -1UL); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), - -1UL); - } - - } else { - xpc_sh1_IPI_access = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), - -1UL); - - /* - * Since the BIST collides with memory operations on - * SHUB 1.1 sn_change_memprotect() cannot be used. - */ - if (enable_shub_wars_1_1()) { - /* open up everything */ - xpc_prot_vec[node] = (u64) HUB_L((u64 *) - GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0)); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0), - -1UL); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQRP_MMR_DIR_PRIVEC0), - -1UL); - } - } - } -} - - -/* - * Restrict protections to disallow IPI operations (and AMO operations on - * Shub 1.1 systems). - */ -void -xpc_restrict_IPI_ops(void) -{ - int node; - int nasid; - - - // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. - - if (is_shub2()) { - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), - xpc_sh2_IPI_access0); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), - xpc_sh2_IPI_access1); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), - xpc_sh2_IPI_access2); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), - xpc_sh2_IPI_access3); - } - - } else { - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), - xpc_sh1_IPI_access); - - if (enable_shub_wars_1_1()) { - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0), - xpc_prot_vec[node]); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQRP_MMR_DIR_PRIVEC0), - xpc_prot_vec[node]); - } - } - } -} - - -/* - * At periodic intervals, scan through all active partitions and ensure - * their heartbeat is still active. If not, the partition is deactivated. - */ -void -xpc_check_remote_hb(void) -{ - struct xpc_vars *remote_vars; - struct xpc_partition *part; - partid_t partid; - bte_result_t bres; - - - remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - - if (xpc_exiting) { - break; - } - - if (partid == sn_partition_id) { - continue; - } - - part = &xpc_partitions[partid]; - - if (part->act_state == XPC_P_INACTIVE || - part->act_state == XPC_P_DEACTIVATING) { - continue; - } - - /* pull the remote_hb cache line */ - bres = xp_bte_copy(part->remote_vars_pa, - (u64) remote_vars, - XPC_RP_VARS_SIZE, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) { - XPC_DEACTIVATE_PARTITION(part, - xpc_map_bte_errors(bres)); - continue; - } - - dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" - " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n", - partid, remote_vars->heartbeat, part->last_heartbeat, - remote_vars->heartbeat_offline, - remote_vars->heartbeating_to_mask); - - if (((remote_vars->heartbeat == part->last_heartbeat) && - (remote_vars->heartbeat_offline == 0)) || - !xpc_hb_allowed(sn_partition_id, remote_vars)) { - - XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); - continue; - } - - part->last_heartbeat = remote_vars->heartbeat; - } -} - - -/* - * Get a copy of a portion of the remote partition's rsvd page. - * - * remote_rp points to a buffer that is cacheline aligned for BTE copies and - * is large enough to contain a copy of their reserved page header and - * part_nasids mask. - */ -static enum xpc_retval -xpc_get_remote_rp(int nasid, u64 *discovered_nasids, - struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa) -{ - int bres, i; - - - /* get the reserved page's physical address */ - - *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); - if (*remote_rp_pa == 0) { - return xpcNoRsvdPageAddr; - } - - - /* pull over the reserved page header and part_nasids mask */ - bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp, - XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) { - return xpc_map_bte_errors(bres); - } - - - if (discovered_nasids != NULL) { - u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp); - - - for (i = 0; i < xp_nasid_mask_words; i++) { - discovered_nasids[i] |= remote_part_nasids[i]; - } - } - - - /* check that the partid is for another partition */ - - if (remote_rp->partid < 1 || - remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { - return xpcInvalidPartid; - } - - if (remote_rp->partid == sn_partition_id) { - return xpcLocalPartid; - } - - - if (XPC_VERSION_MAJOR(remote_rp->version) != - XPC_VERSION_MAJOR(XPC_RP_VERSION)) { - return xpcBadVersion; - } - - return xpcSuccess; -} - - -/* - * Get a copy of the remote partition's XPC variables from the reserved page. - * - * remote_vars points to a buffer that is cacheline aligned for BTE copies and - * assumed to be of size XPC_RP_VARS_SIZE. - */ -static enum xpc_retval -xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) -{ - int bres; - - - if (remote_vars_pa == 0) { - return xpcVarsNotSet; - } - - /* pull over the cross partition variables */ - bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) { - return xpc_map_bte_errors(bres); - } - - if (XPC_VERSION_MAJOR(remote_vars->version) != - XPC_VERSION_MAJOR(XPC_V_VERSION)) { - return xpcBadVersion; - } - - return xpcSuccess; -} - - -/* - * Update the remote partition's info. - */ -static void -xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version, - struct timespec *remote_rp_stamp, u64 remote_rp_pa, - u64 remote_vars_pa, struct xpc_vars *remote_vars) -{ - part->remote_rp_version = remote_rp_version; - dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", - part->remote_rp_version); - - part->remote_rp_stamp = *remote_rp_stamp; - dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n", - part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec); - - part->remote_rp_pa = remote_rp_pa; - dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); - - part->remote_vars_pa = remote_vars_pa; - dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", - part->remote_vars_pa); - - part->last_heartbeat = remote_vars->heartbeat; - dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", - part->last_heartbeat); - - part->remote_vars_part_pa = remote_vars->vars_part_pa; - dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", - part->remote_vars_part_pa); - - part->remote_act_nasid = remote_vars->act_nasid; - dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", - part->remote_act_nasid); - - part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; - dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", - part->remote_act_phys_cpuid); - - part->remote_amos_page_pa = remote_vars->amos_page_pa; - dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", - part->remote_amos_page_pa); - - part->remote_vars_version = remote_vars->version; - dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", - part->remote_vars_version); -} - - -/* - * Prior code has determined the nasid which generated an IPI. Inspect - * that nasid to determine if its partition needs to be activated or - * deactivated. - * - * A partition is consider "awaiting activation" if our partition - * flags indicate it is not active and it has a heartbeat. A - * partition is considered "awaiting deactivation" if our partition - * flags indicate it is active but it has no heartbeat or it is not - * sending its heartbeat to us. - * - * To determine the heartbeat, the remote nasid must have a properly - * initialized reserved page. - */ -static void -xpc_identify_act_IRQ_req(int nasid) -{ - struct xpc_rsvd_page *remote_rp; - struct xpc_vars *remote_vars; - u64 remote_rp_pa; - u64 remote_vars_pa; - int remote_rp_version; - int reactivate = 0; - int stamp_diff; - struct timespec remote_rp_stamp = { 0, 0 }; - partid_t partid; - struct xpc_partition *part; - enum xpc_retval ret; - - - /* pull over the reserved page structure */ - - remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; - - ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); - if (ret != xpcSuccess) { - dev_warn(xpc_part, "unable to get reserved page from nasid %d, " - "which sent interrupt, reason=%d\n", nasid, ret); - return; - } - - remote_vars_pa = remote_rp->vars_pa; - remote_rp_version = remote_rp->version; - if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { - remote_rp_stamp = remote_rp->stamp; - } - partid = remote_rp->partid; - part = &xpc_partitions[partid]; - - - /* pull over the cross partition variables */ - - remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; - - ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); - if (ret != xpcSuccess) { - - dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " - "which sent interrupt, reason=%d\n", nasid, ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - return; - } - - - part->act_IRQ_rcvd++; - - dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " - "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, - remote_vars->heartbeat, remote_vars->heartbeating_to_mask); - - if (xpc_partition_disengaged(part) && - part->act_state == XPC_P_INACTIVE) { - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { - if (xpc_partition_disengage_requested(1UL << partid)) { - /* - * Other side is waiting on us to disengage, - * even though we already have. - */ - return; - } - } else { - /* other side doesn't support disengage requests */ - xpc_clear_partition_disengage_request(1UL << partid); - } - - xpc_activate_partition(part); - return; - } - - DBUG_ON(part->remote_rp_version == 0); - DBUG_ON(part->remote_vars_version == 0); - - if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) { - DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part-> - remote_vars_version)); - - if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { - DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> - version)); - /* see if the other side rebooted */ - if (part->remote_amos_page_pa == - remote_vars->amos_page_pa && - xpc_hb_allowed(sn_partition_id, - remote_vars)) { - /* doesn't look that way, so ignore the IPI */ - return; - } - } - - /* - * Other side rebooted and previous XPC didn't support the - * disengage request, so we don't need to do anything special. - */ - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - part->reactivate_nasid = nasid; - XPC_DEACTIVATE_PARTITION(part, xpcReactivating); - return; - } - - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)); - - if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); - - /* - * Other side rebooted and previous XPC did support the - * disengage request, but the new one doesn't. - */ - - xpc_clear_partition_engaged(1UL << partid); - xpc_clear_partition_disengage_request(1UL << partid); - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - reactivate = 1; - - } else { - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); - - stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp, - &remote_rp_stamp); - if (stamp_diff != 0) { - DBUG_ON(stamp_diff >= 0); - - /* - * Other side rebooted and the previous XPC did support - * the disengage request, as does the new one. - */ - - DBUG_ON(xpc_partition_engaged(1UL << partid)); - DBUG_ON(xpc_partition_disengage_requested(1UL << - partid)); - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - reactivate = 1; - } - } - - if (part->disengage_request_timeout > 0 && - !xpc_partition_disengaged(part)) { - /* still waiting on other side to disengage from us */ - return; - } - - if (reactivate) { - part->reactivate_nasid = nasid; - XPC_DEACTIVATE_PARTITION(part, xpcReactivating); - - } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) && - xpc_partition_disengage_requested(1UL << partid)) { - XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown); - } -} - - -/* - * Loop through the activation AMO variables and process any bits - * which are set. Each bit indicates a nasid sending a partition - * activation or deactivation request. - * - * Return #of IRQs detected. - */ -int -xpc_identify_act_IRQ_sender(void) -{ - int word, bit; - u64 nasid_mask; - u64 nasid; /* remote nasid */ - int n_IRQs_detected = 0; - AMO_t *act_amos; - - - act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS; - - - /* scan through act AMO variable looking for non-zero entries */ - for (word = 0; word < xp_nasid_mask_words; word++) { - - if (xpc_exiting) { - break; - } - - nasid_mask = xpc_IPI_receive(&act_amos[word]); - if (nasid_mask == 0) { - /* no IRQs from nasids in this variable */ - continue; - } - - dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word, - nasid_mask); - - - /* - * If this nasid has been added to the machine since - * our partition was reset, this will retain the - * remote nasid in our reserved pages machine mask. - * This is used in the event of module reload. - */ - xpc_mach_nasids[word] |= nasid_mask; - - - /* locate the nasid(s) which sent interrupts */ - - for (bit = 0; bit < (8 * sizeof(u64)); bit++) { - if (nasid_mask & (1UL << bit)) { - n_IRQs_detected++; - nasid = XPC_NASID_FROM_W_B(word, bit); - dev_dbg(xpc_part, "interrupt from nasid %ld\n", - nasid); - xpc_identify_act_IRQ_req(nasid); - } - } - } - return n_IRQs_detected; -} - - -/* - * See if the other side has responded to a partition disengage request - * from us. - */ -int -xpc_partition_disengaged(struct xpc_partition *part) -{ - partid_t partid = XPC_PARTID(part); - int disengaged; - - - disengaged = (xpc_partition_engaged(1UL << partid) == 0); - if (part->disengage_request_timeout) { - if (!disengaged) { - if (jiffies < part->disengage_request_timeout) { - /* timelimit hasn't been reached yet */ - return 0; - } - - /* - * Other side hasn't responded to our disengage - * request in a timely fashion, so assume it's dead. - */ - - dev_info(xpc_part, "disengage from remote partition %d " - "timed out\n", partid); - xpc_disengage_request_timedout = 1; - xpc_clear_partition_engaged(1UL << partid); - disengaged = 1; - } - part->disengage_request_timeout = 0; - - /* cancel the timer function, provided it's not us */ - if (!in_interrupt()) { - del_singleshot_timer_sync(&part-> - disengage_request_timer); - } - - DBUG_ON(part->act_state != XPC_P_DEACTIVATING && - part->act_state != XPC_P_INACTIVE); - if (part->act_state != XPC_P_INACTIVE) { - xpc_wakeup_channel_mgr(part); - } - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { - xpc_cancel_partition_disengage_request(part); - } - } - return disengaged; -} - - -/* - * Mark specified partition as active. - */ -enum xpc_retval -xpc_mark_partition_active(struct xpc_partition *part) -{ - unsigned long irq_flags; - enum xpc_retval ret; - - - dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); - - spin_lock_irqsave(&part->act_lock, irq_flags); - if (part->act_state == XPC_P_ACTIVATING) { - part->act_state = XPC_P_ACTIVE; - ret = xpcSuccess; - } else { - DBUG_ON(part->reason == xpcSuccess); - ret = part->reason; - } - spin_unlock_irqrestore(&part->act_lock, irq_flags); - - return ret; -} - - -/* - * Notify XPC that the partition is down. - */ -void -xpc_deactivate_partition(const int line, struct xpc_partition *part, - enum xpc_retval reason) -{ - unsigned long irq_flags; - - - spin_lock_irqsave(&part->act_lock, irq_flags); - - if (part->act_state == XPC_P_INACTIVE) { - XPC_SET_REASON(part, reason, line); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - if (reason == xpcReactivating) { - /* we interrupt ourselves to reactivate partition */ - xpc_IPI_send_reactivate(part); - } - return; - } - if (part->act_state == XPC_P_DEACTIVATING) { - if ((part->reason == xpcUnloading && reason != xpcUnloading) || - reason == xpcReactivating) { - XPC_SET_REASON(part, reason, line); - } - spin_unlock_irqrestore(&part->act_lock, irq_flags); - return; - } - - part->act_state = XPC_P_DEACTIVATING; - XPC_SET_REASON(part, reason, line); - - spin_unlock_irqrestore(&part->act_lock, irq_flags); - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { - xpc_request_partition_disengage(part); - xpc_IPI_send_disengage(part); - - /* set a timelimit on the disengage request */ - part->disengage_request_timeout = jiffies + - (xpc_disengage_request_timelimit * HZ); - part->disengage_request_timer.expires = - part->disengage_request_timeout; - add_timer(&part->disengage_request_timer); - } - - dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", - XPC_PARTID(part), reason); - - xpc_partition_going_down(part, reason); -} - - -/* - * Mark specified partition as inactive. - */ -void -xpc_mark_partition_inactive(struct xpc_partition *part) -{ - unsigned long irq_flags; - - - dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", - XPC_PARTID(part)); - - spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; - spin_unlock_irqrestore(&part->act_lock, irq_flags); - part->remote_rp_pa = 0; -} - - -/* - * SAL has provided a partition and machine mask. The partition mask - * contains a bit for each even nasid in our partition. The machine - * mask contains a bit for each even nasid in the entire machine. - * - * Using those two bit arrays, we can determine which nasids are - * known in the machine. Each should also have a reserved page - * initialized if they are available for partitioning. - */ -void -xpc_discovery(void) -{ - void *remote_rp_base; - struct xpc_rsvd_page *remote_rp; - struct xpc_vars *remote_vars; - u64 remote_rp_pa; - u64 remote_vars_pa; - int region; - int region_size; - int max_regions; - int nasid; - struct xpc_rsvd_page *rp; - partid_t partid; - struct xpc_partition *part; - u64 *discovered_nasids; - enum xpc_retval ret; - - - remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + - xp_nasid_mask_bytes, - GFP_KERNEL, &remote_rp_base); - if (remote_rp == NULL) { - return; - } - remote_vars = (struct xpc_vars *) remote_rp; - - - discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words, - GFP_KERNEL); - if (discovered_nasids == NULL) { - kfree(remote_rp_base); - return; - } - - rp = (struct xpc_rsvd_page *) xpc_rsvd_page; - - /* - * The term 'region' in this context refers to the minimum number of - * nodes that can comprise an access protection grouping. The access - * protection is in regards to memory, IOI and IPI. - */ - max_regions = 64; - region_size = sn_region_size; - - switch (region_size) { - case 128: - max_regions *= 2; - case 64: - max_regions *= 2; - case 32: - max_regions *= 2; - region_size = 16; - DBUG_ON(!is_shub2()); - } - - for (region = 0; region < max_regions; region++) { - - if ((volatile int) xpc_exiting) { - break; - } - - dev_dbg(xpc_part, "searching region %d\n", region); - - for (nasid = (region * region_size * 2); - nasid < ((region + 1) * region_size * 2); - nasid += 2) { - - if ((volatile int) xpc_exiting) { - break; - } - - dev_dbg(xpc_part, "checking nasid %d\n", nasid); - - - if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) { - dev_dbg(xpc_part, "PROM indicates Nasid %d is " - "part of the local partition; skipping " - "region\n", nasid); - break; - } - - if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) { - dev_dbg(xpc_part, "PROM indicates Nasid %d was " - "not on Numa-Link network at reset\n", - nasid); - continue; - } - - if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { - dev_dbg(xpc_part, "Nasid %d is part of a " - "partition which was previously " - "discovered\n", nasid); - continue; - } - - - /* pull over the reserved page structure */ - - ret = xpc_get_remote_rp(nasid, discovered_nasids, - remote_rp, &remote_rp_pa); - if (ret != xpcSuccess) { - dev_dbg(xpc_part, "unable to get reserved page " - "from nasid %d, reason=%d\n", nasid, - ret); - - if (ret == xpcLocalPartid) { - break; - } - continue; - } - - remote_vars_pa = remote_rp->vars_pa; - - partid = remote_rp->partid; - part = &xpc_partitions[partid]; - - - /* pull over the cross partition variables */ - - ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); - if (ret != xpcSuccess) { - dev_dbg(xpc_part, "unable to get XPC variables " - "from nasid %d, reason=%d\n", nasid, - ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - continue; - } - - if (part->act_state != XPC_P_INACTIVE) { - dev_dbg(xpc_part, "partition %d on nasid %d is " - "already activating\n", partid, nasid); - break; - } - - /* - * Register the remote partition's AMOs with SAL so it - * can handle and cleanup errors within that address - * range should the remote partition go down. We don't - * unregister this range because it is difficult to - * tell when outstanding writes to the remote partition - * are finished and thus when it is thus safe to - * unregister. This should not result in wasted space - * in the SAL xp_addr_region table because we should - * get the same page for remote_act_amos_pa after - * module reloads and system reboots. - */ - if (sn_register_xp_addr_region( - remote_vars->amos_page_pa, - PAGE_SIZE, 1) < 0) { - dev_dbg(xpc_part, "partition %d failed to " - "register xp_addr region 0x%016lx\n", - partid, remote_vars->amos_page_pa); - - XPC_SET_REASON(part, xpcPhysAddrRegFailed, - __LINE__); - break; - } - - /* - * The remote nasid is valid and available. - * Send an interrupt to that nasid to notify - * it that we are ready to begin activation. - */ - dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, " - "nasid %d, phys_cpuid 0x%x\n", - remote_vars->amos_page_pa, - remote_vars->act_nasid, - remote_vars->act_phys_cpuid); - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> - version)) { - part->remote_amos_page_pa = - remote_vars->amos_page_pa; - xpc_mark_partition_disengaged(part); - xpc_cancel_partition_disengage_request(part); - } - xpc_IPI_send_activate(remote_vars); - } - } - - kfree(discovered_nasids); - kfree(remote_rp_base); -} - - -/* - * Given a partid, get the nasids owned by that partition from the - * remote partition's reserved page. - */ -enum xpc_retval -xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask) -{ - struct xpc_partition *part; - u64 part_nasid_pa; - int bte_res; - - - part = &xpc_partitions[partid]; - if (part->remote_rp_pa == 0) { - return xpcPartitionDown; - } - - memset(nasid_mask, 0, XP_NASID_MASK_BYTES); - - part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa); - - bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask, - xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL); - - return xpc_map_bte_errors(bte_res); -} - diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c deleted file mode 100644 index a5df672d839..00000000000 --- a/arch/ia64/sn/kernel/xpnet.c +++ /dev/null @@ -1,718 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. - */ - - -/* - * Cross Partition Network Interface (XPNET) support - * - * XPNET provides a virtual network layered on top of the Cross - * Partition communication layer. - * - * XPNET provides direct point-to-point and broadcast-like support - * for an ethernet-like device. The ethernet broadcast medium is - * replaced with a point-to-point message structure which passes - * pointers to a DMA-capable block that a remote partition should - * retrieve and pass to the upper level networking layer. - * - */ - - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/delay.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/smp.h> -#include <linux/string.h> -#include <asm/sn/bte.h> -#include <asm/sn/io.h> -#include <asm/sn/sn_sal.h> -#include <asm/types.h> -#include <asm/atomic.h> -#include <asm/sn/xp.h> - - -/* - * The message payload transferred by XPC. - * - * buf_pa is the physical address where the DMA should pull from. - * - * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a - * cacheline boundary. To accomplish this, we record the number of - * bytes from the beginning of the first cacheline to the first useful - * byte of the skb (leadin_ignore) and the number of bytes from the - * last useful byte of the skb to the end of the last cacheline - * (tailout_ignore). - * - * size is the number of bytes to transfer which includes the skb->len - * (useful bytes of the senders skb) plus the leadin and tailout - */ -struct xpnet_message { - u16 version; /* Version for this message */ - u16 embedded_bytes; /* #of bytes embedded in XPC message */ - u32 magic; /* Special number indicating this is xpnet */ - u64 buf_pa; /* phys address of buffer to retrieve */ - u32 size; /* #of bytes in buffer */ - u8 leadin_ignore; /* #of bytes to ignore at the beginning */ - u8 tailout_ignore; /* #of bytes to ignore at the end */ - unsigned char data; /* body of small packets */ -}; - -/* - * Determine the size of our message, the cacheline aligned size, - * and then the number of message will request from XPC. - * - * XPC expects each message to exist in an individual cacheline. - */ -#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET) -#define XPNET_MSG_DATA_MAX \ - (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data)) -#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE)) -#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE) - - -#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) -#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) - -/* - * Version number of XPNET implementation. XPNET can always talk to versions - * with same major #, and never talk to versions with a different version. - */ -#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor)) -#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4) -#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf) - -#define XPNET_VERSION _XPNET_VERSION(1,0) /* version 1.0 */ -#define XPNET_VERSION_EMBED _XPNET_VERSION(1,1) /* version 1.1 */ -#define XPNET_MAGIC 0x88786984 /* "XNET" */ - -#define XPNET_VALID_MSG(_m) \ - ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \ - && (msg->magic == XPNET_MAGIC)) - -#define XPNET_DEVICE_NAME "xp0" - - -/* - * When messages are queued with xpc_send_notify, a kmalloc'd buffer - * of the following type is passed as a notification cookie. When the - * notification function is called, we use the cookie to decide - * whether all outstanding message sends have completed. The skb can - * then be released. - */ -struct xpnet_pending_msg { - struct list_head free_list; - struct sk_buff *skb; - atomic_t use_count; -}; - -/* driver specific structure pointed to by the device structure */ -struct xpnet_dev_private { - struct net_device_stats stats; -}; - -struct net_device *xpnet_device; - -/* - * When we are notified of other partitions activating, we add them to - * our bitmask of partitions to which we broadcast. - */ -static u64 xpnet_broadcast_partitions; -/* protect above */ -static DEFINE_SPINLOCK(xpnet_broadcast_lock); - -/* - * Since the Block Transfer Engine (BTE) is being used for the transfer - * and it relies upon cache-line size transfers, we need to reserve at - * least one cache-line for head and tail alignment. The BTE is - * limited to 8MB transfers. - * - * Testing has shown that changing MTU to greater than 64KB has no effect - * on TCP as the two sides negotiate a Max Segment Size that is limited - * to 64K. Other protocols May use packets greater than this, but for - * now, the default is 64KB. - */ -#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) -/* 32KB has been determined to be the ideal */ -#define XPNET_DEF_MTU (0x8000UL) - - -/* - * The partition id is encapsulated in the MAC address. The following - * define locates the octet the partid is in. - */ -#define XPNET_PARTID_OCTET 1 -#define XPNET_LICENSE_OCTET 2 - - -/* - * Define the XPNET debug device structure that is to be used with dev_dbg(), - * dev_err(), dev_warn(), and dev_info(). - */ -struct device_driver xpnet_dbg_name = { - .name = "xpnet" -}; - -struct device xpnet_dbg_subname = { - .bus_id = {0}, /* set to "" */ - .driver = &xpnet_dbg_name -}; - -struct device *xpnet = &xpnet_dbg_subname; - -/* - * Packet was recevied by XPC and forwarded to us. - */ -static void -xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg) -{ - struct sk_buff *skb; - bte_result_t bret; - struct xpnet_dev_private *priv = - (struct xpnet_dev_private *) xpnet_device->priv; - - - if (!XPNET_VALID_MSG(msg)) { - /* - * Packet with a different XPC version. Ignore. - */ - xpc_received(partid, channel, (void *) msg); - - priv->stats.rx_errors++; - - return; - } - dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size, - msg->leadin_ignore, msg->tailout_ignore); - - - /* reserve an extra cache line */ - skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES); - if (!skb) { - dev_err(xpnet, "failed on dev_alloc_skb(%d)\n", - msg->size + L1_CACHE_BYTES); - - xpc_received(partid, channel, (void *) msg); - - priv->stats.rx_errors++; - - return; - } - - /* - * The allocated skb has some reserved space. - * In order to use bte_copy, we need to get the - * skb->data pointer moved forward. - */ - skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & - (L1_CACHE_BYTES - 1)) + - msg->leadin_ignore)); - - /* - * Update the tail pointer to indicate data actually - * transferred. - */ - skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore)); - - /* - * Move the data over from the other side. - */ - if ((XPNET_VERSION_MINOR(msg->version) == 1) && - (msg->embedded_bytes != 0)) { - dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, " - "%lu)\n", skb->data, &msg->data, - (size_t) msg->embedded_bytes); - - skb_copy_to_linear_data(skb, &msg->data, (size_t)msg->embedded_bytes); - } else { - dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" - "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa, - (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), - msg->size); - - bret = bte_copy(msg->buf_pa, - __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), - msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL); - - if (bret != BTE_SUCCESS) { - // >>> Need better way of cleaning skb. Currently skb - // >>> appears in_use and we can't just call - // >>> dev_kfree_skb. - dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned " - "error=0x%x\n", (void *)msg->buf_pa, - (void *)__pa((u64)skb->data & - ~(L1_CACHE_BYTES - 1)), - msg->size, bret); - - xpc_received(partid, channel, (void *) msg); - - priv->stats.rx_errors++; - - return; - } - } - - dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p " - "skb->end=0x%p skb->len=%d\n", (void *) skb->head, - (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), - skb->len); - - skb->protocol = eth_type_trans(skb, xpnet_device); - skb->ip_summed = CHECKSUM_UNNECESSARY; - - dev_dbg(xpnet, "passing skb to network layer\n" - KERN_DEBUG "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p " - "skb->end=0x%p skb->len=%d\n", - (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb), - skb_end_pointer(skb), skb->len); - - - xpnet_device->last_rx = jiffies; - priv->stats.rx_packets++; - priv->stats.rx_bytes += skb->len + ETH_HLEN; - - netif_rx_ni(skb); - xpc_received(partid, channel, (void *) msg); -} - - -/* - * This is the handler which XPC calls during any sort of change in - * state or message reception on a connection. - */ -static void -xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel, - void *data, void *key) -{ - long bp; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(channel != XPC_NET_CHANNEL); - - switch(reason) { - case xpcMsgReceived: /* message received */ - DBUG_ON(data == NULL); - - xpnet_receive(partid, channel, (struct xpnet_message *) data); - break; - - case xpcConnected: /* connection completed to a partition */ - spin_lock_bh(&xpnet_broadcast_lock); - xpnet_broadcast_partitions |= 1UL << (partid -1 ); - bp = xpnet_broadcast_partitions; - spin_unlock_bh(&xpnet_broadcast_lock); - - netif_carrier_on(xpnet_device); - - dev_dbg(xpnet, "%s connection created to partition %d; " - "xpnet_broadcast_partitions=0x%lx\n", - xpnet_device->name, partid, bp); - break; - - default: - spin_lock_bh(&xpnet_broadcast_lock); - xpnet_broadcast_partitions &= ~(1UL << (partid -1 )); - bp = xpnet_broadcast_partitions; - spin_unlock_bh(&xpnet_broadcast_lock); - - if (bp == 0) { - netif_carrier_off(xpnet_device); - } - - dev_dbg(xpnet, "%s disconnected from partition %d; " - "xpnet_broadcast_partitions=0x%lx\n", - xpnet_device->name, partid, bp); - break; - - } -} - - -static int -xpnet_dev_open(struct net_device *dev) -{ - enum xpc_retval ret; - - - dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, " - "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity, - XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS, - XPNET_MAX_IDLE_KTHREADS); - - ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, - XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, - XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); - if (ret != xpcSuccess) { - dev_err(xpnet, "ifconfig up of %s failed on XPC connect, " - "ret=%d\n", dev->name, ret); - - return -ENOMEM; - } - - dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name); - - return 0; -} - - -static int -xpnet_dev_stop(struct net_device *dev) -{ - xpc_disconnect(XPC_NET_CHANNEL); - - dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name); - - return 0; -} - - -static int -xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) -{ - /* 68 comes from min TCP+IP+MAC header */ - if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { - dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " - "between 68 and %ld\n", dev->name, new_mtu, - XPNET_MAX_MTU); - return -EINVAL; - } - - dev->mtu = new_mtu; - dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); - return 0; -} - - -/* - * Required for the net_device structure. - */ -static int -xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map) -{ - return 0; -} - - -/* - * Return statistics to the caller. - */ -static struct net_device_stats * -xpnet_dev_get_stats(struct net_device *dev) -{ - struct xpnet_dev_private *priv; - - - priv = (struct xpnet_dev_private *) dev->priv; - - return &priv->stats; -} - - -/* - * Notification that the other end has received the message and - * DMA'd the skb information. At this point, they are done with - * our side. When all recipients are done processing, we - * release the skb and then release our pending message structure. - */ -static void -xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel, - void *__qm) -{ - struct xpnet_pending_msg *queued_msg = - (struct xpnet_pending_msg *) __qm; - - - DBUG_ON(queued_msg == NULL); - - dev_dbg(xpnet, "message to %d notified with reason %d\n", - partid, reason); - - if (atomic_dec_return(&queued_msg->use_count) == 0) { - dev_dbg(xpnet, "all acks for skb->head=-x%p\n", - (void *) queued_msg->skb->head); - - dev_kfree_skb_any(queued_msg->skb); - kfree(queued_msg); - } -} - - -/* - * Network layer has formatted a packet (skb) and is ready to place it - * "on the wire". Prepare and send an xpnet_message to all partitions - * which have connected with us and are targets of this packet. - * - * MAC-NOTE: For the XPNET driver, the MAC address contains the - * destination partition_id. If the destination partition id word - * is 0xff, this packet is to broadcast to all partitions. - */ -static int -xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct xpnet_pending_msg *queued_msg; - enum xpc_retval ret; - struct xpnet_message *msg; - u64 start_addr, end_addr; - long dp; - u8 second_mac_octet; - partid_t dest_partid; - struct xpnet_dev_private *priv; - u16 embedded_bytes; - - - priv = (struct xpnet_dev_private *) dev->priv; - - - dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " - "skb->end=0x%p skb->len=%d\n", (void *) skb->head, - (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), - skb->len); - - - /* - * The xpnet_pending_msg tracks how many outstanding - * xpc_send_notifies are relying on this skb. When none - * remain, release the skb. - */ - queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC); - if (queued_msg == NULL) { - dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping " - "packet\n", sizeof(struct xpnet_pending_msg)); - - priv->stats.tx_errors++; - - return -ENOMEM; - } - - - /* get the beginning of the first cacheline and end of last */ - start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1)); - end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb)); - - /* calculate how many bytes to embed in the XPC message */ - embedded_bytes = 0; - if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { - /* skb->data does fit so embed */ - embedded_bytes = skb->len; - } - - - /* - * Since the send occurs asynchronously, we set the count to one - * and begin sending. Any sends that happen to complete before - * we are done sending will not free the skb. We will be left - * with that task during exit. This also handles the case of - * a packet destined for a partition which is no longer up. - */ - atomic_set(&queued_msg->use_count, 1); - queued_msg->skb = skb; - - - second_mac_octet = skb->data[XPNET_PARTID_OCTET]; - if (second_mac_octet == 0xff) { - /* we are being asked to broadcast to all partitions */ - dp = xpnet_broadcast_partitions; - } else if (second_mac_octet != 0) { - dp = xpnet_broadcast_partitions & - (1UL << (second_mac_octet - 1)); - } else { - /* 0 is an invalid partid. Ignore */ - dp = 0; - } - dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp); - - /* - * If we wanted to allow promiscuous mode to work like an - * unswitched network, this would be a good point to OR in a - * mask of partitions which should be receiving all packets. - */ - - /* - * Main send loop. - */ - for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS; - dest_partid++) { - - - if (!(dp & (1UL << (dest_partid - 1)))) { - /* not destined for this partition */ - continue; - } - - /* remove this partition from the destinations mask */ - dp &= ~(1UL << (dest_partid - 1)); - - - /* found a partition to send to */ - - ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL, - XPC_NOWAIT, (void **)&msg); - if (unlikely(ret != xpcSuccess)) { - continue; - } - - msg->embedded_bytes = embedded_bytes; - if (unlikely(embedded_bytes != 0)) { - msg->version = XPNET_VERSION_EMBED; - dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", - &msg->data, skb->data, (size_t) embedded_bytes); - skb_copy_from_linear_data(skb, &msg->data, - (size_t)embedded_bytes); - } else { - msg->version = XPNET_VERSION; - } - msg->magic = XPNET_MAGIC; - msg->size = end_addr - start_addr; - msg->leadin_ignore = (u64) skb->data - start_addr; - msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb); - msg->buf_pa = __pa(start_addr); - - dev_dbg(xpnet, "sending XPC message to %d:%d\n" - KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, " - "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n", - dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size, - msg->leadin_ignore, msg->tailout_ignore); - - - atomic_inc(&queued_msg->use_count); - - ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg, - xpnet_send_completed, queued_msg); - if (unlikely(ret != xpcSuccess)) { - atomic_dec(&queued_msg->use_count); - continue; - } - - } - - if (atomic_dec_return(&queued_msg->use_count) == 0) { - dev_dbg(xpnet, "no partitions to receive packet destined for " - "%d\n", dest_partid); - - - dev_kfree_skb(skb); - kfree(queued_msg); - } - - priv->stats.tx_packets++; - priv->stats.tx_bytes += skb->len; - - return 0; -} - - -/* - * Deal with transmit timeouts coming from the network layer. - */ -static void -xpnet_dev_tx_timeout (struct net_device *dev) -{ - struct xpnet_dev_private *priv; - - - priv = (struct xpnet_dev_private *) dev->priv; - - priv->stats.tx_errors++; - return; -} - - -static int __init -xpnet_init(void) -{ - int i; - u32 license_num; - int result = -ENOMEM; - - - if (!ia64_platform_is("sn2")) { - return -ENODEV; - } - - dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); - - /* - * use ether_setup() to init the majority of our device - * structure and then override the necessary pieces. - */ - xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private), - XPNET_DEVICE_NAME, ether_setup); - if (xpnet_device == NULL) { - return -ENOMEM; - } - - netif_carrier_off(xpnet_device); - - xpnet_device->mtu = XPNET_DEF_MTU; - xpnet_device->change_mtu = xpnet_dev_change_mtu; - xpnet_device->open = xpnet_dev_open; - xpnet_device->get_stats = xpnet_dev_get_stats; - xpnet_device->stop = xpnet_dev_stop; - xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit; - xpnet_device->tx_timeout = xpnet_dev_tx_timeout; - xpnet_device->set_config = xpnet_dev_set_config; - - /* - * Multicast assumes the LSB of the first octet is set for multicast - * MAC addresses. We chose the first octet of the MAC to be unlikely - * to collide with any vendor's officially issued MAC. - */ - xpnet_device->dev_addr[0] = 0xfe; - xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id; - license_num = sn_partition_serial_number_val(); - for (i = 3; i >= 0; i--) { - xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] = - license_num & 0xff; - license_num = license_num >> 8; - } - - /* - * ether_setup() sets this to a multicast device. We are - * really not supporting multicast at this time. - */ - xpnet_device->flags &= ~IFF_MULTICAST; - - /* - * No need to checksum as it is a DMA transfer. The BTE will - * report an error if the data is not retrievable and the - * packet will be dropped. - */ - xpnet_device->features = NETIF_F_NO_CSUM; - - result = register_netdev(xpnet_device); - if (result != 0) { - free_netdev(xpnet_device); - } - - return result; -} -module_init(xpnet_init); - - -static void __exit -xpnet_exit(void) -{ - dev_info(xpnet, "unregistering network device %s\n", - xpnet_device[0].name); - - unregister_netdev(xpnet_device); - - free_netdev(xpnet_device); -} -module_exit(xpnet_exit); - - -MODULE_AUTHOR("Silicon Graphics, Inc."); -MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)"); -MODULE_LICENSE("GPL"); - diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 9b3c1137302..94e584527f4 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -655,7 +655,8 @@ tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) * * Simply call tioce_do_dma_map() to create a map with the barrier bit set * in the address. - */ static u64 + */ +static u64 tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) { return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags); @@ -668,7 +669,8 @@ tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma * * Handle a CE error interrupt. Simply a wrapper around a SAL call which * defers processing to the SGI prom. - */ static irqreturn_t + */ +static irqreturn_t tioce_error_intr_handler(int irq, void *arg) { struct tioce_common *soft = arg; |