diff options
Diffstat (limited to 'arch/ia64')
37 files changed, 11699 insertions, 6212 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index ed21737a00c..3aa6c821449 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -19,6 +19,7 @@ config IA64 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_KVM default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -266,17 +267,6 @@ config IOSAPIC depends on !IA64_HP_SIM default y -config IA64_SGI_SN_XP - tristate "Support communication between SGI SSIs" - depends on IA64_GENERIC || IA64_SGI_SN2 - select IA64_UNCACHED_ALLOCATOR - help - An SGI machine can be divided into multiple Single System - Images which act independently of each other and have - hardware based memory protection from the others. Enabling - this feature will allow for direct communication between SSIs - based on a network adapter and DMA messaging. - config FORCE_MAX_ZONEORDER int "MAX_ORDER (11 - 17)" if !HUGETLB_PAGE range 11 17 if !HUGETLB_PAGE @@ -600,6 +590,8 @@ config MSPEC source "fs/Kconfig" +source "arch/ia64/kvm/Kconfig" + source "lib/Kconfig" # diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index f1645c4f703..ec4cca477f4 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ +core-$(CONFIG_KVM) += arch/ia64/kvm/ drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 90ef338cf46..f065093f8e9 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -194,8 +194,8 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) unw_init_running(kdump_cpu_freeze, NULL); break; case DIE_MCA_MONARCH_LEAVE: - /* die_register->signr indicate if MCA is recoverable */ - if (kdump_on_fatal_mca && !args->signr) { + /* *(nd->data) indicate if MCA is recoverable */ + if (kdump_on_fatal_mca && !(*(nd->data))) { atomic_set(&kdump_in_progress, 1); *(nd->monarch_cpu) = -1; machine_kdump_on_init(); diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index b0be4a28017..e49ad8c5dc6 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -570,6 +570,7 @@ GLOBAL_ENTRY(ia64_trace_syscall) br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value .ret3: (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +(pUStk) rsm psr.i // disable interrupts br.cond.sptk .work_pending_syscall_end strace_error: diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index e51bced3b0f..705176b434b 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -109,6 +109,20 @@ # define IA64_MCA_DEBUG(fmt...) #endif +#define NOTIFY_INIT(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + +#define NOTIFY_MCA(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + /* Used by mca_asm.S */ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ @@ -766,9 +780,8 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg) /* Mask all interrupts */ local_irq_save(flags); - if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; /* Register with the SAL monarch that the slave has @@ -776,17 +789,13 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg) */ ia64_sal_mc_rendez(); - if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1); /* Wait for the monarch cpu to exit. */ while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ - if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", get_irq_regs(), - (long)&nd, 0, 0) == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; /* Enable all interrupts */ @@ -1256,7 +1265,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, int recover, cpu = smp_processor_id(); struct task_struct *previous_current; struct ia64_mca_notify_die nd = - { .sos = sos, .monarch_cpu = &monarch_cpu }; + { .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover }; static atomic_t mca_count; static cpumask_t mca_cpu; @@ -1272,9 +1281,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); - if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; if (sos->monarch) { @@ -1288,13 +1295,12 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, * does not work. */ ia64_mca_wakeup_all(); - if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); } else { while (cpu_isset(cpu, mca_cpu)) cpu_relax(); /* spin until monarch wakes us */ - } + } + + NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1); /* Get the MCA error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); @@ -1320,9 +1326,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, mca_insert_tr(0x2); /*Reload dynamic itrs*/ } - if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1); if (atomic_dec_return(&mca_count) > 0) { int i; @@ -1643,7 +1647,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; - (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); + NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0); mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); @@ -1680,17 +1684,15 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; while (monarch_cpu == -1) cpu_relax(); /* spin until monarch enters */ - if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); - if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1); + while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ - if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + + NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); + mprintk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; @@ -1699,9 +1701,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, } monarch_cpu = cpu; - if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1); /* * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be @@ -1716,12 +1716,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, * to default_monarch_init_process() above and just print all the * tasks. */ - if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); - if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) - == NOTIFY_STOP) - ia64_mca_spin(__func__); + NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1); + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); @@ -1953,7 +1950,7 @@ ia64_mca_init(void) printk(KERN_INFO "Increasing MCA rendezvous timeout from " "%ld to %ld milliseconds\n", timeout, isrv.v0); timeout = isrv.v0; - (void) notify_die(DIE_MCA_NEW_TIMEOUT, "MCA", NULL, timeout, 0, 0); + NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0); continue; } printk(KERN_ERR "Failed to register rendezvous interrupt " diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d1d24f4598d..c8e403752a0 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -5511,7 +5511,7 @@ stop_monitoring: } static int -pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs) +pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) { struct task_struct *task; pfm_context_t *ctx; @@ -5591,7 +5591,7 @@ pfm_interrupt_handler(int irq, void *arg) start_cycles = ia64_get_itc(); - ret = pfm_do_interrupt_handler(irq, arg, regs); + ret = pfm_do_interrupt_handler(arg, regs); total_cycles = ia64_get_itc(); diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 779c3cca206..b11bb50a197 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -44,8 +44,8 @@ #include <linux/smp.h> #include <linux/timer.h> #include <linux/vmalloc.h> +#include <linux/semaphore.h> -#include <asm/semaphore.h> #include <asm/sal.h> #include <asm/uaccess.h> diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig new file mode 100644 index 00000000000..7914e482850 --- /dev/null +++ b/arch/ia64/kvm/Kconfig @@ -0,0 +1,49 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + depends on HAVE_KVM || IA64 + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting fully virtualized guest machines using hardware + virtualization extensions. You will need a fairly recent + processor equipped with virtualization extensions. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_INTEL + tristate "KVM for Intel Itanium 2 processors support" + depends on KVM && m + ---help--- + Provides support for KVM on Itanium 2 processors equipped with the VT + extensions. + +config KVM_TRACE + bool + +endif # VIRTUALIZATION diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile new file mode 100644 index 00000000000..41b034ffa73 --- /dev/null +++ b/arch/ia64/kvm/Makefile @@ -0,0 +1,61 @@ +#This Make file is to generate asm-offsets.h and build source. +# + +#Generate asm-offsets.h for vmm module build +offsets-file := asm-offsets.h + +always := $(offsets-file) +targets := $(offsets-file) +targets += arch/ia64/kvm/asm-offsets.s +clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S) + +# Default sed regexp - multiline due to syntax constraints +define sed-y + "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" +endef + +quiet_cmd_offsets = GEN $@ +define cmd_offsets + (set -e; \ + echo "#ifndef __ASM_KVM_OFFSETS_H__"; \ + echo "#define __ASM_KVM_OFFSETS_H__"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Makefile"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-y) $<; \ + echo ""; \ + echo "#endif" ) > $@ +endef +# We use internal rules to avoid the "is up to date" message from make +arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c + $(call if_changed_dep,cc_s_c) + +$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s + $(call cmd,offsets) + +# +# Makefile for Kernel-based Virtual Machine module +# + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ + +$(addprefix $(objtree)/,$(obj)/memcpy.S $(obj)/memset.S): + $(shell ln -snf ../lib/memcpy.S $(src)/memcpy.S) + $(shell ln -snf ../lib/memset.S $(src)/memset.S) + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) + +kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o +obj-$(CONFIG_KVM) += kvm.o + +FORCE : $(obj)/$(offsets-file) +EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 +kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ + vtlb.o process.o +#Add link memcpy and memset to avoid possible structure assignment error +kvm-intel-objs += memset.o memcpy.o +obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c new file mode 100644 index 00000000000..4e3dc13a619 --- /dev/null +++ b/arch/ia64/kvm/asm-offsets.c @@ -0,0 +1,251 @@ +/* + * asm-offsets.c Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + * + * Anthony Xu <anthony.xu@intel.com> + * Xiantao Zhang <xiantao.zhang@intel.com> + * Copyright (c) 2007 Intel Corporation KVM support. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/autoconf.h> +#include <linux/kvm_host.h> + +#include "vcpu.h" + +#define task_struct kvm_vcpu + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : :) + +#define OFFSET(_sym, _str, _mem) \ + DEFINE(_sym, offsetof(_str, _mem)); + +void foo(void) +{ + DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); + DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs)); + + BLANK(); + + DEFINE(VMM_VCPU_META_RR0_OFFSET, + offsetof(struct kvm_vcpu, arch.metaphysical_rr0)); + DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, + offsetof(struct kvm_vcpu, + arch.metaphysical_saved_rr0)); + DEFINE(VMM_VCPU_VRR0_OFFSET, + offsetof(struct kvm_vcpu, arch.vrr[0])); + DEFINE(VMM_VPD_IRR0_OFFSET, + offsetof(struct vpd, irr[0])); + DEFINE(VMM_VCPU_ITC_CHECK_OFFSET, + offsetof(struct kvm_vcpu, arch.itc_check)); + DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET, + offsetof(struct kvm_vcpu, arch.irq_check)); + DEFINE(VMM_VPD_VHPI_OFFSET, + offsetof(struct vpd, vhpi)); + DEFINE(VMM_VCPU_VSA_BASE_OFFSET, + offsetof(struct kvm_vcpu, arch.vsa_base)); + DEFINE(VMM_VCPU_VPD_OFFSET, + offsetof(struct kvm_vcpu, arch.vpd)); + DEFINE(VMM_VCPU_IRQ_CHECK, + offsetof(struct kvm_vcpu, arch.irq_check)); + DEFINE(VMM_VCPU_TIMER_PENDING, + offsetof(struct kvm_vcpu, arch.timer_pending)); + DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, + offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0)); + DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, + offsetof(struct kvm_vcpu, arch.mode_flags)); + DEFINE(VMM_VCPU_ITC_OFS_OFFSET, + offsetof(struct kvm_vcpu, arch.itc_offset)); + DEFINE(VMM_VCPU_LAST_ITC_OFFSET, + offsetof(struct kvm_vcpu, arch.last_itc)); + DEFINE(VMM_VCPU_SAVED_GP_OFFSET, + offsetof(struct kvm_vcpu, arch.saved_gp)); + + BLANK(); + + DEFINE(VMM_PT_REGS_B6_OFFSET, + offsetof(struct kvm_pt_regs, b6)); + DEFINE(VMM_PT_REGS_B7_OFFSET, + offsetof(struct kvm_pt_regs, b7)); + DEFINE(VMM_PT_REGS_AR_CSD_OFFSET, + offsetof(struct kvm_pt_regs, ar_csd)); + DEFINE(VMM_PT_REGS_AR_SSD_OFFSET, + offsetof(struct kvm_pt_regs, ar_ssd)); + DEFINE(VMM_PT_REGS_R8_OFFSET, + offsetof(struct kvm_pt_regs, r8)); + DEFINE(VMM_PT_REGS_R9_OFFSET, + offsetof(struct kvm_pt_regs, r9)); + DEFINE(VMM_PT_REGS_R10_OFFSET, + offsetof(struct kvm_pt_regs, r10)); + DEFINE(VMM_PT_REGS_R11_OFFSET, + offsetof(struct kvm_pt_regs, r11)); + DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET, + offsetof(struct kvm_pt_regs, cr_ipsr)); + DEFINE(VMM_PT_REGS_CR_IIP_OFFSET, + offsetof(struct kvm_pt_regs, cr_iip)); + DEFINE(VMM_PT_REGS_CR_IFS_OFFSET, + offsetof(struct kvm_pt_regs, cr_ifs)); + DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET, + offsetof(struct kvm_pt_regs, ar_unat)); + DEFINE(VMM_PT_REGS_AR_PFS_OFFSET, + offsetof(struct kvm_pt_regs, ar_pfs)); + DEFINE(VMM_PT_REGS_AR_RSC_OFFSET, + offsetof(struct kvm_pt_regs, ar_rsc)); + DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET, + offsetof(struct kvm_pt_regs, ar_rnat)); + + DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET, + offsetof(struct kvm_pt_regs, ar_bspstore)); + DEFINE(VMM_PT_REGS_PR_OFFSET, + offsetof(struct kvm_pt_regs, pr)); + DEFINE(VMM_PT_REGS_B0_OFFSET, + offsetof(struct kvm_pt_regs, b0)); + DEFINE(VMM_PT_REGS_LOADRS_OFFSET, + offsetof(struct kvm_pt_regs, loadrs)); + DEFINE(VMM_PT_REGS_R1_OFFSET, + offsetof(struct kvm_pt_regs, r1)); + DEFINE(VMM_PT_REGS_R12_OFFSET, + offsetof(struct kvm_pt_regs, r12)); + DEFINE(VMM_PT_REGS_R13_OFFSET, + offsetof(struct kvm_pt_regs, r13)); + DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET, + offsetof(struct kvm_pt_regs, ar_fpsr)); + DEFINE(VMM_PT_REGS_R15_OFFSET, + offsetof(struct kvm_pt_regs, r15)); + DEFINE(VMM_PT_REGS_R14_OFFSET, + offsetof(struct kvm_pt_regs, r14)); + DEFINE(VMM_PT_REGS_R2_OFFSET, + offsetof(struct kvm_pt_regs, r2)); + DEFINE(VMM_PT_REGS_R3_OFFSET, + offsetof(struct kvm_pt_regs, r3)); + DEFINE(VMM_PT_REGS_R16_OFFSET, + offsetof(struct kvm_pt_regs, r16)); + DEFINE(VMM_PT_REGS_R17_OFFSET, + offsetof(struct kvm_pt_regs, r17)); + DEFINE(VMM_PT_REGS_R18_OFFSET, + offsetof(struct kvm_pt_regs, r18)); + DEFINE(VMM_PT_REGS_R19_OFFSET, + offsetof(struct kvm_pt_regs, r19)); + DEFINE(VMM_PT_REGS_R20_OFFSET, + offsetof(struct kvm_pt_regs, r20)); + DEFINE(VMM_PT_REGS_R21_OFFSET, + offsetof(struct kvm_pt_regs, r21)); + DEFINE(VMM_PT_REGS_R22_OFFSET, + offsetof(struct kvm_pt_regs, r22)); + DEFINE(VMM_PT_REGS_R23_OFFSET, + offsetof(struct kvm_pt_regs, r23)); + DEFINE(VMM_PT_REGS_R24_OFFSET, + offsetof(struct kvm_pt_regs, r24)); + DEFINE(VMM_PT_REGS_R25_OFFSET, + offsetof(struct kvm_pt_regs, r25)); + DEFINE(VMM_PT_REGS_R26_OFFSET, + offsetof(struct kvm_pt_regs, r26)); + DEFINE(VMM_PT_REGS_R27_OFFSET, + offsetof(struct kvm_pt_regs, r27)); + DEFINE(VMM_PT_REGS_R28_OFFSET, + offsetof(struct kvm_pt_regs, r28)); + DEFINE(VMM_PT_REGS_R29_OFFSET, + offsetof(struct kvm_pt_regs, r29)); + DEFINE(VMM_PT_REGS_R30_OFFSET, + offsetof(struct kvm_pt_regs, r30)); + DEFINE(VMM_PT_REGS_R31_OFFSET, + offsetof(struct kvm_pt_regs, r31)); + DEFINE(VMM_PT_REGS_AR_CCV_OFFSET, + offsetof(struct kvm_pt_regs, ar_ccv)); + DEFINE(VMM_PT_REGS_F6_OFFSET, + offsetof(struct kvm_pt_regs, f6)); + DEFINE(VMM_PT_REGS_F7_OFFSET, + offsetof(struct kvm_pt_regs, f7)); + DEFINE(VMM_PT_REGS_F8_OFFSET, + offsetof(struct kvm_pt_regs, f8)); + DEFINE(VMM_PT_REGS_F9_OFFSET, + offsetof(struct kvm_pt_regs, f9)); + DEFINE(VMM_PT_REGS_F10_OFFSET, + offsetof(struct kvm_pt_regs, f10)); + DEFINE(VMM_PT_REGS_F11_OFFSET, + offsetof(struct kvm_pt_regs, f11)); + DEFINE(VMM_PT_REGS_R4_OFFSET, + offsetof(struct kvm_pt_regs, r4)); + DEFINE(VMM_PT_REGS_R5_OFFSET, + offsetof(struct kvm_pt_regs, r5)); + DEFINE(VMM_PT_REGS_R6_OFFSET, + offsetof(struct kvm_pt_regs, r6)); + DEFINE(VMM_PT_REGS_R7_OFFSET, + offsetof(struct kvm_pt_regs, r7)); + DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET, + offsetof(struct kvm_pt_regs, eml_unat)); + DEFINE(VMM_VCPU_IIPA_OFFSET, + offsetof(struct kvm_vcpu, arch.cr_iipa)); + DEFINE(VMM_VCPU_OPCODE_OFFSET, + offsetof(struct kvm_vcpu, arch.opcode)); + DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause)); + DEFINE(VMM_VCPU_ISR_OFFSET, + offsetof(struct kvm_vcpu, arch.cr_isr)); + DEFINE(VMM_PT_REGS_R16_SLOT, + (((offsetof(struct kvm_pt_regs, r16) + - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f)); + DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, + offsetof(struct kvm_vcpu, arch.mode_flags)); + DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp)); + BLANK(); + + DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd)); + DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs)); + DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET, + offsetof(struct kvm_vcpu, arch.insvc[0])); + DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta)); + DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr)); + + DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4])); + DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5])); + DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12])); + DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13])); + DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0])); + DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1])); + DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0])); + DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1])); + DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2])); + DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0])); + DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16])); + DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18])); + DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19])); + DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21])); + DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24])); + DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27])); + DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28])); + DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29])); + DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30])); + DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36])); + DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40])); + DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64])); + DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65])); + DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0])); + DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2])); + DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8])); + DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0])); + DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0])); + DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2])); + DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3])); + DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32])); + DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33])); + DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0])); + DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr)); + BLANK(); +} diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c new file mode 100644 index 00000000000..6df07324013 --- /dev/null +++ b/arch/ia64/kvm/kvm-ia64.c @@ -0,0 +1,1806 @@ + +/* + * kvm_ia64.c: Basic KVM suppport On Itanium series processors + * + * + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/percpu.h> +#include <linux/gfp.h> +#include <linux/fs.h> +#include <linux/smp.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/bitops.h> +#include <linux/hrtimer.h> +#include <linux/uaccess.h> + +#include <asm/pgtable.h> +#include <asm/gcc_intrin.h> +#include <asm/pal.h> +#include <asm/cacheflush.h> +#include <asm/div64.h> +#include <asm/tlb.h> + +#include "misc.h" +#include "vti.h" +#include "iodev.h" +#include "ioapic.h" +#include "lapic.h" + +static unsigned long kvm_vmm_base; +static unsigned long kvm_vsa_base; +static unsigned long kvm_vm_buffer; +static unsigned long kvm_vm_buffer_size; +unsigned long kvm_vmm_gp; + +static long vp_env_info; + +static struct kvm_vmm_info *kvm_vmm_info; + +static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + + +struct fdesc{ + unsigned long ip; + unsigned long gp; +}; + +static void kvm_flush_icache(unsigned long start, unsigned long len) +{ + int l; + + for (l = 0; l < (len + 32); l += 32) + ia64_fc(start + l); + + ia64_sync_i(); + ia64_srlz_i(); +} + +static void kvm_flush_tlb_all(void) +{ + unsigned long i, j, count0, count1, stride0, stride1, addr; + long flags; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, + (u64)opt_handler); + + return iprv.status; +} + +static DEFINE_SPINLOCK(vp_lock); + +void kvm_arch_hardware_enable(void *garbage) +{ + long status; + long tmp_base; + unsigned long pte; + unsigned long saved_psr; + int slot; + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + spin_lock(&vp_lock); + status = ia64_pal_vp_init_env(kvm_vsa_base ? + VP_INIT_ENV : VP_INIT_ENV_INITALIZE, + __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); + if (status != 0) { + printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); + return ; + } + + if (!kvm_vsa_base) { + kvm_vsa_base = tmp_base; + printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); + } + spin_unlock(&vp_lock); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_hardware_disable(void *garbage) +{ + + long status; + int slot; + unsigned long pte; + unsigned long saved_psr; + unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + status = ia64_pal_vp_exit_env(host_iva); + if (status) + printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", + status); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + + int r; + + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_USER_MEMORY: + + r = 1; + break; + default: + r = 0; + } + return r; + +} + +static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + struct kvm_io_device *dev; + + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + + return dev; +} + +static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 1; + return 0; +} + +static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct kvm_mmio_req *p; + struct kvm_io_device *mmio_dev; + + p = kvm_get_vcpu_ioreq(vcpu); + + if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) + goto mmio; + vcpu->mmio_needed = 1; + vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; + vcpu->mmio_size = kvm_run->mmio.len = p->size; + vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; + + if (vcpu->mmio_is_write) + memcpy(vcpu->mmio_data, &p->data, p->size); + memcpy(kvm_run->mmio.data, &p->data, p->size); + kvm_run->exit_reason = KVM_EXIT_MMIO; + return 0; +mmio: + mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); + if (mmio_dev) { + if (!p->dir) + kvm_iodevice_write(mmio_dev, p->addr, p->size, + &p->data); + else + kvm_iodevice_read(mmio_dev, p->addr, p->size, + &p->data); + + } else + printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); + p->state = STATE_IORESP_READY; + + return 1; +} + +static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_PAL_CALL) + return kvm_pal_emul(vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 2; + return 0; + } +} + +static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + kvm_sal_emul(vcpu); + return 1; + } else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 3; + return 0; + } + +} + +/* + * offset: address offset to IPI space. + * value: deliver value. + */ +static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, + uint64_t vector) +{ + switch (dm) { + case SAPIC_FIXED: + kvm_apic_set_irq(vcpu, vector, 0); + break; + case SAPIC_NMI: + kvm_apic_set_irq(vcpu, 2, 0); + break; + case SAPIC_EXTINT: + kvm_apic_set_irq(vcpu, 0, 0); + break; + case SAPIC_INIT: + case SAPIC_PMI: + default: + printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); + break; + } +} + +static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, + unsigned long eid) +{ + union ia64_lid lid; + int i; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (kvm->vcpus[i]) { + lid.val = VCPU_LID(kvm->vcpus[i]); + if (lid.id == id && lid.eid == eid) + return kvm->vcpus[i]; + } + } + + return NULL; +} + +static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm_vcpu *target_vcpu; + struct kvm_pt_regs *regs; + union ia64_ipi_a addr = p->u.ipi_data.addr; + union ia64_ipi_d data = p->u.ipi_data.data; + + target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); + if (!target_vcpu) + return handle_vm_error(vcpu, kvm_run); + + if (!target_vcpu->arch.launched) { + regs = vcpu_regs(target_vcpu); + + regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; + regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; + + target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(&target_vcpu->wq)) + wake_up_interruptible(&target_vcpu->wq); + } else { + vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); + if (target_vcpu != vcpu) + kvm_vcpu_kick(target_vcpu); + } + + return 1; +} + +struct call_data { + struct kvm_ptc_g ptc_g_data; + struct kvm_vcpu *vcpu; +}; + +static void vcpu_global_purge(void *info) +{ + struct call_data *p = (struct call_data *)info; + struct kvm_vcpu *vcpu = p->vcpu; + + if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + return; + + set_bit(KVM_REQ_PTC_G, &vcpu->requests); + if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { + vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = + p->ptc_g_data; + } else { + clear_bit(KVM_REQ_PTC_G, &vcpu->requests); + vcpu->arch.ptc_g_count = 0; + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); + } +} + +static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm *kvm = vcpu->kvm; + struct call_data call_data; + int i; + call_data.ptc_g_data = p->u.ptc_g_data; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == + KVM_MP_STATE_UNINITIALIZED || + vcpu == kvm->vcpus[i]) + continue; + + if (waitqueue_active(&kvm->vcpus[i]->wq)) + wake_up_interruptible(&kvm->vcpus[i]->wq); + + if (kvm->vcpus[i]->cpu != -1) { + call_data.vcpu = kvm->vcpus[i]; + smp_call_function_single(kvm->vcpus[i]->cpu, + vcpu_global_purge, &call_data, 0, 1); + } else + printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); + + } + return 1; +} + +static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + return 1; +} + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + + ktime_t kt; + long itc_diff; + unsigned long vcpu_now_itc; + + unsigned long expires; + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; + + if (time_after(vcpu_now_itc, vpd->itm)) { + vcpu->arch.timer_check = 1; + return 1; + } + itc_diff = vpd->itm - vcpu_now_itc; + if (itc_diff < 0) + itc_diff = -itc_diff; + + expires = div64_64(itc_diff, cyc_per_usec); + kt = ktime_set(0, 1000 * expires); + vcpu->arch.ht_active = 1; + hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); + + if (irqchip_in_kernel(vcpu->kvm)) { + vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + kvm_vcpu_block(vcpu); + hrtimer_cancel(p_ht); + vcpu->arch.ht_active = 0; + + if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) + return -EINTR; + return 1; + } else { + printk(KERN_ERR"kvm: Unsupported userspace halt!"); + return 0; + } +} + +static int handle_vm_shutdown(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + return 0; +} + +static int handle_external_interrupt(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return 1; +} + +static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) = { + [EXIT_REASON_VM_PANIC] = handle_vm_error, + [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, + [EXIT_REASON_PAL_CALL] = handle_pal_call, + [EXIT_REASON_SAL_CALL] = handle_sal_call, + [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, + [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, + [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, + [EXIT_REASON_IPI] = handle_ipi, + [EXIT_REASON_PTC_G] = handle_global_purge, + +}; + +static const int kvm_vti_max_exit_handlers = + sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); + +static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu) +{ +} + +static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_exit_data; + + p_exit_data = kvm_get_exit_data(vcpu); + return p_exit_data->exit_reason; +} + +/* + * The guest has exited. See if we can fix it or if we need userspace + * assistance. + */ +static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + u32 exit_reason = kvm_get_exit_reason(vcpu); + vcpu->arch.last_exit = exit_reason; + + if (exit_reason < kvm_vti_max_exit_handlers + && kvm_vti_exit_handlers[exit_reason]) + return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = exit_reason; + } + return 0; +} + +static inline void vti_set_rr6(unsigned long rr6) +{ + ia64_set_rr(RR6, rr6); + ia64_srlz_i(); +} + +static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) +{ + unsigned long pte; + struct kvm *kvm = vcpu->kvm; + int r; + + /*Insert a pair of tr to map vmm*/ + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vmm_tr_slot = r; + /*Insert a pairt of tr to map data of vm*/ + pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, + pte, KVM_VM_DATA_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vm_tr_slot = r; + r = 0; +out: + return r; + +} + +static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) +{ + + ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); + ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); + +} + +static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + + if (vcpu->arch.last_run_cpu != cpu || + per_cpu(last_vcpu, cpu) != vcpu) { + per_cpu(last_vcpu, cpu) = vcpu; + vcpu->arch.last_run_cpu = cpu; + kvm_flush_tlb_all(); + } + + vcpu->arch.host_rr6 = ia64_get_rr(RR6); + vti_set_rr6(vcpu->arch.vmm_rr); + return kvm_insert_vmm_mapping(vcpu); +} +static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) +{ + kvm_purge_vmm_mapping(vcpu); + vti_set_rr6(vcpu->arch.host_rr6); +} + +static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + union context *host_ctx, *guest_ctx; + int r; + + /*Get host and guest context with guest address space.*/ + host_ctx = kvm_get_host_context(vcpu); + guest_ctx = kvm_get_guest_context(vcpu); + + r = kvm_vcpu_pre_transition(vcpu); + if (r < 0) + goto out; + kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); + kvm_vcpu_post_transition(vcpu); + r = 0; +out: + return r; +} + +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + +again: + preempt_disable(); + + kvm_prepare_guest_switch(vcpu); + local_irq_disable(); + + if (signal_pending(current)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + goto out; + } + + vcpu->guest_mode = 1; + kvm_guest_enter(); + + r = vti_vcpu_run(vcpu, kvm_run); + if (r < 0) { + local_irq_enable(); + preempt_enable(); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + goto out; + } + + vcpu->arch.launched = 1; + vcpu->guest_mode = 0; + local_irq_enable(); + + /* + * We must have an instruction between local_irq_enable() and + * kvm_guest_exit(), so the timer interrupt isn't delayed by + * the interrupt shadow. The stat.exits increment will do nicely. + * But we need to prevent reordering, hence this barrier(): + */ + barrier(); + + kvm_guest_exit(); + + preempt_enable(); + + r = kvm_handle_exit(kvm_run, vcpu); + + if (r > 0) { + if (!need_resched()) + goto again; + } + +out: + if (r > 0) { + kvm_resched(vcpu); + goto again; + } + + return r; +} + +static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) +{ + struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); + + if (!vcpu->mmio_is_write) + memcpy(&p->data, vcpu->mmio_data, 8); + p->state = STATE_IORESP_READY; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { + kvm_vcpu_block(vcpu); + vcpu_put(vcpu); + return -EAGAIN; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + kvm_set_mmio_data(vcpu); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + } + r = __vcpu_run(vcpu, kvm_run); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + return r; +} + +/* + * Allocate 16M memory for every vm to hold its specific data. + * Its memory map is defined in kvm_host.h. + */ +static struct kvm *kvm_alloc_kvm(void) +{ + + struct kvm *kvm; + uint64_t vm_base; + + vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); + + if (!vm_base) + return ERR_PTR(-ENOMEM); + printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); + + /* Zero all pages before use! */ + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + + kvm = (struct kvm *)(vm_base + KVM_VM_OFS); + kvm->arch.vm_base = vm_base; + + return kvm; +} + +struct kvm_io_range { + unsigned long start; + unsigned long size; + unsigned long type; +}; + +static const struct kvm_io_range io_ranges[] = { + {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, + {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, + {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, + {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, + {PIB_START, PIB_SIZE, GPFN_PIB}, +}; + +static void kvm_build_io_pmt(struct kvm *kvm) +{ + unsigned long i, j; + + /* Mark I/O ranges */ + for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); + i++) { + for (j = io_ranges[i].start; + j < io_ranges[i].start + io_ranges[i].size; + j += PAGE_SIZE) + kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, + io_ranges[i].type, 0); + } + +} + +/*Use unused rids to virtualize guest rid.*/ +#define GUEST_PHYSICAL_RR0 0x1739 +#define GUEST_PHYSICAL_RR4 0x2739 +#define VMM_INIT_RR 0x1660 + +static void kvm_init_vm(struct kvm *kvm) +{ + long vm_base; + + BUG_ON(!kvm); + + kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; + kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; + kvm->arch.vmm_init_rr = VMM_INIT_RR; + + vm_base = kvm->arch.vm_base; + if (vm_base) { + kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; + kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; + kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; + } + + /* + *Fill P2M entries for MMIO/IO ranges + */ + kvm_build_io_pmt(kvm); + +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm = kvm_alloc_kvm(); + + if (IS_ERR(kvm)) + return ERR_PTR(-ENOMEM); + kvm_init_vm(kvm); + + return kvm; + +} + +static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, + struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(ioapic_irqchip(kvm), + &chip->chip.ioapic, + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + int r; + + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + vpd->vgr[i] = regs->vpd.vgr[i]; + vpd->vbgr[i] = regs->vpd.vbgr[i]; + } + for (i = 0; i < 128; i++) + vpd->vcr[i] = regs->vpd.vcr[i]; + vpd->vhpi = regs->vpd.vhpi; + vpd->vnat = regs->vpd.vnat; + vpd->vbnat = regs->vpd.vbnat; + vpd->vpsr = regs->vpd.vpsr; + + vpd->vpr = regs->vpd.vpr; + + r = -EFAULT; + r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, + sizeof(union context)); + if (r) + goto out; + r = copy_from_user(vcpu + 1, regs->saved_stack + + sizeof(struct kvm_vcpu), + IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); + if (r) + goto out; + vcpu->arch.exit_data = + ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; + + RESTORE_REGS(mp_state); + RESTORE_REGS(vmm_rr); + memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); + memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); + RESTORE_REGS(itr_regions); + RESTORE_REGS(dtr_regions); + RESTORE_REGS(tc_regions); + RESTORE_REGS(irq_check); + RESTORE_REGS(itc_check); + RESTORE_REGS(timer_check); + RESTORE_REGS(timer_pending); + RESTORE_REGS(last_itc); + for (i = 0; i < 8; i++) { + vcpu->arch.vrr[i] = regs->vrr[i]; + vcpu->arch.ibr[i] = regs->ibr[i]; + vcpu->arch.dbr[i] = regs->dbr[i]; + } + for (i = 0; i < 4; i++) + vcpu->arch.insvc[i] = regs->insvc[i]; + RESTORE_REGS(xtp); + RESTORE_REGS(metaphysical_rr0); + RESTORE_REGS(metaphysical_rr4); + RESTORE_REGS(metaphysical_saved_rr0); + RESTORE_REGS(metaphysical_saved_rr4); + RESTORE_REGS(fp_psr); + RESTORE_REGS(saved_gp); + + vcpu->arch.irq_new_pending = 1; + vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); + set_bit(KVM_REQ_RESUME, &vcpu->requests); + + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r = -EINVAL; + + switch (ioctl) { + case KVM_SET_MEMORY_REGION: { + struct kvm_memory_region kvm_mem; + struct kvm_userspace_memory_region kvm_userspace_mem; + + r = -EFAULT; + if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) + goto out; + kvm_userspace_mem.slot = kvm_mem.slot; + kvm_userspace_mem.flags = kvm_mem.flags; + kvm_userspace_mem.guest_phys_addr = + kvm_mem.guest_phys_addr; + kvm_userspace_mem.memory_size = kvm_mem.memory_size; + r = kvm_vm_ioctl_set_memory_region(kvm, + &kvm_userspace_mem, 0); + if (r) + goto out; + break; + } + case KVM_CREATE_IRQCHIP: + r = -EFAULT; + r = kvm_ioapic_init(kvm); + if (r) + goto out; + break; + case KVM_IRQ_LINE: { + struct kvm_irq_level irq_event; + + r = -EFAULT; + if (copy_from_user(&irq_event, argp, sizeof irq_event)) + goto out; + if (irqchip_in_kernel(kvm)) { + mutex_lock(&kvm->lock); + kvm_ioapic_set_irq(kvm->arch.vioapic, + irq_event.irq, + irq_event.level); + mutex_unlock(&kvm->lock); + r = 0; + } + break; + } + case KVM_GET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_get_irqchip(kvm, &chip); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &chip, sizeof chip)) + goto out; + r = 0; + break; + } + case KVM_SET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_set_irqchip(kvm, &chip); + if (r) + goto out; + r = 0; + break; + } + default: + ; + } +out: + return r; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; + +} +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + + return -EINVAL; +} + +static int kvm_alloc_vmm_area(void) +{ + if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { + kvm_vmm_base = __get_free_pages(GFP_KERNEL, + get_order(KVM_VMM_SIZE)); + if (!kvm_vmm_base) + return -ENOMEM; + + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; + + printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", + kvm_vmm_base, kvm_vm_buffer); + } + + return 0; +} + +static void kvm_free_vmm_area(void) +{ + if (kvm_vmm_base) { + /*Zero this area before free to avoid bits leak!!*/ + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); + kvm_vmm_base = 0; + kvm_vm_buffer = 0; + kvm_vsa_base = 0; + } +} + +/* + * Make sure that a cpu that is being hot-unplugged does not have any vcpus + * cached on it. Leave it as blank for IA64. + */ +void decache_vcpus_on_cpu(int cpu) +{ +} + +static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +static int vti_init_vpd(struct kvm_vcpu *vcpu) +{ + int i; + union cpuid3_t cpuid3; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (IS_ERR(vpd)) + return PTR_ERR(vpd); + + /* CPUID init */ + for (i = 0; i < 5; i++) + vpd->vcpuid[i] = ia64_get_cpuid(i); + + /* Limit the CPUID number to 5 */ + cpuid3.value = vpd->vcpuid[3]; + cpuid3.number = 4; /* 5 - 1 */ + vpd->vcpuid[3] = cpuid3.value; + + /*Set vac and vdc fields*/ + vpd->vac.a_from_int_cr = 1; + vpd->vac.a_to_int_cr = 1; + vpd->vac.a_from_psr = 1; + vpd->vac.a_from_cpuid = 1; + vpd->vac.a_cover = 1; + vpd->vac.a_bsw = 1; + vpd->vac.a_int = 1; + vpd->vdc.d_vmsw = 1; + + /*Set virtual buffer*/ + vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; + + return 0; +} + +static int vti_create_vp(struct kvm_vcpu *vcpu) +{ + long ret; + struct vpd *vpd = vcpu->arch.vpd; + unsigned long vmm_ivt; + + vmm_ivt = kvm_vmm_info->vmm_ivt; + + printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); + + ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); + + if (ret) { + printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); + return -EINVAL; + } + return 0; +} + +static void init_ptce_info(struct kvm_vcpu *vcpu) +{ + ia64_ptce_info_t ptce = {0}; + + ia64_get_ptce(&ptce); + vcpu->arch.ptce_base = ptce.base; + vcpu->arch.ptce_count[0] = ptce.count[0]; + vcpu->arch.ptce_count[1] = ptce.count[1]; + vcpu->arch.ptce_stride[0] = ptce.stride[0]; + vcpu->arch.ptce_stride[1] = ptce.stride[1]; +} + +static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) +{ + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + + if (hrtimer_cancel(p_ht)) + hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS); +} + +static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) +{ + struct kvm_vcpu *vcpu; + wait_queue_head_t *q; + + vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); + if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) + goto out; + + q = &vcpu->wq; + if (waitqueue_active(q)) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(q); + } +out: + vcpu->arch.timer_check = 1; + return HRTIMER_NORESTART; +} + +#define PALE_RESET_ENTRY 0x80000000ffffffb0UL + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *v; + int r; + int i; + long itc_offset; + struct kvm *kvm = vcpu->kvm; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + union context *p_ctx = &vcpu->arch.guest; + struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); + + /*Init vcpu context for first run.*/ + if (IS_ERR(vmm_vcpu)) + return PTR_ERR(vmm_vcpu); + + if (vcpu->vcpu_id == 0) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + + /*Set entry address for first run.*/ + regs->cr_iip = PALE_RESET_ENTRY; + + /*Initilize itc offset for vcpus*/ + itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + v->arch.itc_offset = itc_offset; + v->arch.last_itc = 0; + } + } else + vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; + + r = -ENOMEM; + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); + if (!vcpu->arch.apic) + goto out; + vcpu->arch.apic->vcpu = vcpu; + + p_ctx->gr[1] = 0; + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); + p_ctx->gr[13] = (unsigned long)vmm_vcpu; + p_ctx->psr = 0x1008522000UL; + p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ + p_ctx->caller_unat = 0; + p_ctx->pr = 0x0; + p_ctx->ar[36] = 0x0; /*unat*/ + p_ctx->ar[19] = 0x0; /*rnat*/ + p_ctx->ar[18] = (unsigned long)vmm_vcpu + + ((sizeof(struct kvm_vcpu)+15) & ~15); + p_ctx->ar[64] = 0x0; /*pfs*/ + p_ctx->cr[0] = 0x7e04UL; + p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; + p_ctx->cr[8] = 0x3c; + + /*Initilize region register*/ + p_ctx->rr[0] = 0x30; + p_ctx->rr[1] = 0x30; + p_ctx->rr[2] = 0x30; + p_ctx->rr[3] = 0x30; + p_ctx->rr[4] = 0x30; + p_ctx->rr[5] = 0x30; + p_ctx->rr[7] = 0x30; + + /*Initilize branch register 0*/ + p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; + + vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; + vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; + vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; + + hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + vcpu->arch.hlt_timer.function = hlt_timer_fn; + + vcpu->arch.last_run_cpu = -1; + vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); + vcpu->arch.vsa_base = kvm_vsa_base; + vcpu->arch.__gp = kvm_vmm_gp; + vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); + init_ptce_info(vcpu); + + r = 0; +out: + return r; +} + +static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) +{ + unsigned long psr; + int r; + + local_irq_save(psr); + r = kvm_insert_vmm_mapping(vcpu); + if (r) + goto fail; + r = kvm_vcpu_init(vcpu, vcpu->kvm, id); + if (r) + goto fail; + + r = vti_init_vpd(vcpu); + if (r) { + printk(KERN_DEBUG"kvm: vpd init error!!\n"); + goto uninit; + } + + r = vti_create_vp(vcpu); + if (r) + goto uninit; + + kvm_purge_vmm_mapping(vcpu); + local_irq_restore(psr); + + return 0; +uninit: + kvm_vcpu_uninit(vcpu); +fail: + return r; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu; + unsigned long vm_base = kvm->arch.vm_base; + int r; + int cpu; + + r = -ENOMEM; + if (!vm_base) { + printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); + goto fail; + } + vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); + vcpu->kvm = kvm; + + cpu = get_cpu(); + vti_vcpu_load(vcpu, cpu); + r = vti_vcpu_setup(vcpu, id); + put_cpu(); + + if (r) { + printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); + goto fail; + } + + return vcpu; +fail: + return ERR_PTR(r); +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -EINVAL; +} + +static void free_kvm(struct kvm *kvm) +{ + unsigned long vm_base = kvm->arch.vm_base; + + if (vm_base) { + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); + } + +} + +static void kvm_release_vm_pages(struct kvm *kvm) +{ + struct kvm_memory_slot *memslot; + int i, j; + unsigned long base_gfn; + + for (i = 0; i < kvm->nmemslots; i++) { + memslot = &kvm->memslots[i]; + base_gfn = memslot->base_gfn; + + for (j = 0; j < memslot->npages; j++) { + if (memslot->rmap[j]) + put_page((struct page *)memslot->rmap[j]); + } + } +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kfree(kvm->arch.vioapic); + kvm_release_vm_pages(kvm); + kvm_free_physmem(kvm); + free_kvm(kvm); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + if (cpu != vcpu->cpu) { + vcpu->cpu = cpu; + if (vcpu->arch.ht_active) + kvm_migrate_hlt_timer(vcpu); + } +} + +#define SAVE_REGS(_x) regs->_x = vcpu->arch._x + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + int r; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + regs->vpd.vgr[i] = vpd->vgr[i]; + regs->vpd.vbgr[i] = vpd->vbgr[i]; + } + for (i = 0; i < 128; i++) + regs->vpd.vcr[i] = vpd->vcr[i]; + regs->vpd.vhpi = vpd->vhpi; + regs->vpd.vnat = vpd->vnat; + regs->vpd.vbnat = vpd->vbnat; + regs->vpd.vpsr = vpd->vpsr; + regs->vpd.vpr = vpd->vpr; + + r = -EFAULT; + r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, + sizeof(union context)); + if (r) + goto out; + r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); + if (r) + goto out; + SAVE_REGS(mp_state); + SAVE_REGS(vmm_rr); + memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); + memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); + SAVE_REGS(itr_regions); + SAVE_REGS(dtr_regions); + SAVE_REGS(tc_regions); + SAVE_REGS(irq_check); + SAVE_REGS(itc_check); + SAVE_REGS(timer_check); + SAVE_REGS(timer_pending); + SAVE_REGS(last_itc); + for (i = 0; i < 8; i++) { + regs->vrr[i] = vcpu->arch.vrr[i]; + regs->ibr[i] = vcpu->arch.ibr[i]; + regs->dbr[i] = vcpu->arch.dbr[i]; + } + for (i = 0; i < 4; i++) + regs->insvc[i] = vcpu->arch.insvc[i]; + regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); + SAVE_REGS(xtp); + SAVE_REGS(metaphysical_rr0); + SAVE_REGS(metaphysical_rr4); + SAVE_REGS(metaphysical_saved_rr0); + SAVE_REGS(metaphysical_saved_rr4); + SAVE_REGS(fp_psr); + SAVE_REGS(saved_gp); + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + + hrtimer_cancel(&vcpu->arch.hlt_timer); + kfree(vcpu->arch.apic); +} + + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + unsigned long i; + struct page *page; + int npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; + unsigned long base_gfn = memslot->base_gfn; + + for (i = 0; i < npages; i++) { + page = gfn_to_page(kvm, base_gfn + i); + kvm_set_pmt_entry(kvm, base_gfn + i, + page_to_pfn(page) << PAGE_SHIFT, + _PAGE_AR_RWX|_PAGE_MA_WB); + memslot->rmap[i] = (unsigned long)page; + } + + return 0; +} + + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_uninit(vcpu); +} + +static int vti_cpu_has_kvm_support(void) +{ + long avail = 1, status = 1, control = 1; + long ret; + + ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); + if (ret) + goto out; + + if (!(avail & PAL_PROC_VM_BIT)) + goto out; + + printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); + + ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); + if (ret) + goto out; + printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); + + if (!(vp_env_info & VP_OPCODE)) { + printk(KERN_WARNING"kvm: No opcode ability on hardware, " + "vm_env_info:0x%lx\n", vp_env_info); + } + + return 1; +out: + return 0; +} + +static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, + struct module *module) +{ + unsigned long module_base; + unsigned long vmm_size; + + unsigned long vmm_offset, func_offset, fdesc_offset; + struct fdesc *p_fdesc; + + BUG_ON(!module); + + if (!kvm_vmm_base) { + printk("kvm: kvm area hasn't been initilized yet!!\n"); + return -EFAULT; + } + + /*Calculate new position of relocated vmm module.*/ + module_base = (unsigned long)module->module_core; + vmm_size = module->core_size; + if (unlikely(vmm_size > KVM_VMM_SIZE)) + return -EFAULT; + + memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); + kvm_flush_icache(kvm_vmm_base, vmm_size); + + /*Recalculate kvm_vmm_info based on new VMM*/ + vmm_offset = vmm_info->vmm_ivt - module_base; + kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; + printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", + kvm_vmm_info->vmm_ivt); + + fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; + kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); + + printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", + KVM_VMM_BASE+func_offset); + + fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; + kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); + + kvm_vmm_gp = p_fdesc->gp; + + printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", + kvm_vmm_info->vmm_entry); + printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", + KVM_VMM_BASE + func_offset); + + return 0; +} + +int kvm_arch_init(void *opaque) +{ + int r; + struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; + + if (!vti_cpu_has_kvm_support()) { + printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); + r = -EOPNOTSUPP; + goto out; + } + + if (kvm_vmm_info) { + printk(KERN_ERR "kvm: Already loaded VMM module!\n"); + r = -EEXIST; + goto out; + } + + r = -ENOMEM; + kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); + if (!kvm_vmm_info) + goto out; + + if (kvm_alloc_vmm_area()) + goto out_free0; + + r = kvm_relocate_vmm(vmm_info, vmm_info->module); + if (r) + goto out_free1; + + return 0; + +out_free1: + kvm_free_vmm_area(); +out_free0: + kfree(kvm_vmm_info); +out: + return r; +} + +void kvm_arch_exit(void) +{ + kvm_free_vmm_area(); + kfree(kvm_vmm_info); + kvm_vmm_info = NULL; +} + +static int kvm_ia64_sync_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + struct kvm_memory_slot *memslot; + int r, i; + long n, base; + unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS + + KVM_MEM_DIRTY_LOG_OFS); + + r = -EINVAL; + if (log->slot >= KVM_MEMORY_SLOTS) + goto out; + + memslot = &kvm->memslots[log->slot]; + r = -ENOENT; + if (!memslot->dirty_bitmap) + goto out; + + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + base = memslot->base_gfn / BITS_PER_LONG; + + for (i = 0; i < n/sizeof(long); ++i) { + memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; + dirty_bitmap[base + i] = 0; + } + r = 0; +out: + return r; +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + int n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + + spin_lock(&kvm->arch.dirty_log_lock); + + r = kvm_ia64_sync_dirty_log(kvm, log); + if (r) + goto out; + + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* If nothing is dirty, don't bother messing with page tables. */ + if (is_dirty) { + kvm_flush_remote_tlbs(kvm); + memslot = &kvm->memslots[log->slot]; + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +out: + spin_unlock(&kvm->arch.dirty_log_lock); + return r; +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +static void vcpu_kick_intr(void *info) +{ +#ifdef DEBUG + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; + printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); +#endif +} + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int ipi_pcpu = vcpu->cpu; + + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + + if (vcpu->guest_mode) + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); +} + +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) +{ + + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (!test_and_set_bit(vec, &vpd->irr[0])) { + vcpu->arch.irq_new_pending = 1; + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) + kvm_vcpu_kick(vcpu); + else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + } + return 1; + } + return 0; +} + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) +{ + return apic->vcpu->vcpu_id == dest; +} + +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) +{ + return 0; +} + +struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, + unsigned long bitmap) +{ + struct kvm_vcpu *lvcpu = kvm->vcpus[0]; + int i; + + for (i = 1; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i]) + continue; + if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) + lvcpu = kvm->vcpus[i]; + } + + return lvcpu; +} + +static int find_highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + + return -1; +} + +int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (vpd->irr[0] & (1UL << NMI_VECTOR)) + return NMI_VECTOR; + if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return find_highest_bits((int *)&vpd->irr[0]); +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + if (kvm_highest_pending_irq(vcpu) != -1) + return 1; + return 0; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c new file mode 100644 index 00000000000..091f936c448 --- /dev/null +++ b/arch/ia64/kvm/kvm_fw.c @@ -0,0 +1,500 @@ +/* + * PAL/SAL call delegation + * + * Copyright (c) 2004 Li Susie <susie.li@intel.com> + * Copyright (c) 2005 Yu Ke <ke.yu@intel.com> + * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <linux/kvm_host.h> +#include <linux/smp.h> + +#include "vti.h" +#include "misc.h" + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/tlb.h> + +/* + * Handy macros to make sure that the PAL return values start out + * as something meaningful. + */ +#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \ + { \ + x.status = PAL_STATUS_UNIMPLEMENTED; \ + x.v0 = 0; \ + x.v1 = 0; \ + x.v2 = 0; \ + } + +#define INIT_PAL_STATUS_SUCCESS(x) \ + { \ + x.status = PAL_STATUS_SUCCESS; \ + x.v0 = 0; \ + x.v1 = 0; \ + x.v2 = 0; \ + } + +static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu, + u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) { + struct exit_ctl_data *p; + + if (vcpu) { + p = &vcpu->arch.exit_data; + if (p->exit_reason == EXIT_REASON_PAL_CALL) { + *gr28 = p->u.pal_data.gr28; + *gr29 = p->u.pal_data.gr29; + *gr30 = p->u.pal_data.gr30; + *gr31 = p->u.pal_data.gr31; + return ; + } + } + printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n"); +} + +static void set_pal_result(struct kvm_vcpu *vcpu, + struct ia64_pal_retval result) { + + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { + p->u.pal_data.ret = result; + return ; + } + INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret); +} + +static void set_sal_result(struct kvm_vcpu *vcpu, + struct sal_ret_values result) { + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { + p->u.sal_data.ret = result; + return ; + } + printk(KERN_WARNING"Failed to set sal result!!\n"); +} + +struct cache_flush_args { + u64 cache_type; + u64 operation; + u64 progress; + long status; +}; + +cpumask_t cpu_cache_coherent_map; + +static void remote_pal_cache_flush(void *data) +{ + struct cache_flush_args *args = data; + long status; + u64 progress = args->progress; + + status = ia64_pal_cache_flush(args->cache_type, args->operation, + &progress, NULL); + if (status != 0) + args->status = status; +} + +static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) +{ + u64 gr28, gr29, gr30, gr31; + struct ia64_pal_retval result = {0, 0, 0, 0}; + struct cache_flush_args args = {0, 0, 0, 0}; + long psr; + + gr28 = gr29 = gr30 = gr31 = 0; + kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31); + + if (gr31 != 0) + printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu); + + /* Always call Host Pal in int=1 */ + gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS; + args.cache_type = gr29; + args.operation = gr30; + smp_call_function(remote_pal_cache_flush, + (void *)&args, 1, 1); + if (args.status != 0) + printk(KERN_ERR"pal_cache_flush error!," + "status:0x%lx\n", args.status); + /* + * Call Host PAL cache flush + * Clear psr.ic when call PAL_CACHE_FLUSH + */ + local_irq_save(psr); + result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1, + &result.v0); + local_irq_restore(psr); + if (result.status != 0) + printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld" + "in1:%lx,in2:%lx\n", + vcpu, result.status, gr29, gr30); + +#if 0 + if (gr29 == PAL_CACHE_TYPE_COHERENT) { + cpus_setall(vcpu->arch.cache_coherent_map); + cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map); + cpus_setall(cpu_cache_coherent_map); + cpu_clear(vcpu->cpu, cpu_cache_coherent_map); + } +#endif + return result; +} + +struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0); + return result; +} + +static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0); + + /* + * PAL_FREQ_BASE may not be implemented in some platforms, + * call SAL instead. + */ + if (result.v0 == 0) { + result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, + &result.v0, + &result.v1); + result.v2 = 0; + } + + return result; +} + +static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); + return result; +} + +static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result; + + INIT_PAL_STATUS_UNIMPLEMENTED(result); + return result; +} + +static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + INIT_PAL_STATUS_SUCCESS(result); + return result; +} + +static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result = {0, 0, 0, 0}; + long in0, in1, in2, in3; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + result.status = ia64_pal_proc_get_features(&result.v0, &result.v1, + &result.v2, in2); + + return result; +} + +static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) +{ + + pal_cache_config_info_t ci; + long status; + unsigned long in0, in1, in2, in3, r9, r10; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + status = ia64_pal_cache_config_info(in1, in2, &ci); + r9 = ci.pcci_info_1.pcci1_data; + r10 = ci.pcci_info_2.pcci2_data; + return ((struct ia64_pal_retval){status, r9, r10, 0}); +} + +#define GUEST_IMPL_VA_MSB 59 +#define GUEST_RID_BITS 18 + +static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu) +{ + + pal_vm_info_1_u_t vminfo1; + pal_vm_info_2_u_t vminfo2; + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0); + if (!result.status) { + vminfo1.pvi1_val = result.v0; + vminfo1.pal_vm_info_1_s.max_itr_entry = 8; + vminfo1.pal_vm_info_1_s.max_dtr_entry = 8; + result.v0 = vminfo1.pvi1_val; + vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB; + vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS; + result.v1 = vminfo2.pvi2_val; + } + + return result; +} + +static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result; + + INIT_PAL_STATUS_UNIMPLEMENTED(result); + + return result; +} + +static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu) +{ + u64 index = 0; + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) + index = p->u.pal_data.gr28; + + return index; +} + +int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + + u64 gr28; + struct ia64_pal_retval result; + int ret = 1; + + gr28 = kvm_get_pal_call_index(vcpu); + /*printk("pal_call index:%lx\n",gr28);*/ + switch (gr28) { + case PAL_CACHE_FLUSH: + result = pal_cache_flush(vcpu); + break; + case PAL_CACHE_SUMMARY: + result = pal_cache_summary(vcpu); + break; + case PAL_HALT_LIGHT: + { + vcpu->arch.timer_pending = 1; + INIT_PAL_STATUS_SUCCESS(result); + if (kvm_highest_pending_irq(vcpu) == -1) + ret = kvm_emulate_halt(vcpu); + + } + break; + + case PAL_FREQ_RATIOS: + result = pal_freq_ratios(vcpu); + break; + + case PAL_FREQ_BASE: + result = pal_freq_base(vcpu); + break; + + case PAL_LOGICAL_TO_PHYSICAL : + result = pal_logical_to_physica(vcpu); + break; + + case PAL_VM_SUMMARY : + result = pal_vm_summary(vcpu); + break; + + case PAL_VM_INFO : + result = pal_vm_info(vcpu); + break; + case PAL_PLATFORM_ADDR : + result = pal_platform_addr(vcpu); + break; + case PAL_CACHE_INFO: + result = pal_cache_info(vcpu); + break; + case PAL_PTCE_INFO: + INIT_PAL_STATUS_SUCCESS(result); + result.v1 = (1L << 32) | 1L; + break; + case PAL_VM_PAGE_SIZE: + result.status = ia64_pal_vm_page_size(&result.v0, + &result.v1); + break; + case PAL_RSE_INFO: + result.status = ia64_pal_rse_info(&result.v0, + (pal_hints_u_t *)&result.v1); + break; + case PAL_PROC_GET_FEATURES: + result = pal_proc_get_features(vcpu); + break; + case PAL_DEBUG_INFO: + result.status = ia64_pal_debug_info(&result.v0, + &result.v1); + break; + case PAL_VERSION: + result.status = ia64_pal_version( + (pal_version_u_t *)&result.v0, + (pal_version_u_t *)&result.v1); + + break; + case PAL_FIXED_ADDR: + result.status = PAL_STATUS_SUCCESS; + result.v0 = vcpu->vcpu_id; + break; + default: + INIT_PAL_STATUS_UNIMPLEMENTED(result); + printk(KERN_WARNING"kvm: Unsupported pal call," + " index:0x%lx\n", gr28); + } + set_pal_result(vcpu, result); + return ret; +} + +static struct sal_ret_values sal_emulator(struct kvm *kvm, + long index, unsigned long in1, + unsigned long in2, unsigned long in3, + unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + unsigned long r9 = 0; + unsigned long r10 = 0; + long r11 = 0; + long status; + + status = 0; + switch (index) { + case SAL_FREQ_BASE: + status = ia64_sal_freq_base(in1, &r9, &r10); + break; + case SAL_PCI_CONFIG_READ: + printk(KERN_WARNING"kvm: Not allowed to call here!" + " SAL_PCI_CONFIG_READ\n"); + break; + case SAL_PCI_CONFIG_WRITE: + printk(KERN_WARNING"kvm: Not allowed to call here!" + " SAL_PCI_CONFIG_WRITE\n"); + break; + case SAL_SET_VECTORS: + if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { + if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { + status = -2; + } else { + kvm->arch.rdv_sal_data.boot_ip = in2; + kvm->arch.rdv_sal_data.boot_gp = in3; + } + printk("Rendvous called! iip:%lx\n\n", in2); + } else + printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu." + "ignored...\n", in1); + break; + case SAL_GET_STATE_INFO: + /* No more info. */ + status = -5; + r9 = 0; + break; + case SAL_GET_STATE_INFO_SIZE: + /* Return a dummy size. */ + status = 0; + r9 = 128; + break; + case SAL_CLEAR_STATE_INFO: + /* Noop. */ + break; + case SAL_MC_RENDEZ: + printk(KERN_WARNING + "kvm: called SAL_MC_RENDEZ. ignored...\n"); + break; + case SAL_MC_SET_PARAMS: + printk(KERN_WARNING + "kvm: called SAL_MC_SET_PARAMS.ignored!\n"); + break; + case SAL_CACHE_FLUSH: + if (1) { + /*Flush using SAL. + This method is faster but has a side + effect on other vcpu running on + this cpu. */ + status = ia64_sal_cache_flush(in1); + } else { + /*Maybe need to implement the method + without side effect!*/ + status = 0; + } + break; + case SAL_CACHE_INIT: + printk(KERN_WARNING + "kvm: called SAL_CACHE_INIT. ignored...\n"); + break; + case SAL_UPDATE_PAL: + printk(KERN_WARNING + "kvm: CALLED SAL_UPDATE_PAL. ignored...\n"); + break; + default: + printk(KERN_WARNING"kvm: called SAL_CALL with unknown index." + " index:%ld\n", index); + status = -1; + break; + } + return ((struct sal_ret_values) {status, r9, r10, r11}); +} + +static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1, + u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){ + + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p) { + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + *in0 = p->u.sal_data.in0; + *in1 = p->u.sal_data.in1; + *in2 = p->u.sal_data.in2; + *in3 = p->u.sal_data.in3; + *in4 = p->u.sal_data.in4; + *in5 = p->u.sal_data.in5; + *in6 = p->u.sal_data.in6; + *in7 = p->u.sal_data.in7; + return ; + } + } + *in0 = 0; +} + +void kvm_sal_emul(struct kvm_vcpu *vcpu) +{ + + struct sal_ret_values result; + u64 index, in1, in2, in3, in4, in5, in6, in7; + + kvm_get_sal_call_data(vcpu, &index, &in1, &in2, + &in3, &in4, &in5, &in6, &in7); + result = sal_emulator(vcpu->kvm, index, in1, in2, in3, + in4, in5, in6, in7); + set_sal_result(vcpu, result); +} diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h new file mode 100644 index 00000000000..13980d9b8bc --- /dev/null +++ b/arch/ia64/kvm/kvm_minstate.h @@ -0,0 +1,273 @@ +/* + * kvm_minstate.h: min save macros + * Copyright (c) 2007, Intel Corporation. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + + +#include <asm/asmmacro.h> +#include <asm/types.h> +#include <asm/kregs.h> +#include "asm-offsets.h" + +#define KVM_MINSTATE_START_SAVE_MIN \ + mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\ + ;; \ + mov.m r28 = ar.rnat; \ + addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ + ;; \ + lfetch.fault.excl.nt1 [r22]; \ + addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ + mov r23 = ar.bspstore; /* save ar.bspstore */ \ + ;; \ + mov ar.bspstore = r22; /* switch to kernel RBS */\ + ;; \ + mov r18 = ar.bsp; \ + mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ + + + +#define KVM_MINSTATE_END_SAVE_MIN \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */\ + ;; + + +#define PAL_VSA_SYNC_READ \ + /* begin to call pal vps sync_read */ \ + add r25 = VMM_VPD_BASE_OFFSET, r21; \ + adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \ + ;; \ + ld8 r25 = [r25]; /* read vpd base */ \ + ld8 r20 = [r20]; \ + ;; \ + add r20 = PAL_VPS_SYNC_READ,r20; \ + ;; \ +{ .mii; \ + nop 0x0; \ + mov r24 = ip; \ + mov b0 = r20; \ + ;; \ +}; \ +{ .mmb; \ + add r24 = 0x20, r24; \ + nop 0x0; \ + br.cond.sptk b0; /* call the service */ \ + ;; \ +}; + + + +#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 + +/* + * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ + + +#define PT(f) (VMM_PT_REGS_##f##_OFFSET) + +#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ + KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ + mov r27 = ar.rsc; /* M */ \ + mov r20 = r1; /* A */ \ + mov r25 = ar.unat; /* M */ \ + mov r29 = cr.ipsr; /* M */ \ + mov r26 = ar.pfs; /* I */ \ + mov r18 = cr.isr; \ + COVER; /* B;; (or nothing) */ \ + ;; \ + tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \ + mov r1 = r16; \ +/* mov r21=r16; */ \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + ;; \ + KVM_MINSTATE_START_SAVE_MIN \ + adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \ + adds r16 = PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16] = r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \ + mov r29 = b0 \ + ;; \ + adds r16 = PT(R8),r1; /* initialize first base pointer */\ + adds r17 = PT(R9),r1; /* initialize second base pointer */\ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r8,16; \ +.mem.offset 8,0; st8.spill [r17] = r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r10,24; \ +.mem.offset 8,0; st8.spill [r17] = r11,24; \ + ;; \ + mov r9 = cr.iip; /* M */ \ + mov r10 = ar.fpsr; /* M */ \ + ;; \ + st8 [r16] = r9,16; /* save cr.iip */ \ + st8 [r17] = r30,16; /* save cr.ifs */ \ + sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \ + ;; \ + st8 [r16] = r25,16; /* save ar.unat */ \ + st8 [r17] = r26,16; /* save ar.pfs */ \ + shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\ + ;; \ + st8 [r16] = r27,16; /* save ar.rsc */ \ + st8 [r17] = r28,16; /* save ar.rnat */ \ + ;; /* avoid RAW on r16 & r17 */ \ + st8 [r16] = r23,16; /* save ar.bspstore */ \ + st8 [r17] = r31,16; /* save predicates */ \ + ;; \ + st8 [r16] = r29,16; /* save b0 */ \ + st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17] = r12,16; \ + adds r12 = -16,r1; /* switch to kernel memory stack */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r13,16; \ +.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\ + mov r13 = r21; /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r15,16; \ +.mem.offset 8,0; st8.spill [r17] = r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r2,16; \ +.mem.offset 8,0; st8.spill [r17] = r3,16; \ + adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \ + ;; \ + adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \ + adds r17 = VMM_VCPU_ISR_OFFSET,r13; \ + mov r26 = cr.iipa; \ + mov r27 = cr.isr; \ + ;; \ + st8 [r16] = r26; \ + st8 [r17] = r27; \ + ;; \ + EXTRA; \ + mov r8 = ar.ccv; \ + mov r9 = ar.csd; \ + mov r10 = ar.ssd; \ + movl r11 = FPSR_DEFAULT; /* L-unit */ \ + adds r17 = VMM_VCPU_GP_OFFSET,r13; \ + ;; \ + ld8 r1 = [r17];/* establish kernel global pointer */ \ + ;; \ + PAL_VSA_SYNC_READ \ + KVM_MINSTATE_END_SAVE_MIN + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.f6 + * r3: points to &pt_regs.f7 + * r8: contents of ar.ccv + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define KVM_SAVE_REST \ +.mem.offset 0,0; st8.spill [r2] = r16,16; \ +.mem.offset 8,0; st8.spill [r3] = r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r18,16; \ +.mem.offset 8,0; st8.spill [r3] = r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r20,16; \ +.mem.offset 8,0; st8.spill [r3] = r21,16; \ + mov r18=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r22,16; \ +.mem.offset 8,0; st8.spill [r3] = r23,16; \ + mov r19 = b7; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r24,16; \ +.mem.offset 8,0; st8.spill [r3] = r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r26,16; \ +.mem.offset 8,0; st8.spill [r3] = r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r28,16; \ +.mem.offset 8,0; st8.spill [r3] = r29,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r30,16; \ +.mem.offset 8,0; st8.spill [r3] = r31,32; \ + ;; \ + mov ar.fpsr = r11; \ + st8 [r2] = r8,8; \ + adds r24 = PT(B6)-PT(F7),r3; \ + adds r25 = PT(B7)-PT(F7),r3; \ + ;; \ + st8 [r24] = r18,16; /* b6 */ \ + st8 [r25] = r19,16; /* b7 */ \ + adds r2 = PT(R4)-PT(F6),r2; \ + adds r3 = PT(R5)-PT(F7),r3; \ + ;; \ + st8 [r24] = r9; /* ar.csd */ \ + st8 [r25] = r10; /* ar.ssd */ \ + ;; \ + mov r18 = ar.unat; \ + adds r19 = PT(EML_UNAT)-PT(R4),r2; \ + ;; \ + st8 [r19] = r18; /* eml_unat */ \ + + +#define KVM_SAVE_EXTRA \ +.mem.offset 0,0; st8.spill [r2] = r4,16; \ +.mem.offset 8,0; st8.spill [r3] = r5,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r6,16; \ +.mem.offset 8,0; st8.spill [r3] = r7; \ + ;; \ + mov r26 = ar.unat; \ + ;; \ + st8 [r2] = r26;/* eml_unat */ \ + +#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,) +#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19) +#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, ) diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h new file mode 100644 index 00000000000..6d6cbcb1489 --- /dev/null +++ b/arch/ia64/kvm/lapic.h @@ -0,0 +1,25 @@ +#ifndef __KVM_IA64_LAPIC_H +#define __KVM_IA64_LAPIC_H + +#include <linux/kvm_host.h> + +/* + * vlsapic + */ +struct kvm_lapic{ + struct kvm_vcpu *vcpu; + uint64_t insvc[4]; + uint64_t vhpi; + uint8_t xtp; + uint8_t pal_init_pending; + uint8_t pad[2]; +}; + +int kvm_create_lapic(struct kvm_vcpu *vcpu); +void kvm_free_lapic(struct kvm_vcpu *vcpu); + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); + +#endif diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h new file mode 100644 index 00000000000..e585c460734 --- /dev/null +++ b/arch/ia64/kvm/misc.h @@ -0,0 +1,93 @@ +#ifndef __KVM_IA64_MISC_H +#define __KVM_IA64_MISC_H + +#include <linux/kvm_host.h> +/* + * misc.h + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +/* + *Return p2m base address at host side! + */ +static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) +{ + return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); +} + +static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, + u64 paddr, u64 mem_flags) +{ + uint64_t *pmt_base = kvm_host_get_pmt(kvm); + unsigned long pte; + + pte = PAGE_ALIGN(paddr) | mem_flags; + pmt_base[gfn] = pte; +} + +/*Function for translating host address to guest address*/ + +static inline void *to_guest(struct kvm *kvm, void *addr) +{ + return (void *)((unsigned long)(addr) - kvm->arch.vm_base + + KVM_VM_DATA_BASE); +} + +/*Function for translating guest address to host address*/ + +static inline void *to_host(struct kvm *kvm, void *addr) +{ + return (void *)((unsigned long)addr - KVM_VM_DATA_BASE + + kvm->arch.vm_base); +} + +/* Get host context of the vcpu */ +static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu) +{ + union context *ctx = &vcpu->arch.host; + return to_guest(vcpu->kvm, ctx); +} + +/* Get guest context of the vcpu */ +static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu) +{ + union context *ctx = &vcpu->arch.guest; + return to_guest(vcpu->kvm, ctx); +} + +/* kvm get exit data from gvmm! */ +static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.exit_data; +} + +/*kvm get vcpu ioreq for kvm module!*/ +static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_ctl_data; + + if (vcpu) { + p_ctl_data = kvm_get_exit_data(vcpu); + if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION) + return &p_ctl_data->u.ioreq; + } + + return NULL; +} + +#endif diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c new file mode 100644 index 00000000000..351bf70da46 --- /dev/null +++ b/arch/ia64/kvm/mmio.c @@ -0,0 +1,341 @@ +/* + * mmio.c: MMIO emulation components. + * Copyright (c) 2004, Intel Corporation. + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Kun Tian (Kevin Tian) (Kevin.tian@intel.com) + * + * Copyright (c) 2007 Intel Corporation KVM support. + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/kvm_host.h> + +#include "vcpu.h" + +static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val) +{ + VLSAPIC_XTP(v) = val; +} + +/* + * LSAPIC OFFSET + */ +#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20)) +#define PIB_OFST_INTA 0x1E0000 +#define PIB_OFST_XTP 0x1E0008 + +/* + * execute write IPI op. + */ +static void vlsapic_write_ipi(struct kvm_vcpu *vcpu, + uint64_t addr, uint64_t data) +{ + struct exit_ctl_data *p = ¤t_vcpu->arch.exit_data; + unsigned long psr; + + local_irq_save(psr); + + p->exit_reason = EXIT_REASON_IPI; + p->u.ipi_data.addr.val = addr; + p->u.ipi_data.data.val = data; + vmm_transition(current_vcpu); + + local_irq_restore(psr); + +} + +void lsapic_write(struct kvm_vcpu *v, unsigned long addr, + unsigned long length, unsigned long val) +{ + addr &= (PIB_SIZE - 1); + + switch (addr) { + case PIB_OFST_INTA: + /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ + panic_vm(v); + break; + case PIB_OFST_XTP: + if (length == 1) { + vlsapic_write_xtp(v, val); + } else { + /*panic_domain(NULL, + "Undefined write on PIB XTP\n");*/ + panic_vm(v); + } + break; + default: + if (PIB_LOW_HALF(addr)) { + /*lower half */ + if (length != 8) + /*panic_domain(NULL, + "Can't LHF write with size %ld!\n", + length);*/ + panic_vm(v); + else + vlsapic_write_ipi(v, addr, val); + } else { /* upper half + printk("IPI-UHF write %lx\n",addr);*/ + panic_vm(v); + } + break; + } +} + +unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, + unsigned long length) +{ + uint64_t result = 0; + + addr &= (PIB_SIZE - 1); + + switch (addr) { + case PIB_OFST_INTA: + if (length == 1) /* 1 byte load */ + ; /* There is no i8259, there is no INTA access*/ + else + /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ + panic_vm(v); + + break; + case PIB_OFST_XTP: + if (length == 1) { + result = VLSAPIC_XTP(v); + /* printk("read xtp %lx\n", result); */ + } else { + /*panic_domain(NULL, + "Undefined read on PIB XTP\n");*/ + panic_vm(v); + } + break; + default: + panic_vm(v); + break; + } + return result; +} + +static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, + u16 s, int ma, int dir) +{ + unsigned long iot; + struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long psr; + + iot = __gpfn_is_io(src_pa >> PAGE_SHIFT); + + local_irq_save(psr); + + /*Intercept the acces for PIB range*/ + if (iot == GPFN_PIB) { + if (!dir) + lsapic_write(vcpu, src_pa, s, *dest); + else + *dest = lsapic_read(vcpu, src_pa, s); + goto out; + } + p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION; + p->u.ioreq.addr = src_pa; + p->u.ioreq.size = s; + p->u.ioreq.dir = dir; + if (dir == IOREQ_WRITE) + p->u.ioreq.data = *dest; + p->u.ioreq.state = STATE_IOREQ_READY; + vmm_transition(vcpu); + + if (p->u.ioreq.state == STATE_IORESP_READY) { + if (dir == IOREQ_READ) + *dest = p->u.ioreq.data; + } else + panic_vm(vcpu); +out: + local_irq_restore(psr); + return ; +} + +/* + dir 1: read 0:write + inst_type 0:integer 1:floating point + */ +#define SL_INTEGER 0 /* store/load interger*/ +#define SL_FLOATING 1 /* store/load floating*/ + +void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) +{ + struct kvm_pt_regs *regs; + IA64_BUNDLE bundle; + int slot, dir = 0; + int inst_type = -1; + u16 size = 0; + u64 data, slot1a, slot1b, temp, update_reg; + s32 imm; + INST64 inst; + + regs = vcpu_regs(vcpu); + + if (fetch_code(vcpu, regs->cr_iip, &bundle)) { + /* if fetch code fail, return and try again */ + return; + } + slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; + if (!slot) + inst.inst = bundle.slot0; + else if (slot == 1) { + slot1a = bundle.slot1a; + slot1b = bundle.slot1b; + inst.inst = slot1a + (slot1b << 18); + } else if (slot == 2) + inst.inst = bundle.slot2; + + /* Integer Load/Store */ + if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) { + inst_type = SL_INTEGER; + size = (inst.M1.x6 & 0x3); + if ((inst.M1.x6 >> 2) > 0xb) { + /*write*/ + dir = IOREQ_WRITE; + data = vcpu_get_gr(vcpu, inst.M4.r2); + } else if ((inst.M1.x6 >> 2) < 0xb) { + /*read*/ + dir = IOREQ_READ; + } + } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) { + /* Integer Load + Reg update */ + inst_type = SL_INTEGER; + dir = IOREQ_READ; + size = (inst.M2.x6 & 0x3); + temp = vcpu_get_gr(vcpu, inst.M2.r3); + update_reg = vcpu_get_gr(vcpu, inst.M2.r2); + temp += update_reg; + vcpu_set_gr(vcpu, inst.M2.r3, temp, 0); + } else if (inst.M3.major == 5) { + /*Integer Load/Store + Imm update*/ + inst_type = SL_INTEGER; + size = (inst.M3.x6&0x3); + if ((inst.M5.x6 >> 2) > 0xb) { + /*write*/ + dir = IOREQ_WRITE; + data = vcpu_get_gr(vcpu, inst.M5.r2); + temp = vcpu_get_gr(vcpu, inst.M5.r3); + imm = (inst.M5.s << 31) | (inst.M5.i << 30) | + (inst.M5.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M5.r3, temp, 0); + + } else if ((inst.M3.x6 >> 2) < 0xb) { + /*read*/ + dir = IOREQ_READ; + temp = vcpu_get_gr(vcpu, inst.M3.r3); + imm = (inst.M3.s << 31) | (inst.M3.i << 30) | + (inst.M3.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M3.r3, temp, 0); + + } + } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B + && inst.M9.m == 0 && inst.M9.x == 0) { + /* Floating-point spill*/ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M9.f2, &v); + /* Write high word. FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; + } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { + /* Floating-point spill + Imm update */ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M10.f2, &v); + temp = vcpu_get_gr(vcpu, inst.M10.r3); + imm = (inst.M10.s << 31) | (inst.M10.i << 30) | + (inst.M10.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); + + /* Write high word.FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; + } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { + /* Floating-point stf8 + Imm update */ + struct ia64_fpreg v; + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + size = 3; + vcpu_get_fpreg(vcpu, inst.M10.f2, &v); + data = v.u.bits[0]; /* Significand. */ + temp = vcpu_get_gr(vcpu, inst.M10.r3); + imm = (inst.M10.s << 31) | (inst.M10.i << 30) | + (inst.M10.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); + } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c + && inst.M15.x6 <= 0x2f) { + temp = vcpu_get_gr(vcpu, inst.M15.r3); + imm = (inst.M15.s << 31) | (inst.M15.i << 30) | + (inst.M15.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M15.r3, temp, 0); + + vcpu_increment_iip(vcpu); + return; + } else if (inst.M12.major == 6 && inst.M12.m == 1 + && inst.M12.x == 1 && inst.M12.x6 == 1) { + /* Floating-point Load Pair + Imm ldfp8 M12*/ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_READ; + size = 8; /*ldfd*/ + mmio_access(vcpu, padr, &data, size, ma, dir); + v.u.bits[0] = data; + v.u.bits[1] = 0x1003E; + vcpu_set_fpreg(vcpu, inst.M12.f1, &v); + padr += 8; + mmio_access(vcpu, padr, &data, size, ma, dir); + v.u.bits[0] = data; + v.u.bits[1] = 0x1003E; + vcpu_set_fpreg(vcpu, inst.M12.f2, &v); + padr += 8; + vcpu_set_gr(vcpu, inst.M12.r3, padr, 0); + vcpu_increment_iip(vcpu); + return; + } else { + inst_type = -1; + panic_vm(vcpu); + } + + size = 1 << size; + if (dir == IOREQ_WRITE) { + mmio_access(vcpu, padr, &data, size, ma, dir); + } else { + mmio_access(vcpu, padr, &data, size, ma, dir); + if (inst_type == SL_INTEGER) + vcpu_set_gr(vcpu, inst.M1.r1, data, 0); + else + panic_vm(vcpu); + + } + vcpu_increment_iip(vcpu); +} diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S new file mode 100644 index 00000000000..e4f15d641b2 --- /dev/null +++ b/arch/ia64/kvm/optvfault.S @@ -0,0 +1,918 @@ +/* + * arch/ia64/vmx/optvfault.S + * optimize virtualization fault handler + * + * Copyright (C) 2006 Intel Co + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + */ + +#include <asm/asmmacro.h> +#include <asm/processor.h> + +#include "vti.h" +#include "asm-offsets.h" + +#define ACCE_MOV_FROM_AR +#define ACCE_MOV_FROM_RR +#define ACCE_MOV_TO_RR +#define ACCE_RSM +#define ACCE_SSM +#define ACCE_MOV_TO_PSR +#define ACCE_THASH + +//mov r1=ar3 +GLOBAL_ENTRY(kvm_asm_mov_from_ar) +#ifndef ACCE_MOV_FROM_AR + br.many kvm_virtualization_fault_back +#endif + add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 + add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 + extr.u r17=r25,6,7 + ;; + ld8 r18=[r18] + mov r19=ar.itc + mov r24=b0 + ;; + add r19=r19,r18 + addl r20=@gprel(asm_mov_to_reg),gp + ;; + st8 [r16] = r19 + adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 + shladd r17=r17,4,r20 + ;; + mov b0=r17 + br.sptk.few b0 + ;; +END(kvm_asm_mov_from_ar) + + +// mov r1=rr[r3] +GLOBAL_ENTRY(kvm_asm_mov_from_rr) +#ifndef ACCE_MOV_FROM_RR + br.many kvm_virtualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,6,7 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r24=b0 + ;; + add r27=VMM_VCPU_VRR0_OFFSET,r21 + mov b0=r16 + br.many b0 + ;; +kvm_asm_mov_from_rr_back_1: + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + adds r22=asm_mov_to_reg-asm_mov_from_reg,r20 + shr.u r26=r19,61 + ;; + shladd r17=r17,4,r22 + shladd r27=r26,3,r27 + ;; + ld8 r19=[r27] + mov b0=r17 + br.many b0 +END(kvm_asm_mov_from_rr) + + +// mov rr[r3]=r2 +GLOBAL_ENTRY(kvm_asm_mov_to_rr) +#ifndef ACCE_MOV_TO_RR + br.many kvm_virtualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,13,7 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r22=b0 + ;; + add r27=VMM_VCPU_VRR0_OFFSET,r21 + mov b0=r16 + br.many b0 + ;; +kvm_asm_mov_to_rr_back_1: + adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20 + shr.u r23=r19,61 + shladd r17=r17,4,r20 + ;; + //if rr6, go back + cmp.eq p6,p0=6,r23 + mov b0=r22 + (p6) br.cond.dpnt.many kvm_virtualization_fault_back + ;; + mov r28=r19 + mov b0=r17 + br.many b0 +kvm_asm_mov_to_rr_back_2: + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + shladd r27=r23,3,r27 + ;; // vrr.rid<<4 |0xe + st8 [r27]=r19 + mov b0=r30 + ;; + extr.u r16=r19,8,26 + extr.u r18 =r19,2,6 + mov r17 =0xe + ;; + shladd r16 = r16, 4, r17 + extr.u r19 =r19,0,8 + ;; + shl r16 = r16,8 + ;; + add r19 = r19, r16 + ;; //set ve 1 + dep r19=-1,r19,0,1 + cmp.lt p6,p0=14,r18 + ;; + (p6) mov r18=14 + ;; + (p6) dep r19=r18,r19,2,6 + ;; + cmp.eq p6,p0=0,r23 + ;; + cmp.eq.or p6,p0=4,r23 + ;; + adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + ;; + ld4 r16=[r16] + cmp.eq p7,p0=r0,r0 + (p6) shladd r17=r23,1,r17 + ;; + (p6) st8 [r17]=r19 + (p6) tbit.nz p6,p7=r16,0 + ;; + (p7) mov rr[r28]=r19 + mov r24=r22 + br.many b0 +END(kvm_asm_mov_to_rr) + + +//rsm +GLOBAL_ENTRY(kvm_asm_rsm) +#ifndef ACCE_RSM + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,6,21 + extr.u r27=r25,31,2 + ;; + ld8 r16=[r16] + extr.u r28=r25,36,1 + dep r26=r27,r26,21,2 + ;; + add r17=VPD_VPSR_START_OFFSET,r16 + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + //r26 is imm24 + dep r26=r28,r26,23,1 + ;; + ld8 r18=[r17] + movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI + ld4 r23=[r22] + sub r27=-1,r26 + mov r24=b0 + ;; + mov r20=cr.ipsr + or r28=r27,r28 + and r19=r18,r27 + ;; + st8 [r17]=r19 + and r20=r20,r28 + /* Comment it out due to short of fp lazy alorgithm support + adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 + ;; + ld8 r27=[r27] + ;; + tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT + ;; + (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 + */ + ;; + mov cr.ipsr=r20 + tbit.nz p6,p0=r23,0 + ;; + tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT + (p6) br.dptk kvm_resume_to_guest + ;; + add r26=VMM_VCPU_META_RR0_OFFSET,r21 + add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 + dep r23=-1,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + br.many kvm_resume_to_guest +END(kvm_asm_rsm) + + +//ssm +GLOBAL_ENTRY(kvm_asm_ssm) +#ifndef ACCE_SSM + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,6,21 + extr.u r27=r25,31,2 + ;; + ld8 r16=[r16] + extr.u r28=r25,36,1 + dep r26=r27,r26,21,2 + ;; //r26 is imm24 + add r27=VPD_VPSR_START_OFFSET,r16 + dep r26=r28,r26,23,1 + ;; //r19 vpsr + ld8 r29=[r27] + mov r24=b0 + ;; + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + mov r20=cr.ipsr + or r19=r29,r26 + ;; + ld4 r23=[r22] + st8 [r27]=r19 + or r20=r20,r26 + ;; + mov cr.ipsr=r20 + movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT + ;; + and r19=r28,r19 + tbit.z p6,p0=r23,0 + ;; + cmp.ne.or p6,p0=r28,r19 + (p6) br.dptk kvm_asm_ssm_1 + ;; + add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 + dep r23=0,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + ;; +kvm_asm_ssm_1: + tbit.nz p6,p0=r29,IA64_PSR_I_BIT + ;; + tbit.z.or p6,p0=r19,IA64_PSR_I_BIT + (p6) br.dptk kvm_resume_to_guest + ;; + add r29=VPD_VTPR_START_OFFSET,r16 + add r30=VPD_VHPI_START_OFFSET,r16 + ;; + ld8 r29=[r29] + ld8 r30=[r30] + ;; + extr.u r17=r29,4,4 + extr.u r18=r29,16,1 + ;; + dep r17=r18,r17,4,1 + ;; + cmp.gt p6,p0=r30,r17 + (p6) br.dpnt.few kvm_asm_dispatch_vexirq + br.many kvm_resume_to_guest +END(kvm_asm_ssm) + + +//mov psr.l=r2 +GLOBAL_ENTRY(kvm_asm_mov_to_psr) +#ifndef ACCE_MOV_TO_PSR + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,13,7 //r2 + ;; + ld8 r16=[r16] + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 + shladd r26=r26,4,r20 + mov r24=b0 + ;; + add r27=VPD_VPSR_START_OFFSET,r16 + mov b0=r26 + br.many b0 + ;; +kvm_asm_mov_to_psr_back: + ld8 r17=[r27] + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + dep r19=0,r19,32,32 + ;; + ld4 r23=[r22] + dep r18=0,r17,0,32 + ;; + add r30=r18,r19 + movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT + ;; + st8 [r27]=r30 + and r27=r28,r30 + and r29=r28,r17 + ;; + cmp.eq p5,p0=r29,r27 + cmp.eq p6,p7=r28,r27 + (p5) br.many kvm_asm_mov_to_psr_1 + ;; + //virtual to physical + (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21 + (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 + (p7) dep r23=-1,r23,0,1 + ;; + //physical to virtual + (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 + (p6) dep r23=0,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + ;; +kvm_asm_mov_to_psr_1: + mov r20=cr.ipsr + movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT + ;; + or r19=r19,r28 + dep r20=0,r20,0,32 + ;; + add r20=r19,r20 + mov b0=r24 + ;; + /* Comment it out due to short of fp lazy algorithm support + adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 + ;; + ld8 r27=[r27] + ;; + tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT + ;; + (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 + ;; + */ + mov cr.ipsr=r20 + cmp.ne p6,p0=r0,r0 + ;; + tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT + tbit.z.or p6,p0=r30,IA64_PSR_I_BIT + (p6) br.dpnt.few kvm_resume_to_guest + ;; + add r29=VPD_VTPR_START_OFFSET,r16 + add r30=VPD_VHPI_START_OFFSET,r16 + ;; + ld8 r29=[r29] + ld8 r30=[r30] + ;; + extr.u r17=r29,4,4 + extr.u r18=r29,16,1 + ;; + dep r17=r18,r17,4,1 + ;; + cmp.gt p6,p0=r30,r17 + (p6) br.dpnt.few kvm_asm_dispatch_vexirq + br.many kvm_resume_to_guest +END(kvm_asm_mov_to_psr) + + +ENTRY(kvm_asm_dispatch_vexirq) +//increment iip + mov r16=cr.ipsr + ;; + extr.u r17=r16,IA64_PSR_RI_BIT,2 + tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 + ;; + (p6) mov r18=cr.iip + (p6) mov r17=r0 + (p7) add r17=1,r17 + ;; + (p6) add r18=0x10,r18 + dep r16=r17,r16,IA64_PSR_RI_BIT,2 + ;; + (p6) mov cr.iip=r18 + mov cr.ipsr=r16 + mov r30 =1 + br.many kvm_dispatch_vexirq +END(kvm_asm_dispatch_vexirq) + +// thash +// TODO: add support when pta.vf = 1 +GLOBAL_ENTRY(kvm_asm_thash) +#ifndef ACCE_THASH + br.many kvm_virtualization_fault_back +#endif + extr.u r17=r25,20,7 // get r3 from opcode in r25 + extr.u r18=r25,6,7 // get r1 from opcode in r25 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20 + shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17) + adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs + ;; + mov r24=b0 + ;; + ld8 r16=[r16] // get VPD addr + mov b0=r17 + br.many b0 // r19 return value + ;; +kvm_asm_thash_back1: + shr.u r23=r19,61 // get RR number + adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr + adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta + ;; + shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr + ld8 r17=[r16] // get PTA + mov r26=1 + ;; + extr.u r29=r17,2,6 // get pta.size + ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value + ;; + extr.u r25=r25,2,6 // get rr.ps + shl r22=r26,r29 // 1UL << pta.size + ;; + shr.u r23=r19,r25 // vaddr >> rr.ps + adds r26=3,r29 // pta.size + 3 + shl r27=r17,3 // pta << 3 + ;; + shl r23=r23,3 // (vaddr >> rr.ps) << 3 + shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) + movl r16=7<<61 + ;; + adds r22=-1,r22 // (1UL << pta.size) - 1 + shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size + and r19=r19,r16 // vaddr & VRN_MASK + ;; + and r22=r22,r23 // vhpt_offset + or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size) + adds r26=asm_mov_to_reg-asm_mov_from_reg,r20 + ;; + or r19=r19,r22 // calc pval + shladd r17=r18,4,r26 + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + ;; + mov b0=r17 + br.many b0 +END(kvm_asm_thash) + +#define MOV_TO_REG0 \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + nop.b 0x0; \ + ;; \ +}; + + +#define MOV_TO_REG(n) \ +{; \ + mov r##n##=r19; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; + + +#define MOV_FROM_REG(n) \ +{; \ + mov r19=r##n##; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; + + +#define MOV_TO_BANK0_REG(n) \ +ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \ +{; \ + mov r26=r2; \ + mov r2=r19; \ + bsw.1; \ + ;; \ +}; \ +{; \ + mov r##n##=r2; \ + nop.b 0x0; \ + bsw.0; \ + ;; \ +}; \ +{; \ + mov r2=r26; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; \ +END(asm_mov_to_bank0_reg##n##) + + +#define MOV_FROM_BANK0_REG(n) \ +ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \ +{; \ + mov r26=r2; \ + nop.b 0x0; \ + bsw.1; \ + ;; \ +}; \ +{; \ + mov r2=r##n##; \ + nop.b 0x0; \ + bsw.0; \ + ;; \ +}; \ +{; \ + mov r19=r2; \ + mov r2=r26; \ + mov b0=r30; \ +}; \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many b0; \ + ;; \ +}; \ +END(asm_mov_from_bank0_reg##n##) + + +#define JMP_TO_MOV_TO_BANK0_REG(n) \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many asm_mov_to_bank0_reg##n##; \ + ;; \ +} + + +#define JMP_TO_MOV_FROM_BANK0_REG(n) \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many asm_mov_from_bank0_reg##n##; \ + ;; \ +} + + +MOV_FROM_BANK0_REG(16) +MOV_FROM_BANK0_REG(17) +MOV_FROM_BANK0_REG(18) +MOV_FROM_BANK0_REG(19) +MOV_FROM_BANK0_REG(20) +MOV_FROM_BANK0_REG(21) +MOV_FROM_BANK0_REG(22) +MOV_FROM_BANK0_REG(23) +MOV_FROM_BANK0_REG(24) +MOV_FROM_BANK0_REG(25) +MOV_FROM_BANK0_REG(26) +MOV_FROM_BANK0_REG(27) +MOV_FROM_BANK0_REG(28) +MOV_FROM_BANK0_REG(29) +MOV_FROM_BANK0_REG(30) +MOV_FROM_BANK0_REG(31) + + +// mov from reg table +ENTRY(asm_mov_from_reg) + MOV_FROM_REG(0) + MOV_FROM_REG(1) + MOV_FROM_REG(2) + MOV_FROM_REG(3) + MOV_FROM_REG(4) + MOV_FROM_REG(5) + MOV_FROM_REG(6) + MOV_FROM_REG(7) + MOV_FROM_REG(8) + MOV_FROM_REG(9) + MOV_FROM_REG(10) + MOV_FROM_REG(11) + MOV_FROM_REG(12) + MOV_FROM_REG(13) + MOV_FROM_REG(14) + MOV_FROM_REG(15) + JMP_TO_MOV_FROM_BANK0_REG(16) + JMP_TO_MOV_FROM_BANK0_REG(17) + JMP_TO_MOV_FROM_BANK0_REG(18) + JMP_TO_MOV_FROM_BANK0_REG(19) + JMP_TO_MOV_FROM_BANK0_REG(20) + JMP_TO_MOV_FROM_BANK0_REG(21) + JMP_TO_MOV_FROM_BANK0_REG(22) + JMP_TO_MOV_FROM_BANK0_REG(23) + JMP_TO_MOV_FROM_BANK0_REG(24) + JMP_TO_MOV_FROM_BANK0_REG(25) + JMP_TO_MOV_FROM_BANK0_REG(26) + JMP_TO_MOV_FROM_BANK0_REG(27) + JMP_TO_MOV_FROM_BANK0_REG(28) + JMP_TO_MOV_FROM_BANK0_REG(29) + JMP_TO_MOV_FROM_BANK0_REG(30) + JMP_TO_MOV_FROM_BANK0_REG(31) + MOV_FROM_REG(32) + MOV_FROM_REG(33) + MOV_FROM_REG(34) + MOV_FROM_REG(35) + MOV_FROM_REG(36) + MOV_FROM_REG(37) + MOV_FROM_REG(38) + MOV_FROM_REG(39) + MOV_FROM_REG(40) + MOV_FROM_REG(41) + MOV_FROM_REG(42) + MOV_FROM_REG(43) + MOV_FROM_REG(44) + MOV_FROM_REG(45) + MOV_FROM_REG(46) + MOV_FROM_REG(47) + MOV_FROM_REG(48) + MOV_FROM_REG(49) + MOV_FROM_REG(50) + MOV_FROM_REG(51) + MOV_FROM_REG(52) + MOV_FROM_REG(53) + MOV_FROM_REG(54) + MOV_FROM_REG(55) + MOV_FROM_REG(56) + MOV_FROM_REG(57) + MOV_FROM_REG(58) + MOV_FROM_REG(59) + MOV_FROM_REG(60) + MOV_FROM_REG(61) + MOV_FROM_REG(62) + MOV_FROM_REG(63) + MOV_FROM_REG(64) + MOV_FROM_REG(65) + MOV_FROM_REG(66) + MOV_FROM_REG(67) + MOV_FROM_REG(68) + MOV_FROM_REG(69) + MOV_FROM_REG(70) + MOV_FROM_REG(71) + MOV_FROM_REG(72) + MOV_FROM_REG(73) + MOV_FROM_REG(74) + MOV_FROM_REG(75) + MOV_FROM_REG(76) + MOV_FROM_REG(77) + MOV_FROM_REG(78) + MOV_FROM_REG(79) + MOV_FROM_REG(80) + MOV_FROM_REG(81) + MOV_FROM_REG(82) + MOV_FROM_REG(83) + MOV_FROM_REG(84) + MOV_FROM_REG(85) + MOV_FROM_REG(86) + MOV_FROM_REG(87) + MOV_FROM_REG(88) + MOV_FROM_REG(89) + MOV_FROM_REG(90) + MOV_FROM_REG(91) + MOV_FROM_REG(92) + MOV_FROM_REG(93) + MOV_FROM_REG(94) + MOV_FROM_REG(95) + MOV_FROM_REG(96) + MOV_FROM_REG(97) + MOV_FROM_REG(98) + MOV_FROM_REG(99) + MOV_FROM_REG(100) + MOV_FROM_REG(101) + MOV_FROM_REG(102) + MOV_FROM_REG(103) + MOV_FROM_REG(104) + MOV_FROM_REG(105) + MOV_FROM_REG(106) + MOV_FROM_REG(107) + MOV_FROM_REG(108) + MOV_FROM_REG(109) + MOV_FROM_REG(110) + MOV_FROM_REG(111) + MOV_FROM_REG(112) + MOV_FROM_REG(113) + MOV_FROM_REG(114) + MOV_FROM_REG(115) + MOV_FROM_REG(116) + MOV_FROM_REG(117) + MOV_FROM_REG(118) + MOV_FROM_REG(119) + MOV_FROM_REG(120) + MOV_FROM_REG(121) + MOV_FROM_REG(122) + MOV_FROM_REG(123) + MOV_FROM_REG(124) + MOV_FROM_REG(125) + MOV_FROM_REG(126) + MOV_FROM_REG(127) +END(asm_mov_from_reg) + + +/* must be in bank 0 + * parameter: + * r31: pr + * r24: b0 + */ +ENTRY(kvm_resume_to_guest) + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 =[r16] + adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21 + ;; + mov r16=cr.ipsr + ;; + ld8 r20 = [r20] + adds r19=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r25=[r19] + extr.u r17=r16,IA64_PSR_RI_BIT,2 + tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 + ;; + (p6) mov r18=cr.iip + (p6) mov r17=r0 + ;; + (p6) add r18=0x10,r18 + (p7) add r17=1,r17 + ;; + (p6) mov cr.iip=r18 + dep r16=r17,r16,IA64_PSR_RI_BIT,2 + ;; + mov cr.ipsr=r16 + adds r19= VPD_VPSR_START_OFFSET,r25 + add r28=PAL_VPS_RESUME_NORMAL,r20 + add r29=PAL_VPS_RESUME_HANDLER,r20 + ;; + ld8 r19=[r19] + mov b0=r29 + cmp.ne p6,p7 = r0,r0 + ;; + tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p6) ld8 r26=[r25] + (p7) mov b0=r28 + mov pr=r31,-2 + br.sptk.many b0 // call pal service + ;; +END(kvm_resume_to_guest) + + +MOV_TO_BANK0_REG(16) +MOV_TO_BANK0_REG(17) +MOV_TO_BANK0_REG(18) +MOV_TO_BANK0_REG(19) +MOV_TO_BANK0_REG(20) +MOV_TO_BANK0_REG(21) +MOV_TO_BANK0_REG(22) +MOV_TO_BANK0_REG(23) +MOV_TO_BANK0_REG(24) +MOV_TO_BANK0_REG(25) +MOV_TO_BANK0_REG(26) +MOV_TO_BANK0_REG(27) +MOV_TO_BANK0_REG(28) +MOV_TO_BANK0_REG(29) +MOV_TO_BANK0_REG(30) +MOV_TO_BANK0_REG(31) + + +// mov to reg table +ENTRY(asm_mov_to_reg) + MOV_TO_REG0 + MOV_TO_REG(1) + MOV_TO_REG(2) + MOV_TO_REG(3) + MOV_TO_REG(4) + MOV_TO_REG(5) + MOV_TO_REG(6) + MOV_TO_REG(7) + MOV_TO_REG(8) + MOV_TO_REG(9) + MOV_TO_REG(10) + MOV_TO_REG(11) + MOV_TO_REG(12) + MOV_TO_REG(13) + MOV_TO_REG(14) + MOV_TO_REG(15) + JMP_TO_MOV_TO_BANK0_REG(16) + JMP_TO_MOV_TO_BANK0_REG(17) + JMP_TO_MOV_TO_BANK0_REG(18) + JMP_TO_MOV_TO_BANK0_REG(19) + JMP_TO_MOV_TO_BANK0_REG(20) + JMP_TO_MOV_TO_BANK0_REG(21) + JMP_TO_MOV_TO_BANK0_REG(22) + JMP_TO_MOV_TO_BANK0_REG(23) + JMP_TO_MOV_TO_BANK0_REG(24) + JMP_TO_MOV_TO_BANK0_REG(25) + JMP_TO_MOV_TO_BANK0_REG(26) + JMP_TO_MOV_TO_BANK0_REG(27) + JMP_TO_MOV_TO_BANK0_REG(28) + JMP_TO_MOV_TO_BANK0_REG(29) + JMP_TO_MOV_TO_BANK0_REG(30) + JMP_TO_MOV_TO_BANK0_REG(31) + MOV_TO_REG(32) + MOV_TO_REG(33) + MOV_TO_REG(34) + MOV_TO_REG(35) + MOV_TO_REG(36) + MOV_TO_REG(37) + MOV_TO_REG(38) + MOV_TO_REG(39) + MOV_TO_REG(40) + MOV_TO_REG(41) + MOV_TO_REG(42) + MOV_TO_REG(43) + MOV_TO_REG(44) + MOV_TO_REG(45) + MOV_TO_REG(46) + MOV_TO_REG(47) + MOV_TO_REG(48) + MOV_TO_REG(49) + MOV_TO_REG(50) + MOV_TO_REG(51) + MOV_TO_REG(52) + MOV_TO_REG(53) + MOV_TO_REG(54) + MOV_TO_REG(55) + MOV_TO_REG(56) + MOV_TO_REG(57) + MOV_TO_REG(58) + MOV_TO_REG(59) + MOV_TO_REG(60) + MOV_TO_REG(61) + MOV_TO_REG(62) + MOV_TO_REG(63) + MOV_TO_REG(64) + MOV_TO_REG(65) + MOV_TO_REG(66) + MOV_TO_REG(67) + MOV_TO_REG(68) + MOV_TO_REG(69) + MOV_TO_REG(70) + MOV_TO_REG(71) + MOV_TO_REG(72) + MOV_TO_REG(73) + MOV_TO_REG(74) + MOV_TO_REG(75) + MOV_TO_REG(76) + MOV_TO_REG(77) + MOV_TO_REG(78) + MOV_TO_REG(79) + MOV_TO_REG(80) + MOV_TO_REG(81) + MOV_TO_REG(82) + MOV_TO_REG(83) + MOV_TO_REG(84) + MOV_TO_REG(85) + MOV_TO_REG(86) + MOV_TO_REG(87) + MOV_TO_REG(88) + MOV_TO_REG(89) + MOV_TO_REG(90) + MOV_TO_REG(91) + MOV_TO_REG(92) + MOV_TO_REG(93) + MOV_TO_REG(94) + MOV_TO_REG(95) + MOV_TO_REG(96) + MOV_TO_REG(97) + MOV_TO_REG(98) + MOV_TO_REG(99) + MOV_TO_REG(100) + MOV_TO_REG(101) + MOV_TO_REG(102) + MOV_TO_REG(103) + MOV_TO_REG(104) + MOV_TO_REG(105) + MOV_TO_REG(106) + MOV_TO_REG(107) + MOV_TO_REG(108) + MOV_TO_REG(109) + MOV_TO_REG(110) + MOV_TO_REG(111) + MOV_TO_REG(112) + MOV_TO_REG(113) + MOV_TO_REG(114) + MOV_TO_REG(115) + MOV_TO_REG(116) + MOV_TO_REG(117) + MOV_TO_REG(118) + MOV_TO_REG(119) + MOV_TO_REG(120) + MOV_TO_REG(121) + MOV_TO_REG(122) + MOV_TO_REG(123) + MOV_TO_REG(124) + MOV_TO_REG(125) + MOV_TO_REG(126) + MOV_TO_REG(127) +END(asm_mov_to_reg) diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c new file mode 100644 index 00000000000..5a33f7ed29a --- /dev/null +++ b/arch/ia64/kvm/process.c @@ -0,0 +1,970 @@ +/* + * process.c: handle interruption inject for guests. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + */ +#include "vcpu.h" + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/fpswa.h> +#include <asm/kregs.h> +#include <asm/tlb.h> + +fpswa_interface_t *vmm_fpswa_interface; + +#define IA64_VHPT_TRANS_VECTOR 0x0000 +#define IA64_INST_TLB_VECTOR 0x0400 +#define IA64_DATA_TLB_VECTOR 0x0800 +#define IA64_ALT_INST_TLB_VECTOR 0x0c00 +#define IA64_ALT_DATA_TLB_VECTOR 0x1000 +#define IA64_DATA_NESTED_TLB_VECTOR 0x1400 +#define IA64_INST_KEY_MISS_VECTOR 0x1800 +#define IA64_DATA_KEY_MISS_VECTOR 0x1c00 +#define IA64_DIRTY_BIT_VECTOR 0x2000 +#define IA64_INST_ACCESS_BIT_VECTOR 0x2400 +#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800 +#define IA64_BREAK_VECTOR 0x2c00 +#define IA64_EXTINT_VECTOR 0x3000 +#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000 +#define IA64_KEY_PERMISSION_VECTOR 0x5100 +#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200 +#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300 +#define IA64_GENEX_VECTOR 0x5400 +#define IA64_DISABLED_FPREG_VECTOR 0x5500 +#define IA64_NAT_CONSUMPTION_VECTOR 0x5600 +#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */ +#define IA64_DEBUG_VECTOR 0x5900 +#define IA64_UNALIGNED_REF_VECTOR 0x5a00 +#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00 +#define IA64_FP_FAULT_VECTOR 0x5c00 +#define IA64_FP_TRAP_VECTOR 0x5d00 +#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00 +#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00 +#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000 + +/* SDM vol2 5.5 - IVA based interruption handling */ +#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\ + IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \ + IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT) + +#define DOMN_PAL_REQUEST 0x110000 +#define DOMN_SAL_REQUEST 0x110001 + +static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800, + 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, + 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400, + 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00, + 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, + 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, + 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, + 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00 +}; + +static void collect_interruption(struct kvm_vcpu *vcpu) +{ + u64 ipsr; + u64 vdcr; + u64 vifs; + unsigned long vpsr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + vpsr = vcpu_get_psr(vcpu); + vcpu_bsw0(vcpu); + if (vpsr & IA64_PSR_IC) { + + /* Sync mpsr id/da/dd/ss/ed bits to vipsr + * since after guest do rfi, we still want these bits on in + * mpsr + */ + + ipsr = regs->cr_ipsr; + vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA + | IA64_PSR_DD | IA64_PSR_SS + | IA64_PSR_ED)); + vcpu_set_ipsr(vcpu, vpsr); + + /* Currently, for trap, we do not advance IIP to next + * instruction. That's because we assume caller already + * set up IIP correctly + */ + + vcpu_set_iip(vcpu , regs->cr_iip); + + /* set vifs.v to zero */ + vifs = VCPU(vcpu, ifs); + vifs &= ~IA64_IFS_V; + vcpu_set_ifs(vcpu, vifs); + + vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa)); + } + + vdcr = VCPU(vcpu, dcr); + + /* Set guest psr + * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged + * be: set to the value of dcr.be + * pp: set to the value of dcr.pp + */ + vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION; + vpsr |= (vdcr & IA64_DCR_BE); + + /* VDCR pp bit position is different from VPSR pp bit */ + if (vdcr & IA64_DCR_PP) { + vpsr |= IA64_PSR_PP; + } else { + vpsr &= ~IA64_PSR_PP;; + } + + vcpu_set_psr(vcpu, vpsr); + +} + +void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec) +{ + u64 viva; + struct kvm_pt_regs *regs; + union ia64_isr pt_isr; + + regs = vcpu_regs(vcpu); + + /* clear cr.isr.ir (incomplete register frame)*/ + pt_isr.val = VMX(vcpu, cr_isr); + pt_isr.ir = 0; + VMX(vcpu, cr_isr) = pt_isr.val; + + collect_interruption(vcpu); + + viva = vcpu_get_iva(vcpu); + regs->cr_iip = viva + vec; +} + +static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa) +{ + union ia64_rr rr, rr1; + + rr.val = vcpu_get_rr(vcpu, ifa); + rr1.val = 0; + rr1.ps = rr.ps; + rr1.rid = rr.rid; + return (rr1.val); +} + + +/* + * Set vIFA & vITIR & vIHA, when vPSR.ic =1 + * Parameter: + * set_ifa: if true, set vIFA + * set_itir: if true, set vITIR + * set_iha: if true, set vIHA + */ +void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr, + int set_ifa, int set_itir, int set_iha) +{ + long vpsr; + u64 value; + + vpsr = VCPU(vcpu, vpsr); + /* Vol2, Table 8-1 */ + if (vpsr & IA64_PSR_IC) { + if (set_ifa) + vcpu_set_ifa(vcpu, vadr); + if (set_itir) { + value = vcpu_get_itir_on_fault(vcpu, vadr); + vcpu_set_itir(vcpu, value); + } + + if (set_iha) { + value = vcpu_thash(vcpu, vadr); + vcpu_set_iha(vcpu, value); + } + } +} + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR); +} + +/* + * Instruction TLB Fault + * @ Instruction TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); +} + + + +/* + * Data Nested TLB Fault + * @ Data Nested TLB Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void nested_dtlb(struct kvm_vcpu *vcpu) +{ + inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR); +} + +/* + * Alternate Data TLB Fault + * @ Alternate Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr) +{ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); +} + + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr) +{ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR); +} + +/* Deal with: + * VHPT Translation Vector + */ +static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA*/ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); + + +} + +/* + * VHPT Instruction Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + +/* + * VHPT Data Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + + +/* + * Deal with: + * General Exception vector + */ +void _general_exception(struct kvm_vcpu *vcpu) +{ + inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); +} + + +/* + * Illegal Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void illegal_op(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Illegal Dependency Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void illegal_dep(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Reserved Register/Field Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void rsv_reg_field(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} +/* + * Privileged Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ + +void privilege_op(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Unimplement Data Address Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void unimpl_daddr(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Privileged Register Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void privilege_reg(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* Deal with + * Nat consumption vector + * Parameter: + * vaddr: Optional, if t == REGISTER + */ +static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr, + enum tlb_miss_type t) +{ + /* If vPSR.ic && t == DATA/INST, IFA */ + if (t == DATA || t == INSTRUCTION) { + /* IFA */ + set_ifa_itir_iha(vcpu, vadr, 1, 0, 0); + } + + inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR); +} + +/* + * Instruction Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, INSTRUCTION); +} + +/* + * Register Nat Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void rnat_consumption(struct kvm_vcpu *vcpu) +{ + _nat_consumption_fault(vcpu, 0, REGISTER); +} + +/* + * Data Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, DATA); +} + +/* Deal with + * Page not present vector + */ +static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); +} + + +void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +/* Deal with + * Data access rights vector + */ +void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR); +} + +fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, + unsigned long *fpsr, unsigned long *isr, unsigned long *pr, + unsigned long *ifs, struct kvm_pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; + struct kvm_vcpu *vcpu = current_vcpu; + + uint64_t old_rr7 = ia64_get_rr(7UL<<61); + + if (!vmm_fpswa_interface) + return (fpswa_ret_t) {-1, 0, 0, 0}; + + /* + * Just let fpswa driver to use hardware fp registers. + * No fp register is valid in memory. + */ + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + /*Call host fpswa interface directly to virtualize + *guest fpswa request! + */ + ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]); + ia64_srlz_d(); + + ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle, + ipsr, fpsr, isr, pr, ifs, &fp_state); + ia64_set_rr(7UL << 61, old_rr7); + ia64_srlz_d(); + return ret; +} + +/* + * Handle floating-point assist faults and traps for domain. + */ +unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs, + unsigned long isr) +{ + struct kvm_vcpu *v = current_vcpu; + IA64_BUNDLE bundle; + unsigned long fault_ip; + fpswa_ret_t ret; + + fault_ip = regs->cr_iip; + /* + * When the FP trap occurs, the trapping instruction is completed. + * If ipsr.ri == 0, there is the trapping instruction in previous + * bundle. + */ + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + + if (fetch_code(v, fault_ip, &bundle)) + return -EAGAIN; + + if (!bundle.i64[0] && !bundle.i64[1]) + return -EACCES; + + ret = vmm_fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, + &isr, ®s->pr, ®s->cr_ifs, regs); + return ret.status; +} + +void reflect_interruption(u64 ifa, u64 isr, u64 iim, + u64 vec, struct kvm_pt_regs *regs) +{ + u64 vector; + int status ; + struct kvm_vcpu *vcpu = current_vcpu; + u64 vpsr = VCPU(vcpu, vpsr); + + vector = vec2off[vec]; + + if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { + panic_vm(vcpu); + return; + } + + switch (vec) { + case 32: /*IA64_FP_FAULT_VECTOR*/ + status = vmm_handle_fpu_swa(1, regs, isr); + if (!status) { + vcpu_increment_iip(vcpu); + return; + } else if (-EAGAIN == status) + return; + break; + case 33: /*IA64_FP_TRAP_VECTOR*/ + status = vmm_handle_fpu_swa(0, regs, isr); + if (!status) + return ; + else if (-EAGAIN == status) { + vcpu_decrement_iip(vcpu); + return ; + } + break; + } + + VCPU(vcpu, isr) = isr; + VCPU(vcpu, iipa) = regs->cr_iip; + if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) + VCPU(vcpu, iim) = iim; + else + set_ifa_itir_iha(vcpu, ifa, 1, 1, 1); + + inject_guest_interruption(vcpu, vector); +} + +static void set_pal_call_data(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + /*FIXME:For static and stacked convention, firmware + * has put the parameters in gr28-gr31 before + * break to vmm !!*/ + + p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28); + p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29); + p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); + p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); + p->exit_reason = EXIT_REASON_PAL_CALL; +} + +static void set_pal_call_result(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (p->exit_reason == EXIT_REASON_PAL_CALL) { + vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0); + vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0); + vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); + vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); + } else + panic_vm(vcpu); +} + +static void set_sal_call_data(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32); + p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33); + p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34); + p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35); + p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36); + p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37); + p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38); + p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39); + p->exit_reason = EXIT_REASON_SAL_CALL; +} + +static void set_sal_call_result(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0); + vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0); + vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); + vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); + } else + panic_vm(vcpu); +} + +void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, + unsigned long isr, unsigned long iim) +{ + struct kvm_vcpu *v = current_vcpu; + + if (ia64_psr(regs)->cpl == 0) { + /* Allow hypercalls only when cpl = 0. */ + if (iim == DOMN_PAL_REQUEST) { + set_pal_call_data(v); + vmm_transition(v); + set_pal_call_result(v); + vcpu_increment_iip(v); + return; + } else if (iim == DOMN_SAL_REQUEST) { + set_sal_call_data(v); + vmm_transition(v); + set_sal_call_result(v); + vcpu_increment_iip(v); + return; + } + } + reflect_interruption(ifa, isr, iim, 11, regs); +} + +void check_pending_irq(struct kvm_vcpu *vcpu) +{ + int mask, h_pending, h_inservice; + u64 isr; + unsigned long vpsr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + h_pending = highest_pending_irq(vcpu); + if (h_pending == NULL_VECTOR) { + update_vhpi(vcpu, NULL_VECTOR); + return; + } + h_inservice = highest_inservice_irq(vcpu); + + vpsr = VCPU(vcpu, vpsr); + mask = irq_masked(vcpu, h_pending, h_inservice); + if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) { + isr = vpsr & IA64_PSR_RI; + update_vhpi(vcpu, h_pending); + reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ + } else if (mask == IRQ_MASKED_BY_INSVC) { + if (VCPU(vcpu, vhpi)) + update_vhpi(vcpu, NULL_VECTOR); + } else { + /* masked by vpsr.i or vtpr.*/ + update_vhpi(vcpu, h_pending); + } +} + +static void generate_exirq(struct kvm_vcpu *vcpu) +{ + unsigned vpsr; + uint64_t isr; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + vpsr = VCPU(vcpu, vpsr); + isr = vpsr & IA64_PSR_RI; + if (!(vpsr & IA64_PSR_IC)) + panic_vm(vcpu); + reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ +} + +void vhpi_detection(struct kvm_vcpu *vcpu) +{ + uint64_t threshold, vhpi; + union ia64_tpr vtpr; + struct ia64_psr vpsr; + + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + vtpr.val = VCPU(vcpu, tpr); + + threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; + vhpi = VCPU(vcpu, vhpi); + if (vhpi > threshold) { + /* interrupt actived*/ + generate_exirq(vcpu); + } +} + + +void leave_hypervisor_tail(void) +{ + struct kvm_vcpu *v = current_vcpu; + + if (VMX(v, timer_check)) { + VMX(v, timer_check) = 0; + if (VMX(v, itc_check)) { + if (vcpu_get_itc(v) > VCPU(v, itm)) { + if (!(VCPU(v, itv) & (1 << 16))) { + vcpu_pend_interrupt(v, VCPU(v, itv) + & 0xff); + VMX(v, itc_check) = 0; + } else { + v->arch.timer_pending = 1; + } + VMX(v, last_itc) = VCPU(v, itm) + 1; + } + } + } + + rmb(); + if (v->arch.irq_new_pending) { + v->arch.irq_new_pending = 0; + VMX(v, irq_check) = 0; + check_pending_irq(v); + return; + } + if (VMX(v, irq_check)) { + VMX(v, irq_check) = 0; + vhpi_detection(v); + } +} + + +static inline void handle_lds(struct kvm_pt_regs *regs) +{ + regs->cr_ipsr |= IA64_PSR_ED; +} + +void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type) +{ + unsigned long pte; + union ia64_rr rr; + + rr.val = ia64_get_rr(vadr); + pte = vadr & _PAGE_PPN_MASK; + pte = pte | PHY_PAGE_WB; + thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type); + return; +} + +void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs) +{ + unsigned long vpsr; + int type; + + u64 vhpt_adr, gppa, pteval, rr, itir; + union ia64_isr misr; + union ia64_pta vpta; + struct thash_data *data; + struct kvm_vcpu *v = current_vcpu; + + vpsr = VCPU(v, vpsr); + misr.val = VMX(v, cr_isr); + + type = vec; + + if (is_physical_mode(v) && (!(vadr << 1 >> 62))) { + if (vec == 2) { + if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) { + emulate_io_inst(v, ((vadr << 1) >> 1), 4); + return; + } + } + physical_tlb_miss(v, vadr, type); + return; + } + data = vtlb_lookup(v, vadr, type); + if (data != 0) { + if (type == D_TLB) { + gppa = (vadr & ((1UL << data->ps) - 1)) + + (data->ppn >> (data->ps - 12) << data->ps); + if (__gpfn_is_io(gppa >> PAGE_SHIFT)) { + if (data->pl >= ((regs->cr_ipsr >> + IA64_PSR_CPL0_BIT) & 3)) + emulate_io_inst(v, gppa, data->ma); + else { + vcpu_set_isr(v, misr.val); + data_access_rights(v, vadr); + } + return ; + } + } + thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type); + + } else if (type == D_TLB) { + if (misr.sp) { + handle_lds(regs); + return; + } + + rr = vcpu_get_rr(v, vadr); + itir = rr & (RR_RID_MASK | RR_PS_MASK); + + if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) { + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + alt_dtlb(v, vadr); + } else { + nested_dtlb(v); + } + return ; + } + + vpta.val = vcpu_get_pta(v); + /* avoid recursively walking (short format) VHPT */ + + vhpt_adr = vcpu_thash(v, vadr); + if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { + /* VHPT successfully read. */ + if (!(pteval & _PAGE_P)) { + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dtlb_fault(v, vadr); + } else { + nested_dtlb(v); + } + } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) { + thash_purge_and_insert(v, pteval, itir, + vadr, D_TLB); + } else if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dtlb_fault(v, vadr); + } else { + nested_dtlb(v); + } + } else { + /* Can't read VHPT. */ + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dvhpt_fault(v, vadr); + } else { + nested_dtlb(v); + } + } + } else if (type == I_TLB) { + if (!(vpsr & IA64_PSR_IC)) + misr.ni = 1; + if (!vhpt_enabled(v, vadr, INST_REF)) { + vcpu_set_isr(v, misr.val); + alt_itlb(v, vadr); + return; + } + + vpta.val = vcpu_get_pta(v); + + vhpt_adr = vcpu_thash(v, vadr); + if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { + /* VHPT successfully read. */ + if (pteval & _PAGE_P) { + if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) { + vcpu_set_isr(v, misr.val); + itlb_fault(v, vadr); + return ; + } + rr = vcpu_get_rr(v, vadr); + itir = rr & (RR_RID_MASK | RR_PS_MASK); + thash_purge_and_insert(v, pteval, itir, + vadr, I_TLB); + } else { + vcpu_set_isr(v, misr.val); + inst_page_not_present(v, vadr); + } + } else { + vcpu_set_isr(v, misr.val); + ivhpt_fault(v, vadr); + } + } +} + +void kvm_vexirq(struct kvm_vcpu *vcpu) +{ + u64 vpsr, isr; + struct kvm_pt_regs *regs; + + regs = vcpu_regs(vcpu); + vpsr = VCPU(vcpu, vpsr); + isr = vpsr & IA64_PSR_RI; + reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/ +} + +void kvm_ia64_handle_irq(struct kvm_vcpu *v) +{ + struct exit_ctl_data *p = &v->arch.exit_data; + long psr; + + local_irq_save(psr); + p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; + vmm_transition(v); + local_irq_restore(psr); + + VMX(v, timer_check) = 1; + +} + +static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos) +{ + u64 oldrid, moldrid, oldpsbits, vaddr; + struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos]; + vaddr = p->vaddr; + + oldrid = VMX(v, vrr[0]); + VMX(v, vrr[0]) = p->rr; + oldpsbits = VMX(v, psbits[0]); + VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]); + moldrid = ia64_get_rr(0x0); + ia64_set_rr(0x0, vrrtomrr(p->rr)); + ia64_srlz_d(); + + vaddr = PAGEALIGN(vaddr, p->ps); + thash_purge_entries_remote(v, vaddr, p->ps); + + VMX(v, vrr[0]) = oldrid; + VMX(v, psbits[0]) = oldpsbits; + ia64_set_rr(0x0, moldrid); + ia64_dv_serialize_data(); +} + +static void vcpu_do_resume(struct kvm_vcpu *vcpu) +{ + /*Re-init VHPT and VTLB once from resume*/ + vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES; + thash_init(&vcpu->arch.vhpt, VHPT_SHIFT); + vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES; + thash_init(&vcpu->arch.vtlb, VTLB_SHIFT); + + ia64_set_pta(vcpu->arch.vhpt.pta.val); +} + +static void kvm_do_resume_op(struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { + vcpu_do_resume(vcpu); + return; + } + + if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) { + thash_purge_all(vcpu); + return; + } + + if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) { + while (vcpu->arch.ptc_g_count > 0) + ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count); + } +} + +void vmm_transition(struct kvm_vcpu *vcpu) +{ + ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, + 0, 0, 0, 0, 0, 0); + vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); + ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, + 0, 0, 0, 0, 0, 0); + kvm_do_resume_op(vcpu); +} diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S new file mode 100644 index 00000000000..30897d44d61 --- /dev/null +++ b/arch/ia64/kvm/trampoline.S @@ -0,0 +1,1038 @@ +/* Save all processor states + * + * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com> + * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com> + */ + +#include <asm/asmmacro.h> +#include "asm-offsets.h" + + +#define CTX(name) VMM_CTX_##name##_OFFSET + + /* + * r32: context_t base address + */ +#define SAVE_BRANCH_REGS \ + add r2 = CTX(B0),r32; \ + add r3 = CTX(B1),r32; \ + mov r16 = b0; \ + mov r17 = b1; \ + ;; \ + st8 [r2]=r16,16; \ + st8 [r3]=r17,16; \ + ;; \ + mov r16 = b2; \ + mov r17 = b3; \ + ;; \ + st8 [r2]=r16,16; \ + st8 [r3]=r17,16; \ + ;; \ + mov r16 = b4; \ + mov r17 = b5; \ + ;; \ + st8 [r2]=r16; \ + st8 [r3]=r17; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_BRANCH_REGS \ + add r2 = CTX(B0),r33; \ + add r3 = CTX(B1),r33; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov b0 = r16; \ + mov b1 = r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov b2 = r16; \ + mov b3 = r17; \ + ;; \ + ld8 r16=[r2]; \ + ld8 r17=[r3]; \ + ;; \ + mov b4=r16; \ + mov b5=r17; \ + ;; + + + /* + * r32: context_t base address + * bsw == 1 + * Save all bank1 general registers, r4 ~ r7 + */ +#define SAVE_GENERAL_REGS \ + add r2=CTX(R4),r32; \ + add r3=CTX(R5),r32; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r4,16; \ +.mem.offset 8,0; \ + st8.spill [r3]=r5,16; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r6,48; \ +.mem.offset 8,0; \ + st8.spill [r3]=r7,48; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r12; \ +.mem.offset 8,0; \ + st8.spill [r3]=r13; \ + ;; + + /* + * r33: context_t base address + * bsw == 1 + */ +#define RESTORE_GENERAL_REGS \ + add r2=CTX(R4),r33; \ + add r3=CTX(R5),r33; \ + ;; \ + ld8.fill r4=[r2],16; \ + ld8.fill r5=[r3],16; \ + ;; \ + ld8.fill r6=[r2],48; \ + ld8.fill r7=[r3],48; \ + ;; \ + ld8.fill r12=[r2]; \ + ld8.fill r13 =[r3]; \ + ;; + + + + + /* + * r32: context_t base address + */ +#define SAVE_KERNEL_REGS \ + add r2 = CTX(KR0),r32; \ + add r3 = CTX(KR1),r32; \ + mov r16 = ar.k0; \ + mov r17 = ar.k1; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k2; \ + mov r17 = ar.k3; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k4; \ + mov r17 = ar.k5; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k6; \ + mov r17 = ar.k7; \ + ;; \ + st8 [r2] = r16; \ + st8 [r3] = r17; \ + ;; + + + + /* + * r33: context_t base address + */ +#define RESTORE_KERNEL_REGS \ + add r2 = CTX(KR0),r33; \ + add r3 = CTX(KR1),r33; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k0=r16; \ + mov ar.k1=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k2=r16; \ + mov ar.k3=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k4=r16; \ + mov ar.k5=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k6=r16; \ + mov ar.k7=r17; \ + ;; + + + + /* + * r32: context_t base address + */ +#define SAVE_APP_REGS \ + add r2 = CTX(BSPSTORE),r32; \ + mov r16 = ar.bspstore; \ + ;; \ + st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\ + mov r16 = ar.rnat; \ + ;; \ + st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \ + mov r16 = ar.fcr; \ + ;; \ + st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \ + mov r16 = ar.eflag; \ + ;; \ + st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \ + mov r16 = ar.cflg; \ + ;; \ + st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \ + mov r16 = ar.fsr; \ + ;; \ + st8 [r2] = r16,CTX(FIR)-CTX(FSR); \ + mov r16 = ar.fir; \ + ;; \ + st8 [r2] = r16,CTX(FDR)-CTX(FIR); \ + mov r16 = ar.fdr; \ + ;; \ + st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \ + mov r16 = ar.unat; \ + ;; \ + st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \ + mov r16 = ar.fpsr; \ + ;; \ + st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \ + mov r16 = ar.pfs; \ + ;; \ + st8 [r2] = r16,CTX(LC)-CTX(PFS); \ + mov r16 = ar.lc; \ + ;; \ + st8 [r2] = r16; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_APP_REGS \ + add r2=CTX(BSPSTORE),r33; \ + ;; \ + ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \ + ;; \ + mov ar.bspstore=r16; \ + ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \ + ;; \ + mov ar.rnat=r16; \ + ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \ + ;; \ + mov ar.fcr=r16; \ + ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \ + ;; \ + mov ar.eflag=r16; \ + ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \ + ;; \ + mov ar.cflg=r16; \ + ld8 r16=[r2],CTX(FIR)-CTX(FSR); \ + ;; \ + mov ar.fsr=r16; \ + ld8 r16=[r2],CTX(FDR)-CTX(FIR); \ + ;; \ + mov ar.fir=r16; \ + ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \ + ;; \ + mov ar.fdr=r16; \ + ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \ + ;; \ + mov ar.unat=r16; \ + ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \ + ;; \ + mov ar.fpsr=r16; \ + ld8 r16=[r2],CTX(LC)-CTX(PFS); \ + ;; \ + mov ar.pfs=r16; \ + ld8 r16=[r2]; \ + ;; \ + mov ar.lc=r16; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_CTL_REGS \ + add r2 = CTX(DCR),r32; \ + mov r16 = cr.dcr; \ + ;; \ + st8 [r2] = r16,CTX(IVA)-CTX(DCR); \ + ;; \ + mov r16 = cr.iva; \ + ;; \ + st8 [r2] = r16,CTX(PTA)-CTX(IVA); \ + ;; \ + mov r16 = cr.pta; \ + ;; \ + st8 [r2] = r16 ; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_CTL_REGS \ + add r2 = CTX(DCR),r33; \ + ;; \ + ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \ + ;; \ + mov cr.dcr = r16; \ + dv_serialize_data; \ + ;; \ + ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \ + ;; \ + mov cr.iva = r16; \ + dv_serialize_data; \ + ;; \ + ld8 r16 = [r2]; \ + ;; \ + mov cr.pta = r16; \ + dv_serialize_data; \ + ;; + + + /* + * r32: context_t base address + */ +#define SAVE_REGION_REGS \ + add r2=CTX(RR0),r32; \ + mov r16=rr[r0]; \ + dep.z r18=1,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=2,61,3; \ + ;; \ + st8 [r2]=r17,8; \ + mov r16=rr[r18]; \ + dep.z r18=3,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=4,61,3; \ + ;; \ + st8 [r2]=r17,8; \ + mov r16=rr[r18]; \ + dep.z r18=5,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=7,61,3; \ + ;; \ + st8 [r2]=r17,16; \ + mov r16=rr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + ;; + + /* + * r33:context_t base address + */ +#define RESTORE_REGION_REGS \ + add r2=CTX(RR0),r33;\ + mov r18=r0; \ + ;; \ + ld8 r20=[r2],8; \ + ;; /* rr0 */ \ + ld8 r21=[r2],8; \ + ;; /* rr1 */ \ + ld8 r22=[r2],8; \ + ;; /* rr2 */ \ + ld8 r23=[r2],8; \ + ;; /* rr3 */ \ + ld8 r24=[r2],8; \ + ;; /* rr4 */ \ + ld8 r25=[r2],16; \ + ;; /* rr5 */ \ + ld8 r27=[r2]; \ + ;; /* rr7 */ \ + mov rr[r18]=r20; \ + dep.z r18=1,61,3; \ + ;; /* rr1 */ \ + mov rr[r18]=r21; \ + dep.z r18=2,61,3; \ + ;; /* rr2 */ \ + mov rr[r18]=r22; \ + dep.z r18=3,61,3; \ + ;; /* rr3 */ \ + mov rr[r18]=r23; \ + dep.z r18=4,61,3; \ + ;; /* rr4 */ \ + mov rr[r18]=r24; \ + dep.z r18=5,61,3; \ + ;; /* rr5 */ \ + mov rr[r18]=r25; \ + dep.z r18=7,61,3; \ + ;; /* rr7 */ \ + mov rr[r18]=r27; \ + ;; \ + srlz.i; \ + ;; + + + + /* + * r32: context_t base address + * r36~r39:scratch registers + */ +#define SAVE_DEBUG_REGS \ + add r2=CTX(IBR0),r32; \ + add r3=CTX(DBR0),r32; \ + mov r16=ibr[r0]; \ + mov r17=dbr[r0]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=1,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=2,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=2,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=3,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=4,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=5,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=6,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=7,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + ;; + + +/* + * r33: point to context_t structure + * ar.lc are corrupted. + */ +#define RESTORE_DEBUG_REGS \ + add r2=CTX(IBR0),r33; \ + add r3=CTX(DBR0),r33; \ + mov r16=7; \ + mov r17=r0; \ + ;; \ + mov ar.lc = r16; \ + ;; \ +1: \ + ld8 r18=[r2],8; \ + ld8 r19=[r3],8; \ + ;; \ + mov ibr[r17]=r18; \ + mov dbr[r17]=r19; \ + ;; \ + srlz.i; \ + ;; \ + add r17=1,r17; \ + br.cloop.sptk 1b; \ + ;; + + + /* + * r32: context_t base address + */ +#define SAVE_FPU_LOW \ + add r2=CTX(F2),r32; \ + add r3=CTX(F3),r32; \ + ;; \ + stf.spill.nta [r2]=f2,32; \ + stf.spill.nta [r3]=f3,32; \ + ;; \ + stf.spill.nta [r2]=f4,32; \ + stf.spill.nta [r3]=f5,32; \ + ;; \ + stf.spill.nta [r2]=f6,32; \ + stf.spill.nta [r3]=f7,32; \ + ;; \ + stf.spill.nta [r2]=f8,32; \ + stf.spill.nta [r3]=f9,32; \ + ;; \ + stf.spill.nta [r2]=f10,32; \ + stf.spill.nta [r3]=f11,32; \ + ;; \ + stf.spill.nta [r2]=f12,32; \ + stf.spill.nta [r3]=f13,32; \ + ;; \ + stf.spill.nta [r2]=f14,32; \ + stf.spill.nta [r3]=f15,32; \ + ;; \ + stf.spill.nta [r2]=f16,32; \ + stf.spill.nta [r3]=f17,32; \ + ;; \ + stf.spill.nta [r2]=f18,32; \ + stf.spill.nta [r3]=f19,32; \ + ;; \ + stf.spill.nta [r2]=f20,32; \ + stf.spill.nta [r3]=f21,32; \ + ;; \ + stf.spill.nta [r2]=f22,32; \ + stf.spill.nta [r3]=f23,32; \ + ;; \ + stf.spill.nta [r2]=f24,32; \ + stf.spill.nta [r3]=f25,32; \ + ;; \ + stf.spill.nta [r2]=f26,32; \ + stf.spill.nta [r3]=f27,32; \ + ;; \ + stf.spill.nta [r2]=f28,32; \ + stf.spill.nta [r3]=f29,32; \ + ;; \ + stf.spill.nta [r2]=f30; \ + stf.spill.nta [r3]=f31; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_FPU_HIGH \ + add r2=CTX(F32),r32; \ + add r3=CTX(F33),r32; \ + ;; \ + stf.spill.nta [r2]=f32,32; \ + stf.spill.nta [r3]=f33,32; \ + ;; \ + stf.spill.nta [r2]=f34,32; \ + stf.spill.nta [r3]=f35,32; \ + ;; \ + stf.spill.nta [r2]=f36,32; \ + stf.spill.nta [r3]=f37,32; \ + ;; \ + stf.spill.nta [r2]=f38,32; \ + stf.spill.nta [r3]=f39,32; \ + ;; \ + stf.spill.nta [r2]=f40,32; \ + stf.spill.nta [r3]=f41,32; \ + ;; \ + stf.spill.nta [r2]=f42,32; \ + stf.spill.nta [r3]=f43,32; \ + ;; \ + stf.spill.nta [r2]=f44,32; \ + stf.spill.nta [r3]=f45,32; \ + ;; \ + stf.spill.nta [r2]=f46,32; \ + stf.spill.nta [r3]=f47,32; \ + ;; \ + stf.spill.nta [r2]=f48,32; \ + stf.spill.nta [r3]=f49,32; \ + ;; \ + stf.spill.nta [r2]=f50,32; \ + stf.spill.nta [r3]=f51,32; \ + ;; \ + stf.spill.nta [r2]=f52,32; \ + stf.spill.nta [r3]=f53,32; \ + ;; \ + stf.spill.nta [r2]=f54,32; \ + stf.spill.nta [r3]=f55,32; \ + ;; \ + stf.spill.nta [r2]=f56,32; \ + stf.spill.nta [r3]=f57,32; \ + ;; \ + stf.spill.nta [r2]=f58,32; \ + stf.spill.nta [r3]=f59,32; \ + ;; \ + stf.spill.nta [r2]=f60,32; \ + stf.spill.nta [r3]=f61,32; \ + ;; \ + stf.spill.nta [r2]=f62,32; \ + stf.spill.nta [r3]=f63,32; \ + ;; \ + stf.spill.nta [r2]=f64,32; \ + stf.spill.nta [r3]=f65,32; \ + ;; \ + stf.spill.nta [r2]=f66,32; \ + stf.spill.nta [r3]=f67,32; \ + ;; \ + stf.spill.nta [r2]=f68,32; \ + stf.spill.nta [r3]=f69,32; \ + ;; \ + stf.spill.nta [r2]=f70,32; \ + stf.spill.nta [r3]=f71,32; \ + ;; \ + stf.spill.nta [r2]=f72,32; \ + stf.spill.nta [r3]=f73,32; \ + ;; \ + stf.spill.nta [r2]=f74,32; \ + stf.spill.nta [r3]=f75,32; \ + ;; \ + stf.spill.nta [r2]=f76,32; \ + stf.spill.nta [r3]=f77,32; \ + ;; \ + stf.spill.nta [r2]=f78,32; \ + stf.spill.nta [r3]=f79,32; \ + ;; \ + stf.spill.nta [r2]=f80,32; \ + stf.spill.nta [r3]=f81,32; \ + ;; \ + stf.spill.nta [r2]=f82,32; \ + stf.spill.nta [r3]=f83,32; \ + ;; \ + stf.spill.nta [r2]=f84,32; \ + stf.spill.nta [r3]=f85,32; \ + ;; \ + stf.spill.nta [r2]=f86,32; \ + stf.spill.nta [r3]=f87,32; \ + ;; \ + stf.spill.nta [r2]=f88,32; \ + stf.spill.nta [r3]=f89,32; \ + ;; \ + stf.spill.nta [r2]=f90,32; \ + stf.spill.nta [r3]=f91,32; \ + ;; \ + stf.spill.nta [r2]=f92,32; \ + stf.spill.nta [r3]=f93,32; \ + ;; \ + stf.spill.nta [r2]=f94,32; \ + stf.spill.nta [r3]=f95,32; \ + ;; \ + stf.spill.nta [r2]=f96,32; \ + stf.spill.nta [r3]=f97,32; \ + ;; \ + stf.spill.nta [r2]=f98,32; \ + stf.spill.nta [r3]=f99,32; \ + ;; \ + stf.spill.nta [r2]=f100,32; \ + stf.spill.nta [r3]=f101,32; \ + ;; \ + stf.spill.nta [r2]=f102,32; \ + stf.spill.nta [r3]=f103,32; \ + ;; \ + stf.spill.nta [r2]=f104,32; \ + stf.spill.nta [r3]=f105,32; \ + ;; \ + stf.spill.nta [r2]=f106,32; \ + stf.spill.nta [r3]=f107,32; \ + ;; \ + stf.spill.nta [r2]=f108,32; \ + stf.spill.nta [r3]=f109,32; \ + ;; \ + stf.spill.nta [r2]=f110,32; \ + stf.spill.nta [r3]=f111,32; \ + ;; \ + stf.spill.nta [r2]=f112,32; \ + stf.spill.nta [r3]=f113,32; \ + ;; \ + stf.spill.nta [r2]=f114,32; \ + stf.spill.nta [r3]=f115,32; \ + ;; \ + stf.spill.nta [r2]=f116,32; \ + stf.spill.nta [r3]=f117,32; \ + ;; \ + stf.spill.nta [r2]=f118,32; \ + stf.spill.nta [r3]=f119,32; \ + ;; \ + stf.spill.nta [r2]=f120,32; \ + stf.spill.nta [r3]=f121,32; \ + ;; \ + stf.spill.nta [r2]=f122,32; \ + stf.spill.nta [r3]=f123,32; \ + ;; \ + stf.spill.nta [r2]=f124,32; \ + stf.spill.nta [r3]=f125,32; \ + ;; \ + stf.spill.nta [r2]=f126; \ + stf.spill.nta [r3]=f127; \ + ;; + + /* + * r33: point to context_t structure + */ +#define RESTORE_FPU_LOW \ + add r2 = CTX(F2), r33; \ + add r3 = CTX(F3), r33; \ + ;; \ + ldf.fill.nta f2 = [r2], 32; \ + ldf.fill.nta f3 = [r3], 32; \ + ;; \ + ldf.fill.nta f4 = [r2], 32; \ + ldf.fill.nta f5 = [r3], 32; \ + ;; \ + ldf.fill.nta f6 = [r2], 32; \ + ldf.fill.nta f7 = [r3], 32; \ + ;; \ + ldf.fill.nta f8 = [r2], 32; \ + ldf.fill.nta f9 = [r3], 32; \ + ;; \ + ldf.fill.nta f10 = [r2], 32; \ + ldf.fill.nta f11 = [r3], 32; \ + ;; \ + ldf.fill.nta f12 = [r2], 32; \ + ldf.fill.nta f13 = [r3], 32; \ + ;; \ + ldf.fill.nta f14 = [r2], 32; \ + ldf.fill.nta f15 = [r3], 32; \ + ;; \ + ldf.fill.nta f16 = [r2], 32; \ + ldf.fill.nta f17 = [r3], 32; \ + ;; \ + ldf.fill.nta f18 = [r2], 32; \ + ldf.fill.nta f19 = [r3], 32; \ + ;; \ + ldf.fill.nta f20 = [r2], 32; \ + ldf.fill.nta f21 = [r3], 32; \ + ;; \ + ldf.fill.nta f22 = [r2], 32; \ + ldf.fill.nta f23 = [r3], 32; \ + ;; \ + ldf.fill.nta f24 = [r2], 32; \ + ldf.fill.nta f25 = [r3], 32; \ + ;; \ + ldf.fill.nta f26 = [r2], 32; \ + ldf.fill.nta f27 = [r3], 32; \ + ;; \ + ldf.fill.nta f28 = [r2], 32; \ + ldf.fill.nta f29 = [r3], 32; \ + ;; \ + ldf.fill.nta f30 = [r2], 32; \ + ldf.fill.nta f31 = [r3], 32; \ + ;; + + + + /* + * r33: point to context_t structure + */ +#define RESTORE_FPU_HIGH \ + add r2 = CTX(F32), r33; \ + add r3 = CTX(F33), r33; \ + ;; \ + ldf.fill.nta f32 = [r2], 32; \ + ldf.fill.nta f33 = [r3], 32; \ + ;; \ + ldf.fill.nta f34 = [r2], 32; \ + ldf.fill.nta f35 = [r3], 32; \ + ;; \ + ldf.fill.nta f36 = [r2], 32; \ + ldf.fill.nta f37 = [r3], 32; \ + ;; \ + ldf.fill.nta f38 = [r2], 32; \ + ldf.fill.nta f39 = [r3], 32; \ + ;; \ + ldf.fill.nta f40 = [r2], 32; \ + ldf.fill.nta f41 = [r3], 32; \ + ;; \ + ldf.fill.nta f42 = [r2], 32; \ + ldf.fill.nta f43 = [r3], 32; \ + ;; \ + ldf.fill.nta f44 = [r2], 32; \ + ldf.fill.nta f45 = [r3], 32; \ + ;; \ + ldf.fill.nta f46 = [r2], 32; \ + ldf.fill.nta f47 = [r3], 32; \ + ;; \ + ldf.fill.nta f48 = [r2], 32; \ + ldf.fill.nta f49 = [r3], 32; \ + ;; \ + ldf.fill.nta f50 = [r2], 32; \ + ldf.fill.nta f51 = [r3], 32; \ + ;; \ + ldf.fill.nta f52 = [r2], 32; \ + ldf.fill.nta f53 = [r3], 32; \ + ;; \ + ldf.fill.nta f54 = [r2], 32; \ + ldf.fill.nta f55 = [r3], 32; \ + ;; \ + ldf.fill.nta f56 = [r2], 32; \ + ldf.fill.nta f57 = [r3], 32; \ + ;; \ + ldf.fill.nta f58 = [r2], 32; \ + ldf.fill.nta f59 = [r3], 32; \ + ;; \ + ldf.fill.nta f60 = [r2], 32; \ + ldf.fill.nta f61 = [r3], 32; \ + ;; \ + ldf.fill.nta f62 = [r2], 32; \ + ldf.fill.nta f63 = [r3], 32; \ + ;; \ + ldf.fill.nta f64 = [r2], 32; \ + ldf.fill.nta f65 = [r3], 32; \ + ;; \ + ldf.fill.nta f66 = [r2], 32; \ + ldf.fill.nta f67 = [r3], 32; \ + ;; \ + ldf.fill.nta f68 = [r2], 32; \ + ldf.fill.nta f69 = [r3], 32; \ + ;; \ + ldf.fill.nta f70 = [r2], 32; \ + ldf.fill.nta f71 = [r3], 32; \ + ;; \ + ldf.fill.nta f72 = [r2], 32; \ + ldf.fill.nta f73 = [r3], 32; \ + ;; \ + ldf.fill.nta f74 = [r2], 32; \ + ldf.fill.nta f75 = [r3], 32; \ + ;; \ + ldf.fill.nta f76 = [r2], 32; \ + ldf.fill.nta f77 = [r3], 32; \ + ;; \ + ldf.fill.nta f78 = [r2], 32; \ + ldf.fill.nta f79 = [r3], 32; \ + ;; \ + ldf.fill.nta f80 = [r2], 32; \ + ldf.fill.nta f81 = [r3], 32; \ + ;; \ + ldf.fill.nta f82 = [r2], 32; \ + ldf.fill.nta f83 = [r3], 32; \ + ;; \ + ldf.fill.nta f84 = [r2], 32; \ + ldf.fill.nta f85 = [r3], 32; \ + ;; \ + ldf.fill.nta f86 = [r2], 32; \ + ldf.fill.nta f87 = [r3], 32; \ + ;; \ + ldf.fill.nta f88 = [r2], 32; \ + ldf.fill.nta f89 = [r3], 32; \ + ;; \ + ldf.fill.nta f90 = [r2], 32; \ + ldf.fill.nta f91 = [r3], 32; \ + ;; \ + ldf.fill.nta f92 = [r2], 32; \ + ldf.fill.nta f93 = [r3], 32; \ + ;; \ + ldf.fill.nta f94 = [r2], 32; \ + ldf.fill.nta f95 = [r3], 32; \ + ;; \ + ldf.fill.nta f96 = [r2], 32; \ + ldf.fill.nta f97 = [r3], 32; \ + ;; \ + ldf.fill.nta f98 = [r2], 32; \ + ldf.fill.nta f99 = [r3], 32; \ + ;; \ + ldf.fill.nta f100 = [r2], 32; \ + ldf.fill.nta f101 = [r3], 32; \ + ;; \ + ldf.fill.nta f102 = [r2], 32; \ + ldf.fill.nta f103 = [r3], 32; \ + ;; \ + ldf.fill.nta f104 = [r2], 32; \ + ldf.fill.nta f105 = [r3], 32; \ + ;; \ + ldf.fill.nta f106 = [r2], 32; \ + ldf.fill.nta f107 = [r3], 32; \ + ;; \ + ldf.fill.nta f108 = [r2], 32; \ + ldf.fill.nta f109 = [r3], 32; \ + ;; \ + ldf.fill.nta f110 = [r2], 32; \ + ldf.fill.nta f111 = [r3], 32; \ + ;; \ + ldf.fill.nta f112 = [r2], 32; \ + ldf.fill.nta f113 = [r3], 32; \ + ;; \ + ldf.fill.nta f114 = [r2], 32; \ + ldf.fill.nta f115 = [r3], 32; \ + ;; \ + ldf.fill.nta f116 = [r2], 32; \ + ldf.fill.nta f117 = [r3], 32; \ + ;; \ + ldf.fill.nta f118 = [r2], 32; \ + ldf.fill.nta f119 = [r3], 32; \ + ;; \ + ldf.fill.nta f120 = [r2], 32; \ + ldf.fill.nta f121 = [r3], 32; \ + ;; \ + ldf.fill.nta f122 = [r2], 32; \ + ldf.fill.nta f123 = [r3], 32; \ + ;; \ + ldf.fill.nta f124 = [r2], 32; \ + ldf.fill.nta f125 = [r3], 32; \ + ;; \ + ldf.fill.nta f126 = [r2], 32; \ + ldf.fill.nta f127 = [r3], 32; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_PTK_REGS \ + add r2=CTX(PKR0), r32; \ + mov r16=7; \ + ;; \ + mov ar.lc=r16; \ + mov r17=r0; \ + ;; \ +1: \ + mov r18=pkr[r17]; \ + ;; \ + srlz.i; \ + ;; \ + st8 [r2]=r18, 8; \ + ;; \ + add r17 =1,r17; \ + ;; \ + br.cloop.sptk 1b; \ + ;; + +/* + * r33: point to context_t structure + * ar.lc are corrupted. + */ +#define RESTORE_PTK_REGS \ + add r2=CTX(PKR0), r33; \ + mov r16=7; \ + ;; \ + mov ar.lc=r16; \ + mov r17=r0; \ + ;; \ +1: \ + ld8 r18=[r2], 8; \ + ;; \ + mov pkr[r17]=r18; \ + ;; \ + srlz.i; \ + ;; \ + add r17 =1,r17; \ + ;; \ + br.cloop.sptk 1b; \ + ;; + + +/* + * void vmm_trampoline( context_t * from, + * context_t * to) + * + * from: r32 + * to: r33 + * note: interrupt disabled before call this function. + */ +GLOBAL_ENTRY(vmm_trampoline) + mov r16 = psr + adds r2 = CTX(PSR), r32 + ;; + st8 [r2] = r16, 8 // psr + mov r17 = pr + ;; + st8 [r2] = r17, 8 // pr + mov r18 = ar.unat + ;; + st8 [r2] = r18 + mov r17 = ar.rsc + ;; + adds r2 = CTX(RSC),r32 + ;; + st8 [r2]= r17 + mov ar.rsc =0 + flushrs + ;; + SAVE_GENERAL_REGS + ;; + SAVE_KERNEL_REGS + ;; + SAVE_APP_REGS + ;; + SAVE_BRANCH_REGS + ;; + SAVE_CTL_REGS + ;; + SAVE_REGION_REGS + ;; + //SAVE_DEBUG_REGS + ;; + rsm psr.dfl + ;; + srlz.d + ;; + SAVE_FPU_LOW + ;; + rsm psr.dfh + ;; + srlz.d + ;; + SAVE_FPU_HIGH + ;; + SAVE_PTK_REGS + ;; + RESTORE_PTK_REGS + ;; + RESTORE_FPU_HIGH + ;; + RESTORE_FPU_LOW + ;; + //RESTORE_DEBUG_REGS + ;; + RESTORE_REGION_REGS + ;; + RESTORE_CTL_REGS + ;; + RESTORE_BRANCH_REGS + ;; + RESTORE_APP_REGS + ;; + RESTORE_KERNEL_REGS + ;; + RESTORE_GENERAL_REGS + ;; + adds r2=CTX(PSR), r33 + ;; + ld8 r16=[r2], 8 // psr + ;; + mov psr.l=r16 + ;; + srlz.d + ;; + ld8 r16=[r2], 8 // pr + ;; + mov pr =r16,-1 + ld8 r16=[r2] // unat + ;; + mov ar.unat=r16 + ;; + adds r2=CTX(RSC),r33 + ;; + ld8 r16 =[r2] + ;; + mov ar.rsc = r16 + ;; + br.ret.sptk.few b0 +END(vmm_trampoline) diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c new file mode 100644 index 00000000000..e44027ce566 --- /dev/null +++ b/arch/ia64/kvm/vcpu.c @@ -0,0 +1,2163 @@ +/* + * kvm_vcpu.c: handling all virtual cpu related thing. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang <xiantao.zhang@intel.com> + */ + +#include <linux/kvm_host.h> +#include <linux/types.h> + +#include <asm/processor.h> +#include <asm/ia64regs.h> +#include <asm/gcc_intrin.h> +#include <asm/kregs.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> + +#include "asm-offsets.h" +#include "vcpu.h" + +/* + * Special notes: + * - Index by it/dt/rt sequence + * - Only existing mode transitions are allowed in this table + * - RSE is placed at lazy mode when emulating guest partial mode + * - If gva happens to be rr0 and rr4, only allowed case is identity + * mapping (gva=gpa), or panic! (How?) + */ +int mm_switch_table[8][8] = { + /* 2004/09/12(Kevin): Allow switch to self */ + /* + * (it,dt,rt): (0,0,0) -> (1,1,1) + * This kind of transition usually occurs in the very early + * stage of Linux boot up procedure. Another case is in efi + * and pal calls. (see "arch/ia64/kernel/head.S") + * + * (it,dt,rt): (0,0,0) -> (0,1,1) + * This kind of transition is found when OSYa exits efi boot + * service. Due to gva = gpa in this case (Same region), + * data access can be satisfied though itlb entry for physical + * emulation is hit. + */ + {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + /* + * (it,dt,rt): (0,1,1) -> (1,1,1) + * This kind of transition is found in OSYa. + * + * (it,dt,rt): (0,1,1) -> (0,0,0) + * This kind of transition is found in OSYa + */ + {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V}, + /* (1,0,0)->(1,1,1) */ + {0, 0, 0, 0, 0, 0, 0, SW_P2V}, + /* + * (it,dt,rt): (1,0,1) -> (1,1,1) + * This kind of transition usually occurs when Linux returns + * from the low level TLB miss handlers. + * (see "arch/ia64/kernel/ivt.S") + */ + {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V}, + {0, 0, 0, 0, 0, 0, 0, 0}, + /* + * (it,dt,rt): (1,1,1) -> (1,0,1) + * This kind of transition usually occurs in Linux low level + * TLB miss handler. (see "arch/ia64/kernel/ivt.S") + * + * (it,dt,rt): (1,1,1) -> (0,0,0) + * This kind of transition usually occurs in pal and efi calls, + * which requires running in physical mode. + * (see "arch/ia64/kernel/head.S") + * (1,1,1)->(1,0,0) + */ + + {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF}, +}; + +void physical_mode_init(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mode_flags = GUEST_IN_PHY; +} + +void switch_to_physical_rid(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + /* Save original virtual mode rr[0] and rr[4] */ + psr = ia64_clear_ic(); + ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4); + ia64_srlz_d(); + + ia64_set_psr(psr); + return; +} + + +void switch_to_virtual_rid(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + psr = ia64_clear_ic(); + ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4); + ia64_srlz_d(); + ia64_set_psr(psr); + return; +} + +static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr) +{ + return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; +} + +void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, + struct ia64_psr new_psr) +{ + int act; + act = mm_switch_action(old_psr, new_psr); + switch (act) { + case SW_V2P: + /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n", + old_psr.val, new_psr.val);*/ + switch_to_physical_rid(vcpu); + /* + * Set rse to enforced lazy, to prevent active rse + *save/restor when guest physical mode. + */ + vcpu->arch.mode_flags |= GUEST_IN_PHY; + break; + case SW_P2V: + switch_to_virtual_rid(vcpu); + /* + * recover old mode which is saved when entering + * guest physical mode + */ + vcpu->arch.mode_flags &= ~GUEST_IN_PHY; + break; + case SW_SELF: + break; + case SW_NOP: + break; + default: + /* Sanity check */ + break; + } + return; +} + + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, + struct ia64_psr new_psr) +{ + + if ((old_psr.dt != new_psr.dt) + || (old_psr.it != new_psr.it) + || (old_psr.rt != new_psr.rt)) + switch_mm_mode(vcpu, old_psr, new_psr); + + return; +} + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void prepare_if_physical_mode(struct kvm_vcpu *vcpu) +{ + if (is_physical_mode(vcpu)) { + vcpu->arch.mode_flags |= GUEST_PHY_EMUL; + switch_to_virtual_rid(vcpu); + } + return; +} + +/* Recover always follows prepare */ +void recover_if_physical_mode(struct kvm_vcpu *vcpu) +{ + if (is_physical_mode(vcpu)) + switch_to_physical_rid(vcpu); + vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; + return; +} + +#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x) + +static u16 gr_info[32] = { + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + RPT(r1), RPT(r2), RPT(r3), + RPT(r4), RPT(r5), RPT(r6), RPT(r7), + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 + +static inline unsigned long +rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg) +{ + reg += rrb; + if (reg >= sor) + reg -= sor; + return reg; +} + +/* + * Return the (rotated) index for floating point register + * be in the REGNUM (REGNUM must range from 32-127, + * result is in the range from 0-95. + */ +static inline unsigned long fph_index(struct kvm_pt_regs *regs, + long regnum) +{ + unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; + return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); +} + + +/* + * The inverse of the above: given bspstore and the number of + * registers, calculate ar.bsp. + */ +static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr, + long num_regs) +{ + long delta = ia64_rse_slot_num(addr) + num_regs; + int i = 0; + + if (num_regs < 0) + delta -= 0x3e; + if (delta < 0) { + while (delta <= -0x3f) { + i--; + delta += 0x3f; + } + } else { + while (delta >= 0x3f) { + i++; + delta -= 0x3f; + } + } + + return addr + num_regs + i; +} + +static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, + unsigned long *val, int *nat) +{ + unsigned long *bsp, *addr, *rnat_addr, *bspstore; + unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; + unsigned long nat_mask; + unsigned long old_rsc, new_rsc; + long sof = (regs->cr_ifs) & 0x7f; + long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + old_rsc = ia64_getreg(_IA64_REG_AR_RSC); + new_rsc = old_rsc&(~(0x3)); + ia64_setreg(_IA64_REG_AR_RSC, new_rsc); + + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + bsp = kbs + (regs->loadrs >> 19); + + addr = kvm_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + if (addr >= bspstore) { + ia64_flushrs(); + ia64_mf(); + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + } + *val = *addr; + if (nat) { + if (bspstore < rnat_addr) + *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT) + & nat_mask); + else + *nat = (int)!!((*rnat_addr) & nat_mask); + ia64_setreg(_IA64_REG_AR_RSC, old_rsc); + } +} + +void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, + unsigned long val, unsigned long nat) +{ + unsigned long *bsp, *bspstore, *addr, *rnat_addr; + unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; + unsigned long nat_mask; + unsigned long old_rsc, new_rsc, psr; + unsigned long rnat; + long sof = (regs->cr_ifs) & 0x7f; + long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + old_rsc = ia64_getreg(_IA64_REG_AR_RSC); + /* put RSC to lazy mode, and set loadrs 0 */ + new_rsc = old_rsc & (~0x3fff0003); + ia64_setreg(_IA64_REG_AR_RSC, new_rsc); + bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */ + + addr = kvm_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + local_irq_save(psr); + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + if (addr >= bspstore) { + + ia64_flushrs(); + ia64_mf(); + *addr = val; + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + rnat = ia64_getreg(_IA64_REG_AR_RNAT); + if (bspstore < rnat_addr) + rnat = rnat & (~nat_mask); + else + *rnat_addr = (*rnat_addr)&(~nat_mask); + + ia64_mf(); + ia64_loadrs(); + ia64_setreg(_IA64_REG_AR_RNAT, rnat); + } else { + rnat = ia64_getreg(_IA64_REG_AR_RNAT); + *addr = val; + if (bspstore < rnat_addr) + rnat = rnat&(~nat_mask); + else + *rnat_addr = (*rnat_addr) & (~nat_mask); + + ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore); + ia64_setreg(_IA64_REG_AR_RNAT, rnat); + } + local_irq_restore(psr); + ia64_setreg(_IA64_REG_AR_RSC, old_rsc); +} + +void getreg(unsigned long regnum, unsigned long *val, + int *nat, struct kvm_pt_regs *regs) +{ + unsigned long addr, *unat; + if (regnum >= IA64_FIRST_STACKED_GR) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + addr = (unsigned long)regs; + unat = ®s->eml_unat;; + + addr += gr_info[regnum]; + + *val = *(unsigned long *)addr; + /* + * do it only when requested + */ + if (nat) + *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL; +} + +void setreg(unsigned long regnum, unsigned long val, + int nat, struct kvm_pt_regs *regs) +{ + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + /* + * First takes care of stacked registers + */ + if (regnum >= IA64_FIRST_STACKED_GR) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + addr = (unsigned long)regs; + unat = ®s->eml_unat; + /* + * add offset from base of struct + * and do it ! + */ + addr += gr_info[regnum]; + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = 1UL << ((addr >> 3) & 0x3f); + if (nat) + *unat |= bitmask; + else + *unat &= ~bitmask; + +} + +u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + u64 val; + + if (!reg) + return 0; + getreg(reg, &val, 0, regs); + return val; +} + +void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + long sof = (regs->cr_ifs) & 0x7f; + + if (!reg) + return; + if (reg >= sof + 32) + return; + setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/ +} + +void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, + struct kvm_pt_regs *regs) +{ + /* Take floating register rotation into consideration*/ + if (regnum >= IA64_FIRST_ROTATING_FR) + regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); +#define CASE_FIXED_FP(reg) \ + case (reg) : \ + ia64_stf_spill(fpval, reg); \ + break + + switch (regnum) { + CASE_FIXED_FP(0); + CASE_FIXED_FP(1); + CASE_FIXED_FP(2); + CASE_FIXED_FP(3); + CASE_FIXED_FP(4); + CASE_FIXED_FP(5); + + CASE_FIXED_FP(6); + CASE_FIXED_FP(7); + CASE_FIXED_FP(8); + CASE_FIXED_FP(9); + CASE_FIXED_FP(10); + CASE_FIXED_FP(11); + + CASE_FIXED_FP(12); + CASE_FIXED_FP(13); + CASE_FIXED_FP(14); + CASE_FIXED_FP(15); + CASE_FIXED_FP(16); + CASE_FIXED_FP(17); + CASE_FIXED_FP(18); + CASE_FIXED_FP(19); + CASE_FIXED_FP(20); + CASE_FIXED_FP(21); + CASE_FIXED_FP(22); + CASE_FIXED_FP(23); + CASE_FIXED_FP(24); + CASE_FIXED_FP(25); + CASE_FIXED_FP(26); + CASE_FIXED_FP(27); + CASE_FIXED_FP(28); + CASE_FIXED_FP(29); + CASE_FIXED_FP(30); + CASE_FIXED_FP(31); + CASE_FIXED_FP(32); + CASE_FIXED_FP(33); + CASE_FIXED_FP(34); + CASE_FIXED_FP(35); + CASE_FIXED_FP(36); + CASE_FIXED_FP(37); + CASE_FIXED_FP(38); + CASE_FIXED_FP(39); + CASE_FIXED_FP(40); + CASE_FIXED_FP(41); + CASE_FIXED_FP(42); + CASE_FIXED_FP(43); + CASE_FIXED_FP(44); + CASE_FIXED_FP(45); + CASE_FIXED_FP(46); + CASE_FIXED_FP(47); + CASE_FIXED_FP(48); + CASE_FIXED_FP(49); + CASE_FIXED_FP(50); + CASE_FIXED_FP(51); + CASE_FIXED_FP(52); + CASE_FIXED_FP(53); + CASE_FIXED_FP(54); + CASE_FIXED_FP(55); + CASE_FIXED_FP(56); + CASE_FIXED_FP(57); + CASE_FIXED_FP(58); + CASE_FIXED_FP(59); + CASE_FIXED_FP(60); + CASE_FIXED_FP(61); + CASE_FIXED_FP(62); + CASE_FIXED_FP(63); + CASE_FIXED_FP(64); + CASE_FIXED_FP(65); + CASE_FIXED_FP(66); + CASE_FIXED_FP(67); + CASE_FIXED_FP(68); + CASE_FIXED_FP(69); + CASE_FIXED_FP(70); + CASE_FIXED_FP(71); + CASE_FIXED_FP(72); + CASE_FIXED_FP(73); + CASE_FIXED_FP(74); + CASE_FIXED_FP(75); + CASE_FIXED_FP(76); + CASE_FIXED_FP(77); + CASE_FIXED_FP(78); + CASE_FIXED_FP(79); + CASE_FIXED_FP(80); + CASE_FIXED_FP(81); + CASE_FIXED_FP(82); + CASE_FIXED_FP(83); + CASE_FIXED_FP(84); + CASE_FIXED_FP(85); + CASE_FIXED_FP(86); + CASE_FIXED_FP(87); + CASE_FIXED_FP(88); + CASE_FIXED_FP(89); + CASE_FIXED_FP(90); + CASE_FIXED_FP(91); + CASE_FIXED_FP(92); + CASE_FIXED_FP(93); + CASE_FIXED_FP(94); + CASE_FIXED_FP(95); + CASE_FIXED_FP(96); + CASE_FIXED_FP(97); + CASE_FIXED_FP(98); + CASE_FIXED_FP(99); + CASE_FIXED_FP(100); + CASE_FIXED_FP(101); + CASE_FIXED_FP(102); + CASE_FIXED_FP(103); + CASE_FIXED_FP(104); + CASE_FIXED_FP(105); + CASE_FIXED_FP(106); + CASE_FIXED_FP(107); + CASE_FIXED_FP(108); + CASE_FIXED_FP(109); + CASE_FIXED_FP(110); + CASE_FIXED_FP(111); + CASE_FIXED_FP(112); + CASE_FIXED_FP(113); + CASE_FIXED_FP(114); + CASE_FIXED_FP(115); + CASE_FIXED_FP(116); + CASE_FIXED_FP(117); + CASE_FIXED_FP(118); + CASE_FIXED_FP(119); + CASE_FIXED_FP(120); + CASE_FIXED_FP(121); + CASE_FIXED_FP(122); + CASE_FIXED_FP(123); + CASE_FIXED_FP(124); + CASE_FIXED_FP(125); + CASE_FIXED_FP(126); + CASE_FIXED_FP(127); + } +#undef CASE_FIXED_FP +} + +void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, + struct kvm_pt_regs *regs) +{ + /* Take floating register rotation into consideration*/ + if (regnum >= IA64_FIRST_ROTATING_FR) + regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); + +#define CASE_FIXED_FP(reg) \ + case (reg) : \ + ia64_ldf_fill(reg, fpval); \ + break + + switch (regnum) { + CASE_FIXED_FP(2); + CASE_FIXED_FP(3); + CASE_FIXED_FP(4); + CASE_FIXED_FP(5); + + CASE_FIXED_FP(6); + CASE_FIXED_FP(7); + CASE_FIXED_FP(8); + CASE_FIXED_FP(9); + CASE_FIXED_FP(10); + CASE_FIXED_FP(11); + + CASE_FIXED_FP(12); + CASE_FIXED_FP(13); + CASE_FIXED_FP(14); + CASE_FIXED_FP(15); + CASE_FIXED_FP(16); + CASE_FIXED_FP(17); + CASE_FIXED_FP(18); + CASE_FIXED_FP(19); + CASE_FIXED_FP(20); + CASE_FIXED_FP(21); + CASE_FIXED_FP(22); + CASE_FIXED_FP(23); + CASE_FIXED_FP(24); + CASE_FIXED_FP(25); + CASE_FIXED_FP(26); + CASE_FIXED_FP(27); + CASE_FIXED_FP(28); + CASE_FIXED_FP(29); + CASE_FIXED_FP(30); + CASE_FIXED_FP(31); + CASE_FIXED_FP(32); + CASE_FIXED_FP(33); + CASE_FIXED_FP(34); + CASE_FIXED_FP(35); + CASE_FIXED_FP(36); + CASE_FIXED_FP(37); + CASE_FIXED_FP(38); + CASE_FIXED_FP(39); + CASE_FIXED_FP(40); + CASE_FIXED_FP(41); + CASE_FIXED_FP(42); + CASE_FIXED_FP(43); + CASE_FIXED_FP(44); + CASE_FIXED_FP(45); + CASE_FIXED_FP(46); + CASE_FIXED_FP(47); + CASE_FIXED_FP(48); + CASE_FIXED_FP(49); + CASE_FIXED_FP(50); + CASE_FIXED_FP(51); + CASE_FIXED_FP(52); + CASE_FIXED_FP(53); + CASE_FIXED_FP(54); + CASE_FIXED_FP(55); + CASE_FIXED_FP(56); + CASE_FIXED_FP(57); + CASE_FIXED_FP(58); + CASE_FIXED_FP(59); + CASE_FIXED_FP(60); + CASE_FIXED_FP(61); + CASE_FIXED_FP(62); + CASE_FIXED_FP(63); + CASE_FIXED_FP(64); + CASE_FIXED_FP(65); + CASE_FIXED_FP(66); + CASE_FIXED_FP(67); + CASE_FIXED_FP(68); + CASE_FIXED_FP(69); + CASE_FIXED_FP(70); + CASE_FIXED_FP(71); + CASE_FIXED_FP(72); + CASE_FIXED_FP(73); + CASE_FIXED_FP(74); + CASE_FIXED_FP(75); + CASE_FIXED_FP(76); + CASE_FIXED_FP(77); + CASE_FIXED_FP(78); + CASE_FIXED_FP(79); + CASE_FIXED_FP(80); + CASE_FIXED_FP(81); + CASE_FIXED_FP(82); + CASE_FIXED_FP(83); + CASE_FIXED_FP(84); + CASE_FIXED_FP(85); + CASE_FIXED_FP(86); + CASE_FIXED_FP(87); + CASE_FIXED_FP(88); + CASE_FIXED_FP(89); + CASE_FIXED_FP(90); + CASE_FIXED_FP(91); + CASE_FIXED_FP(92); + CASE_FIXED_FP(93); + CASE_FIXED_FP(94); + CASE_FIXED_FP(95); + CASE_FIXED_FP(96); + CASE_FIXED_FP(97); + CASE_FIXED_FP(98); + CASE_FIXED_FP(99); + CASE_FIXED_FP(100); + CASE_FIXED_FP(101); + CASE_FIXED_FP(102); + CASE_FIXED_FP(103); + CASE_FIXED_FP(104); + CASE_FIXED_FP(105); + CASE_FIXED_FP(106); + CASE_FIXED_FP(107); + CASE_FIXED_FP(108); + CASE_FIXED_FP(109); + CASE_FIXED_FP(110); + CASE_FIXED_FP(111); + CASE_FIXED_FP(112); + CASE_FIXED_FP(113); + CASE_FIXED_FP(114); + CASE_FIXED_FP(115); + CASE_FIXED_FP(116); + CASE_FIXED_FP(117); + CASE_FIXED_FP(118); + CASE_FIXED_FP(119); + CASE_FIXED_FP(120); + CASE_FIXED_FP(121); + CASE_FIXED_FP(122); + CASE_FIXED_FP(123); + CASE_FIXED_FP(124); + CASE_FIXED_FP(125); + CASE_FIXED_FP(126); + CASE_FIXED_FP(127); + } +} + +void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, + struct ia64_fpreg *val) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + getfpreg(reg, val, regs); /* FIXME: handle NATs later*/ +} + +void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, + struct ia64_fpreg *val) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + if (reg > 1) + setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ +} + +/************************************************************************ + * lsapic timer + ***********************************************************************/ +u64 vcpu_get_itc(struct kvm_vcpu *vcpu) +{ + unsigned long guest_itc; + guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); + + if (guest_itc >= VMX(vcpu, last_itc)) { + VMX(vcpu, last_itc) = guest_itc; + return guest_itc; + } else + return VMX(vcpu, last_itc); +} + +static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val); +static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) +{ + struct kvm_vcpu *v; + int i; + long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); + unsigned long vitv = VCPU(vcpu, itv); + + if (vcpu->vcpu_id == 0) { + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + VMX(v, itc_offset) = itc_offset; + VMX(v, last_itc) = 0; + } + } + VMX(vcpu, last_itc) = 0; + if (VCPU(vcpu, itm) <= val) { + VMX(vcpu, itc_check) = 0; + vcpu_unpend_interrupt(vcpu, vitv); + } else { + VMX(vcpu, itc_check) = 1; + vcpu_set_itm(vcpu, VCPU(vcpu, itm)); + } + +} + +static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, itm)); +} + +static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val) +{ + unsigned long vitv = VCPU(vcpu, itv); + VCPU(vcpu, itm) = val; + + if (val > vcpu_get_itc(vcpu)) { + VMX(vcpu, itc_check) = 1; + vcpu_unpend_interrupt(vcpu, vitv); + VMX(vcpu, timer_pending) = 0; + } else + VMX(vcpu, itc_check) = 0; +} + +#define ITV_VECTOR(itv) (itv&0xff) +#define ITV_IRQ_MASK(itv) (itv&(1<<16)) + +static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, itv) = val; + if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) { + vcpu_pend_interrupt(vcpu, ITV_VECTOR(val)); + vcpu->arch.timer_pending = 0; + } +} + +static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val) +{ + int vec; + + vec = highest_inservice_irq(vcpu); + if (vec == NULL_VECTOR) + return; + VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63)); + VCPU(vcpu, eoi) = 0; + vcpu->arch.irq_new_pending = 1; + +} + +/* See Table 5-8 in SDM vol2 for the definition */ +int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice) +{ + union ia64_tpr vtpr; + + vtpr.val = VCPU(vcpu, tpr); + + if (h_inservice == NMI_VECTOR) + return IRQ_MASKED_BY_INSVC; + + if (h_pending == NMI_VECTOR) { + /* Non Maskable Interrupt */ + return IRQ_NO_MASKED; + } + + if (h_inservice == ExtINT_VECTOR) + return IRQ_MASKED_BY_INSVC; + + if (h_pending == ExtINT_VECTOR) { + if (vtpr.mmi) { + /* mask all external IRQ */ + return IRQ_MASKED_BY_VTPR; + } else + return IRQ_NO_MASKED; + } + + if (is_higher_irq(h_pending, h_inservice)) { + if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4))) + return IRQ_NO_MASKED; + else + return IRQ_MASKED_BY_VTPR; + } else { + return IRQ_MASKED_BY_INSVC; + } +} + +void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec) +{ + long spsr; + int ret; + + local_irq_save(spsr); + ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0])); + local_irq_restore(spsr); + + vcpu->arch.irq_new_pending = 1; +} + +void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec) +{ + long spsr; + int ret; + + local_irq_save(spsr); + ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0])); + local_irq_restore(spsr); + if (ret) { + vcpu->arch.irq_new_pending = 1; + wmb(); + } +} + +void update_vhpi(struct kvm_vcpu *vcpu, int vec) +{ + u64 vhpi; + + if (vec == NULL_VECTOR) + vhpi = 0; + else if (vec == NMI_VECTOR) + vhpi = 32; + else if (vec == ExtINT_VECTOR) + vhpi = 16; + else + vhpi = vec >> 4; + + VCPU(vcpu, vhpi) = vhpi; + if (VCPU(vcpu, vac).a_int) + ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT, + (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0); +} + +u64 vcpu_get_ivr(struct kvm_vcpu *vcpu) +{ + int vec, h_inservice, mask; + + vec = highest_pending_irq(vcpu); + h_inservice = highest_inservice_irq(vcpu); + mask = irq_masked(vcpu, vec, h_inservice); + if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) { + if (VCPU(vcpu, vhpi)) + update_vhpi(vcpu, NULL_VECTOR); + return IA64_SPURIOUS_INT_VECTOR; + } + if (mask == IRQ_MASKED_BY_VTPR) { + update_vhpi(vcpu, vec); + return IA64_SPURIOUS_INT_VECTOR; + } + VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63)); + vcpu_unpend_interrupt(vcpu, vec); + return (u64)vec; +} + +/************************************************************************** + Privileged operation emulation routines + **************************************************************************/ +u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr) +{ + union ia64_pta vpta; + union ia64_rr vrr; + u64 pval; + u64 vhpt_offset; + + vpta.val = vcpu_get_pta(vcpu); + vrr.val = vcpu_get_rr(vcpu, vadr); + vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1); + if (vpta.vf) { + pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val, + vpta.val, 0, 0, 0, 0); + } else { + pval = (vadr & VRN_MASK) | vhpt_offset | + (vpta.val << 3 >> (vpta.size + 3) << (vpta.size)); + } + return pval; +} + +u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr) +{ + union ia64_rr vrr; + union ia64_pta vpta; + u64 pval; + + vpta.val = vcpu_get_pta(vcpu); + vrr.val = vcpu_get_rr(vcpu, vadr); + if (vpta.vf) { + pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val, + 0, 0, 0, 0, 0); + } else + pval = 1; + + return pval; +} + +u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr) +{ + struct thash_data *data; + union ia64_pta vpta; + u64 key; + + vpta.val = vcpu_get_pta(vcpu); + if (vpta.vf == 0) { + key = 1; + return key; + } + data = vtlb_lookup(vcpu, vadr, D_TLB); + if (!data || !data->p) + key = 1; + else + key = data->key; + + return key; +} + + + +void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long thash, vadr; + + vadr = vcpu_get_gr(vcpu, inst.M46.r3); + thash = vcpu_thash(vcpu, vadr); + vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); +} + + +void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long tag, vadr; + + vadr = vcpu_get_gr(vcpu, inst.M46.r3); + tag = vcpu_ttag(vcpu, vadr); + vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); +} + +int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) +{ + struct thash_data *data; + union ia64_isr visr, pt_isr; + struct kvm_pt_regs *regs; + struct ia64_psr vpsr; + + regs = vcpu_regs(vcpu); + pt_isr.val = VMX(vcpu, cr_isr); + visr.val = 0; + visr.ei = pt_isr.ei; + visr.ir = pt_isr.ir; + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + visr.na = 1; + + data = vhpt_lookup(vadr); + if (data) { + if (data->p == 0) { + vcpu_set_isr(vcpu, visr.val); + data_page_not_present(vcpu, vadr); + return IA64_FAULT; + } else if (data->ma == VA_MATTR_NATPAGE) { + vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + } else { + *padr = (data->gpaddr >> data->ps << data->ps) | + (vadr & (PSIZE(data->ps) - 1)); + return IA64_NO_FAULT; + } + } + + data = vtlb_lookup(vcpu, vadr, D_TLB); + if (data) { + if (data->p == 0) { + vcpu_set_isr(vcpu, visr.val); + data_page_not_present(vcpu, vadr); + return IA64_FAULT; + } else if (data->ma == VA_MATTR_NATPAGE) { + vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + } else{ + *padr = ((data->ppn >> (data->ps - 12)) << data->ps) + | (vadr & (PSIZE(data->ps) - 1)); + return IA64_NO_FAULT; + } + } + if (!vhpt_enabled(vcpu, vadr, NA_REF)) { + if (vpsr.ic) { + vcpu_set_isr(vcpu, visr.val); + alt_dtlb(vcpu, vadr); + return IA64_FAULT; + } else { + nested_dtlb(vcpu); + return IA64_FAULT; + } + } else { + if (vpsr.ic) { + vcpu_set_isr(vcpu, visr.val); + dvhpt_fault(vcpu, vadr); + return IA64_FAULT; + } else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + + return IA64_NO_FAULT; +} + + +int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1, r3; + + r3 = vcpu_get_gr(vcpu, inst.M46.r3); + + if (vcpu_tpa(vcpu, r3, &r1)) + return IA64_FAULT; + + vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + +void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1, r3; + + r3 = vcpu_get_gr(vcpu, inst.M46.r3); + r1 = vcpu_tak(vcpu, r3); + vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); +} + + +/************************************ + * Insert/Purge translation register/cache + ************************************/ +void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) +{ + thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB); +} + +void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) +{ + thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB); +} + +void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + u64 ps, va, rid; + struct thash_data *p_itr; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + pte &= ~PAGE_FLAGS_RV_MASK; + rid = vcpu_get_rr(vcpu, ifa); + rid = rid & RR_RID_MASK; + p_itr = (struct thash_data *)&vcpu->arch.itrs[slot]; + vcpu_set_tr(p_itr, pte, itir, va, rid); + vcpu_quick_region_set(VMX(vcpu, itr_regions), va); +} + + +void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + u64 gpfn; + u64 ps, va, rid; + struct thash_data *p_dtr; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + pte &= ~PAGE_FLAGS_RV_MASK; + + if (ps != _PAGE_SIZE_16M) + thash_purge_entries(vcpu, va, ps); + gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; + if (__gpfn_is_io(gpfn)) + pte |= VTLB_PTE_IO; + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK; + p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot]; + vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot], + pte, itir, va, rid); + vcpu_quick_region_set(VMX(vcpu, dtr_regions), va); +} + +void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) +{ + int index; + u64 va; + + va = PAGEALIGN(ifa, ps); + while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0) + vcpu->arch.dtrs[index].page_flags = 0; + + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) +{ + int index; + u64 va; + + va = PAGEALIGN(ifa, ps); + while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0) + vcpu->arch.itrs[index].page_flags = 0; + + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + va = PAGEALIGN(va, ps); + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va) +{ + thash_purge_all(vcpu); +} + +void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + long psr; + local_irq_save(psr); + p->exit_reason = EXIT_REASON_PTC_G; + + p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va); + p->u.ptc_g_data.vaddr = va; + p->u.ptc_g_data.ps = ps; + vmm_transition(vcpu); + /* Do Local Purge Here*/ + vcpu_ptc_l(vcpu, va, ps); + local_irq_restore(psr); +} + + +void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + vcpu_ptc_ga(vcpu, va, ps); +} + +void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + vcpu_ptc_e(vcpu, ifa); +} + +void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_g(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_ga(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_l(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptr_d(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptr_i(vcpu, ifa, itir_ps(itir)); +} + +void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte, slot; + + slot = vcpu_get_gr(vcpu, inst.M45.r3); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + vcpu_itr_d(vcpu, slot, pte, itir, ifa); +} + + + +void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte, slot; + + slot = vcpu_get_gr(vcpu, inst.M45.r3); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + vcpu_itr_i(vcpu, slot, pte, itir, ifa); +} + +void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte; + + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_itc_d(vcpu, pte, itir, ifa); +} + +void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte; + + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_itc_i(vcpu, pte, itir, ifa); +} + +/************************************* + * Moves to semi-privileged registers + *************************************/ + +void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long imm; + + if (inst.M30.s) + imm = -inst.M30.imm; + else + imm = inst.M30.imm; + + vcpu_set_itc(vcpu, imm); +} + +void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r2; + + r2 = vcpu_get_gr(vcpu, inst.M29.r2); + vcpu_set_itc(vcpu, r2); +} + + +void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1; + + r1 = vcpu_get_itc(vcpu); + vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); +} +/************************************************************************** + struct kvm_vcpu*protection key register access routines + **************************************************************************/ + +unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) +{ + return ((unsigned long)ia64_get_pkr(reg)); +} + +void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val) +{ + ia64_set_pkr(reg, val); +} + + +unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa) +{ + union ia64_rr rr, rr1; + + rr.val = vcpu_get_rr(vcpu, ifa); + rr1.val = 0; + rr1.ps = rr.ps; + rr1.rid = rr.rid; + return (rr1.val); +} + + + +/******************************** + * Moves to privileged registers + ********************************/ +unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg, + unsigned long val) +{ + union ia64_rr oldrr, newrr; + unsigned long rrval; + struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long psr; + + oldrr.val = vcpu_get_rr(vcpu, reg); + newrr.val = val; + vcpu->arch.vrr[reg >> VRN_SHIFT] = val; + + switch ((unsigned long)(reg >> VRN_SHIFT)) { + case VRN6: + vcpu->arch.vmm_rr = vrrtomrr(val); + local_irq_save(psr); + p->exit_reason = EXIT_REASON_SWITCH_RR6; + vmm_transition(vcpu); + local_irq_restore(psr); + break; + case VRN4: + rrval = vrrtomrr(val); + vcpu->arch.metaphysical_saved_rr4 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg, rrval); + break; + case VRN0: + rrval = vrrtomrr(val); + vcpu->arch.metaphysical_saved_rr0 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg, rrval); + break; + default: + ia64_set_rr(reg, vrrtomrr(val)); + break; + } + + return (IA64_NO_FAULT); +} + + + +void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_rr(vcpu, r3, r2); +} + +void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst) +{ +} + +void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst) +{ +} + +void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pmc(vcpu, r3, r2); +} + +void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pmd(vcpu, r3, r2); +} + +void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst) +{ + u64 r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pkr(vcpu, r3, r2); +} + + + +void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_rr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_pkr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_dbr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_ibr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_pmc(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + + +unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) +{ + /* FIXME: This could get called as a result of a rsvd-reg fault */ + if (reg > (ia64_get_cpuid(3) & 0xff)) + return 0; + else + return ia64_get_cpuid(reg); +} + +void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_cpuid(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val) +{ + VCPU(vcpu, tpr) = val; + vcpu->arch.irq_check = 1; +} + +unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r2; + + r2 = vcpu_get_gr(vcpu, inst.M32.r2); + VCPU(vcpu, vcr[inst.M32.cr3]) = r2; + + switch (inst.M32.cr3) { + case 0: + vcpu_set_dcr(vcpu, r2); + break; + case 1: + vcpu_set_itm(vcpu, r2); + break; + case 66: + vcpu_set_tpr(vcpu, r2); + break; + case 67: + vcpu_set_eoi(vcpu, r2); + break; + default: + break; + } + + return 0; +} + + +unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long tgt = inst.M33.r1; + unsigned long val; + + switch (inst.M33.cr3) { + case 65: + val = vcpu_get_ivr(vcpu); + vcpu_set_gr(vcpu, tgt, val, 0); + break; + + case 67: + vcpu_set_gr(vcpu, tgt, 0L, 0); + break; + default: + val = VCPU(vcpu, vcr[inst.M33.cr3]); + vcpu_set_gr(vcpu, tgt, val, 0); + break; + } + + return 0; +} + + + +void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) +{ + + unsigned long mask; + struct kvm_pt_regs *regs; + struct ia64_psr old_psr, new_psr; + + old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + regs = vcpu_regs(vcpu); + /* We only support guest as: + * vpsr.pk = 0 + * vpsr.is = 0 + * Otherwise panic + */ + if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) + panic_vm(vcpu); + + /* + * For those IA64_PSR bits: id/da/dd/ss/ed/ia + * Since these bits will become 0, after success execution of each + * instruction, we will change set them to mIA64_PSR + */ + VCPU(vcpu, vpsr) = val + & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA)); + + if (!old_psr.i && (val & IA64_PSR_I)) { + /* vpsr.i 0->1 */ + vcpu->arch.irq_check = 1; + } + new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + /* + * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) + * , except for the following bits: + * ic/i/dt/si/rt/mc/it/bn/vm + */ + mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + + IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + + IA64_PSR_VM; + + regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask)); + + check_mm_mode_switch(vcpu, old_psr, new_psr); + + return ; +} + +unsigned long vcpu_cover(struct kvm_vcpu *vcpu) +{ + struct ia64_psr vpsr; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + if (!vpsr.ic) + VCPU(vcpu, ifs) = regs->cr_ifs; + regs->cr_ifs = IA64_IFS_V; + return (IA64_NO_FAULT); +} + + + +/************************************************************************** + VCPU banked general register access routines + **************************************************************************/ +#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ + do { \ + __asm__ __volatile__ ( \ + ";;extr.u %0 = %3,%6,16;;\n" \ + "dep %1 = %0, %1, 0, 16;;\n" \ + "st8 [%4] = %1\n" \ + "extr.u %0 = %2, 16, 16;;\n" \ + "dep %3 = %0, %3, %6, 16;;\n" \ + "st8 [%5] = %3\n" \ + ::"r"(i), "r"(*b1unat), "r"(*b0unat), \ + "r"(*runat), "r"(b1unat), "r"(runat), \ + "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ + } while (0) + +void vcpu_bsw0(struct kvm_vcpu *vcpu) +{ + unsigned long i; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + unsigned long *r = ®s->r16; + unsigned long *b0 = &VCPU(vcpu, vbgr[0]); + unsigned long *b1 = &VCPU(vcpu, vgr[0]); + unsigned long *runat = ®s->eml_unat; + unsigned long *b0unat = &VCPU(vcpu, vbnat); + unsigned long *b1unat = &VCPU(vcpu, vnat); + + + if (VCPU(vcpu, vpsr) & IA64_PSR_BN) { + for (i = 0; i < 16; i++) { + *b1++ = *r; + *r++ = *b0++; + } + vcpu_bsw0_unat(i, b0unat, b1unat, runat, + VMM_PT_REGS_R16_SLOT); + VCPU(vcpu, vpsr) &= ~IA64_PSR_BN; + } +} + +#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ + do { \ + __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \ + "dep %1 = %0, %1, 16, 16;;\n" \ + "st8 [%4] = %1\n" \ + "extr.u %0 = %2, 0, 16;;\n" \ + "dep %3 = %0, %3, %6, 16;;\n" \ + "st8 [%5] = %3\n" \ + ::"r"(i), "r"(*b0unat), "r"(*b1unat), \ + "r"(*runat), "r"(b0unat), "r"(runat), \ + "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ + } while (0) + +void vcpu_bsw1(struct kvm_vcpu *vcpu) +{ + unsigned long i; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + unsigned long *r = ®s->r16; + unsigned long *b0 = &VCPU(vcpu, vbgr[0]); + unsigned long *b1 = &VCPU(vcpu, vgr[0]); + unsigned long *runat = ®s->eml_unat; + unsigned long *b0unat = &VCPU(vcpu, vbnat); + unsigned long *b1unat = &VCPU(vcpu, vnat); + + if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) { + for (i = 0; i < 16; i++) { + *b0++ = *r; + *r++ = *b1++; + } + vcpu_bsw1_unat(i, b0unat, b1unat, runat, + VMM_PT_REGS_R16_SLOT); + VCPU(vcpu, vpsr) |= IA64_PSR_BN; + } +} + + + + +void vcpu_rfi(struct kvm_vcpu *vcpu) +{ + unsigned long ifs, psr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + psr = VCPU(vcpu, ipsr); + if (psr & IA64_PSR_BN) + vcpu_bsw1(vcpu); + else + vcpu_bsw0(vcpu); + vcpu_set_psr(vcpu, psr); + ifs = VCPU(vcpu, ifs); + if (ifs >> 63) + regs->cr_ifs = ifs; + regs->cr_iip = VCPU(vcpu, iip); +} + + +/* + VPSR can't keep track of below bits of guest PSR + This function gets guest PSR + */ + +unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu) +{ + unsigned long mask; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | + IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI; + return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask); +} + +void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long vpsr; + unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21) + | inst.M44.imm; + + vpsr = vcpu_get_psr(vcpu); + vpsr &= (~imm24); + vcpu_set_psr(vcpu, vpsr); +} + +void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long vpsr; + unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21) + | inst.M44.imm; + + vpsr = vcpu_get_psr(vcpu); + vpsr |= imm24; + vcpu_set_psr(vcpu, vpsr); +} + +/* Generate Mask + * Parameter: + * bit -- starting bit + * len -- how many bits + */ +#define MASK(bit,len) \ +({ \ + __u64 ret; \ + \ + __asm __volatile("dep %0=-1, r0, %1, %2"\ + : "=r" (ret): \ + "M" (bit), \ + "M" (len)); \ + ret; \ +}) + +void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val) +{ + val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32)); + vcpu_set_psr(vcpu, val); +} + +void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long val; + + val = vcpu_get_gr(vcpu, inst.M35.r2); + vcpu_set_psr_l(vcpu, val); +} + +void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long val; + + val = vcpu_get_psr(vcpu); + val = (val & MASK(0, 32)) | (val & MASK(35, 2)); + vcpu_set_gr(vcpu, inst.M33.r1, val, 0); +} + +void vcpu_increment_iip(struct kvm_vcpu *vcpu) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; + if (ipsr->ri == 2) { + ipsr->ri = 0; + regs->cr_iip += 16; + } else + ipsr->ri++; +} + +void vcpu_decrement_iip(struct kvm_vcpu *vcpu) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; + + if (ipsr->ri == 0) { + ipsr->ri = 2; + regs->cr_iip -= 16; + } else + ipsr->ri--; +} + +/** Emulate a privileged operation. + * + * + * @param vcpu virtual cpu + * @cause the reason cause virtualization fault + * @opcode the instruction code which cause virtualization fault + */ + +void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs) +{ + unsigned long status, cause, opcode ; + INST64 inst; + + status = IA64_NO_FAULT; + cause = VMX(vcpu, cause); + opcode = VMX(vcpu, opcode); + inst.inst = opcode; + /* + * Switch to actual virtual rid in rr0 and rr4, + * which is required by some tlb related instructions. + */ + prepare_if_physical_mode(vcpu); + + switch (cause) { + case EVENT_RSM: + kvm_rsm(vcpu, inst); + break; + case EVENT_SSM: + kvm_ssm(vcpu, inst); + break; + case EVENT_MOV_TO_PSR: + kvm_mov_to_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_PSR: + kvm_mov_from_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_CR: + kvm_mov_from_cr(vcpu, inst); + break; + case EVENT_MOV_TO_CR: + kvm_mov_to_cr(vcpu, inst); + break; + case EVENT_BSW_0: + vcpu_bsw0(vcpu); + break; + case EVENT_BSW_1: + vcpu_bsw1(vcpu); + break; + case EVENT_COVER: + vcpu_cover(vcpu); + break; + case EVENT_RFI: + vcpu_rfi(vcpu); + break; + case EVENT_ITR_D: + kvm_itr_d(vcpu, inst); + break; + case EVENT_ITR_I: + kvm_itr_i(vcpu, inst); + break; + case EVENT_PTR_D: + kvm_ptr_d(vcpu, inst); + break; + case EVENT_PTR_I: + kvm_ptr_i(vcpu, inst); + break; + case EVENT_ITC_D: + kvm_itc_d(vcpu, inst); + break; + case EVENT_ITC_I: + kvm_itc_i(vcpu, inst); + break; + case EVENT_PTC_L: + kvm_ptc_l(vcpu, inst); + break; + case EVENT_PTC_G: + kvm_ptc_g(vcpu, inst); + break; + case EVENT_PTC_GA: + kvm_ptc_ga(vcpu, inst); + break; + case EVENT_PTC_E: + kvm_ptc_e(vcpu, inst); + break; + case EVENT_MOV_TO_RR: + kvm_mov_to_rr(vcpu, inst); + break; + case EVENT_MOV_FROM_RR: + kvm_mov_from_rr(vcpu, inst); + break; + case EVENT_THASH: + kvm_thash(vcpu, inst); + break; + case EVENT_TTAG: + kvm_ttag(vcpu, inst); + break; + case EVENT_TPA: + status = kvm_tpa(vcpu, inst); + break; + case EVENT_TAK: + kvm_tak(vcpu, inst); + break; + case EVENT_MOV_TO_AR_IMM: + kvm_mov_to_ar_imm(vcpu, inst); + break; + case EVENT_MOV_TO_AR: + kvm_mov_to_ar_reg(vcpu, inst); + break; + case EVENT_MOV_FROM_AR: + kvm_mov_from_ar_reg(vcpu, inst); + break; + case EVENT_MOV_TO_DBR: + kvm_mov_to_dbr(vcpu, inst); + break; + case EVENT_MOV_TO_IBR: + kvm_mov_to_ibr(vcpu, inst); + break; + case EVENT_MOV_TO_PMC: + kvm_mov_to_pmc(vcpu, inst); + break; + case EVENT_MOV_TO_PMD: + kvm_mov_to_pmd(vcpu, inst); + break; + case EVENT_MOV_TO_PKR: + kvm_mov_to_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_DBR: + kvm_mov_from_dbr(vcpu, inst); + break; + case EVENT_MOV_FROM_IBR: + kvm_mov_from_ibr(vcpu, inst); + break; + case EVENT_MOV_FROM_PMC: + kvm_mov_from_pmc(vcpu, inst); + break; + case EVENT_MOV_FROM_PKR: + kvm_mov_from_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_CPUID: + kvm_mov_from_cpuid(vcpu, inst); + break; + case EVENT_VMSW: + status = IA64_FAULT; + break; + default: + break; + }; + /*Assume all status is NO_FAULT ?*/ + if (status == IA64_NO_FAULT && cause != EVENT_RFI) + vcpu_increment_iip(vcpu); + + recover_if_physical_mode(vcpu); +} + +void init_vcpu(struct kvm_vcpu *vcpu) +{ + int i; + + vcpu->arch.mode_flags = GUEST_IN_PHY; + VMX(vcpu, vrr[0]) = 0x38; + VMX(vcpu, vrr[1]) = 0x38; + VMX(vcpu, vrr[2]) = 0x38; + VMX(vcpu, vrr[3]) = 0x38; + VMX(vcpu, vrr[4]) = 0x38; + VMX(vcpu, vrr[5]) = 0x38; + VMX(vcpu, vrr[6]) = 0x38; + VMX(vcpu, vrr[7]) = 0x38; + VCPU(vcpu, vpsr) = IA64_PSR_BN; + VCPU(vcpu, dcr) = 0; + /* pta.size must not be 0. The minimum is 15 (32k) */ + VCPU(vcpu, pta) = 15 << 2; + VCPU(vcpu, itv) = 0x10000; + VCPU(vcpu, itm) = 0; + VMX(vcpu, last_itc) = 0; + + VCPU(vcpu, lid) = VCPU_LID(vcpu); + VCPU(vcpu, ivr) = 0; + VCPU(vcpu, tpr) = 0x10000; + VCPU(vcpu, eoi) = 0; + VCPU(vcpu, irr[0]) = 0; + VCPU(vcpu, irr[1]) = 0; + VCPU(vcpu, irr[2]) = 0; + VCPU(vcpu, irr[3]) = 0; + VCPU(vcpu, pmv) = 0x10000; + VCPU(vcpu, cmcv) = 0x10000; + VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */ + VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */ + update_vhpi(vcpu, NULL_VECTOR); + VLSAPIC_XTP(vcpu) = 0x80; /* disabled */ + + for (i = 0; i < 4; i++) + VLSAPIC_INSVC(vcpu, i) = 0; +} + +void kvm_init_all_rr(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + local_irq_save(psr); + + /* WARNING: not allow co-exist of both virtual mode and physical + * mode in same region + */ + + vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0])); + vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4])); + + if (is_physical_mode(vcpu)) { + if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) + panic_vm(vcpu); + + ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4); + ia64_dv_serialize_data(); + } else { + ia64_set_rr((VRN0 << VRN_SHIFT), + vcpu->arch.metaphysical_saved_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), + vcpu->arch.metaphysical_saved_rr4); + ia64_dv_serialize_data(); + } + ia64_set_rr((VRN1 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN1]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN2 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN2]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN3 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN3]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN5 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN5]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN7 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN7]))); + ia64_dv_serialize_data(); + ia64_srlz_d(); + ia64_set_psr(psr); +} + +int vmm_entry(void) +{ + struct kvm_vcpu *v; + v = current_vcpu; + + ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd, + 0, 0, 0, 0, 0, 0); + kvm_init_vtlb(v); + kvm_init_vhpt(v); + init_vcpu(v); + kvm_init_all_rr(v); + vmm_reset_entry(); + + return 0; +} + +void panic_vm(struct kvm_vcpu *v) +{ + struct exit_ctl_data *p = &v->arch.exit_data; + + p->exit_reason = EXIT_REASON_VM_PANIC; + vmm_transition(v); + /*Never to return*/ + while (1); +} diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h new file mode 100644 index 00000000000..b0fcfb62c49 --- /dev/null +++ b/arch/ia64/kvm/vcpu.h @@ -0,0 +1,740 @@ +/* + * vcpu.h: vcpu routines + * Copyright (c) 2005, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * + * Copyright (c) 2007, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + + +#ifndef __KVM_VCPU_H__ +#define __KVM_VCPU_H__ + +#include <asm/types.h> +#include <asm/fpu.h> +#include <asm/processor.h> + +#ifndef __ASSEMBLY__ +#include "vti.h" + +#include <linux/kvm_host.h> +#include <linux/spinlock.h> + +typedef unsigned long IA64_INST; + +typedef union U_IA64_BUNDLE { + unsigned long i64[2]; + struct { unsigned long template:5, slot0:41, slot1a:18, + slot1b:23, slot2:41; }; + /* NOTE: following doesn't work because bitfields can't cross natural + size boundaries + struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */ +} IA64_BUNDLE; + +typedef union U_INST64_A5 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5, + imm9d:9, s:1, major:4; }; +} INST64_A5; + +typedef union U_INST64_B4 { + IA64_INST inst; + struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, + wh:2, d:1, un1:1, major:4; }; +} INST64_B4; + +typedef union U_INST64_B8 { + IA64_INST inst; + struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; }; +} INST64_B8; + +typedef union U_INST64_B9 { + IA64_INST inst; + struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; }; +} INST64_B9; + +typedef union U_INST64_I19 { + IA64_INST inst; + struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; }; +} INST64_I19; + +typedef union U_INST64_I26 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_I26; + +typedef union U_INST64_I27 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; }; +} INST64_I27; + +typedef union U_INST64_I28 { /* not privileged (mov from AR) */ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_I28; + +typedef union U_INST64_M28 { + IA64_INST inst; + struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M28; + +typedef union U_INST64_M29 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M29; + +typedef union U_INST64_M30 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2, + x3:3, s:1, major:4; }; +} INST64_M30; + +typedef union U_INST64_M31 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M31; + +typedef union U_INST64_M32 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M32; + +typedef union U_INST64_M33 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M33; + +typedef union U_INST64_M35 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; + +} INST64_M35; + +typedef union U_INST64_M36 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; }; +} INST64_M36; + +typedef union U_INST64_M37 { + IA64_INST inst; + struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3, + i:1, major:4; }; +} INST64_M37; + +typedef union U_INST64_M41 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; +} INST64_M41; + +typedef union U_INST64_M42 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M42; + +typedef union U_INST64_M43 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M43; + +typedef union U_INST64_M44 { + IA64_INST inst; + struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; }; +} INST64_M44; + +typedef union U_INST64_M45 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M45; + +typedef union U_INST64_M46 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, + x3:3, un1:1, major:4; }; +} INST64_M46; + +typedef union U_INST64_M47 { + IA64_INST inst; + struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; }; +} INST64_M47; + +typedef union U_INST64_M1{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M1; + +typedef union U_INST64_M2{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M2; + +typedef union U_INST64_M3{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M3; + +typedef union U_INST64_M4 { + IA64_INST inst; + struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M4; + +typedef union U_INST64_M5 { + IA64_INST inst; + struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M5; + +typedef union U_INST64_M6 { + IA64_INST inst; + struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M6; + +typedef union U_INST64_M9 { + IA64_INST inst; + struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M9; + +typedef union U_INST64_M10 { + IA64_INST inst; + struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M10; + +typedef union U_INST64_M12 { + IA64_INST inst; + struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M12; + +typedef union U_INST64_M15 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M15; + +typedef union U_INST64 { + IA64_INST inst; + struct { unsigned long :37, major:4; } generic; + INST64_A5 A5; /* used in build_hypercall_bundle only */ + INST64_B4 B4; /* used in build_hypercall_bundle only */ + INST64_B8 B8; /* rfi, bsw.[01] */ + INST64_B9 B9; /* break.b */ + INST64_I19 I19; /* used in build_hypercall_bundle only */ + INST64_I26 I26; /* mov register to ar (I unit) */ + INST64_I27 I27; /* mov immediate to ar (I unit) */ + INST64_I28 I28; /* mov from ar (I unit) */ + INST64_M1 M1; /* ld integer */ + INST64_M2 M2; + INST64_M3 M3; + INST64_M4 M4; /* st integer */ + INST64_M5 M5; + INST64_M6 M6; /* ldfd floating pointer */ + INST64_M9 M9; /* stfd floating pointer */ + INST64_M10 M10; /* stfd floating pointer */ + INST64_M12 M12; /* ldfd pair floating pointer */ + INST64_M15 M15; /* lfetch + imm update */ + INST64_M28 M28; /* purge translation cache entry */ + INST64_M29 M29; /* mov register to ar (M unit) */ + INST64_M30 M30; /* mov immediate to ar (M unit) */ + INST64_M31 M31; /* mov from ar (M unit) */ + INST64_M32 M32; /* mov reg to cr */ + INST64_M33 M33; /* mov from cr */ + INST64_M35 M35; /* mov to psr */ + INST64_M36 M36; /* mov from psr */ + INST64_M37 M37; /* break.m */ + INST64_M41 M41; /* translation cache insert */ + INST64_M42 M42; /* mov to indirect reg/translation reg insert*/ + INST64_M43 M43; /* mov from indirect reg */ + INST64_M44 M44; /* set/reset system mask */ + INST64_M45 M45; /* translation purge */ + INST64_M46 M46; /* translation access (tpa,tak) */ + INST64_M47 M47; /* purge translation entry */ +} INST64; + +#define MASK_41 ((unsigned long)0x1ffffffffff) + +/* Virtual address memory attributes encoding */ +#define VA_MATTR_WB 0x0 +#define VA_MATTR_UC 0x4 +#define VA_MATTR_UCE 0x5 +#define VA_MATTR_WC 0x6 +#define VA_MATTR_NATPAGE 0x7 + +#define PMASK(size) (~((size) - 1)) +#define PSIZE(size) (1UL<<(size)) +#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits)) +#define PAGEALIGN(va, ps) CLEARLSB(va, ps) +#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53)) +#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */ + +#define ARCH_PAGE_SHIFT 12 + +#define INVALID_TI_TAG (1UL << 63) + +#define VTLB_PTE_P_BIT 0 +#define VTLB_PTE_IO_BIT 60 +#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT) +#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT) + +#define vcpu_quick_region_check(_tr_regions,_ifa) \ + (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) + +#define vcpu_quick_region_set(_tr_regions,_ifa) \ + do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) + +static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir, + u64 va, u64 rid) +{ + trp->page_flags = pte; + trp->itir = itir; + trp->vadr = va; + trp->rid = rid; +} + +extern u64 kvm_lookup_mpa(u64 gpfn); +extern u64 kvm_gpa_to_mpa(u64 gpa); + +/* Return I/O type if trye */ +#define __gpfn_is_io(gpfn) \ + ({ \ + u64 pte, ret = 0; \ + pte = kvm_lookup_mpa(gpfn); \ + if (!(pte & GPFN_INV_MASK)) \ + ret = pte & GPFN_IO_MASK; \ + ret; \ + }) + +#endif + +#define IA64_NO_FAULT 0 +#define IA64_FAULT 1 + +#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15) + +#define SW_BAD 0 /* Bad mode transitition */ +#define SW_V2P 1 /* Physical emulatino is activated */ +#define SW_P2V 2 /* Exit physical mode emulation */ +#define SW_SELF 3 /* No mode transition */ +#define SW_NOP 4 /* Mode transition, but without action required */ + +#define GUEST_IN_PHY 0x1 +#define GUEST_PHY_EMUL 0x2 + +#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP)) + +#define VRN_SHIFT 61 +#define VRN_MASK 0xe000000000000000 +#define VRN0 0x0UL +#define VRN1 0x1UL +#define VRN2 0x2UL +#define VRN3 0x3UL +#define VRN4 0x4UL +#define VRN5 0x5UL +#define VRN6 0x6UL +#define VRN7 0x7UL + +#define IRQ_NO_MASKED 0 +#define IRQ_MASKED_BY_VTPR 1 +#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */ + +#define PTA_BASE_SHIFT 15 + +#define IA64_PSR_VM_BIT 46 +#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT) + +/* Interruption Function State */ +#define IA64_IFS_V_BIT 63 +#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT) + +#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) +#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) + +#ifndef __ASSEMBLY__ + +#include <asm/gcc_intrin.h> + +#define is_physical_mode(v) \ + ((v->arch.mode_flags) & GUEST_IN_PHY) + +#define is_virtual_mode(v) \ + (!is_physical_mode(v)) + +#define MODE_IND(psr) \ + (((psr).it << 2) + ((psr).dt << 1) + (psr).rt) + +#define _vmm_raw_spin_lock(x) \ + do { \ + __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ + __u64 ia64_spinlock_val; \ + ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ + if (unlikely(ia64_spinlock_val)) { \ + do { \ + while (*ia64_spinlock_ptr) \ + ia64_barrier(); \ + ia64_spinlock_val = \ + ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ + } while (ia64_spinlock_val); \ + } \ + } while (0) + +#define _vmm_raw_spin_unlock(x) \ + do { barrier(); \ + ((spinlock_t *)x)->raw_lock.lock = 0; } \ +while (0) + +void vmm_spin_lock(spinlock_t *lock); +void vmm_spin_unlock(spinlock_t *lock); +enum { + I_TLB = 1, + D_TLB = 2 +}; + +union kvm_va { + struct { + unsigned long off : 60; /* intra-region offset */ + unsigned long reg : 4; /* region number */ + } f; + unsigned long l; + void *p; +}; + +#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \ + _v.f.reg = 0; _v.l; }) +#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \ + _v.f.reg = -1; _v.p; }) + +#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.rid; }) +#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.ps; }) +#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.ve; }) + +enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF }; +enum tlb_miss_type { INSTRUCTION, DATA, REGISTER }; + +#define VCPU(_v, _x) ((_v)->arch.vpd->_x) +#define VMX(_v, _x) ((_v)->arch._x) + +#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i]) +#define VLSAPIC_XTP(_v) VMX(_v, xtp) + +static inline unsigned long itir_ps(unsigned long itir) +{ + return ((itir >> 2) & 0x3f); +} + + +/************************************************************************** + VCPU control register access routines + **************************************************************************/ + +static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, itir)); +} + +static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, itir) = val; +} + +static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, ifa)); +} + +static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ifa) = val; +} + +static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, iva)); +} + +static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, pta)); +} + +static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, lid)); +} + +static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, tpr)); +} + +static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu) +{ + return (0UL); /*reads of eoi always return 0 */ +} + +static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[0])); +} + +static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[1])); +} + +static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[2])); +} + +static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[3])); +} + +static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val) +{ + ia64_setreg(_IA64_REG_CR_DCR, val); +} + +static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, isr) = val; +} + +static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, lid) = val; +} + +static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ipsr) = val; +} + +static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iip) = val; +} + +static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ifs) = val; +} + +static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iipa) = val; +} + +static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iha) = val; +} + + +static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg) +{ + return vcpu->arch.vrr[reg>>61]; +} + +/************************************************************************** + VCPU debug breakpoint register access routines + **************************************************************************/ + +static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + __ia64_set_dbr(reg, val); +} + +static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + ia64_set_ibr(reg, val); +} + +static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg) +{ + return ((u64)__ia64_get_dbr(reg)); +} + +static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg) +{ + return ((u64)ia64_get_ibr(reg)); +} + +/************************************************************************** + VCPU performance monitor register access routines + **************************************************************************/ +static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + /* NOTE: Writes to unimplemented PMC registers are discarded */ + ia64_set_pmc(reg, val); +} + +static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + /* NOTE: Writes to unimplemented PMD registers are discarded */ + ia64_set_pmd(reg, val); +} + +static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg) +{ + /* NOTE: Reads from unimplemented PMC registers return zero */ + return ((u64)ia64_get_pmc(reg)); +} + +static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg) +{ + /* NOTE: Reads from unimplemented PMD registers return zero */ + return ((u64)ia64_get_pmd(reg)); +} + +static inline unsigned long vrrtomrr(unsigned long val) +{ + union ia64_rr rr; + rr.val = val; + rr.rid = (rr.rid << 4) | 0xe; + if (rr.ps > PAGE_SHIFT) + rr.ps = PAGE_SHIFT; + rr.ve = 1; + return rr.val; +} + + +static inline int highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + return NULL_VECTOR; +} + +/* + * The pending irq is higher than the inservice one. + * + */ +static inline int is_higher_irq(int pending, int inservice) +{ + return ((pending > inservice) + || ((pending != NULL_VECTOR) + && (inservice == NULL_VECTOR))); +} + +static inline int is_higher_class(int pending, int mic) +{ + return ((pending >> 4) > mic); +} + +/* + * Return 0-255 for pending irq. + * NULL_VECTOR: when no pending. + */ +static inline int highest_pending_irq(struct kvm_vcpu *vcpu) +{ + if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR)) + return NMI_VECTOR; + if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return highest_bits((int *)&VCPU(vcpu, irr[0])); +} + +static inline int highest_inservice_irq(struct kvm_vcpu *vcpu) +{ + if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR)) + return NMI_VECTOR; + if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return highest_bits((int *)&(VMX(vcpu, insvc[0]))); +} + +extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg, + struct ia64_fpreg *val); +extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg, + struct ia64_fpreg *val); +extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg); +extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat); +extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu); +extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val); +extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); +extern void vcpu_bsw0(struct kvm_vcpu *vcpu); +extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, + u64 itir, u64 va, int type); +extern struct thash_data *vhpt_lookup(u64 va); +extern u64 guest_vhpt_lookup(u64 iha, u64 *pte); +extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps); +extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps); +extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va); +extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, + u64 itir, u64 ifa, int type); +extern void thash_purge_all(struct kvm_vcpu *v); +extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v, + u64 va, int is_data); +extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, + u64 ps, int is_data); + +extern void vcpu_increment_iip(struct kvm_vcpu *v); +extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu); +extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec); +extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec); +extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr); +extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr); +extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr); +extern void nested_dtlb(struct kvm_vcpu *vcpu); +extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr); +extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref); + +extern void update_vhpi(struct kvm_vcpu *vcpu, int vec); +extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice); + +extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle); +extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma); +extern void vmm_transition(struct kvm_vcpu *vcpu); +extern void vmm_trampoline(union context *from, union context *to); +extern int vmm_entry(void); +extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu); + +extern void vmm_reset_entry(void); +void kvm_init_vtlb(struct kvm_vcpu *v); +void kvm_init_vhpt(struct kvm_vcpu *v); +void thash_init(struct thash_cb *hcb, u64 sz); + +void panic_vm(struct kvm_vcpu *v); + +extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, + u64 arg4, u64 arg5, u64 arg6, u64 arg7); +#endif +#endif /* __VCPU_H__ */ diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c new file mode 100644 index 00000000000..2275bf4e681 --- /dev/null +++ b/arch/ia64/kvm/vmm.c @@ -0,0 +1,66 @@ +/* + * vmm.c: vmm module interface with kvm module + * + * Copyright (c) 2007, Intel Corporation. + * + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + + +#include<linux/module.h> +#include<asm/fpswa.h> + +#include "vcpu.h" + +MODULE_AUTHOR("Intel"); +MODULE_LICENSE("GPL"); + +extern char kvm_ia64_ivt; +extern fpswa_interface_t *vmm_fpswa_interface; + +struct kvm_vmm_info vmm_info = { + .module = THIS_MODULE, + .vmm_entry = vmm_entry, + .tramp_entry = vmm_trampoline, + .vmm_ivt = (unsigned long)&kvm_ia64_ivt, +}; + +static int __init kvm_vmm_init(void) +{ + + vmm_fpswa_interface = fpswa_interface; + + /*Register vmm data to kvm side*/ + return kvm_init(&vmm_info, 1024, THIS_MODULE); +} + +static void __exit kvm_vmm_exit(void) +{ + kvm_exit(); + return ; +} + +void vmm_spin_lock(spinlock_t *lock) +{ + _vmm_raw_spin_lock(lock); +} + +void vmm_spin_unlock(spinlock_t *lock) +{ + _vmm_raw_spin_unlock(lock); +} +module_init(kvm_vmm_init) +module_exit(kvm_vmm_exit) diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S new file mode 100644 index 00000000000..3ee5f481c06 --- /dev/null +++ b/arch/ia64/kvm/vmm_ivt.S @@ -0,0 +1,1424 @@ +/* + * /ia64/kvm_ivt.S + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger <davidm@hpl.hp.com> + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick <asit.k.mallick@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + * Kenneth Chen <kenneth.w.chen@intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + * + * + * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling + * for SMP + * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB + * handler now uses virtual PT. + * + * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Supporting Intel virtualization architecture + * + */ + +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for + * critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss + * (12,51) + * entry offset ----/ / / / + * / + * entry number ---------/ / / + * / + * size of the entry -------------/ / + * / + * vector name -------------------------------------/ + * / + * interruptions triggering this vector + * ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB + * boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + + +#include <asm/asmmacro.h> +#include <asm/cache.h> +#include <asm/pgtable.h> + +#include "asm-offsets.h" +#include "vcpu.h" +#include "kvm_minstate.h" +#include "vti.h" + +#if 1 +# define PSR_DEFAULT_BITS psr.ac +#else +# define PSR_DEFAULT_BITS 0 +#endif + + +#define KVM_FAULT(n) \ + kvm_fault_##n:; \ + mov r19=n;; \ + br.sptk.many kvm_fault_##n; \ + ;; \ + + +#define KVM_REFLECT(n) \ + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7)br.sptk.many kvm_dispatch_reflection; \ + br.sptk.many kvm_panic; \ + + +GLOBAL_ENTRY(kvm_panic) + br.sptk.many kvm_panic + ;; +END(kvm_panic) + + + + + + .section .text.ivt,"ax" + + .align 32768 // align on 32KB boundary + .global kvm_ia64_ivt +kvm_ia64_ivt: +/////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(kvm_vhpt_miss) + KVM_FAULT(0) +END(kvm_vhpt_miss) + + + .org kvm_ia64_ivt+0x400 +//////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(kvm_itlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; + (p6) br.sptk kvm_alt_itlb_miss + mov r19 = 1 + br.sptk kvm_itlb_miss_dispatch + KVM_FAULT(1); +END(kvm_itlb_miss) + + .org kvm_ia64_ivt+0x0800 +////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(kvm_dtlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6)br.sptk kvm_alt_dtlb_miss + br.sptk kvm_dtlb_miss_dispatch +END(kvm_dtlb_miss) + + .org kvm_ia64_ivt+0x0c00 +//////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(kvm_alt_itlb_miss) + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r17,r19 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(kvm_alt_itlb_miss) + + .org kvm_ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(kvm_alt_dtlb_miss) + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r19,r17 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(kvm_alt_dtlb_miss) + + .org kvm_ia64_ivt+0x1400 +////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(kvm_nested_dtlb_miss) + KVM_FAULT(5) +END(kvm_nested_dtlb_miss) + + .org kvm_ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(kvm_ikey_miss) + KVM_REFLECT(6) +END(kvm_ikey_miss) + + .org kvm_ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(kvm_dkey_miss) + KVM_REFLECT(7) +END(kvm_dkey_miss) + + .org kvm_ia64_ivt+0x2000 +//////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(kvm_dirty_bit) + KVM_REFLECT(8) +END(kvm_dirty_bit) + + .org kvm_ia64_ivt+0x2400 +//////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(kvm_iaccess_bit) + KVM_REFLECT(9) +END(kvm_iaccess_bit) + + .org kvm_ia64_ivt+0x2800 +/////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(kvm_daccess_bit) + KVM_REFLECT(10) +END(kvm_daccess_bit) + + .org kvm_ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(kvm_break_fault) + mov r31=pr + mov r19=11 + mov r29=cr.ipsr + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15)ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out1=16,sp + br.call.sptk.many b6=kvm_ia64_handle_break + ;; +END(kvm_break_fault) + + .org kvm_ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(kvm_interrupt) + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT + tbit.z p0,p15=r29,IA64_PSR_I_BIT + ;; +(p7) br.sptk kvm_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-VMM_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; +.mem.offset 0,0; st8.spill [r16]=r8,16 +.mem.offset 8,0; st8.spill [r17]=r9,16 + ;; +.mem.offset 0,0; st8.spill [r16]=r10,24 +.mem.offset 8,0; st8.spill [r17]=r11,24 + ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; +.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ +.mem.offset 8,0; st8.spill [r17]=r12,16 + adds r12=-16,r1 + /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; +.mem.offset 0,0; st8.spill [r16]=r13,16 +.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ + ;; +.mem.offset 0,0; st8.spill [r16]=r15,16 +.mem.offset 8,0; st8.spill [r17]=r14,16 + dep r14=-1,r0,60,4 + ;; +.mem.offset 0,0; st8.spill [r16]=r2,16 +.mem.offset 8,0; st8.spill [r17]=r3,16 + adds r2=VMM_PT_REGS_R16_OFFSET,r1 + adds r14 = VMM_VCPU_GP_OFFSET,r13 + ;; + mov r8=ar.ccv + ld8 r14 = [r14] + ;; + mov r1=r14 /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + mov out0=r13 + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; +.mem.offset 0,0; st8.spill [r2]=r16,16 +.mem.offset 8,0; st8.spill [r3]=r17,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r18,16 +.mem.offset 8,0; st8.spill [r3]=r19,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r20,16 +.mem.offset 8,0; st8.spill [r3]=r21,16 + mov r18=b6 + ;; +.mem.offset 0,0; st8.spill [r2]=r22,16 +.mem.offset 8,0; st8.spill [r3]=r23,16 + mov r19=b7 + ;; +.mem.offset 0,0; st8.spill [r2]=r24,16 +.mem.offset 8,0; st8.spill [r3]=r25,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r26,16 +.mem.offset 8,0; st8.spill [r3]=r27,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r28,16 +.mem.offset 8,0; st8.spill [r3]=r29,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r30,16 +.mem.offset 8,0; st8.spill [r3]=r31,32 + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + addl r14=@gprel(ia64_leave_nested),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_ia64_handle_irq + ;; +END(kvm_interrupt) + + .global kvm_dispatch_vexirq + .org kvm_ia64_ivt+0x3400 +////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved +ENTRY(kvm_virtual_exirq) + mov r31=pr + mov r19=13 + mov r30 =r0 + ;; +kvm_dispatch_vexirq: + cmp.eq p6,p0 = 1,r30 + ;; +(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; +(p6)ld8 r1 = [r29] + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r13 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + KVM_SAVE_REST + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_vexirq +END(kvm_virtual_exirq) + + .org kvm_ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + KVM_FAULT(14) + // this code segment is from 2.6.16.13 + + + .org kvm_ia64_ivt+0x3c00 +/////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + KVM_FAULT(15) + + + .org kvm_ia64_ivt+0x4000 +/////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + KVM_FAULT(16) + + .org kvm_ia64_ivt+0x4400 +////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + KVM_FAULT(17) + + .org kvm_ia64_ivt+0x4800 +////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + KVM_FAULT(18) + + .org kvm_ia64_ivt+0x4c00 +////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + KVM_FAULT(19) + + .org kvm_ia64_ivt+0x5000 +////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present +ENTRY(kvm_page_not_present) + KVM_REFLECT(20) +END(kvm_page_not_present) + + .org kvm_ia64_ivt+0x5100 +/////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission vector +ENTRY(kvm_key_permission) + KVM_REFLECT(21) +END(kvm_key_permission) + + .org kvm_ia64_ivt+0x5200 +////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(kvm_iaccess_rights) + KVM_REFLECT(22) +END(kvm_iaccess_rights) + + .org kvm_ia64_ivt+0x5300 +////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(kvm_daccess_rights) + KVM_REFLECT(23) +END(kvm_daccess_rights) + + .org kvm_ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(kvm_general_exception) + KVM_REFLECT(24) + KVM_FAULT(24) +END(kvm_general_exception) + + .org kvm_ia64_ivt+0x5500 +////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(kvm_disabled_fp_reg) + KVM_REFLECT(25) +END(kvm_disabled_fp_reg) + + .org kvm_ia64_ivt+0x5600 +//////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(kvm_nat_consumption) + KVM_REFLECT(26) +END(kvm_nat_consumption) + + .org kvm_ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(kvm_speculation_vector) + KVM_REFLECT(27) +END(kvm_speculation_vector) + + .org kvm_ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + KVM_FAULT(28) + + .org kvm_ia64_ivt+0x5900 +/////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(kvm_debug_vector) + KVM_FAULT(29) +END(kvm_debug_vector) + + .org kvm_ia64_ivt+0x5a00 +/////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(kvm_unaligned_access) + KVM_REFLECT(30) +END(kvm_unaligned_access) + + .org kvm_ia64_ivt+0x5b00 +////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(kvm_unsupported_data_reference) + KVM_REFLECT(31) +END(kvm_unsupported_data_reference) + + .org kvm_ia64_ivt+0x5c00 +//////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) +ENTRY(kvm_floating_point_fault) + KVM_REFLECT(32) +END(kvm_floating_point_fault) + + .org kvm_ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(kvm_floating_point_trap) + KVM_REFLECT(33) +END(kvm_floating_point_trap) + + .org kvm_ia64_ivt+0x5e00 +////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(kvm_lower_privilege_trap) + KVM_REFLECT(34) +END(kvm_lower_privilege_trap) + + .org kvm_ia64_ivt+0x5f00 +////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(kvm_taken_branch_trap) + KVM_REFLECT(35) +END(kvm_taken_branch_trap) + + .org kvm_ia64_ivt+0x6000 +//////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(kvm_single_step_trap) + KVM_REFLECT(36) +END(kvm_single_step_trap) + .global kvm_virtualization_fault_back + .org kvm_ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault +ENTRY(kvm_virtualization_fault) + mov r31=pr + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + st8 [r16] = r1 + adds r17 = VMM_VCPU_GP_OFFSET, r21 + ;; + ld8 r1 = [r17] + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 + cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 + cmp.eq p9,p0=EVENT_RSM,r24 + cmp.eq p10,p0=EVENT_SSM,r24 + cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 + cmp.eq p12,p0=EVENT_THASH,r24 + (p6) br.dptk.many kvm_asm_mov_from_ar + (p7) br.dptk.many kvm_asm_mov_from_rr + (p8) br.dptk.many kvm_asm_mov_to_rr + (p9) br.dptk.many kvm_asm_rsm + (p10) br.dptk.many kvm_asm_ssm + (p11) br.dptk.many kvm_asm_mov_to_psr + (p12) br.dptk.many kvm_asm_thash + ;; +kvm_virtualization_fault_back: + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 = [r16] + ;; + mov r19=37 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + cmp.ne p6,p0=EVENT_RFI, r24 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] + ;; + adds r18=VMM_VPD_VIFS_OFFSET,r18 + ;; + ld8 r18=[r18] + ;; + tbit.z p6,p0=r18,63 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + //if vifs.v=1 desert current register frame + alloc r18=ar.pfs,0,0,0,0 + br.sptk kvm_dispatch_virtualization_fault +END(kvm_virtualization_fault) + + .org kvm_ia64_ivt+0x6200 +////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + KVM_FAULT(38) + + .org kvm_ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + KVM_FAULT(39) + + .org kvm_ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + KVM_FAULT(40) + + .org kvm_ia64_ivt+0x6500 +////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + KVM_FAULT(41) + + .org kvm_ia64_ivt+0x6600 +////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + KVM_FAULT(42) + + .org kvm_ia64_ivt+0x6700 +////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + KVM_FAULT(43) + + .org kvm_ia64_ivt+0x6800 +////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + KVM_FAULT(44) + + .org kvm_ia64_ivt+0x6900 +/////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception +//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(kvm_ia32_exception) + KVM_FAULT(45) +END(kvm_ia32_exception) + + .org kvm_ia64_ivt+0x6a00 +//////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(kvm_ia32_intercept) + KVM_FAULT(47) +END(kvm_ia32_intercept) + + .org kvm_ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + KVM_FAULT(48) + + .org kvm_ia64_ivt+0x6d00 +////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + KVM_FAULT(49) + + .org kvm_ia64_ivt+0x6e00 +////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + KVM_FAULT(50) + + .org kvm_ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + KVM_FAULT(52) + + .org kvm_ia64_ivt+0x7100 +//////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + KVM_FAULT(53) + + .org kvm_ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + KVM_FAULT(54) + + .org kvm_ia64_ivt+0x7300 +//////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + KVM_FAULT(55) + + .org kvm_ia64_ivt+0x7400 +//////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + KVM_FAULT(56) + + .org kvm_ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + KVM_FAULT(57) + + .org kvm_ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + KVM_FAULT(58) + + .org kvm_ia64_ivt+0x7700 +//////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + KVM_FAULT(59) + + .org kvm_ia64_ivt+0x7800 +//////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + KVM_FAULT(60) + + .org kvm_ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + KVM_FAULT(61) + + .org kvm_ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + KVM_FAULT(62) + + .org kvm_ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + KVM_FAULT(63) + + .org kvm_ia64_ivt+0x7c00 +//////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + KVM_FAULT(64) + + .org kvm_ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + KVM_FAULT(65) + + .org kvm_ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + KVM_FAULT(66) + + .org kvm_ia64_ivt+0x7f00 +//////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + KVM_FAULT(67) + + .org kvm_ia64_ivt+0x8000 +// There is no particular reason for this code to be here, other than that +// there happens to be space here that would go unused otherwise. If this +// fault ever gets "unreserved", simply moved the following code to a more +// suitable spot... + + +ENTRY(kvm_dtlb_miss_dispatch) + mov r19 = 2 + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault +END(kvm_dtlb_miss_dispatch) + +ENTRY(kvm_itlb_miss_dispatch) + + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault +END(kvm_itlb_miss_dispatch) + +ENTRY(kvm_dispatch_reflection) + /* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out4=16,r12 + br.call.sptk.many b6=reflect_interruption +END(kvm_dispatch_reflection) + +ENTRY(kvm_dispatch_virtualization_fault) + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + mov out0=r13 //vcpu + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out1=16,sp //regs + br.call.sptk.many b6=kvm_emulate +END(kvm_dispatch_virtualization_fault) + + +ENTRY(kvm_dispatch_interrupt) + KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + //mov out0=cr.ivr // pass cr.ivr as first arg + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + mov out0=r13 // pass pointer to pt_regs as second arg + br.call.sptk.many b6=kvm_ia64_handle_irq +END(kvm_dispatch_interrupt) + + + + +GLOBAL_ENTRY(ia64_leave_nested) + rsm psr.i + ;; + adds r21=PT(PR)+16,r12 + ;; + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3] + adds r3=PT(AR_CSD)-PT(R16),r3 + adds r30=PT(AR_CCV)+16,r12 + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.i // ensure interruption collection is off + mov ar.ccv=r15 + ;; + bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + ;; + ldf.fill f11=[r2] +// mov r18=r13 +// mov r21=r13 + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + ;; + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 + // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16// load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] + ;; + ld8.fill r3=[r16] + ;; + mov r16=ar.bsp // get existing backing store pointer + ;; + mov b0=r22 + mov ar.pfs=r26 + mov cr.ifs=r30 + mov cr.ipsr=r29 + mov ar.fpsr=r20 + mov cr.iip=r28 + ;; + mov ar.rsc=r27 + mov ar.unat=r25 + mov pr=r31,-1 + rfi +END(ia64_leave_nested) + + + +GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) + /* + * work.need_resched etc. mustn't get changed + *by this CPU before it returns to + ;; + * user- or fsys-mode, hence we disable interrupts early on: + */ + adds r2 = PT(R4)+16,r12 + adds r3 = PT(R5)+16,r12 + adds r8 = PT(EML_UNAT)+16,r12 + ;; + ld8 r8 = [r8] + ;; + mov ar.unat=r8 + ;; + ld8.fill r4=[r2],16 //load r4 + ld8.fill r5=[r3],16 //load r5 + ;; + ld8.fill r6=[r2] //load r6 + ld8.fill r7=[r3] //load r7 + ;; +END(ia64_leave_hypervisor_prepare) +//fall through +GLOBAL_ENTRY(ia64_leave_hypervisor) + rsm psr.i + ;; + br.call.sptk.many b0=leave_hypervisor_tail + ;; + adds r20=PT(PR)+16,r12 + adds r8=PT(EML_UNAT)+16,r12 + ;; + ld8 r8=[r8] + ;; + mov ar.unat=r8 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r2 = PT(B6)+16,r12 + adds r3 = PT(B7)+16,r12 + ;; + lfetch [r20] + ;; + ld8 r24=[r2],16 /* B6 */ + ld8 r25=[r3],16 /* B7 */ + ;; + ld8 r26=[r2],16 /* ar_csd */ + ld8 r27=[r3],16 /* ar_ssd */ + mov b6 = r24 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + mov b7 = r25 + ;; + mov ar.csd = r26 + mov ar.ssd = r27 + ;; + ld8.fill r10=[r2],PT(R15)-PT(R10) + ld8.fill r11=[r3],PT(R14)-PT(R11) + ;; + ld8.fill r15=[r2],PT(R16)-PT(R15) + ld8.fill r14=[r3],PT(R17)-PT(R14) + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],PT(F6)-PT(R30) + ld8.fill r31=[r3],PT(F7)-PT(R31) + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i // ensure interruption collection is off + ;; + bsw.0 + ;; + adds r16 = PT(CR_IPSR)+16,r12 + adds r17 = PT(CR_IIP)+16,r12 + mov r21=r13 // get current + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat + ld8 r24=[r17],16 // load ar.bspstore + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 //load r1 + ;; + ld8.fill r12=[r16],16 //load r12 + ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 + ;; + ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr + ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 + ;; + ld8.fill r3=[r16] //load r3 + ld8 r18=[r17] //load ar_ccv + ;; + mov ar.fpsr=r19 + mov ar.ccv=r18 + shr.u r18=r20,16 + ;; +kvm_rbs_switch: + mov r19=96 + +kvm_dont_preserve_current_frame: +/* + * To prevent leaking bits between the hypervisor and guest domain, + * we must clear the stacked registers in the "invalid" partition here. + * 5 registers/cycle on McKinley). + */ +# define pRecurse p6 +# define pReturn p7 +# define Nregs 14 + + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r19 + mov in1=0 + ;; + TEXT_ALIGN(32) +kvm_rse_clear_invalid: + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 + // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 +(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 + // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 +(pReturn) br.ret.dptk.many b0 + +# undef pRecurse +# undef pReturn + +// loadrs has already been shifted + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + mov ar.pfs=r27 + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] //vpd + adds r17=VMM_VCPU_ISR_OFFSET,r21 + ;; + ld8 r17=[r17] + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] //vpsr + adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21 + ;; + ld8 r20=[r20] + ;; +//vsa_sync_write_start + mov r25=r18 + adds r16= VMM_VCPU_GP_OFFSET,r21 + ;; + ld8 r16= [r16] // Put gp in r24 + movl r24=@gprel(ia64_vmm_entry) // calculate return address + ;; + add r24=r24,r16 + ;; + add r16=PAL_VPS_SYNC_WRITE,r20 + ;; + mov b0=r16 + br.cond.sptk b0 // call the service + ;; +END(ia64_leave_hypervisor) +// fall through +GLOBAL_ENTRY(ia64_vmm_entry) +/* + * must be at bank 0 + * parameter: + * r17:cr.isr + * r18:vpd + * r19:vpsr + * r20:__vsa_base + * r22:b0 + * r23:predicate + */ + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p1) br.sptk.many ia64_vmm_entry_out + ;; + tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 + (p2) ld8 r26=[r25] + ;; +ia64_vmm_entry_out: + mov pr=r23,-2 + mov b0=r29 + ;; + br.cond.sptk b0 // call pal service +END(ia64_vmm_entry) + + + +/* + * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, + * u64 arg3, u64 arg4, u64 arg5, + * u64 arg6, u64 arg7); + * + * XXX: The currently defined services use only 4 args at the max. The + * rest are not consumed. + */ +GLOBAL_ENTRY(ia64_call_vsa) + .regstk 4,4,0,0 + +rpsave = loc0 +pfssave = loc1 +psrsave = loc2 +entry = loc3 +hostret = r24 + + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 + ;; + ld8 entry=[entry] +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.i + mov b6=entry + br.cond.sptk b6 // call the service +2: + // Architectural sequence for enabling interrupts if necessary +(p7) ssm psr.ic + ;; +(p7) srlz.i + ;; +//(p6) ssm psr.i + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp + +END(ia64_call_vsa) + +#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) + +GLOBAL_ENTRY(vmm_reset_entry) + //set up ipsr, iip, vpd.vpsr, dcr + // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 + // For DCR: all bits 0 + adds r14=-VMM_PT_REGS_SIZE, r12 + ;; + movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 + movl r10=0x8000000000000000 + adds r16=PT(CR_IIP), r14 + adds r20=PT(R1), r14 + ;; + rsm psr.ic | psr.i + ;; + srlz.i + ;; + bsw.0 + ;; + mov r21 =r13 + ;; + bsw.1 + ;; + mov ar.rsc = 0 + ;; + flushrs + ;; + mov ar.bspstore = 0 + // clear BSPSTORE + ;; + mov cr.ipsr=r6 + mov cr.ifs=r10 + ld8 r4 = [r16] // Set init iip for first run. + ld8 r1 = [r20] + ;; + mov cr.iip=r4 + ;; + adds r16=VMM_VPD_BASE_OFFSET,r13 + adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13 + ;; + ld8 r18=[r16] + ld8 r20=[r20] + ;; + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] + mov r17=r0 + mov r22=r0 + mov r23=r0 + br.cond.sptk ia64_vmm_entry + br.ret.sptk b0 +END(vmm_reset_entry) diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h new file mode 100644 index 00000000000..f6c5617e16a --- /dev/null +++ b/arch/ia64/kvm/vti.h @@ -0,0 +1,290 @@ +/* + * vti.h: prototype for generial vt related interface + * Copyright (c) 2004, Intel Corporation. + * + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Fred Yang (fred.yang@intel.com) + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + * + * Copyright (c) 2007, Intel Corporation. + * Zhang xiantao <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ +#ifndef _KVM_VT_I_H +#define _KVM_VT_I_H + +#ifndef __ASSEMBLY__ +#include <asm/page.h> + +#include <linux/kvm_host.h> + +/* define itr.i and itr.d in ia64_itr function */ +#define ITR 0x01 +#define DTR 0x02 +#define IaDTR 0x03 + +#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/ +#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/ + +#define RR6 (6UL<<61) +#define RR7 (7UL<<61) + + +/* config_options in pal_vp_init_env */ +#define VP_INITIALIZE 1UL +#define VP_FR_PMC 1UL<<1 +#define VP_OPCODE 1UL<<8 +#define VP_CAUSE 1UL<<9 +#define VP_FW_ACC 1UL<<63 + +/* init vp env with initializing vm_buffer */ +#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\ + VP_OPCODE | VP_CAUSE | VP_FW_ACC) +/* init vp env without initializing vm_buffer */ +#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC + +#define PAL_VP_CREATE 265 +/* Stacked Virt. Initializes a new VPD for the operation of + * a new virtual processor in the virtual environment. + */ +#define PAL_VP_ENV_INFO 266 +/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/ +#define PAL_VP_EXIT_ENV 267 +/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/ +#define PAL_VP_INIT_ENV 268 +/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/ +#define PAL_VP_REGISTER 269 +/*Stacked Virt. Register a different host IVT for the virtual processor.*/ +#define PAL_VP_RESUME 270 +/* Renamed from PAL_VP_RESUME */ +#define PAL_VP_RESTORE 270 +/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/ +#define PAL_VP_SUSPEND 271 +/* Renamed from PAL_VP_SUSPEND */ +#define PAL_VP_SAVE 271 +/* Stacked Virt. Suspends operation for the specified virtual processor on + * the logical processor. + */ +#define PAL_VP_TERMINATE 272 +/* Stacked Virt. Terminates operation for the specified virtual processor.*/ + +union vac { + unsigned long value; + struct { + int a_int:1; + int a_from_int_cr:1; + int a_to_int_cr:1; + int a_from_psr:1; + int a_from_cpuid:1; + int a_cover:1; + int a_bsw:1; + long reserved:57; + }; +}; + +union vdc { + unsigned long value; + struct { + int d_vmsw:1; + int d_extint:1; + int d_ibr_dbr:1; + int d_pmc:1; + int d_to_pmd:1; + int d_itm:1; + long reserved:58; + }; +}; + +struct vpd { + union vac vac; + union vdc vdc; + unsigned long virt_env_vaddr; + unsigned long reserved1[29]; + unsigned long vhpi; + unsigned long reserved2[95]; + unsigned long vgr[16]; + unsigned long vbgr[16]; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long reserved3[11]; + unsigned long vpsr; + unsigned long vpr; + unsigned long reserved4[76]; + union { + unsigned long vcr[128]; + struct { + unsigned long dcr; + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; + unsigned long rsv2[7]; + unsigned long ipsr; + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; + unsigned long lrr1; + unsigned long rsv6[46]; + }; + }; + unsigned long reserved5[128]; + unsigned long reserved6[3456]; + unsigned long vmm_avail[128]; + unsigned long reserved7[4096]; +}; + +#define PAL_PROC_VM_BIT (1UL << 40) +#define PAL_PROC_VMSW_BIT (1UL << 54) + +static inline s64 ia64_pal_vp_env_info(u64 *buffer_size, + u64 *vp_env_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0); + *buffer_size = iprv.v0; + *vp_env_info = iprv.v1; + return iprv.status; +} + +static inline s64 ia64_pal_vp_exit_env(u64 iva) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0); + return iprv.status; +} + +static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr, + u64 vbase_addr, u64 *vsa_base) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr, + vbase_addr); + *vsa_base = iprv.v0; + + return iprv.status; +} + +static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0); + + return iprv.status; +} + +static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0); + + return iprv.status; +} + +#endif + +/*VPD field offset*/ +#define VPD_VAC_START_OFFSET 0 +#define VPD_VDC_START_OFFSET 8 +#define VPD_VHPI_START_OFFSET 256 +#define VPD_VGR_START_OFFSET 1024 +#define VPD_VBGR_START_OFFSET 1152 +#define VPD_VNAT_START_OFFSET 1280 +#define VPD_VBNAT_START_OFFSET 1288 +#define VPD_VCPUID_START_OFFSET 1296 +#define VPD_VPSR_START_OFFSET 1424 +#define VPD_VPR_START_OFFSET 1432 +#define VPD_VRSE_CFLE_START_OFFSET 1440 +#define VPD_VCR_START_OFFSET 2048 +#define VPD_VTPR_START_OFFSET 2576 +#define VPD_VRR_START_OFFSET 3072 +#define VPD_VMM_VAIL_START_OFFSET 31744 + +/*Virtualization faults*/ + +#define EVENT_MOV_TO_AR 1 +#define EVENT_MOV_TO_AR_IMM 2 +#define EVENT_MOV_FROM_AR 3 +#define EVENT_MOV_TO_CR 4 +#define EVENT_MOV_FROM_CR 5 +#define EVENT_MOV_TO_PSR 6 +#define EVENT_MOV_FROM_PSR 7 +#define EVENT_ITC_D 8 +#define EVENT_ITC_I 9 +#define EVENT_MOV_TO_RR 10 +#define EVENT_MOV_TO_DBR 11 +#define EVENT_MOV_TO_IBR 12 +#define EVENT_MOV_TO_PKR 13 +#define EVENT_MOV_TO_PMC 14 +#define EVENT_MOV_TO_PMD 15 +#define EVENT_ITR_D 16 +#define EVENT_ITR_I 17 +#define EVENT_MOV_FROM_RR 18 +#define EVENT_MOV_FROM_DBR 19 +#define EVENT_MOV_FROM_IBR 20 +#define EVENT_MOV_FROM_PKR 21 +#define EVENT_MOV_FROM_PMC 22 +#define EVENT_MOV_FROM_CPUID 23 +#define EVENT_SSM 24 +#define EVENT_RSM 25 +#define EVENT_PTC_L 26 +#define EVENT_PTC_G 27 +#define EVENT_PTC_GA 28 +#define EVENT_PTR_D 29 +#define EVENT_PTR_I 30 +#define EVENT_THASH 31 +#define EVENT_TTAG 32 +#define EVENT_TPA 33 +#define EVENT_TAK 34 +#define EVENT_PTC_E 35 +#define EVENT_COVER 36 +#define EVENT_RFI 37 +#define EVENT_BSW_0 38 +#define EVENT_BSW_1 39 +#define EVENT_VMSW 40 + +/**PAL virtual services offsets */ +#define PAL_VPS_RESUME_NORMAL 0x0000 +#define PAL_VPS_RESUME_HANDLER 0x0400 +#define PAL_VPS_SYNC_READ 0x0800 +#define PAL_VPS_SYNC_WRITE 0x0c00 +#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000 +#define PAL_VPS_THASH 0x1400 +#define PAL_VPS_TTAG 0x1800 +#define PAL_VPS_RESTORE 0x1c00 +#define PAL_VPS_SAVE 0x2000 + +#endif/* _VT_I_H*/ diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c new file mode 100644 index 00000000000..def4576d22b --- /dev/null +++ b/arch/ia64/kvm/vtlb.c @@ -0,0 +1,636 @@ +/* + * vtlb.c: guest virtual tlb handling module. + * Copyright (c) 2004, Intel Corporation. + * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com> + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + * + * Copyright (c) 2007, Intel Corporation. + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + * Xiantao Zhang <xiantao.zhang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include "vcpu.h" + +#include <linux/rwsem.h> + +#include <asm/tlb.h> + +/* + * Check to see if the address rid:va is translated by the TLB + */ + +static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va) +{ + return ((trp->p) && (trp->rid == rid) + && ((va-trp->vadr) < PSIZE(trp->ps))); +} + +/* + * Only for GUEST TR format. + */ +static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva) +{ + u64 sa1, ea1; + + if (!trp->p || trp->rid != rid) + return 0; + + sa1 = trp->vadr; + ea1 = sa1 + PSIZE(trp->ps) - 1; + eva -= 1; + if ((sva > ea1) || (sa1 > eva)) + return 0; + else + return 1; + +} + +void machine_tlb_purge(u64 va, u64 ps) +{ + ia64_ptcl(va, ps << 2); +} + +void local_flush_tlb_all(void) +{ + int i, j; + unsigned long flags, count0, count1; + unsigned long stride0, stride1, addr; + + addr = current_vcpu->arch.ptce_base; + count0 = current_vcpu->arch.ptce_count[0]; + count1 = current_vcpu->arch.ptce_count[1]; + stride0 = current_vcpu->arch.ptce_stride[0]; + stride1 = current_vcpu->arch.ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref) +{ + union ia64_rr vrr; + union ia64_pta vpta; + struct ia64_psr vpsr; + + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + vrr.val = vcpu_get_rr(vcpu, vadr); + vpta.val = vcpu_get_pta(vcpu); + + if (vrr.ve & vpta.ve) { + switch (ref) { + case DATA_REF: + case NA_REF: + return vpsr.dt; + case INST_REF: + return vpsr.dt && vpsr.it && vpsr.ic; + case RSE_REF: + return vpsr.dt && vpsr.rt; + + } + } + return 0; +} + +struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag) +{ + u64 index, pfn, rid, pfn_bits; + + pfn_bits = vpta.size - 5 - 8; + pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr); + rid = _REGION_ID(vrr); + index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1)); + *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16); + + return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) + + (index << 5)); +} + +struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) +{ + + struct thash_data *trp; + int i; + u64 rid; + + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK;; + if (type == D_TLB) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; + i < NDTRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) + return trp; + } + } + } else { + if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; + i < NITRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) + return trp; + } + } + } + + return NULL; +} + +static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) +{ + union ia64_rr rr; + struct thash_data *head; + unsigned long ps, gpaddr; + + ps = itir_ps(itir); + + gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | + (ifa & ((1UL << ps) - 1)); + + rr.val = ia64_get_rr(ifa); + head = (struct thash_data *)ia64_thash(ifa); + head->etag = INVALID_TI_TAG; + ia64_mf(); + head->page_flags = pte & ~PAGE_FLAGS_RV_MASK; + head->itir = rr.ps << 2; + head->etag = ia64_ttag(ifa); + head->gpaddr = gpaddr; +} + +void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) +{ + u64 i, dirty_pages = 1; + u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; + spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); + void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) + + KVM_MEM_DIRTY_LOG_OFS; + dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; + + vmm_spin_lock(lock); + for (i = 0; i < dirty_pages; i++) { + /* avoid RMW */ + if (!test_bit(base_gfn + i, dirty_bitmap)) + set_bit(base_gfn + i , dirty_bitmap); + } + vmm_spin_unlock(lock); +} + +void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) +{ + u64 phy_pte, psr; + union ia64_rr mrr; + + mrr.val = ia64_get_rr(va); + phy_pte = translate_phy_pte(&pte, itir, va); + + if (itir_ps(itir) >= mrr.ps) { + vhpt_insert(phy_pte, itir, va, pte); + } else { + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type, va, phy_pte, itir_ps(itir)); + ia64_set_psr(psr); + } + + if (!(pte&VTLB_PTE_IO)) + mark_pages_dirty(v, pte, itir_ps(itir)); +} + +/* + * vhpt lookup + */ +struct thash_data *vhpt_lookup(u64 va) +{ + struct thash_data *head; + u64 tag; + + head = (struct thash_data *)ia64_thash(va); + tag = ia64_ttag(va); + if (head->etag == tag) + return head; + return NULL; +} + +u64 guest_vhpt_lookup(u64 iha, u64 *pte) +{ + u64 ret; + struct thash_data *data; + + data = __vtr_lookup(current_vcpu, iha, D_TLB); + if (data != NULL) + thash_vhpt_insert(current_vcpu, data->page_flags, + data->itir, iha, D_TLB); + + asm volatile ("rsm psr.ic|psr.i;;" + "srlz.d;;" + "ld8.s r9=[%1];;" + "tnat.nz p6,p7=r9;;" + "(p6) mov %0=1;" + "(p6) mov r9=r0;" + "(p7) extr.u r9=r9,0,53;;" + "(p7) mov %0=r0;" + "(p7) st8 [%2]=r9;;" + "ssm psr.ic;;" + "srlz.d;;" + /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ + : "=r"(ret) : "r"(iha), "r"(pte):"memory"); + + return ret; +} + +/* + * purge software guest tlb + */ + +static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps) +{ + struct thash_data *cur; + u64 start, curadr, size, psbits, tag, rr_ps, num; + union ia64_rr vrr; + struct thash_cb *hcb = &v->arch.vtlb; + + vrr.val = vcpu_get_rr(v, va); + psbits = VMX(v, psbits[(va >> 61)]); + start = va & ~((1UL << ps) - 1); + while (psbits) { + curadr = start; + rr_ps = __ffs(psbits); + psbits &= ~(1UL << rr_ps); + num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps)); + size = PSIZE(rr_ps); + vrr.ps = rr_ps; + while (num) { + cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag); + if (cur->etag == tag && cur->ps == rr_ps) + cur->etag = INVALID_TI_TAG; + curadr += size; + num--; + } + } +} + + +/* + * purge VHPT and machine TLB + */ +static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps) +{ + struct thash_data *cur; + u64 start, size, tag, num; + union ia64_rr rr; + + start = va & ~((1UL << ps) - 1); + rr.val = ia64_get_rr(va); + size = PSIZE(rr.ps); + num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps)); + while (num) { + cur = (struct thash_data *)ia64_thash(start); + tag = ia64_ttag(start); + if (cur->etag == tag) + cur->etag = INVALID_TI_TAG; + start += size; + num--; + } + machine_tlb_purge(va, ps); +} + +/* + * Insert an entry into hash TLB or VHPT. + * NOTES: + * 1: When inserting VHPT to thash, "va" is a must covered + * address by the inserted machine VHPT entry. + * 2: The format of entry is always in TLB. + * 3: The caller need to make sure the new entry will not overlap + * with any existed entry. + */ +void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va) +{ + struct thash_data *head; + union ia64_rr vrr; + u64 tag; + struct thash_cb *hcb = &v->arch.vtlb; + + vrr.val = vcpu_get_rr(v, va); + vrr.ps = itir_ps(itir); + VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); + head = vsa_thash(hcb->pta, va, vrr.val, &tag); + head->page_flags = pte; + head->itir = itir; + head->etag = tag; +} + +int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type) +{ + struct thash_data *trp; + int i; + u64 end, rid; + + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK; + end = va + PSIZE(ps); + if (type == D_TLB) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; + i < NDTRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end)) + return i; + } + } + } else { + if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; + i < NITRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end)) + return i; + } + } + } + return -1; +} + +/* + * Purge entries in VTLB and VHPT + */ +void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps) +{ + if (vcpu_quick_region_check(v->arch.tc_regions, va)) + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); +} + +void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) +{ + u64 old_va = va; + va = REGION_OFFSET(va); + if (vcpu_quick_region_check(v->arch.tc_regions, old_va)) + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); +} + +u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) +{ + u64 ps, ps_mask, paddr, maddr; + union pte_flags phy_pte; + + ps = itir_ps(itir); + ps_mask = ~((1UL << ps) - 1); + phy_pte.val = *pte; + paddr = *pte; + paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); + maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT); + if (maddr & GPFN_IO_MASK) { + *pte |= VTLB_PTE_IO; + return -1; + } + maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | + (paddr & ~PAGE_MASK); + phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; + return phy_pte.val; +} + +/* + * Purge overlap TCs and then insert the new entry to emulate itc ops. + * Notes: Only TC entry can purge and insert. + * 1 indicates this is MMIO + */ +int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, + u64 ifa, int type) +{ + u64 ps; + u64 phy_pte; + union ia64_rr vrr, mrr; + int ret = 0; + + ps = itir_ps(itir); + vrr.val = vcpu_get_rr(v, ifa); + mrr.val = ia64_get_rr(ifa); + + phy_pte = translate_phy_pte(&pte, itir, ifa); + + /* Ensure WB attribute if pte is related to a normal mem page, + * which is required by vga acceleration since qemu maps shared + * vram buffer with WB. + */ + if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) { + pte &= ~_PAGE_MA_MASK; + phy_pte &= ~_PAGE_MA_MASK; + } + + if (pte & VTLB_PTE_IO) + ret = 1; + + vtlb_purge(v, ifa, ps); + vhpt_purge(v, ifa, ps); + + if (ps == mrr.ps) { + if (!(pte&VTLB_PTE_IO)) { + vhpt_insert(phy_pte, itir, ifa, pte); + } else { + vtlb_insert(v, pte, itir, ifa); + vcpu_quick_region_set(VMX(v, tc_regions), ifa); + } + } else if (ps > mrr.ps) { + vtlb_insert(v, pte, itir, ifa); + vcpu_quick_region_set(VMX(v, tc_regions), ifa); + if (!(pte&VTLB_PTE_IO)) + vhpt_insert(phy_pte, itir, ifa, pte); + } else { + u64 psr; + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type, ifa, phy_pte, ps); + ia64_set_psr(psr); + } + if (!(pte&VTLB_PTE_IO)) + mark_pages_dirty(v, pte, ps); + + return ret; +} + +/* + * Purge all TCs or VHPT entries including those in Hash table. + * + */ + +void thash_purge_all(struct kvm_vcpu *v) +{ + int i; + struct thash_data *head; + struct thash_cb *vtlb, *vhpt; + vtlb = &v->arch.vtlb; + vhpt = &v->arch.vhpt; + + for (i = 0; i < 8; i++) + VMX(v, psbits[i]) = 0; + + head = vtlb->hash; + for (i = 0; i < vtlb->num; i++) { + head->page_flags = 0; + head->etag = INVALID_TI_TAG; + head->itir = 0; + head->next = 0; + head++; + }; + + head = vhpt->hash; + for (i = 0; i < vhpt->num; i++) { + head->page_flags = 0; + head->etag = INVALID_TI_TAG; + head->itir = 0; + head->next = 0; + head++; + }; + + local_flush_tlb_all(); +} + + +/* + * Lookup the hash table and its collision chain to find an entry + * covering this address rid:va or the entry. + * + * INPUT: + * in: TLB format for both VHPT & TLB. + */ + +struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) +{ + struct thash_data *cch; + u64 psbits, ps, tag; + union ia64_rr vrr; + + struct thash_cb *hcb = &v->arch.vtlb; + + cch = __vtr_lookup(v, va, is_data);; + if (cch) + return cch; + + if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0) + return NULL; + + psbits = VMX(v, psbits[(va >> 61)]); + vrr.val = vcpu_get_rr(v, va); + while (psbits) { + ps = __ffs(psbits); + psbits &= ~(1UL << ps); + vrr.ps = ps; + cch = vsa_thash(hcb->pta, va, vrr.val, &tag); + if (cch->etag == tag && cch->ps == ps) + return cch; + } + + return NULL; +} + + +/* + * Initialize internal control data before service. + */ +void thash_init(struct thash_cb *hcb, u64 sz) +{ + int i; + struct thash_data *head; + + hcb->pta.val = (unsigned long)hcb->hash; + hcb->pta.vf = 1; + hcb->pta.ve = 1; + hcb->pta.size = sz; + head = hcb->hash; + for (i = 0; i < hcb->num; i++) { + head->page_flags = 0; + head->itir = 0; + head->etag = INVALID_TI_TAG; + head->next = 0; + head++; + } +} + +u64 kvm_lookup_mpa(u64 gpfn) +{ + u64 *base = (u64 *) KVM_P2M_BASE; + return *(base + gpfn); +} + +u64 kvm_gpa_to_mpa(u64 gpa) +{ + u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); + return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); +} + + +/* + * Fetch guest bundle code. + * INPUT: + * gip: guest ip + * pbundle: used to return fetched bundle. + */ +int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) +{ + u64 gpip = 0; /* guest physical IP*/ + u64 *vpa; + struct thash_data *tlb; + u64 maddr; + + if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) { + /* I-side physical mode */ + gpip = gip; + } else { + tlb = vtlb_lookup(vcpu, gip, I_TLB); + if (tlb) + gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | + (gip & (PSIZE(tlb->ps) - 1)); + } + if (gpip) { + maddr = kvm_gpa_to_mpa(gpip); + } else { + tlb = vhpt_lookup(gip); + if (tlb == NULL) { + ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); + return IA64_FAULT; + } + maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) + | (gip & (PSIZE(tlb->ps) - 1)); + } + vpa = (u64 *)__kvm_va(maddr); + + pbundle->i64[0] = *vpa++; + pbundle->i64[1] = *vpa; + + return IA64_NO_FAULT; +} + + +void kvm_init_vhpt(struct kvm_vcpu *v) +{ + v->arch.vhpt.num = VHPT_NUM_ENTRIES; + thash_init(&v->arch.vhpt, VHPT_SHIFT); + ia64_set_pta(v->arch.vhpt.pta.val); + /*Enable VHPT here?*/ +} + +void kvm_init_vtlb(struct kvm_vcpu *v) +{ + v->arch.vtlb.num = VTLB_NUM_ENTRIES; + thash_init(&v->arch.vtlb, VTLB_SHIFT); +} diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 5c1de53c8c1..fc6c6636ffd 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -682,15 +682,6 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -void online_page(struct page *page) -{ - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalram_pages++; - num_physpages++; -} - int arch_add_memory(int nid, u64 start, u64 size) { pg_data_t *pgdat; diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 53d0a8ee35d..77b15f80f10 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -504,54 +504,12 @@ pcibios_update_irq (struct pci_dev *dev, int irq) /* ??? FIXME -- record old value for shutdown. */ } -static inline int -pcibios_enable_resources (struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM; - - if (!dev) - return -EINVAL; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx=0; idx<PCI_NUM_RESOURCES; idx++) { - /* Only set up the desired resources. */ - if (!(mask & (1 << idx))) - continue; - - r = &dev->resource[idx]; - if (!(r->flags & type_mask)) - continue; - if ((idx == PCI_ROM_RESOURCE) && - (!(r->flags & IORESOURCE_ROM_ENABLE))) - continue; - if (!r->start && r->end) { - printk(KERN_ERR - "PCI: Device %s not available because of resource collisions\n", - pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - int pcibios_enable_device (struct pci_dev *dev, int mask) { int ret; - ret = pcibios_enable_resources(dev, mask); + ret = pci_enable_resources(dev, mask); if (ret < 0) return ret; diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 688a3c27e0f..0591038735a 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile @@ -4,7 +4,7 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # -# Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All Rights Reserved. +# Copyright (C) 1999,2001-2006,2008 Silicon Graphics, Inc. All Rights Reserved. # EXTRA_CFLAGS += -Iarch/ia64/sn/include @@ -15,9 +15,4 @@ obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ sn2/ obj-$(CONFIG_IA64_GENERIC) += machvec.o obj-$(CONFIG_SGI_TIOCX) += tiocx.o -obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o -xp-y := xp_main.o xp_nofault.o -obj-$(CONFIG_IA64_SGI_SN_XP) += xpc.o -xpc-y := xpc_main.o xpc_channel.o xpc_partition.o -obj-$(CONFIG_IA64_SGI_SN_XP) += xpnet.o obj-$(CONFIG_PCI_MSI) += msi_sn.o diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 0101c7924a4..08b0d9bb62e 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -187,8 +187,8 @@ void hub_error_init(struct hubdev_info *hubdev_info) { if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED, - "SN_hub_error", (void *)hubdev_info)) { - printk("hub_error_init: Failed to request_irq for 0x%p\n", + "SN_hub_error", hubdev_info)) { + printk(KERN_ERR "hub_error_init: Failed to request_irq for 0x%p\n", hubdev_info); return; } diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 4b0d1538e7e..8cc0c4753d8 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -37,7 +37,6 @@ #include <asm/processor.h> #include <asm/topology.h> -#include <asm/semaphore.h> #include <asm/uaccess.h> #include <asm/sal.h> #include <asm/sn/io.h> diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c deleted file mode 100644 index b7ea46645e1..00000000000 --- a/arch/ia64/sn/kernel/xp_main.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition (XP) base. - * - * XP provides a base from which its users can interact - * with XPC, yet not be dependent on XPC. - * - */ - - -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> -#include <asm/sn/xp.h> - - -/* - * Target of nofault PIO read. - */ -u64 xp_nofault_PIOR_target; - - -/* - * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level - * users of XPC. - */ -struct xpc_registration xpc_registrations[XPC_NCHANNELS]; - - -/* - * Initialize the XPC interface to indicate that XPC isn't loaded. - */ -static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; } - -struct xpc_interface xpc_interface = { - (void (*)(int)) xpc_notloaded, - (void (*)(int)) xpc_notloaded, - (enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded, - (enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded, - (enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *)) - xpc_notloaded, - (void (*)(partid_t, int, void *)) xpc_notloaded, - (enum xpc_retval (*)(partid_t, void *)) xpc_notloaded -}; - - -/* - * XPC calls this when it (the XPC module) has been loaded. - */ -void -xpc_set_interface(void (*connect)(int), - void (*disconnect)(int), - enum xpc_retval (*allocate)(partid_t, int, u32, void **), - enum xpc_retval (*send)(partid_t, int, void *), - enum xpc_retval (*send_notify)(partid_t, int, void *, - xpc_notify_func, void *), - void (*received)(partid_t, int, void *), - enum xpc_retval (*partid_to_nasids)(partid_t, void *)) -{ - xpc_interface.connect = connect; - xpc_interface.disconnect = disconnect; - xpc_interface.allocate = allocate; - xpc_interface.send = send; - xpc_interface.send_notify = send_notify; - xpc_interface.received = received; - xpc_interface.partid_to_nasids = partid_to_nasids; -} - - -/* - * XPC calls this when it (the XPC module) is being unloaded. - */ -void -xpc_clear_interface(void) -{ - xpc_interface.connect = (void (*)(int)) xpc_notloaded; - xpc_interface.disconnect = (void (*)(int)) xpc_notloaded; - xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32, - void **)) xpc_notloaded; - xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *)) - xpc_notloaded; - xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *, - xpc_notify_func, void *)) xpc_notloaded; - xpc_interface.received = (void (*)(partid_t, int, void *)) - xpc_notloaded; - xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *)) - xpc_notloaded; -} - - -/* - * Register for automatic establishment of a channel connection whenever - * a partition comes up. - * - * Arguments: - * - * ch_number - channel # to register for connection. - * func - function to call for asynchronous notification of channel - * state changes (i.e., connection, disconnection, error) and - * the arrival of incoming messages. - * key - pointer to optional user-defined value that gets passed back - * to the user on any callouts made to func. - * payload_size - size in bytes of the XPC message's payload area which - * contains a user-defined message. The user should make - * this large enough to hold their largest message. - * nentries - max #of XPC message entries a message queue can contain. - * The actual number, which is determined when a connection - * is established and may be less then requested, will be - * passed to the user via the xpcConnected callout. - * assigned_limit - max number of kthreads allowed to be processing - * messages (per connection) at any given instant. - * idle_limit - max number of kthreads allowed to be idle at any given - * instant. - */ -enum xpc_retval -xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, - u16 nentries, u32 assigned_limit, u32 idle_limit) -{ - struct xpc_registration *registration; - - - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); - DBUG_ON(payload_size == 0 || nentries == 0); - DBUG_ON(func == NULL); - DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); - - registration = &xpc_registrations[ch_number]; - - if (mutex_lock_interruptible(®istration->mutex) != 0) { - return xpcInterrupted; - } - - /* if XPC_CHANNEL_REGISTERED(ch_number) */ - if (registration->func != NULL) { - mutex_unlock(®istration->mutex); - return xpcAlreadyRegistered; - } - - /* register the channel for connection */ - registration->msg_size = XPC_MSG_SIZE(payload_size); - registration->nentries = nentries; - registration->assigned_limit = assigned_limit; - registration->idle_limit = idle_limit; - registration->key = key; - registration->func = func; - - mutex_unlock(®istration->mutex); - - xpc_interface.connect(ch_number); - - return xpcSuccess; -} - - -/* - * Remove the registration for automatic connection of the specified channel - * when a partition comes up. - * - * Before returning this xpc_disconnect() will wait for all connections on the - * specified channel have been closed/torndown. So the caller can be assured - * that they will not be receiving any more callouts from XPC to their - * function registered via xpc_connect(). - * - * Arguments: - * - * ch_number - channel # to unregister. - */ -void -xpc_disconnect(int ch_number) -{ - struct xpc_registration *registration; - - - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); - - registration = &xpc_registrations[ch_number]; - - /* - * We've decided not to make this a down_interruptible(), since we - * figured XPC's users will just turn around and call xpc_disconnect() - * again anyways, so we might as well wait, if need be. - */ - mutex_lock(®istration->mutex); - - /* if !XPC_CHANNEL_REGISTERED(ch_number) */ - if (registration->func == NULL) { - mutex_unlock(®istration->mutex); - return; - } - - /* remove the connection registration for the specified channel */ - registration->func = NULL; - registration->key = NULL; - registration->nentries = 0; - registration->msg_size = 0; - registration->assigned_limit = 0; - registration->idle_limit = 0; - - xpc_interface.disconnect(ch_number); - - mutex_unlock(®istration->mutex); - - return; -} - - -int __init -xp_init(void) -{ - int ret, ch_number; - u64 func_addr = *(u64 *) xp_nofault_PIOR; - u64 err_func_addr = *(u64 *) xp_error_PIOR; - - - if (!ia64_platform_is("sn2")) { - return -ENODEV; - } - - /* - * Register a nofault code region which performs a cross-partition - * PIO read. If the PIO read times out, the MCA handler will consume - * the error and return to a kernel-provided instruction to indicate - * an error. This PIO read exists because it is guaranteed to timeout - * if the destination is down (AMO operations do not timeout on at - * least some CPUs on Shubs <= v1.2, which unfortunately we have to - * work around). - */ - if ((ret = sn_register_nofault_code(func_addr, err_func_addr, - err_func_addr, 1, 1)) != 0) { - printk(KERN_ERR "XP: can't register nofault code, error=%d\n", - ret); - } - /* - * Setup the nofault PIO read target. (There is no special reason why - * SH_IPI_ACCESS was selected.) - */ - if (is_shub2()) { - xp_nofault_PIOR_target = SH2_IPI_ACCESS0; - } else { - xp_nofault_PIOR_target = SH1_IPI_ACCESS; - } - - /* initialize the connection registration mutex */ - for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) { - mutex_init(&xpc_registrations[ch_number].mutex); - } - - return 0; -} -module_init(xp_init); - - -void __exit -xp_exit(void) -{ - u64 func_addr = *(u64 *) xp_nofault_PIOR; - u64 err_func_addr = *(u64 *) xp_error_PIOR; - - - /* unregister the PIO read nofault code region */ - (void) sn_register_nofault_code(func_addr, err_func_addr, - err_func_addr, 1, 0); -} -module_exit(xp_exit); - - -MODULE_AUTHOR("Silicon Graphics, Inc."); -MODULE_DESCRIPTION("Cross Partition (XP) base"); -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(xp_nofault_PIOR); -EXPORT_SYMBOL(xp_nofault_PIOR_target); -EXPORT_SYMBOL(xpc_registrations); -EXPORT_SYMBOL(xpc_interface); -EXPORT_SYMBOL(xpc_clear_interface); -EXPORT_SYMBOL(xpc_set_interface); -EXPORT_SYMBOL(xpc_connect); -EXPORT_SYMBOL(xpc_disconnect); - diff --git a/arch/ia64/sn/kernel/xp_nofault.S b/arch/ia64/sn/kernel/xp_nofault.S deleted file mode 100644 index 98e7c7dbfdd..00000000000 --- a/arch/ia64/sn/kernel/xp_nofault.S +++ /dev/null @@ -1,36 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * The xp_nofault_PIOR function takes a pointer to a remote PIO register - * and attempts to load and consume a value from it. This function - * will be registered as a nofault code block. In the event that the - * PIO read fails, the MCA handler will force the error to look - * corrected and vector to the xp_error_PIOR which will return an error. - * - * The definition of "consumption" and the time it takes for an MCA - * to surface is processor implementation specific. This code - * is sufficient on Itanium through the Montvale processor family. - * It may need to be adjusted for future processor implementations. - * - * extern int xp_nofault_PIOR(void *remote_register); - */ - - .global xp_nofault_PIOR -xp_nofault_PIOR: - mov r8=r0 // Stage a success return value - ld8.acq r9=[r32];; // PIO Read the specified register - adds r9=1,r9;; // Add to force consumption - srlz.i;; // Allow time for MCA to surface - br.ret.sptk.many b0;; // Return success - - .global xp_error_PIOR -xp_error_PIOR: - mov r8=1 // Return value of 1 - br.ret.sptk.many b0;; // Return failure diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c deleted file mode 100644 index 44ccc0d789c..00000000000 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ /dev/null @@ -1,2379 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition Communication (XPC) channel support. - * - * This is the part of XPC that manages the channels and - * sends/receives messages across them to/from other partitions. - * - */ - - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/cache.h> -#include <linux/interrupt.h> -#include <linux/mutex.h> -#include <linux/completion.h> -#include <asm/sn/bte.h> -#include <asm/sn/sn_sal.h> -#include <asm/sn/xpc.h> - - -/* - * Guarantee that the kzalloc'd memory is cacheline aligned. - */ -static void * -xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) -{ - /* see if kzalloc will give us cachline aligned memory by default */ - *base = kzalloc(size, flags); - if (*base == NULL) { - return NULL; - } - if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { - return *base; - } - kfree(*base); - - /* nope, we'll have to do it ourselves */ - *base = kzalloc(size + L1_CACHE_BYTES, flags); - if (*base == NULL) { - return NULL; - } - return (void *) L1_CACHE_ALIGN((u64) *base); -} - - -/* - * Set up the initial values for the XPartition Communication channels. - */ -static void -xpc_initialize_channels(struct xpc_partition *part, partid_t partid) -{ - int ch_number; - struct xpc_channel *ch; - - - for (ch_number = 0; ch_number < part->nchannels; ch_number++) { - ch = &part->channels[ch_number]; - - ch->partid = partid; - ch->number = ch_number; - ch->flags = XPC_C_DISCONNECTED; - - ch->local_GP = &part->local_GPs[ch_number]; - ch->local_openclose_args = - &part->local_openclose_args[ch_number]; - - atomic_set(&ch->kthreads_assigned, 0); - atomic_set(&ch->kthreads_idle, 0); - atomic_set(&ch->kthreads_active, 0); - - atomic_set(&ch->references, 0); - atomic_set(&ch->n_to_notify, 0); - - spin_lock_init(&ch->lock); - mutex_init(&ch->msg_to_pull_mutex); - init_completion(&ch->wdisconnect_wait); - - atomic_set(&ch->n_on_msg_allocate_wq, 0); - init_waitqueue_head(&ch->msg_allocate_wq); - init_waitqueue_head(&ch->idle_wq); - } -} - - -/* - * Setup the infrastructure necessary to support XPartition Communication - * between the specified remote partition and the local one. - */ -enum xpc_retval -xpc_setup_infrastructure(struct xpc_partition *part) -{ - int ret, cpuid; - struct timer_list *timer; - partid_t partid = XPC_PARTID(part); - - - /* - * Zero out MOST of the entry for this partition. Only the fields - * starting with `nchannels' will be zeroed. The preceding fields must - * remain `viable' across partition ups and downs, since they may be - * referenced during this memset() operation. - */ - memset(&part->nchannels, 0, sizeof(struct xpc_partition) - - offsetof(struct xpc_partition, nchannels)); - - /* - * Allocate all of the channel structures as a contiguous chunk of - * memory. - */ - part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, - GFP_KERNEL); - if (part->channels == NULL) { - dev_err(xpc_chan, "can't get memory for channels\n"); - return xpcNoMemory; - } - - part->nchannels = XPC_NCHANNELS; - - - /* allocate all the required GET/PUT values */ - - part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, - GFP_KERNEL, &part->local_GPs_base); - if (part->local_GPs == NULL) { - kfree(part->channels); - part->channels = NULL; - dev_err(xpc_chan, "can't get memory for local get/put " - "values\n"); - return xpcNoMemory; - } - - part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, - GFP_KERNEL, &part->remote_GPs_base); - if (part->remote_GPs == NULL) { - dev_err(xpc_chan, "can't get memory for remote get/put " - "values\n"); - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpcNoMemory; - } - - - /* allocate all the required open and close args */ - - part->local_openclose_args = xpc_kzalloc_cacheline_aligned( - XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, - &part->local_openclose_args_base); - if (part->local_openclose_args == NULL) { - dev_err(xpc_chan, "can't get memory for local connect args\n"); - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpcNoMemory; - } - - part->remote_openclose_args = xpc_kzalloc_cacheline_aligned( - XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, - &part->remote_openclose_args_base); - if (part->remote_openclose_args == NULL) { - dev_err(xpc_chan, "can't get memory for remote connect args\n"); - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpcNoMemory; - } - - - xpc_initialize_channels(part, partid); - - atomic_set(&part->nchannels_active, 0); - atomic_set(&part->nchannels_engaged, 0); - - - /* local_IPI_amo were set to 0 by an earlier memset() */ - - /* Initialize this partitions AMO_t structure */ - part->local_IPI_amo_va = xpc_IPI_init(partid); - - spin_lock_init(&part->IPI_lock); - - atomic_set(&part->channel_mgr_requests, 1); - init_waitqueue_head(&part->channel_mgr_wq); - - sprintf(part->IPI_owner, "xpc%02d", partid); - ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED, - part->IPI_owner, (void *) (u64) partid); - if (ret != 0) { - dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " - "errno=%d\n", -ret); - kfree(part->remote_openclose_args_base); - part->remote_openclose_args = NULL; - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - return xpcLackOfResources; - } - - /* Setup a timer to check for dropped IPIs */ - timer = &part->dropped_IPI_timer; - init_timer(timer); - timer->function = (void (*)(unsigned long)) xpc_dropped_IPI_check; - timer->data = (unsigned long) part; - timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT; - add_timer(timer); - - /* - * With the setting of the partition setup_state to XPC_P_SETUP, we're - * declaring that this partition is ready to go. - */ - part->setup_state = XPC_P_SETUP; - - - /* - * Setup the per partition specific variables required by the - * remote partition to establish channel connections with us. - * - * The setting of the magic # indicates that these per partition - * specific variables are ready to be used. - */ - xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs); - xpc_vars_part[partid].openclose_args_pa = - __pa(part->local_openclose_args); - xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va); - cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */ - xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid); - xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid); - xpc_vars_part[partid].nchannels = part->nchannels; - xpc_vars_part[partid].magic = XPC_VP_MAGIC1; - - return xpcSuccess; -} - - -/* - * Create a wrapper that hides the underlying mechanism for pulling a cacheline - * (or multiple cachelines) from a remote partition. - * - * src must be a cacheline aligned physical address on the remote partition. - * dst must be a cacheline aligned virtual address on this partition. - * cnt must be an cacheline sized - */ -static enum xpc_retval -xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst, - const void *src, size_t cnt) -{ - bte_result_t bte_ret; - - - DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src)); - DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst)); - DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); - - if (part->act_state == XPC_P_DEACTIVATING) { - return part->reason; - } - - bte_ret = xp_bte_copy((u64) src, (u64) dst, (u64) cnt, - (BTE_NORMAL | BTE_WACQUIRE), NULL); - if (bte_ret == BTE_SUCCESS) { - return xpcSuccess; - } - - dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n", - XPC_PARTID(part), bte_ret); - - return xpc_map_bte_errors(bte_ret); -} - - -/* - * Pull the remote per partition specific variables from the specified - * partition. - */ -enum xpc_retval -xpc_pull_remote_vars_part(struct xpc_partition *part) -{ - u8 buffer[L1_CACHE_BYTES * 2]; - struct xpc_vars_part *pulled_entry_cacheline = - (struct xpc_vars_part *) L1_CACHE_ALIGN((u64) buffer); - struct xpc_vars_part *pulled_entry; - u64 remote_entry_cacheline_pa, remote_entry_pa; - partid_t partid = XPC_PARTID(part); - enum xpc_retval ret; - - - /* pull the cacheline that contains the variables we're interested in */ - - DBUG_ON(part->remote_vars_part_pa != - L1_CACHE_ALIGN(part->remote_vars_part_pa)); - DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2); - - remote_entry_pa = part->remote_vars_part_pa + - sn_partition_id * sizeof(struct xpc_vars_part); - - remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); - - pulled_entry = (struct xpc_vars_part *) ((u64) pulled_entry_cacheline + - (remote_entry_pa & (L1_CACHE_BYTES - 1))); - - ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline, - (void *) remote_entry_cacheline_pa, - L1_CACHE_BYTES); - if (ret != xpcSuccess) { - dev_dbg(xpc_chan, "failed to pull XPC vars_part from " - "partition %d, ret=%d\n", partid, ret); - return ret; - } - - - /* see if they've been set up yet */ - - if (pulled_entry->magic != XPC_VP_MAGIC1 && - pulled_entry->magic != XPC_VP_MAGIC2) { - - if (pulled_entry->magic != 0) { - dev_dbg(xpc_chan, "partition %d's XPC vars_part for " - "partition %d has bad magic value (=0x%lx)\n", - partid, sn_partition_id, pulled_entry->magic); - return xpcBadMagic; - } - - /* they've not been initialized yet */ - return xpcRetry; - } - - if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) { - - /* validate the variables */ - - if (pulled_entry->GPs_pa == 0 || - pulled_entry->openclose_args_pa == 0 || - pulled_entry->IPI_amo_pa == 0) { - - dev_err(xpc_chan, "partition %d's XPC vars_part for " - "partition %d are not valid\n", partid, - sn_partition_id); - return xpcInvalidAddress; - } - - /* the variables we imported look to be valid */ - - part->remote_GPs_pa = pulled_entry->GPs_pa; - part->remote_openclose_args_pa = - pulled_entry->openclose_args_pa; - part->remote_IPI_amo_va = - (AMO_t *) __va(pulled_entry->IPI_amo_pa); - part->remote_IPI_nasid = pulled_entry->IPI_nasid; - part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid; - - if (part->nchannels > pulled_entry->nchannels) { - part->nchannels = pulled_entry->nchannels; - } - - /* let the other side know that we've pulled their variables */ - - xpc_vars_part[partid].magic = XPC_VP_MAGIC2; - } - - if (pulled_entry->magic == XPC_VP_MAGIC1) { - return xpcRetry; - } - - return xpcSuccess; -} - - -/* - * Get the IPI flags and pull the openclose args and/or remote GPs as needed. - */ -static u64 -xpc_get_IPI_flags(struct xpc_partition *part) -{ - unsigned long irq_flags; - u64 IPI_amo; - enum xpc_retval ret; - - - /* - * See if there are any IPI flags to be handled. - */ - - spin_lock_irqsave(&part->IPI_lock, irq_flags); - if ((IPI_amo = part->local_IPI_amo) != 0) { - part->local_IPI_amo = 0; - } - spin_unlock_irqrestore(&part->IPI_lock, irq_flags); - - - if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) { - ret = xpc_pull_remote_cachelines(part, - part->remote_openclose_args, - (void *) part->remote_openclose_args_pa, - XPC_OPENCLOSE_ARGS_SIZE); - if (ret != xpcSuccess) { - XPC_DEACTIVATE_PARTITION(part, ret); - - dev_dbg(xpc_chan, "failed to pull openclose args from " - "partition %d, ret=%d\n", XPC_PARTID(part), - ret); - - /* don't bother processing IPIs anymore */ - IPI_amo = 0; - } - } - - if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) { - ret = xpc_pull_remote_cachelines(part, part->remote_GPs, - (void *) part->remote_GPs_pa, - XPC_GP_SIZE); - if (ret != xpcSuccess) { - XPC_DEACTIVATE_PARTITION(part, ret); - - dev_dbg(xpc_chan, "failed to pull GPs from partition " - "%d, ret=%d\n", XPC_PARTID(part), ret); - - /* don't bother processing IPIs anymore */ - IPI_amo = 0; - } - } - - return IPI_amo; -} - - -/* - * Allocate the local message queue and the notify queue. - */ -static enum xpc_retval -xpc_allocate_local_msgqueue(struct xpc_channel *ch) -{ - unsigned long irq_flags; - int nentries; - size_t nbytes; - - - // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between - // >>> iterations of the for-loop, bail if set? - - // >>> should we impose a minimum #of entries? like 4 or 8? - for (nentries = ch->local_nentries; nentries > 0; nentries--) { - - nbytes = nentries * ch->msg_size; - ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, - GFP_KERNEL, - &ch->local_msgqueue_base); - if (ch->local_msgqueue == NULL) { - continue; - } - - nbytes = nentries * sizeof(struct xpc_notify); - ch->notify_queue = kzalloc(nbytes, GFP_KERNEL); - if (ch->notify_queue == NULL) { - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - continue; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - if (nentries < ch->local_nentries) { - dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " - "partid=%d, channel=%d\n", nentries, - ch->local_nentries, ch->partid, ch->number); - - ch->local_nentries = nentries; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpcSuccess; - } - - dev_dbg(xpc_chan, "can't get memory for local message queue and notify " - "queue, partid=%d, channel=%d\n", ch->partid, ch->number); - return xpcNoMemory; -} - - -/* - * Allocate the cached remote message queue. - */ -static enum xpc_retval -xpc_allocate_remote_msgqueue(struct xpc_channel *ch) -{ - unsigned long irq_flags; - int nentries; - size_t nbytes; - - - DBUG_ON(ch->remote_nentries <= 0); - - // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between - // >>> iterations of the for-loop, bail if set? - - // >>> should we impose a minimum #of entries? like 4 or 8? - for (nentries = ch->remote_nentries; nentries > 0; nentries--) { - - nbytes = nentries * ch->msg_size; - ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes, - GFP_KERNEL, - &ch->remote_msgqueue_base); - if (ch->remote_msgqueue == NULL) { - continue; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - if (nentries < ch->remote_nentries) { - dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " - "partid=%d, channel=%d\n", nentries, - ch->remote_nentries, ch->partid, ch->number); - - ch->remote_nentries = nentries; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpcSuccess; - } - - dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " - "partid=%d, channel=%d\n", ch->partid, ch->number); - return xpcNoMemory; -} - - -/* - * Allocate message queues and other stuff associated with a channel. - * - * Note: Assumes all of the channel sizes are filled in. - */ -static enum xpc_retval -xpc_allocate_msgqueues(struct xpc_channel *ch) -{ - unsigned long irq_flags; - enum xpc_retval ret; - - - DBUG_ON(ch->flags & XPC_C_SETUP); - - if ((ret = xpc_allocate_local_msgqueue(ch)) != xpcSuccess) { - return ret; - } - - if ((ret = xpc_allocate_remote_msgqueue(ch)) != xpcSuccess) { - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - kfree(ch->notify_queue); - ch->notify_queue = NULL; - return ret; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_SETUP; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - return xpcSuccess; -} - - -/* - * Process a connect message from a remote partition. - * - * Note: xpc_process_connect() is expecting to be called with the - * spin_lock_irqsave held and will leave it locked upon return. - */ -static void -xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) -{ - enum xpc_retval ret; - - - DBUG_ON(!spin_is_locked(&ch->lock)); - - if (!(ch->flags & XPC_C_OPENREQUEST) || - !(ch->flags & XPC_C_ROPENREQUEST)) { - /* nothing more to do for now */ - return; - } - DBUG_ON(!(ch->flags & XPC_C_CONNECTING)); - - if (!(ch->flags & XPC_C_SETUP)) { - spin_unlock_irqrestore(&ch->lock, *irq_flags); - ret = xpc_allocate_msgqueues(ch); - spin_lock_irqsave(&ch->lock, *irq_flags); - - if (ret != xpcSuccess) { - XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); - } - if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) { - return; - } - - DBUG_ON(!(ch->flags & XPC_C_SETUP)); - DBUG_ON(ch->local_msgqueue == NULL); - DBUG_ON(ch->remote_msgqueue == NULL); - } - - if (!(ch->flags & XPC_C_OPENREPLY)) { - ch->flags |= XPC_C_OPENREPLY; - xpc_IPI_send_openreply(ch, irq_flags); - } - - if (!(ch->flags & XPC_C_ROPENREPLY)) { - return; - } - - DBUG_ON(ch->remote_msgqueue_pa == 0); - - ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ - - dev_info(xpc_chan, "channel %d to partition %d connected\n", - ch->number, ch->partid); - - spin_unlock_irqrestore(&ch->lock, *irq_flags); - xpc_create_kthreads(ch, 1, 0); - spin_lock_irqsave(&ch->lock, *irq_flags); -} - - -/* - * Notify those who wanted to be notified upon delivery of their message. - */ -static void -xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put) -{ - struct xpc_notify *notify; - u8 notify_type; - s64 get = ch->w_remote_GP.get - 1; - - - while (++get < put && atomic_read(&ch->n_to_notify) > 0) { - - notify = &ch->notify_queue[get % ch->local_nentries]; - - /* - * See if the notify entry indicates it was associated with - * a message who's sender wants to be notified. It is possible - * that it is, but someone else is doing or has done the - * notification. - */ - notify_type = notify->type; - if (notify_type == 0 || - cmpxchg(¬ify->type, notify_type, 0) != - notify_type) { - continue; - } - - DBUG_ON(notify_type != XPC_N_CALL); - - atomic_dec(&ch->n_to_notify); - - if (notify->func != NULL) { - dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *) notify, get, ch->partid, ch->number); - - notify->func(reason, ch->partid, ch->number, - notify->key); - - dev_dbg(xpc_chan, "notify->func() returned, " - "notify=0x%p, msg_number=%ld, partid=%d, " - "channel=%d\n", (void *) notify, get, - ch->partid, ch->number); - } - } -} - - -/* - * Free up message queues and other stuff that were allocated for the specified - * channel. - * - * Note: ch->reason and ch->reason_line are left set for debugging purposes, - * they're cleared when XPC_C_DISCONNECTED is cleared. - */ -static void -xpc_free_msgqueues(struct xpc_channel *ch) -{ - DBUG_ON(!spin_is_locked(&ch->lock)); - DBUG_ON(atomic_read(&ch->n_to_notify) != 0); - - ch->remote_msgqueue_pa = 0; - ch->func = NULL; - ch->key = NULL; - ch->msg_size = 0; - ch->local_nentries = 0; - ch->remote_nentries = 0; - ch->kthreads_assigned_limit = 0; - ch->kthreads_idle_limit = 0; - - ch->local_GP->get = 0; - ch->local_GP->put = 0; - ch->remote_GP.get = 0; - ch->remote_GP.put = 0; - ch->w_local_GP.get = 0; - ch->w_local_GP.put = 0; - ch->w_remote_GP.get = 0; - ch->w_remote_GP.put = 0; - ch->next_msg_to_pull = 0; - - if (ch->flags & XPC_C_SETUP) { - ch->flags &= ~XPC_C_SETUP; - - dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", - ch->flags, ch->partid, ch->number); - - kfree(ch->local_msgqueue_base); - ch->local_msgqueue = NULL; - kfree(ch->remote_msgqueue_base); - ch->remote_msgqueue = NULL; - kfree(ch->notify_queue); - ch->notify_queue = NULL; - } -} - - -/* - * spin_lock_irqsave() is expected to be held on entry. - */ -static void -xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED); - - - DBUG_ON(!spin_is_locked(&ch->lock)); - - if (!(ch->flags & XPC_C_DISCONNECTING)) { - return; - } - - DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); - - /* make sure all activity has settled down first */ - - if (atomic_read(&ch->kthreads_assigned) > 0 || - atomic_read(&ch->references) > 0) { - return; - } - DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); - - if (part->act_state == XPC_P_DEACTIVATING) { - /* can't proceed until the other side disengages from us */ - if (xpc_partition_engaged(1UL << ch->partid)) { - return; - } - - } else { - - /* as long as the other side is up do the full protocol */ - - if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { - return; - } - - if (!(ch->flags & XPC_C_CLOSEREPLY)) { - ch->flags |= XPC_C_CLOSEREPLY; - xpc_IPI_send_closereply(ch, irq_flags); - } - - if (!(ch->flags & XPC_C_RCLOSEREPLY)) { - return; - } - } - - /* wake those waiting for notify completion */ - if (atomic_read(&ch->n_to_notify) > 0) { - /* >>> we do callout while holding ch->lock */ - xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put); - } - - /* both sides are disconnected now */ - - if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { - spin_unlock_irqrestore(&ch->lock, *irq_flags); - xpc_disconnect_callout(ch, xpcDisconnected); - spin_lock_irqsave(&ch->lock, *irq_flags); - } - - /* it's now safe to free the channel's message queues */ - xpc_free_msgqueues(ch); - - /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */ - ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); - - atomic_dec(&part->nchannels_active); - - if (channel_was_connected) { - dev_info(xpc_chan, "channel %d to partition %d disconnected, " - "reason=%d\n", ch->number, ch->partid, ch->reason); - } - - if (ch->flags & XPC_C_WDISCONNECT) { - /* we won't lose the CPU since we're holding ch->lock */ - complete(&ch->wdisconnect_wait); - } else if (ch->delayed_IPI_flags) { - if (part->act_state != XPC_P_DEACTIVATING) { - /* time to take action on any delayed IPI flags */ - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number, - ch->delayed_IPI_flags); - spin_unlock(&part->IPI_lock); - } - ch->delayed_IPI_flags = 0; - } -} - - -/* - * Process a change in the channel's remote connection state. - */ -static void -xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, - u8 IPI_flags) -{ - unsigned long irq_flags; - struct xpc_openclose_args *args = - &part->remote_openclose_args[ch_number]; - struct xpc_channel *ch = &part->channels[ch_number]; - enum xpc_retval reason; - - - - spin_lock_irqsave(&ch->lock, irq_flags); - -again: - - if ((ch->flags & XPC_C_DISCONNECTED) && - (ch->flags & XPC_C_WDISCONNECT)) { - /* - * Delay processing IPI flags until thread waiting disconnect - * has had a chance to see that the channel is disconnected. - */ - ch->delayed_IPI_flags |= IPI_flags; - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - - if (IPI_flags & XPC_IPI_CLOSEREQUEST) { - - dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received " - "from partid=%d, channel=%d\n", args->reason, - ch->partid, ch->number); - - /* - * If RCLOSEREQUEST is set, we're probably waiting for - * RCLOSEREPLY. We should find it and a ROPENREQUEST packed - * with this RCLOSEREQUEST in the IPI_flags. - */ - - if (ch->flags & XPC_C_RCLOSEREQUEST) { - DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); - DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); - DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); - DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); - - DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY)); - IPI_flags &= ~XPC_IPI_CLOSEREPLY; - ch->flags |= XPC_C_RCLOSEREPLY; - - /* both sides have finished disconnecting */ - xpc_process_disconnect(ch, &irq_flags); - DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); - goto again; - } - - if (ch->flags & XPC_C_DISCONNECTED) { - if (!(IPI_flags & XPC_IPI_OPENREQUEST)) { - if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, - ch_number) & XPC_IPI_OPENREQUEST)) { - - DBUG_ON(ch->delayed_IPI_flags != 0); - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch_number, - XPC_IPI_CLOSEREQUEST); - spin_unlock(&part->IPI_lock); - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - XPC_SET_REASON(ch, 0, 0); - ch->flags &= ~XPC_C_DISCONNECTED; - - atomic_inc(&part->nchannels_active); - ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); - } - - IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY); - - /* - * The meaningful CLOSEREQUEST connection state fields are: - * reason = reason connection is to be closed - */ - - ch->flags |= XPC_C_RCLOSEREQUEST; - - if (!(ch->flags & XPC_C_DISCONNECTING)) { - reason = args->reason; - if (reason <= xpcSuccess || reason > xpcUnknownReason) { - reason = xpcUnknownReason; - } else if (reason == xpcUnregistering) { - reason = xpcOtherUnregistering; - } - - XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); - - DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - xpc_process_disconnect(ch, &irq_flags); - } - - - if (IPI_flags & XPC_IPI_CLOSEREPLY) { - - dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d," - " channel=%d\n", ch->partid, ch->number); - - if (ch->flags & XPC_C_DISCONNECTED) { - DBUG_ON(part->act_state != XPC_P_DEACTIVATING); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); - - if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { - if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number) - & XPC_IPI_CLOSEREQUEST)) { - - DBUG_ON(ch->delayed_IPI_flags != 0); - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch_number, XPC_IPI_CLOSEREPLY); - spin_unlock(&part->IPI_lock); - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - ch->flags |= XPC_C_RCLOSEREPLY; - - if (ch->flags & XPC_C_CLOSEREPLY) { - /* both sides have finished disconnecting */ - xpc_process_disconnect(ch, &irq_flags); - } - } - - - if (IPI_flags & XPC_IPI_OPENREQUEST) { - - dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, " - "local_nentries=%d) received from partid=%d, " - "channel=%d\n", args->msg_size, args->local_nentries, - ch->partid, ch->number); - - if (part->act_state == XPC_P_DEACTIVATING || - (ch->flags & XPC_C_ROPENREQUEST)) { - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { - ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST; - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | - XPC_C_OPENREQUEST))); - DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | - XPC_C_OPENREPLY | XPC_C_CONNECTED)); - - /* - * The meaningful OPENREQUEST connection state fields are: - * msg_size = size of channel's messages in bytes - * local_nentries = remote partition's local_nentries - */ - if (args->msg_size == 0 || args->local_nentries == 0) { - /* assume OPENREQUEST was delayed by mistake */ - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); - ch->remote_nentries = args->local_nentries; - - - if (ch->flags & XPC_C_OPENREQUEST) { - if (args->msg_size != ch->msg_size) { - XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, - &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - } else { - ch->msg_size = args->msg_size; - - XPC_SET_REASON(ch, 0, 0); - ch->flags &= ~XPC_C_DISCONNECTED; - - atomic_inc(&part->nchannels_active); - } - - xpc_process_connect(ch, &irq_flags); - } - - - if (IPI_flags & XPC_IPI_OPENREPLY) { - - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, " - "local_nentries=%d, remote_nentries=%d) received from " - "partid=%d, channel=%d\n", args->local_msgqueue_pa, - args->local_nentries, args->remote_nentries, - ch->partid, ch->number); - - if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - if (!(ch->flags & XPC_C_OPENREQUEST)) { - XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError, - &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return; - } - - DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); - DBUG_ON(ch->flags & XPC_C_CONNECTED); - - /* - * The meaningful OPENREPLY connection state fields are: - * local_msgqueue_pa = physical address of remote - * partition's local_msgqueue - * local_nentries = remote partition's local_nentries - * remote_nentries = remote partition's remote_nentries - */ - DBUG_ON(args->local_msgqueue_pa == 0); - DBUG_ON(args->local_nentries == 0); - DBUG_ON(args->remote_nentries == 0); - - ch->flags |= XPC_C_ROPENREPLY; - ch->remote_msgqueue_pa = args->local_msgqueue_pa; - - if (args->local_nentries < ch->remote_nentries) { - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " - "remote_nentries=%d, old remote_nentries=%d, " - "partid=%d, channel=%d\n", - args->local_nentries, ch->remote_nentries, - ch->partid, ch->number); - - ch->remote_nentries = args->local_nentries; - } - if (args->remote_nentries < ch->local_nentries) { - dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " - "local_nentries=%d, old local_nentries=%d, " - "partid=%d, channel=%d\n", - args->remote_nentries, ch->local_nentries, - ch->partid, ch->number); - - ch->local_nentries = args->remote_nentries; - } - - xpc_process_connect(ch, &irq_flags); - } - - spin_unlock_irqrestore(&ch->lock, irq_flags); -} - - -/* - * Attempt to establish a channel connection to a remote partition. - */ -static enum xpc_retval -xpc_connect_channel(struct xpc_channel *ch) -{ - unsigned long irq_flags; - struct xpc_registration *registration = &xpc_registrations[ch->number]; - - - if (mutex_trylock(®istration->mutex) == 0) { - return xpcRetry; - } - - if (!XPC_CHANNEL_REGISTERED(ch->number)) { - mutex_unlock(®istration->mutex); - return xpcUnregistered; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - - DBUG_ON(ch->flags & XPC_C_CONNECTED); - DBUG_ON(ch->flags & XPC_C_OPENREQUEST); - - if (ch->flags & XPC_C_DISCONNECTING) { - spin_unlock_irqrestore(&ch->lock, irq_flags); - mutex_unlock(®istration->mutex); - return ch->reason; - } - - - /* add info from the channel connect registration to the channel */ - - ch->kthreads_assigned_limit = registration->assigned_limit; - ch->kthreads_idle_limit = registration->idle_limit; - DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); - DBUG_ON(atomic_read(&ch->kthreads_idle) != 0); - DBUG_ON(atomic_read(&ch->kthreads_active) != 0); - - ch->func = registration->func; - DBUG_ON(registration->func == NULL); - ch->key = registration->key; - - ch->local_nentries = registration->nentries; - - if (ch->flags & XPC_C_ROPENREQUEST) { - if (registration->msg_size != ch->msg_size) { - /* the local and remote sides aren't the same */ - - /* - * Because XPC_DISCONNECT_CHANNEL() can block we're - * forced to up the registration sema before we unlock - * the channel lock. But that's okay here because we're - * done with the part that required the registration - * sema. XPC_DISCONNECT_CHANNEL() requires that the - * channel lock be locked and will unlock and relock - * the channel lock as needed. - */ - mutex_unlock(®istration->mutex); - XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, - &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpcUnequalMsgSizes; - } - } else { - ch->msg_size = registration->msg_size; - - XPC_SET_REASON(ch, 0, 0); - ch->flags &= ~XPC_C_DISCONNECTED; - - atomic_inc(&xpc_partitions[ch->partid].nchannels_active); - } - - mutex_unlock(®istration->mutex); - - - /* initiate the connection */ - - ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); - xpc_IPI_send_openrequest(ch, &irq_flags); - - xpc_process_connect(ch, &irq_flags); - - spin_unlock_irqrestore(&ch->lock, irq_flags); - - return xpcSuccess; -} - - -/* - * Clear some of the msg flags in the local message queue. - */ -static inline void -xpc_clear_local_msgqueue_flags(struct xpc_channel *ch) -{ - struct xpc_msg *msg; - s64 get; - - - get = ch->w_remote_GP.get; - do { - msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + - (get % ch->local_nentries) * ch->msg_size); - msg->flags = 0; - } while (++get < (volatile s64) ch->remote_GP.get); -} - - -/* - * Clear some of the msg flags in the remote message queue. - */ -static inline void -xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch) -{ - struct xpc_msg *msg; - s64 put; - - - put = ch->w_remote_GP.put; - do { - msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + - (put % ch->remote_nentries) * ch->msg_size); - msg->flags = 0; - } while (++put < (volatile s64) ch->remote_GP.put); -} - - -static void -xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) -{ - struct xpc_channel *ch = &part->channels[ch_number]; - int nmsgs_sent; - - - ch->remote_GP = part->remote_GPs[ch_number]; - - - /* See what, if anything, has changed for each connected channel */ - - xpc_msgqueue_ref(ch); - - if (ch->w_remote_GP.get == ch->remote_GP.get && - ch->w_remote_GP.put == ch->remote_GP.put) { - /* nothing changed since GPs were last pulled */ - xpc_msgqueue_deref(ch); - return; - } - - if (!(ch->flags & XPC_C_CONNECTED)){ - xpc_msgqueue_deref(ch); - return; - } - - - /* - * First check to see if messages recently sent by us have been - * received by the other side. (The remote GET value will have - * changed since we last looked at it.) - */ - - if (ch->w_remote_GP.get != ch->remote_GP.get) { - - /* - * We need to notify any senders that want to be notified - * that their sent messages have been received by their - * intended recipients. We need to do this before updating - * w_remote_GP.get so that we don't allocate the same message - * queue entries prematurely (see xpc_allocate_msg()). - */ - if (atomic_read(&ch->n_to_notify) > 0) { - /* - * Notify senders that messages sent have been - * received and delivered by the other side. - */ - xpc_notify_senders(ch, xpcMsgDelivered, - ch->remote_GP.get); - } - - /* - * Clear msg->flags in previously sent messages, so that - * they're ready for xpc_allocate_msg(). - */ - xpc_clear_local_msgqueue_flags(ch); - - ch->w_remote_GP.get = ch->remote_GP.get; - - dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, " - "channel=%d\n", ch->w_remote_GP.get, ch->partid, - ch->number); - - /* - * If anyone was waiting for message queue entries to become - * available, wake them up. - */ - if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { - wake_up(&ch->msg_allocate_wq); - } - } - - - /* - * Now check for newly sent messages by the other side. (The remote - * PUT value will have changed since we last looked at it.) - */ - - if (ch->w_remote_GP.put != ch->remote_GP.put) { - /* - * Clear msg->flags in previously received messages, so that - * they're ready for xpc_get_deliverable_msg(). - */ - xpc_clear_remote_msgqueue_flags(ch); - - ch->w_remote_GP.put = ch->remote_GP.put; - - dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, " - "channel=%d\n", ch->w_remote_GP.put, ch->partid, - ch->number); - - nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get; - if (nmsgs_sent > 0) { - dev_dbg(xpc_chan, "msgs waiting to be copied and " - "delivered=%d, partid=%d, channel=%d\n", - nmsgs_sent, ch->partid, ch->number); - - if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { - xpc_activate_kthreads(ch, nmsgs_sent); - } - } - } - - xpc_msgqueue_deref(ch); -} - - -void -xpc_process_channel_activity(struct xpc_partition *part) -{ - unsigned long irq_flags; - u64 IPI_amo, IPI_flags; - struct xpc_channel *ch; - int ch_number; - u32 ch_flags; - - - IPI_amo = xpc_get_IPI_flags(part); - - /* - * Initiate channel connections for registered channels. - * - * For each connected channel that has pending messages activate idle - * kthreads and/or create new kthreads as needed. - */ - - for (ch_number = 0; ch_number < part->nchannels; ch_number++) { - ch = &part->channels[ch_number]; - - - /* - * Process any open or close related IPI flags, and then deal - * with connecting or disconnecting the channel as required. - */ - - IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number); - - if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) { - xpc_process_openclose_IPI(part, ch_number, IPI_flags); - } - - ch_flags = ch->flags; /* need an atomic snapshot of flags */ - - if (ch_flags & XPC_C_DISCONNECTING) { - spin_lock_irqsave(&ch->lock, irq_flags); - xpc_process_disconnect(ch, &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - continue; - } - - if (part->act_state == XPC_P_DEACTIVATING) { - continue; - } - - if (!(ch_flags & XPC_C_CONNECTED)) { - if (!(ch_flags & XPC_C_OPENREQUEST)) { - DBUG_ON(ch_flags & XPC_C_SETUP); - (void) xpc_connect_channel(ch); - } else { - spin_lock_irqsave(&ch->lock, irq_flags); - xpc_process_connect(ch, &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - } - continue; - } - - - /* - * Process any message related IPI flags, this may involve the - * activation of kthreads to deliver any pending messages sent - * from the other partition. - */ - - if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) { - xpc_process_msg_IPI(part, ch_number); - } - } -} - - -/* - * XPC's heartbeat code calls this function to inform XPC that a partition is - * going down. XPC responds by tearing down the XPartition Communication - * infrastructure used for the just downed partition. - * - * XPC's heartbeat code will never call this function and xpc_partition_up() - * at the same time. Nor will it ever make multiple calls to either function - * at the same time. - */ -void -xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason) -{ - unsigned long irq_flags; - int ch_number; - struct xpc_channel *ch; - - - dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n", - XPC_PARTID(part), reason); - - if (!xpc_part_ref(part)) { - /* infrastructure for this partition isn't currently set up */ - return; - } - - - /* disconnect channels associated with the partition going down */ - - for (ch_number = 0; ch_number < part->nchannels; ch_number++) { - ch = &part->channels[ch_number]; - - xpc_msgqueue_ref(ch); - spin_lock_irqsave(&ch->lock, irq_flags); - - XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); - - spin_unlock_irqrestore(&ch->lock, irq_flags); - xpc_msgqueue_deref(ch); - } - - xpc_wakeup_channel_mgr(part); - - xpc_part_deref(part); -} - - -/* - * Teardown the infrastructure necessary to support XPartition Communication - * between the specified remote partition and the local one. - */ -void -xpc_teardown_infrastructure(struct xpc_partition *part) -{ - partid_t partid = XPC_PARTID(part); - - - /* - * We start off by making this partition inaccessible to local - * processes by marking it as no longer setup. Then we make it - * inaccessible to remote processes by clearing the XPC per partition - * specific variable's magic # (which indicates that these variables - * are no longer valid) and by ignoring all XPC notify IPIs sent to - * this partition. - */ - - DBUG_ON(atomic_read(&part->nchannels_engaged) != 0); - DBUG_ON(atomic_read(&part->nchannels_active) != 0); - DBUG_ON(part->setup_state != XPC_P_SETUP); - part->setup_state = XPC_P_WTEARDOWN; - - xpc_vars_part[partid].magic = 0; - - - free_irq(SGI_XPC_NOTIFY, (void *) (u64) partid); - - - /* - * Before proceeding with the teardown we have to wait until all - * existing references cease. - */ - wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); - - - /* now we can begin tearing down the infrastructure */ - - part->setup_state = XPC_P_TORNDOWN; - - /* in case we've still got outstanding timers registered... */ - del_timer_sync(&part->dropped_IPI_timer); - - kfree(part->remote_openclose_args_base); - part->remote_openclose_args = NULL; - kfree(part->local_openclose_args_base); - part->local_openclose_args = NULL; - kfree(part->remote_GPs_base); - part->remote_GPs = NULL; - kfree(part->local_GPs_base); - part->local_GPs = NULL; - kfree(part->channels); - part->channels = NULL; - part->local_IPI_amo_va = NULL; -} - - -/* - * Called by XP at the time of channel connection registration to cause - * XPC to establish connections to all currently active partitions. - */ -void -xpc_initiate_connect(int ch_number) -{ - partid_t partid; - struct xpc_partition *part; - struct xpc_channel *ch; - - - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (xpc_part_ref(part)) { - ch = &part->channels[ch_number]; - - /* - * Initiate the establishment of a connection on the - * newly registered channel to the remote partition. - */ - xpc_wakeup_channel_mgr(part); - xpc_part_deref(part); - } - } -} - - -void -xpc_connected_callout(struct xpc_channel *ch) -{ - /* let the registerer know that a connection has been established */ - - if (ch->func != NULL) { - dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, " - "partid=%d, channel=%d\n", ch->partid, ch->number); - - ch->func(xpcConnected, ch->partid, ch->number, - (void *) (u64) ch->local_nentries, ch->key); - - dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, " - "partid=%d, channel=%d\n", ch->partid, ch->number); - } -} - - -/* - * Called by XP at the time of channel connection unregistration to cause - * XPC to teardown all current connections for the specified channel. - * - * Before returning xpc_initiate_disconnect() will wait until all connections - * on the specified channel have been closed/torndown. So the caller can be - * assured that they will not be receiving any more callouts from XPC to the - * function they registered via xpc_connect(). - * - * Arguments: - * - * ch_number - channel # to unregister. - */ -void -xpc_initiate_disconnect(int ch_number) -{ - unsigned long irq_flags; - partid_t partid; - struct xpc_partition *part; - struct xpc_channel *ch; - - - DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); - - /* initiate the channel disconnect for every active partition */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (xpc_part_ref(part)) { - ch = &part->channels[ch_number]; - xpc_msgqueue_ref(ch); - - spin_lock_irqsave(&ch->lock, irq_flags); - - if (!(ch->flags & XPC_C_DISCONNECTED)) { - ch->flags |= XPC_C_WDISCONNECT; - - XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering, - &irq_flags); - } - - spin_unlock_irqrestore(&ch->lock, irq_flags); - - xpc_msgqueue_deref(ch); - xpc_part_deref(part); - } - } - - xpc_disconnect_wait(ch_number); -} - - -/* - * To disconnect a channel, and reflect it back to all who may be waiting. - * - * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by - * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by - * xpc_disconnect_wait(). - * - * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. - */ -void -xpc_disconnect_channel(const int line, struct xpc_channel *ch, - enum xpc_retval reason, unsigned long *irq_flags) -{ - u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED); - - - DBUG_ON(!spin_is_locked(&ch->lock)); - - if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { - return; - } - DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED))); - - dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n", - reason, line, ch->partid, ch->number); - - XPC_SET_REASON(ch, reason, line); - - ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING); - /* some of these may not have been set */ - ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | - XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | - XPC_C_CONNECTING | XPC_C_CONNECTED); - - xpc_IPI_send_closerequest(ch, irq_flags); - - if (channel_was_connected) { - ch->flags |= XPC_C_WASCONNECTED; - } - - spin_unlock_irqrestore(&ch->lock, *irq_flags); - - /* wake all idle kthreads so they can exit */ - if (atomic_read(&ch->kthreads_idle) > 0) { - wake_up_all(&ch->idle_wq); - - } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { - /* start a kthread that will do the xpcDisconnecting callout */ - xpc_create_kthreads(ch, 1, 1); - } - - /* wake those waiting to allocate an entry from the local msg queue */ - if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { - wake_up(&ch->msg_allocate_wq); - } - - spin_lock_irqsave(&ch->lock, *irq_flags); -} - - -void -xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason) -{ - /* - * Let the channel's registerer know that the channel is being - * disconnected. We don't want to do this if the registerer was never - * informed of a connection being made. - */ - - if (ch->func != NULL) { - dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " - "channel=%d\n", reason, ch->partid, ch->number); - - ch->func(reason, ch->partid, ch->number, NULL, ch->key); - - dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " - "channel=%d\n", reason, ch->partid, ch->number); - } -} - - -/* - * Wait for a message entry to become available for the specified channel, - * but don't wait any longer than 1 jiffy. - */ -static enum xpc_retval -xpc_allocate_msg_wait(struct xpc_channel *ch) -{ - enum xpc_retval ret; - - - if (ch->flags & XPC_C_DISCONNECTING) { - DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true? - return ch->reason; - } - - atomic_inc(&ch->n_on_msg_allocate_wq); - ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1); - atomic_dec(&ch->n_on_msg_allocate_wq); - - if (ch->flags & XPC_C_DISCONNECTING) { - ret = ch->reason; - DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true? - } else if (ret == 0) { - ret = xpcTimeout; - } else { - ret = xpcInterrupted; - } - - return ret; -} - - -/* - * Allocate an entry for a message from the message queue associated with the - * specified channel. - */ -static enum xpc_retval -xpc_allocate_msg(struct xpc_channel *ch, u32 flags, - struct xpc_msg **address_of_msg) -{ - struct xpc_msg *msg; - enum xpc_retval ret; - s64 put; - - - /* this reference will be dropped in xpc_send_msg() */ - xpc_msgqueue_ref(ch); - - if (ch->flags & XPC_C_DISCONNECTING) { - xpc_msgqueue_deref(ch); - return ch->reason; - } - if (!(ch->flags & XPC_C_CONNECTED)) { - xpc_msgqueue_deref(ch); - return xpcNotConnected; - } - - - /* - * Get the next available message entry from the local message queue. - * If none are available, we'll make sure that we grab the latest - * GP values. - */ - ret = xpcTimeout; - - while (1) { - - put = (volatile s64) ch->w_local_GP.put; - if (put - (volatile s64) ch->w_remote_GP.get < - ch->local_nentries) { - - /* There are available message entries. We need to try - * to secure one for ourselves. We'll do this by trying - * to increment w_local_GP.put as long as someone else - * doesn't beat us to it. If they do, we'll have to - * try again. - */ - if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == - put) { - /* we got the entry referenced by put */ - break; - } - continue; /* try again */ - } - - - /* - * There aren't any available msg entries at this time. - * - * In waiting for a message entry to become available, - * we set a timeout in case the other side is not - * sending completion IPIs. This lets us fake an IPI - * that will cause the IPI handler to fetch the latest - * GP values as if an IPI was sent by the other side. - */ - if (ret == xpcTimeout) { - xpc_IPI_send_local_msgrequest(ch); - } - - if (flags & XPC_NOWAIT) { - xpc_msgqueue_deref(ch); - return xpcNoWait; - } - - ret = xpc_allocate_msg_wait(ch); - if (ret != xpcInterrupted && ret != xpcTimeout) { - xpc_msgqueue_deref(ch); - return ret; - } - } - - - /* get the message's address and initialize it */ - msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + - (put % ch->local_nentries) * ch->msg_size); - - - DBUG_ON(msg->flags != 0); - msg->number = put; - - dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", put + 1, - (void *) msg, msg->number, ch->partid, ch->number); - - *address_of_msg = msg; - - return xpcSuccess; -} - - -/* - * Allocate an entry for a message from the message queue associated with the - * specified channel. NOTE that this routine can sleep waiting for a message - * entry to become available. To not sleep, pass in the XPC_NOWAIT flag. - * - * Arguments: - * - * partid - ID of partition to which the channel is connected. - * ch_number - channel #. - * flags - see xpc.h for valid flags. - * payload - address of the allocated payload area pointer (filled in on - * return) in which the user-defined message is constructed. - */ -enum xpc_retval -xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - enum xpc_retval ret = xpcUnknownReason; - struct xpc_msg *msg = NULL; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - - *payload = NULL; - - if (xpc_part_ref(part)) { - ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg); - xpc_part_deref(part); - - if (msg != NULL) { - *payload = &msg->payload; - } - } - - return ret; -} - - -/* - * Now we actually send the messages that are ready to be sent by advancing - * the local message queue's Put value and then send an IPI to the recipient - * partition. - */ -static void -xpc_send_msgs(struct xpc_channel *ch, s64 initial_put) -{ - struct xpc_msg *msg; - s64 put = initial_put + 1; - int send_IPI = 0; - - - while (1) { - - while (1) { - if (put == (volatile s64) ch->w_local_GP.put) { - break; - } - - msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + - (put % ch->local_nentries) * ch->msg_size); - - if (!(msg->flags & XPC_M_READY)) { - break; - } - - put++; - } - - if (put == initial_put) { - /* nothing's changed */ - break; - } - - if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) != - initial_put) { - /* someone else beat us to it */ - DBUG_ON((volatile s64) ch->local_GP->put < initial_put); - break; - } - - /* we just set the new value of local_GP->put */ - - dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, " - "channel=%d\n", put, ch->partid, ch->number); - - send_IPI = 1; - - /* - * We need to ensure that the message referenced by - * local_GP->put is not XPC_M_READY or that local_GP->put - * equals w_local_GP.put, so we'll go have a look. - */ - initial_put = put; - } - - if (send_IPI) { - xpc_IPI_send_msgrequest(ch); - } -} - - -/* - * Common code that does the actual sending of the message by advancing the - * local message queue's Put value and sends an IPI to the partition the - * message is being sent to. - */ -static enum xpc_retval -xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type, - xpc_notify_func func, void *key) -{ - enum xpc_retval ret = xpcSuccess; - struct xpc_notify *notify = notify; - s64 put, msg_number = msg->number; - - - DBUG_ON(notify_type == XPC_N_CALL && func == NULL); - DBUG_ON((((u64) msg - (u64) ch->local_msgqueue) / ch->msg_size) != - msg_number % ch->local_nentries); - DBUG_ON(msg->flags & XPC_M_READY); - - if (ch->flags & XPC_C_DISCONNECTING) { - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ch->reason; - } - - if (notify_type != 0) { - /* - * Tell the remote side to send an ACK interrupt when the - * message has been delivered. - */ - msg->flags |= XPC_M_INTERRUPT; - - atomic_inc(&ch->n_to_notify); - - notify = &ch->notify_queue[msg_number % ch->local_nentries]; - notify->func = func; - notify->key = key; - notify->type = notify_type; - - // >>> is a mb() needed here? - - if (ch->flags & XPC_C_DISCONNECTING) { - /* - * An error occurred between our last error check and - * this one. We will try to clear the type field from - * the notify entry. If we succeed then - * xpc_disconnect_channel() didn't already process - * the notify entry. - */ - if (cmpxchg(¬ify->type, notify_type, 0) == - notify_type) { - atomic_dec(&ch->n_to_notify); - ret = ch->reason; - } - - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ret; - } - } - - msg->flags |= XPC_M_READY; - - /* - * The preceding store of msg->flags must occur before the following - * load of ch->local_GP->put. - */ - mb(); - - /* see if the message is next in line to be sent, if so send it */ - - put = ch->local_GP->put; - if (put == msg_number) { - xpc_send_msgs(ch, put); - } - - /* drop the reference grabbed in xpc_allocate_msg() */ - xpc_msgqueue_deref(ch); - return ret; -} - - -/* - * Send a message previously allocated using xpc_initiate_allocate() on the - * specified channel connected to the specified partition. - * - * This routine will not wait for the message to be received, nor will - * notification be given when it does happen. Once this routine has returned - * the message entry allocated via xpc_initiate_allocate() is no longer - * accessable to the caller. - * - * This routine, although called by users, does not call xpc_part_ref() to - * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). - * - * Arguments: - * - * partid - ID of partition to which the channel is connected. - * ch_number - channel # to send message on. - * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). - */ -enum xpc_retval -xpc_initiate_send(partid_t partid, int ch_number, void *payload) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - enum xpc_retval ret; - - - dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg, - partid, ch_number); - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - DBUG_ON(msg == NULL); - - ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL); - - return ret; -} - - -/* - * Send a message previously allocated using xpc_initiate_allocate on the - * specified channel connected to the specified partition. - * - * This routine will not wait for the message to be sent. Once this routine - * has returned the message entry allocated via xpc_initiate_allocate() is no - * longer accessable to the caller. - * - * Once the remote end of the channel has received the message, the function - * passed as an argument to xpc_initiate_send_notify() will be called. This - * allows the sender to free up or re-use any buffers referenced by the - * message, but does NOT mean the message has been processed at the remote - * end by a receiver. - * - * If this routine returns an error, the caller's function will NOT be called. - * - * This routine, although called by users, does not call xpc_part_ref() to - * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). - * - * Arguments: - * - * partid - ID of partition to which the channel is connected. - * ch_number - channel # to send message on. - * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). - * func - function to call with asynchronous notification of message - * receipt. THIS FUNCTION MUST BE NON-BLOCKING. - * key - user-defined key to be passed to the function when it's called. - */ -enum xpc_retval -xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload, - xpc_notify_func func, void *key) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - enum xpc_retval ret; - - - dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg, - partid, ch_number); - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - DBUG_ON(msg == NULL); - DBUG_ON(func == NULL); - - ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL, - func, key); - return ret; -} - - -static struct xpc_msg * -xpc_pull_remote_msg(struct xpc_channel *ch, s64 get) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - struct xpc_msg *remote_msg, *msg; - u32 msg_index, nmsgs; - u64 msg_offset; - enum xpc_retval ret; - - - if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) { - /* we were interrupted by a signal */ - return NULL; - } - - while (get >= ch->next_msg_to_pull) { - - /* pull as many messages as are ready and able to be pulled */ - - msg_index = ch->next_msg_to_pull % ch->remote_nentries; - - DBUG_ON(ch->next_msg_to_pull >= - (volatile s64) ch->w_remote_GP.put); - nmsgs = (volatile s64) ch->w_remote_GP.put - - ch->next_msg_to_pull; - if (msg_index + nmsgs > ch->remote_nentries) { - /* ignore the ones that wrap the msg queue for now */ - nmsgs = ch->remote_nentries - msg_index; - } - - msg_offset = msg_index * ch->msg_size; - msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + - msg_offset); - remote_msg = (struct xpc_msg *) (ch->remote_msgqueue_pa + - msg_offset); - - if ((ret = xpc_pull_remote_cachelines(part, msg, remote_msg, - nmsgs * ch->msg_size)) != xpcSuccess) { - - dev_dbg(xpc_chan, "failed to pull %d msgs starting with" - " msg %ld from partition %d, channel=%d, " - "ret=%d\n", nmsgs, ch->next_msg_to_pull, - ch->partid, ch->number, ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - - mutex_unlock(&ch->msg_to_pull_mutex); - return NULL; - } - - mb(); /* >>> this may not be needed, we're not sure */ - - ch->next_msg_to_pull += nmsgs; - } - - mutex_unlock(&ch->msg_to_pull_mutex); - - /* return the message we were looking for */ - msg_offset = (get % ch->remote_nentries) * ch->msg_size; - msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + msg_offset); - - return msg; -} - - -/* - * Get a message to be delivered. - */ -static struct xpc_msg * -xpc_get_deliverable_msg(struct xpc_channel *ch) -{ - struct xpc_msg *msg = NULL; - s64 get; - - - do { - if ((volatile u32) ch->flags & XPC_C_DISCONNECTING) { - break; - } - - get = (volatile s64) ch->w_local_GP.get; - if (get == (volatile s64) ch->w_remote_GP.put) { - break; - } - - /* There are messages waiting to be pulled and delivered. - * We need to try to secure one for ourselves. We'll do this - * by trying to increment w_local_GP.get and hope that no one - * else beats us to it. If they do, we'll we'll simply have - * to try again for the next one. - */ - - if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) { - /* we got the entry referenced by get */ - - dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, " - "partid=%d, channel=%d\n", get + 1, - ch->partid, ch->number); - - /* pull the message from the remote partition */ - - msg = xpc_pull_remote_msg(ch, get); - - DBUG_ON(msg != NULL && msg->number != get); - DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE)); - DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY)); - - break; - } - - } while (1); - - return msg; -} - - -/* - * Deliver a message to its intended recipient. - */ -void -xpc_deliver_msg(struct xpc_channel *ch) -{ - struct xpc_msg *msg; - - - if ((msg = xpc_get_deliverable_msg(ch)) != NULL) { - - /* - * This ref is taken to protect the payload itself from being - * freed before the user is finished with it, which the user - * indicates by calling xpc_initiate_received(). - */ - xpc_msgqueue_ref(ch); - - atomic_inc(&ch->kthreads_active); - - if (ch->func != NULL) { - dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *) msg, msg->number, ch->partid, - ch->number); - - /* deliver the message to its intended recipient */ - ch->func(xpcMsgReceived, ch->partid, ch->number, - &msg->payload, ch->key); - - dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, " - "msg_number=%ld, partid=%d, channel=%d\n", - (void *) msg, msg->number, ch->partid, - ch->number); - } - - atomic_dec(&ch->kthreads_active); - } -} - - -/* - * Now we actually acknowledge the messages that have been delivered and ack'd - * by advancing the cached remote message queue's Get value and if requested - * send an IPI to the message sender's partition. - */ -static void -xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) -{ - struct xpc_msg *msg; - s64 get = initial_get + 1; - int send_IPI = 0; - - - while (1) { - - while (1) { - if (get == (volatile s64) ch->w_local_GP.get) { - break; - } - - msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + - (get % ch->remote_nentries) * ch->msg_size); - - if (!(msg->flags & XPC_M_DONE)) { - break; - } - - msg_flags |= msg->flags; - get++; - } - - if (get == initial_get) { - /* nothing's changed */ - break; - } - - if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) != - initial_get) { - /* someone else beat us to it */ - DBUG_ON((volatile s64) ch->local_GP->get <= - initial_get); - break; - } - - /* we just set the new value of local_GP->get */ - - dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, " - "channel=%d\n", get, ch->partid, ch->number); - - send_IPI = (msg_flags & XPC_M_INTERRUPT); - - /* - * We need to ensure that the message referenced by - * local_GP->get is not XPC_M_DONE or that local_GP->get - * equals w_local_GP.get, so we'll go have a look. - */ - initial_get = get; - } - - if (send_IPI) { - xpc_IPI_send_msgrequest(ch); - } -} - - -/* - * Acknowledge receipt of a delivered message. - * - * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition - * that sent the message. - * - * This function, although called by users, does not call xpc_part_ref() to - * ensure that the partition infrastructure is in place. It relies on the - * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg(). - * - * Arguments: - * - * partid - ID of partition to which the channel is connected. - * ch_number - channel # message received on. - * payload - pointer to the payload area allocated via - * xpc_initiate_allocate(). - */ -void -xpc_initiate_received(partid_t partid, int ch_number, void *payload) -{ - struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_channel *ch; - struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); - s64 get, msg_number = msg->number; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); - - ch = &part->channels[ch_number]; - - dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n", - (void *) msg, msg_number, ch->partid, ch->number); - - DBUG_ON((((u64) msg - (u64) ch->remote_msgqueue) / ch->msg_size) != - msg_number % ch->remote_nentries); - DBUG_ON(msg->flags & XPC_M_DONE); - - msg->flags |= XPC_M_DONE; - - /* - * The preceding store of msg->flags must occur before the following - * load of ch->local_GP->get. - */ - mb(); - - /* - * See if this message is next in line to be acknowledged as having - * been delivered. - */ - get = ch->local_GP->get; - if (get == msg_number) { - xpc_acknowledge_msgs(ch, get, msg->flags); - } - - /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */ - xpc_msgqueue_deref(ch); -} - diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c deleted file mode 100644 index 9e0b164da9c..00000000000 --- a/arch/ia64/sn/kernel/xpc_main.c +++ /dev/null @@ -1,1431 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition Communication (XPC) support - standard version. - * - * XPC provides a message passing capability that crosses partition - * boundaries. This module is made up of two parts: - * - * partition This part detects the presence/absence of other - * partitions. It provides a heartbeat and monitors - * the heartbeats of other partitions. - * - * channel This part manages the channels and sends/receives - * messages across them to/from other partitions. - * - * There are a couple of additional functions residing in XP, which - * provide an interface to XPC for its users. - * - * - * Caveats: - * - * . We currently have no way to determine which nasid an IPI came - * from. Thus, xpc_IPI_send() does a remote AMO write followed by - * an IPI. The AMO indicates where data is to be pulled from, so - * after the IPI arrives, the remote partition checks the AMO word. - * The IPI can actually arrive before the AMO however, so other code - * must periodically check for this case. Also, remote AMO operations - * do not reliably time out. Thus we do a remote PIO read solely to - * know whether the remote partition is down and whether we should - * stop sending IPIs to it. This remote PIO read operation is set up - * in a special nofault region so SAL knows to ignore (and cleanup) - * any errors due to the remote AMO write, PIO read, and/or PIO - * write operations. - * - * If/when new hardware solves this IPI problem, we should abandon - * the current approach. - * - */ - - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/syscalls.h> -#include <linux/cache.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/completion.h> -#include <linux/kdebug.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> -#include <asm/uaccess.h> -#include <asm/sn/xpc.h> - - -/* define two XPC debug device structures to be used with dev_dbg() et al */ - -struct device_driver xpc_dbg_name = { - .name = "xpc" -}; - -struct device xpc_part_dbg_subname = { - .bus_id = {0}, /* set to "part" at xpc_init() time */ - .driver = &xpc_dbg_name -}; - -struct device xpc_chan_dbg_subname = { - .bus_id = {0}, /* set to "chan" at xpc_init() time */ - .driver = &xpc_dbg_name -}; - -struct device *xpc_part = &xpc_part_dbg_subname; -struct device *xpc_chan = &xpc_chan_dbg_subname; - - -static int xpc_kdebug_ignore; - - -/* systune related variables for /proc/sys directories */ - -static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; -static int xpc_hb_min_interval = 1; -static int xpc_hb_max_interval = 10; - -static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL; -static int xpc_hb_check_min_interval = 10; -static int xpc_hb_check_max_interval = 120; - -int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT; -static int xpc_disengage_request_min_timelimit = 0; -static int xpc_disengage_request_max_timelimit = 120; - -static ctl_table xpc_sys_xpc_hb_dir[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "hb_interval", - .data = &xpc_hb_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xpc_hb_min_interval, - .extra2 = &xpc_hb_max_interval - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "hb_check_interval", - .data = &xpc_hb_check_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xpc_hb_check_min_interval, - .extra2 = &xpc_hb_check_max_interval - }, - {} -}; -static ctl_table xpc_sys_xpc_dir[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "hb", - .mode = 0555, - .child = xpc_sys_xpc_hb_dir - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "disengage_request_timelimit", - .data = &xpc_disengage_request_timelimit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xpc_disengage_request_min_timelimit, - .extra2 = &xpc_disengage_request_max_timelimit - }, - {} -}; -static ctl_table xpc_sys_dir[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xpc", - .mode = 0555, - .child = xpc_sys_xpc_dir - }, - {} -}; -static struct ctl_table_header *xpc_sysctl; - -/* non-zero if any remote partition disengage request was timed out */ -int xpc_disengage_request_timedout; - -/* #of IRQs received */ -static atomic_t xpc_act_IRQ_rcvd; - -/* IRQ handler notifies this wait queue on receipt of an IRQ */ -static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); - -static unsigned long xpc_hb_check_timeout; - -/* notification that the xpc_hb_checker thread has exited */ -static DECLARE_COMPLETION(xpc_hb_checker_exited); - -/* notification that the xpc_discovery thread has exited */ -static DECLARE_COMPLETION(xpc_discovery_exited); - - -static struct timer_list xpc_hb_timer; - - -static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); - - -static int xpc_system_reboot(struct notifier_block *, unsigned long, void *); -static struct notifier_block xpc_reboot_notifier = { - .notifier_call = xpc_system_reboot, -}; - -static int xpc_system_die(struct notifier_block *, unsigned long, void *); -static struct notifier_block xpc_die_notifier = { - .notifier_call = xpc_system_die, -}; - - -/* - * Timer function to enforce the timelimit on the partition disengage request. - */ -static void -xpc_timeout_partition_disengage_request(unsigned long data) -{ - struct xpc_partition *part = (struct xpc_partition *) data; - - - DBUG_ON(time_before(jiffies, part->disengage_request_timeout)); - - (void) xpc_partition_disengaged(part); - - DBUG_ON(part->disengage_request_timeout != 0); - DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0); -} - - -/* - * Notify the heartbeat check thread that an IRQ has been received. - */ -static irqreturn_t -xpc_act_IRQ_handler(int irq, void *dev_id) -{ - atomic_inc(&xpc_act_IRQ_rcvd); - wake_up_interruptible(&xpc_act_IRQ_wq); - return IRQ_HANDLED; -} - - -/* - * Timer to produce the heartbeat. The timer structures function is - * already set when this is initially called. A tunable is used to - * specify when the next timeout should occur. - */ -static void -xpc_hb_beater(unsigned long dummy) -{ - xpc_vars->heartbeat++; - - if (time_after_eq(jiffies, xpc_hb_check_timeout)) { - wake_up_interruptible(&xpc_act_IRQ_wq); - } - - xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); - add_timer(&xpc_hb_timer); -} - - -/* - * This thread is responsible for nearly all of the partition - * activation/deactivation. - */ -static int -xpc_hb_checker(void *ignore) -{ - int last_IRQ_count = 0; - int new_IRQ_count; - int force_IRQ=0; - - - /* this thread was marked active by xpc_hb_init() */ - - daemonize(XPC_HB_CHECK_THREAD_NAME); - - set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU)); - - /* set our heartbeating to other partitions into motion */ - xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); - xpc_hb_beater(0); - - while (!(volatile int) xpc_exiting) { - - dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " - "been received\n", - (int) (xpc_hb_check_timeout - jiffies), - atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); - - - /* checking of remote heartbeats is skewed by IRQ handling */ - if (time_after_eq(jiffies, xpc_hb_check_timeout)) { - dev_dbg(xpc_part, "checking remote heartbeats\n"); - xpc_check_remote_hb(); - - /* - * We need to periodically recheck to ensure no - * IPI/AMO pairs have been missed. That check - * must always reset xpc_hb_check_timeout. - */ - force_IRQ = 1; - } - - - /* check for outstanding IRQs */ - new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); - if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { - force_IRQ = 0; - - dev_dbg(xpc_part, "found an IRQ to process; will be " - "resetting xpc_hb_check_timeout\n"); - - last_IRQ_count += xpc_identify_act_IRQ_sender(); - if (last_IRQ_count < new_IRQ_count) { - /* retry once to help avoid missing AMO */ - (void) xpc_identify_act_IRQ_sender(); - } - last_IRQ_count = new_IRQ_count; - - xpc_hb_check_timeout = jiffies + - (xpc_hb_check_interval * HZ); - } - - /* wait for IRQ or timeout */ - (void) wait_event_interruptible(xpc_act_IRQ_wq, - (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) || - time_after_eq(jiffies, xpc_hb_check_timeout) || - (volatile int) xpc_exiting)); - } - - dev_dbg(xpc_part, "heartbeat checker is exiting\n"); - - - /* mark this thread as having exited */ - complete(&xpc_hb_checker_exited); - return 0; -} - - -/* - * This thread will attempt to discover other partitions to activate - * based on info provided by SAL. This new thread is short lived and - * will exit once discovery is complete. - */ -static int -xpc_initiate_discovery(void *ignore) -{ - daemonize(XPC_DISCOVERY_THREAD_NAME); - - xpc_discovery(); - - dev_dbg(xpc_part, "discovery thread is exiting\n"); - - /* mark this thread as having exited */ - complete(&xpc_discovery_exited); - return 0; -} - - -/* - * Establish first contact with the remote partititon. This involves pulling - * the XPC per partition variables from the remote partition and waiting for - * the remote partition to pull ours. - */ -static enum xpc_retval -xpc_make_first_contact(struct xpc_partition *part) -{ - enum xpc_retval ret; - - - while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) { - if (ret != xpcRetry) { - XPC_DEACTIVATE_PARTITION(part, ret); - return ret; - } - - dev_dbg(xpc_chan, "waiting to make first contact with " - "partition %d\n", XPC_PARTID(part)); - - /* wait a 1/4 of a second or so */ - (void) msleep_interruptible(250); - - if (part->act_state == XPC_P_DEACTIVATING) { - return part->reason; - } - } - - return xpc_mark_partition_active(part); -} - - -/* - * The first kthread assigned to a newly activated partition is the one - * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to - * that kthread until the partition is brought down, at which time that kthread - * returns back to XPC HB. (The return of that kthread will signify to XPC HB - * that XPC has dismantled all communication infrastructure for the associated - * partition.) This kthread becomes the channel manager for that partition. - * - * Each active partition has a channel manager, who, besides connecting and - * disconnecting channels, will ensure that each of the partition's connected - * channels has the required number of assigned kthreads to get the work done. - */ -static void -xpc_channel_mgr(struct xpc_partition *part) -{ - while (part->act_state != XPC_P_DEACTIVATING || - atomic_read(&part->nchannels_active) > 0 || - !xpc_partition_disengaged(part)) { - - xpc_process_channel_activity(part); - - - /* - * Wait until we've been requested to activate kthreads or - * all of the channel's message queues have been torn down or - * a signal is pending. - * - * The channel_mgr_requests is set to 1 after being awakened, - * This is done to prevent the channel mgr from making one pass - * through the loop for each request, since he will - * be servicing all the requests in one pass. The reason it's - * set to 1 instead of 0 is so that other kthreads will know - * that the channel mgr is running and won't bother trying to - * wake him up. - */ - atomic_dec(&part->channel_mgr_requests); - (void) wait_event_interruptible(part->channel_mgr_wq, - (atomic_read(&part->channel_mgr_requests) > 0 || - (volatile u64) part->local_IPI_amo != 0 || - ((volatile u8) part->act_state == - XPC_P_DEACTIVATING && - atomic_read(&part->nchannels_active) == 0 && - xpc_partition_disengaged(part)))); - atomic_set(&part->channel_mgr_requests, 1); - - // >>> Does it need to wakeup periodically as well? In case we - // >>> miscalculated the #of kthreads to wakeup or create? - } -} - - -/* - * When XPC HB determines that a partition has come up, it will create a new - * kthread and that kthread will call this function to attempt to set up the - * basic infrastructure used for Cross Partition Communication with the newly - * upped partition. - * - * The kthread that was created by XPC HB and which setup the XPC - * infrastructure will remain assigned to the partition until the partition - * goes down. At which time the kthread will teardown the XPC infrastructure - * and then exit. - * - * XPC HB will put the remote partition's XPC per partition specific variables - * physical address into xpc_partitions[partid].remote_vars_part_pa prior to - * calling xpc_partition_up(). - */ -static void -xpc_partition_up(struct xpc_partition *part) -{ - DBUG_ON(part->channels != NULL); - - dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); - - if (xpc_setup_infrastructure(part) != xpcSuccess) { - return; - } - - /* - * The kthread that XPC HB called us with will become the - * channel manager for this partition. It will not return - * back to XPC HB until the partition's XPC infrastructure - * has been dismantled. - */ - - (void) xpc_part_ref(part); /* this will always succeed */ - - if (xpc_make_first_contact(part) == xpcSuccess) { - xpc_channel_mgr(part); - } - - xpc_part_deref(part); - - xpc_teardown_infrastructure(part); -} - - -static int -xpc_activating(void *__partid) -{ - partid_t partid = (u64) __partid; - struct xpc_partition *part = &xpc_partitions[partid]; - unsigned long irq_flags; - struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; - int ret; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - - spin_lock_irqsave(&part->act_lock, irq_flags); - - if (part->act_state == XPC_P_DEACTIVATING) { - part->act_state = XPC_P_INACTIVE; - spin_unlock_irqrestore(&part->act_lock, irq_flags); - part->remote_rp_pa = 0; - return 0; - } - - /* indicate the thread is activating */ - DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ); - part->act_state = XPC_P_ACTIVATING; - - XPC_SET_REASON(part, 0, 0); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - - dev_dbg(xpc_part, "bringing partition %d up\n", partid); - - daemonize("xpc%02d", partid); - - /* - * This thread needs to run at a realtime priority to prevent a - * significant performance degradation. - */ - ret = sched_setscheduler(current, SCHED_FIFO, ¶m); - if (ret != 0) { - dev_warn(xpc_part, "unable to set pid %d to a realtime " - "priority, ret=%d\n", current->pid, ret); - } - - /* allow this thread and its children to run on any CPU */ - set_cpus_allowed(current, CPU_MASK_ALL); - - /* - * Register the remote partition's AMOs with SAL so it can handle - * and cleanup errors within that address range should the remote - * partition go down. We don't unregister this range because it is - * difficult to tell when outstanding writes to the remote partition - * are finished and thus when it is safe to unregister. This should - * not result in wasted space in the SAL xp_addr_region table because - * we should get the same page for remote_amos_page_pa after module - * reloads and system reboots. - */ - if (sn_register_xp_addr_region(part->remote_amos_page_pa, - PAGE_SIZE, 1) < 0) { - dev_warn(xpc_part, "xpc_partition_up(%d) failed to register " - "xp_addr region\n", partid); - - spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - part->remote_rp_pa = 0; - return 0; - } - - xpc_allow_hb(partid, xpc_vars); - xpc_IPI_send_activated(part); - - - /* - * xpc_partition_up() holds this thread and marks this partition as - * XPC_P_ACTIVE by calling xpc_hb_mark_active(). - */ - (void) xpc_partition_up(part); - - xpc_disallow_hb(partid, xpc_vars); - xpc_mark_partition_inactive(part); - - if (part->reason == xpcReactivating) { - /* interrupting ourselves results in activating partition */ - xpc_IPI_send_reactivate(part); - } - - return 0; -} - - -void -xpc_activate_partition(struct xpc_partition *part) -{ - partid_t partid = XPC_PARTID(part); - unsigned long irq_flags; - pid_t pid; - - - spin_lock_irqsave(&part->act_lock, irq_flags); - - DBUG_ON(part->act_state != XPC_P_INACTIVE); - - part->act_state = XPC_P_ACTIVATION_REQ; - XPC_SET_REASON(part, xpcCloneKThread, __LINE__); - - spin_unlock_irqrestore(&part->act_lock, irq_flags); - - pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0); - - if (unlikely(pid <= 0)) { - spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - } -} - - -/* - * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified - * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more - * than one partition, we use an AMO_t structure per partition to indicate - * whether a partition has sent an IPI or not. >>> If it has, then wake up the - * associated kthread to handle it. - * - * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC - * running on other partitions. - * - * Noteworthy Arguments: - * - * irq - Interrupt ReQuest number. NOT USED. - * - * dev_id - partid of IPI's potential sender. - */ -irqreturn_t -xpc_notify_IRQ_handler(int irq, void *dev_id) -{ - partid_t partid = (partid_t) (u64) dev_id; - struct xpc_partition *part = &xpc_partitions[partid]; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - - if (xpc_part_ref(part)) { - xpc_check_for_channel_activity(part); - - xpc_part_deref(part); - } - return IRQ_HANDLED; -} - - -/* - * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor - * because the write to their associated IPI amo completed after the IRQ/IPI - * was received. - */ -void -xpc_dropped_IPI_check(struct xpc_partition *part) -{ - if (xpc_part_ref(part)) { - xpc_check_for_channel_activity(part); - - part->dropped_IPI_timer.expires = jiffies + - XPC_P_DROPPED_IPI_WAIT; - add_timer(&part->dropped_IPI_timer); - xpc_part_deref(part); - } -} - - -void -xpc_activate_kthreads(struct xpc_channel *ch, int needed) -{ - int idle = atomic_read(&ch->kthreads_idle); - int assigned = atomic_read(&ch->kthreads_assigned); - int wakeup; - - - DBUG_ON(needed <= 0); - - if (idle > 0) { - wakeup = (needed > idle) ? idle : needed; - needed -= wakeup; - - dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, " - "channel=%d\n", wakeup, ch->partid, ch->number); - - /* only wakeup the requested number of kthreads */ - wake_up_nr(&ch->idle_wq, wakeup); - } - - if (needed <= 0) { - return; - } - - if (needed + assigned > ch->kthreads_assigned_limit) { - needed = ch->kthreads_assigned_limit - assigned; - // >>>should never be less than 0 - if (needed <= 0) { - return; - } - } - - dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", - needed, ch->partid, ch->number); - - xpc_create_kthreads(ch, needed, 0); -} - - -/* - * This function is where XPC's kthreads wait for messages to deliver. - */ -static void -xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) -{ - do { - /* deliver messages to their intended recipients */ - - while ((volatile s64) ch->w_local_GP.get < - (volatile s64) ch->w_remote_GP.put && - !((volatile u32) ch->flags & - XPC_C_DISCONNECTING)) { - xpc_deliver_msg(ch); - } - - if (atomic_inc_return(&ch->kthreads_idle) > - ch->kthreads_idle_limit) { - /* too many idle kthreads on this channel */ - atomic_dec(&ch->kthreads_idle); - break; - } - - dev_dbg(xpc_chan, "idle kthread calling " - "wait_event_interruptible_exclusive()\n"); - - (void) wait_event_interruptible_exclusive(ch->idle_wq, - ((volatile s64) ch->w_local_GP.get < - (volatile s64) ch->w_remote_GP.put || - ((volatile u32) ch->flags & - XPC_C_DISCONNECTING))); - - atomic_dec(&ch->kthreads_idle); - - } while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING)); -} - - -static int -xpc_daemonize_kthread(void *args) -{ - partid_t partid = XPC_UNPACK_ARG1(args); - u16 ch_number = XPC_UNPACK_ARG2(args); - struct xpc_partition *part = &xpc_partitions[partid]; - struct xpc_channel *ch; - int n_needed; - unsigned long irq_flags; - - - daemonize("xpc%02dc%d", partid, ch_number); - - dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", - partid, ch_number); - - ch = &part->channels[ch_number]; - - if (!(ch->flags & XPC_C_DISCONNECTING)) { - - /* let registerer know that connection has been established */ - - spin_lock_irqsave(&ch->lock, irq_flags); - if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { - ch->flags |= XPC_C_CONNECTEDCALLOUT; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - xpc_connected_callout(ch); - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - /* - * It is possible that while the callout was being - * made that the remote partition sent some messages. - * If that is the case, we may need to activate - * additional kthreads to help deliver them. We only - * need one less than total #of messages to deliver. - */ - n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1; - if (n_needed > 0 && - !(ch->flags & XPC_C_DISCONNECTING)) { - xpc_activate_kthreads(ch, n_needed); - } - } else { - spin_unlock_irqrestore(&ch->lock, irq_flags); - } - - xpc_kthread_waitmsgs(part, ch); - } - - /* let registerer know that connection is disconnecting */ - - spin_lock_irqsave(&ch->lock, irq_flags); - if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { - ch->flags |= XPC_C_DISCONNECTINGCALLOUT; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - xpc_disconnect_callout(ch, xpcDisconnecting); - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - - if (atomic_dec_return(&ch->kthreads_assigned) == 0) { - if (atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); - } - } - - xpc_msgqueue_deref(ch); - - dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n", - partid, ch_number); - - xpc_part_deref(part); - return 0; -} - - -/* - * For each partition that XPC has established communications with, there is - * a minimum of one kernel thread assigned to perform any operation that - * may potentially sleep or block (basically the callouts to the asynchronous - * functions registered via xpc_connect()). - * - * Additional kthreads are created and destroyed by XPC as the workload - * demands. - * - * A kthread is assigned to one of the active channels that exists for a given - * partition. - */ -void -xpc_create_kthreads(struct xpc_channel *ch, int needed, - int ignore_disconnecting) -{ - unsigned long irq_flags; - pid_t pid; - u64 args = XPC_PACK_ARGS(ch->partid, ch->number); - struct xpc_partition *part = &xpc_partitions[ch->partid]; - - - while (needed-- > 0) { - - /* - * The following is done on behalf of the newly created - * kthread. That kthread is responsible for doing the - * counterpart to the following before it exits. - */ - if (ignore_disconnecting) { - if (!atomic_inc_not_zero(&ch->kthreads_assigned)) { - /* kthreads assigned had gone to zero */ - BUG_ON(!(ch->flags & - XPC_C_DISCONNECTINGCALLOUT_MADE)); - break; - } - - } else if (ch->flags & XPC_C_DISCONNECTING) { - break; - - } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) { - if (atomic_inc_return(&part->nchannels_engaged) == 1) - xpc_mark_partition_engaged(part); - } - (void) xpc_part_ref(part); - xpc_msgqueue_ref(ch); - - pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); - if (pid < 0) { - /* the fork failed */ - - /* - * NOTE: if (ignore_disconnecting && - * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true, - * then we'll deadlock if all other kthreads assigned - * to this channel are blocked in the channel's - * registerer, because the only thing that will unblock - * them is the xpcDisconnecting callout that this - * failed kernel_thread would have made. - */ - - if (atomic_dec_return(&ch->kthreads_assigned) == 0 && - atomic_dec_return(&part->nchannels_engaged) == 0) { - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); - } - xpc_msgqueue_deref(ch); - xpc_part_deref(part); - - if (atomic_read(&ch->kthreads_assigned) < - ch->kthreads_idle_limit) { - /* - * Flag this as an error only if we have an - * insufficient #of kthreads for the channel - * to function. - */ - spin_lock_irqsave(&ch->lock, irq_flags); - XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources, - &irq_flags); - spin_unlock_irqrestore(&ch->lock, irq_flags); - } - break; - } - - ch->kthreads_created++; // >>> temporary debug only!!! - } -} - - -void -xpc_disconnect_wait(int ch_number) -{ - unsigned long irq_flags; - partid_t partid; - struct xpc_partition *part; - struct xpc_channel *ch; - int wakeup_channel_mgr; - - - /* now wait for all callouts to the caller's function to cease */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (!xpc_part_ref(part)) { - continue; - } - - ch = &part->channels[ch_number]; - - if (!(ch->flags & XPC_C_WDISCONNECT)) { - xpc_part_deref(part); - continue; - } - - wait_for_completion(&ch->wdisconnect_wait); - - spin_lock_irqsave(&ch->lock, irq_flags); - DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); - wakeup_channel_mgr = 0; - - if (ch->delayed_IPI_flags) { - if (part->act_state != XPC_P_DEACTIVATING) { - spin_lock(&part->IPI_lock); - XPC_SET_IPI_FLAGS(part->local_IPI_amo, - ch->number, ch->delayed_IPI_flags); - spin_unlock(&part->IPI_lock); - wakeup_channel_mgr = 1; - } - ch->delayed_IPI_flags = 0; - } - - ch->flags &= ~XPC_C_WDISCONNECT; - spin_unlock_irqrestore(&ch->lock, irq_flags); - - if (wakeup_channel_mgr) { - xpc_wakeup_channel_mgr(part); - } - - xpc_part_deref(part); - } -} - - -static void -xpc_do_exit(enum xpc_retval reason) -{ - partid_t partid; - int active_part_count, printed_waiting_msg = 0; - struct xpc_partition *part; - unsigned long printmsg_time, disengage_request_timeout = 0; - - - /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ - DBUG_ON(xpc_exiting == 1); - - /* - * Let the heartbeat checker thread and the discovery thread - * (if one is running) know that they should exit. Also wake up - * the heartbeat checker thread in case it's sleeping. - */ - xpc_exiting = 1; - wake_up_interruptible(&xpc_act_IRQ_wq); - - /* ignore all incoming interrupts */ - free_irq(SGI_XPC_ACTIVATE, NULL); - - /* wait for the discovery thread to exit */ - wait_for_completion(&xpc_discovery_exited); - - /* wait for the heartbeat checker thread to exit */ - wait_for_completion(&xpc_hb_checker_exited); - - - /* sleep for a 1/3 of a second or so */ - (void) msleep_interruptible(300); - - - /* wait for all partitions to become inactive */ - - printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); - xpc_disengage_request_timedout = 0; - - do { - active_part_count = 0; - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (xpc_partition_disengaged(part) && - part->act_state == XPC_P_INACTIVE) { - continue; - } - - active_part_count++; - - XPC_DEACTIVATE_PARTITION(part, reason); - - if (part->disengage_request_timeout > - disengage_request_timeout) { - disengage_request_timeout = - part->disengage_request_timeout; - } - } - - if (xpc_partition_engaged(-1UL)) { - if (time_after(jiffies, printmsg_time)) { - dev_info(xpc_part, "waiting for remote " - "partitions to disengage, timeout in " - "%ld seconds\n", - (disengage_request_timeout - jiffies) - / HZ); - printmsg_time = jiffies + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); - printed_waiting_msg = 1; - } - - } else if (active_part_count > 0) { - if (printed_waiting_msg) { - dev_info(xpc_part, "waiting for local partition" - " to disengage\n"); - printed_waiting_msg = 0; - } - - } else { - if (!xpc_disengage_request_timedout) { - dev_info(xpc_part, "all partitions have " - "disengaged\n"); - } - break; - } - - /* sleep for a 1/3 of a second or so */ - (void) msleep_interruptible(300); - - } while (1); - - DBUG_ON(xpc_partition_engaged(-1UL)); - - - /* indicate to others that our reserved page is uninitialized */ - xpc_rsvd_page->vars_pa = 0; - - /* now it's time to eliminate our heartbeat */ - del_timer_sync(&xpc_hb_timer); - DBUG_ON(xpc_vars->heartbeating_to_mask != 0); - - if (reason == xpcUnloading) { - /* take ourselves off of the reboot_notifier_list */ - (void) unregister_reboot_notifier(&xpc_reboot_notifier); - - /* take ourselves off of the die_notifier list */ - (void) unregister_die_notifier(&xpc_die_notifier); - } - - /* close down protections for IPI operations */ - xpc_restrict_IPI_ops(); - - - /* clear the interface to XPC's functions */ - xpc_clear_interface(); - - if (xpc_sysctl) { - unregister_sysctl_table(xpc_sysctl); - } - - kfree(xpc_remote_copy_buffer_base); -} - - -/* - * This function is called when the system is being rebooted. - */ -static int -xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) -{ - enum xpc_retval reason; - - - switch (event) { - case SYS_RESTART: - reason = xpcSystemReboot; - break; - case SYS_HALT: - reason = xpcSystemHalt; - break; - case SYS_POWER_OFF: - reason = xpcSystemPoweroff; - break; - default: - reason = xpcSystemGoingDown; - } - - xpc_do_exit(reason); - return NOTIFY_DONE; -} - - -/* - * Notify other partitions to disengage from all references to our memory. - */ -static void -xpc_die_disengage(void) -{ - struct xpc_partition *part; - partid_t partid; - unsigned long engaged; - long time, printmsg_time, disengage_request_timeout; - - - /* keep xpc_hb_checker thread from doing anything (just in case) */ - xpc_exiting = 1; - - xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */ - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part-> - remote_vars_version)) { - - /* just in case it was left set by an earlier XPC */ - xpc_clear_partition_engaged(1UL << partid); - continue; - } - - if (xpc_partition_engaged(1UL << partid) || - part->act_state != XPC_P_INACTIVE) { - xpc_request_partition_disengage(part); - xpc_mark_partition_disengaged(part); - xpc_IPI_send_disengage(part); - } - } - - time = rtc_time(); - printmsg_time = time + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second); - disengage_request_timeout = time + - (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); - - /* wait for all other partitions to disengage from us */ - - while (1) { - engaged = xpc_partition_engaged(-1UL); - if (!engaged) { - dev_info(xpc_part, "all partitions have disengaged\n"); - break; - } - - time = rtc_time(); - if (time >= disengage_request_timeout) { - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - if (engaged & (1UL << partid)) { - dev_info(xpc_part, "disengage from " - "remote partition %d timed " - "out\n", partid); - } - } - break; - } - - if (time >= printmsg_time) { - dev_info(xpc_part, "waiting for remote partitions to " - "disengage, timeout in %ld seconds\n", - (disengage_request_timeout - time) / - sn_rtc_cycles_per_second); - printmsg_time = time + - (XPC_DISENGAGE_PRINTMSG_INTERVAL * - sn_rtc_cycles_per_second); - } - } -} - - -/* - * This function is called when the system is being restarted or halted due - * to some sort of system failure. If this is the case we need to notify the - * other partitions to disengage from all references to our memory. - * This function can also be called when our heartbeater could be offlined - * for a time. In this case we need to notify other partitions to not worry - * about the lack of a heartbeat. - */ -static int -xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) -{ - switch (event) { - case DIE_MACHINE_RESTART: - case DIE_MACHINE_HALT: - xpc_die_disengage(); - break; - - case DIE_KDEBUG_ENTER: - /* Should lack of heartbeat be ignored by other partitions? */ - if (!xpc_kdebug_ignore) { - break; - } - /* fall through */ - case DIE_MCA_MONARCH_ENTER: - case DIE_INIT_MONARCH_ENTER: - xpc_vars->heartbeat++; - xpc_vars->heartbeat_offline = 1; - break; - - case DIE_KDEBUG_LEAVE: - /* Is lack of heartbeat being ignored by other partitions? */ - if (!xpc_kdebug_ignore) { - break; - } - /* fall through */ - case DIE_MCA_MONARCH_LEAVE: - case DIE_INIT_MONARCH_LEAVE: - xpc_vars->heartbeat++; - xpc_vars->heartbeat_offline = 0; - break; - } - - return NOTIFY_DONE; -} - - -int __init -xpc_init(void) -{ - int ret; - partid_t partid; - struct xpc_partition *part; - pid_t pid; - size_t buf_size; - - - if (!ia64_platform_is("sn2")) { - return -ENODEV; - } - - - buf_size = max(XPC_RP_VARS_SIZE, - XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES); - xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size, - GFP_KERNEL, &xpc_remote_copy_buffer_base); - if (xpc_remote_copy_buffer == NULL) - return -ENOMEM; - - snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part"); - snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan"); - - xpc_sysctl = register_sysctl_table(xpc_sys_dir); - - /* - * The first few fields of each entry of xpc_partitions[] need to - * be initialized now so that calls to xpc_connect() and - * xpc_disconnect() can be made prior to the activation of any remote - * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE - * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING - * PARTITION HAS BEEN ACTIVATED. - */ - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - part = &xpc_partitions[partid]; - - DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part)); - - part->act_IRQ_rcvd = 0; - spin_lock_init(&part->act_lock); - part->act_state = XPC_P_INACTIVE; - XPC_SET_REASON(part, 0, 0); - - init_timer(&part->disengage_request_timer); - part->disengage_request_timer.function = - xpc_timeout_partition_disengage_request; - part->disengage_request_timer.data = (unsigned long) part; - - part->setup_state = XPC_P_UNSET; - init_waitqueue_head(&part->teardown_wq); - atomic_set(&part->references, 0); - } - - /* - * Open up protections for IPI operations (and AMO operations on - * Shub 1.1 systems). - */ - xpc_allow_IPI_ops(); - - /* - * Interrupts being processed will increment this atomic variable and - * awaken the heartbeat thread which will process the interrupts. - */ - atomic_set(&xpc_act_IRQ_rcvd, 0); - - /* - * This is safe to do before the xpc_hb_checker thread has started - * because the handler releases a wait queue. If an interrupt is - * received before the thread is waiting, it will not go to sleep, - * but rather immediately process the interrupt. - */ - ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0, - "xpc hb", NULL); - if (ret != 0) { - dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " - "errno=%d\n", -ret); - - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) { - unregister_sysctl_table(xpc_sysctl); - } - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; - } - - /* - * Fill the partition reserved page with the information needed by - * other partitions to discover we are alive and establish initial - * communications. - */ - xpc_rsvd_page = xpc_rsvd_page_init(); - if (xpc_rsvd_page == NULL) { - dev_err(xpc_part, "could not setup our reserved page\n"); - - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) { - unregister_sysctl_table(xpc_sysctl); - } - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; - } - - - /* add ourselves to the reboot_notifier_list */ - ret = register_reboot_notifier(&xpc_reboot_notifier); - if (ret != 0) { - dev_warn(xpc_part, "can't register reboot notifier\n"); - } - - /* add ourselves to the die_notifier list */ - ret = register_die_notifier(&xpc_die_notifier); - if (ret != 0) { - dev_warn(xpc_part, "can't register die notifier\n"); - } - - init_timer(&xpc_hb_timer); - xpc_hb_timer.function = xpc_hb_beater; - - /* - * The real work-horse behind xpc. This processes incoming - * interrupts and monitors remote heartbeats. - */ - pid = kernel_thread(xpc_hb_checker, NULL, 0); - if (pid < 0) { - dev_err(xpc_part, "failed while forking hb check thread\n"); - - /* indicate to others that our reserved page is uninitialized */ - xpc_rsvd_page->vars_pa = 0; - - /* take ourselves off of the reboot_notifier_list */ - (void) unregister_reboot_notifier(&xpc_reboot_notifier); - - /* take ourselves off of the die_notifier list */ - (void) unregister_die_notifier(&xpc_die_notifier); - - del_timer_sync(&xpc_hb_timer); - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_restrict_IPI_ops(); - - if (xpc_sysctl) { - unregister_sysctl_table(xpc_sysctl); - } - - kfree(xpc_remote_copy_buffer_base); - return -EBUSY; - } - - - /* - * Startup a thread that will attempt to discover other partitions to - * activate based on info provided by SAL. This new thread is short - * lived and will exit once discovery is complete. - */ - pid = kernel_thread(xpc_initiate_discovery, NULL, 0); - if (pid < 0) { - dev_err(xpc_part, "failed while forking discovery thread\n"); - - /* mark this new thread as a non-starter */ - complete(&xpc_discovery_exited); - - xpc_do_exit(xpcUnloading); - return -EBUSY; - } - - - /* set the interface to point at XPC's functions */ - xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, - xpc_initiate_allocate, xpc_initiate_send, - xpc_initiate_send_notify, xpc_initiate_received, - xpc_initiate_partid_to_nasids); - - return 0; -} -module_init(xpc_init); - - -void __exit -xpc_exit(void) -{ - xpc_do_exit(xpcUnloading); -} -module_exit(xpc_exit); - - -MODULE_AUTHOR("Silicon Graphics, Inc."); -MODULE_DESCRIPTION("Cross Partition Communication (XPC) support"); -MODULE_LICENSE("GPL"); - -module_param(xpc_hb_interval, int, 0); -MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between " - "heartbeat increments."); - -module_param(xpc_hb_check_interval, int, 0); -MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " - "heartbeat checks."); - -module_param(xpc_disengage_request_timelimit, int, 0); -MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait " - "for disengage request to complete."); - -module_param(xpc_kdebug_ignore, int, 0); -MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " - "other partitions when dropping into kdebug."); - diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c deleted file mode 100644 index 9e97c268483..00000000000 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ /dev/null @@ -1,1239 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved. - */ - - -/* - * Cross Partition Communication (XPC) partition support. - * - * This is the part of XPC that detects the presence/absence of - * other partitions. It provides a heartbeat and monitors the - * heartbeats of other partitions. - * - */ - - -#include <linux/kernel.h> -#include <linux/sysctl.h> -#include <linux/cache.h> -#include <linux/mmzone.h> -#include <linux/nodemask.h> -#include <asm/uncached.h> -#include <asm/sn/bte.h> -#include <asm/sn/intr.h> -#include <asm/sn/sn_sal.h> -#include <asm/sn/nodepda.h> -#include <asm/sn/addrs.h> -#include <asm/sn/xpc.h> - - -/* XPC is exiting flag */ -int xpc_exiting; - - -/* SH_IPI_ACCESS shub register value on startup */ -static u64 xpc_sh1_IPI_access; -static u64 xpc_sh2_IPI_access0; -static u64 xpc_sh2_IPI_access1; -static u64 xpc_sh2_IPI_access2; -static u64 xpc_sh2_IPI_access3; - - -/* original protection values for each node */ -u64 xpc_prot_vec[MAX_NUMNODES]; - - -/* this partition's reserved page pointers */ -struct xpc_rsvd_page *xpc_rsvd_page; -static u64 *xpc_part_nasids; -static u64 *xpc_mach_nasids; -struct xpc_vars *xpc_vars; -struct xpc_vars_part *xpc_vars_part; - -static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */ -static int xp_nasid_mask_words; /* actual size in words of nasid mask */ - - -/* - * For performance reasons, each entry of xpc_partitions[] is cacheline - * aligned. And xpc_partitions[] is padded with an additional entry at the - * end so that the last legitimate entry doesn't share its cacheline with - * another variable. - */ -struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; - - -/* - * Generic buffer used to store a local copy of portions of a remote - * partition's reserved page (either its header and part_nasids mask, - * or its vars). - */ -char *xpc_remote_copy_buffer; -void *xpc_remote_copy_buffer_base; - - -/* - * Guarantee that the kmalloc'd memory is cacheline aligned. - */ -void * -xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) -{ - /* see if kmalloc will give us cachline aligned memory by default */ - *base = kmalloc(size, flags); - if (*base == NULL) { - return NULL; - } - if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { - return *base; - } - kfree(*base); - - /* nope, we'll have to do it ourselves */ - *base = kmalloc(size + L1_CACHE_BYTES, flags); - if (*base == NULL) { - return NULL; - } - return (void *) L1_CACHE_ALIGN((u64) *base); -} - - -/* - * Given a nasid, get the physical address of the partition's reserved page - * for that nasid. This function returns 0 on any error. - */ -static u64 -xpc_get_rsvd_page_pa(int nasid) -{ - bte_result_t bte_res; - s64 status; - u64 cookie = 0; - u64 rp_pa = nasid; /* seed with nasid */ - u64 len = 0; - u64 buf = buf; - u64 buf_len = 0; - void *buf_base = NULL; - - - while (1) { - - status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, - &len); - - dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" - "0x%016lx, address=0x%016lx, len=0x%016lx\n", - status, cookie, rp_pa, len); - - if (status != SALRET_MORE_PASSES) { - break; - } - - if (L1_CACHE_ALIGN(len) > buf_len) { - kfree(buf_base); - buf_len = L1_CACHE_ALIGN(len); - buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len, - GFP_KERNEL, &buf_base); - if (buf_base == NULL) { - dev_err(xpc_part, "unable to kmalloc " - "len=0x%016lx\n", buf_len); - status = SALRET_ERROR; - break; - } - } - - bte_res = xp_bte_copy(rp_pa, buf, buf_len, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bte_res != BTE_SUCCESS) { - dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); - status = SALRET_ERROR; - break; - } - } - - kfree(buf_base); - - if (status != SALRET_OK) { - rp_pa = 0; - } - dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); - return rp_pa; -} - - -/* - * Fill the partition reserved page with the information needed by - * other partitions to discover we are alive and establish initial - * communications. - */ -struct xpc_rsvd_page * -xpc_rsvd_page_init(void) -{ - struct xpc_rsvd_page *rp; - AMO_t *amos_page; - u64 rp_pa, nasid_array = 0; - int i, ret; - - - /* get the local reserved page's address */ - - preempt_disable(); - rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id())); - preempt_enable(); - if (rp_pa == 0) { - dev_err(xpc_part, "SAL failed to locate the reserved page\n"); - return NULL; - } - rp = (struct xpc_rsvd_page *) __va(rp_pa); - - if (rp->partid != sn_partition_id) { - dev_err(xpc_part, "the reserved page's partid of %d should be " - "%d\n", rp->partid, sn_partition_id); - return NULL; - } - - rp->version = XPC_RP_VERSION; - - /* establish the actual sizes of the nasid masks */ - if (rp->SAL_version == 1) { - /* SAL_version 1 didn't set the nasids_size field */ - rp->nasids_size = 128; - } - xp_nasid_mask_bytes = rp->nasids_size; - xp_nasid_mask_words = xp_nasid_mask_bytes / 8; - - /* setup the pointers to the various items in the reserved page */ - xpc_part_nasids = XPC_RP_PART_NASIDS(rp); - xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); - xpc_vars = XPC_RP_VARS(rp); - xpc_vars_part = XPC_RP_VARS_PART(rp); - - /* - * Before clearing xpc_vars, see if a page of AMOs had been previously - * allocated. If not we'll need to allocate one and set permissions - * so that cross-partition AMOs are allowed. - * - * The allocated AMO page needs MCA reporting to remain disabled after - * XPC has unloaded. To make this work, we keep a copy of the pointer - * to this page (i.e., amos_page) in the struct xpc_vars structure, - * which is pointed to by the reserved page, and re-use that saved copy - * on subsequent loads of XPC. This AMO page is never freed, and its - * memory protections are never restricted. - */ - if ((amos_page = xpc_vars->amos_page) == NULL) { - amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0)); - if (amos_page == NULL) { - dev_err(xpc_part, "can't allocate page of AMOs\n"); - return NULL; - } - - /* - * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems - * when xpc_allow_IPI_ops() is called via xpc_hb_init(). - */ - if (!enable_shub_wars_1_1()) { - ret = sn_change_memprotect(ia64_tpa((u64) amos_page), - PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1, - &nasid_array); - if (ret != 0) { - dev_err(xpc_part, "can't change memory " - "protections\n"); - uncached_free_page(__IA64_UNCACHED_OFFSET | - TO_PHYS((u64) amos_page)); - return NULL; - } - } - } else if (!IS_AMO_ADDRESS((u64) amos_page)) { - /* - * EFI's XPBOOT can also set amos_page in the reserved page, - * but it happens to leave it as an uncached physical address - * and we need it to be an uncached virtual, so we'll have to - * convert it. - */ - if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) { - dev_err(xpc_part, "previously used amos_page address " - "is bad = 0x%p\n", (void *) amos_page); - return NULL; - } - amos_page = (AMO_t *) TO_AMO((u64) amos_page); - } - - /* clear xpc_vars */ - memset(xpc_vars, 0, sizeof(struct xpc_vars)); - - xpc_vars->version = XPC_V_VERSION; - xpc_vars->act_nasid = cpuid_to_nasid(0); - xpc_vars->act_phys_cpuid = cpu_physical_id(0); - xpc_vars->vars_part_pa = __pa(xpc_vars_part); - xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page); - xpc_vars->amos_page = amos_page; /* save for next load of XPC */ - - - /* clear xpc_vars_part */ - memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) * - XP_MAX_PARTITIONS); - - /* initialize the activate IRQ related AMO variables */ - for (i = 0; i < xp_nasid_mask_words; i++) { - (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i); - } - - /* initialize the engaged remote partitions related AMO variables */ - (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO); - (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO); - - /* timestamp of when reserved page was setup by XPC */ - rp->stamp = CURRENT_TIME; - - /* - * This signifies to the remote partition that our reserved - * page is initialized. - */ - rp->vars_pa = __pa(xpc_vars); - - return rp; -} - - -/* - * Change protections to allow IPI operations (and AMO operations on - * Shub 1.1 systems). - */ -void -xpc_allow_IPI_ops(void) -{ - int node; - int nasid; - - - // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. - - if (is_shub2()) { - xpc_sh2_IPI_access0 = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); - xpc_sh2_IPI_access1 = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); - xpc_sh2_IPI_access2 = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); - xpc_sh2_IPI_access3 = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), - -1UL); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), - -1UL); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), - -1UL); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), - -1UL); - } - - } else { - xpc_sh1_IPI_access = - (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), - -1UL); - - /* - * Since the BIST collides with memory operations on - * SHUB 1.1 sn_change_memprotect() cannot be used. - */ - if (enable_shub_wars_1_1()) { - /* open up everything */ - xpc_prot_vec[node] = (u64) HUB_L((u64 *) - GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0)); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0), - -1UL); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQRP_MMR_DIR_PRIVEC0), - -1UL); - } - } - } -} - - -/* - * Restrict protections to disallow IPI operations (and AMO operations on - * Shub 1.1 systems). - */ -void -xpc_restrict_IPI_ops(void) -{ - int node; - int nasid; - - - // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. - - if (is_shub2()) { - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), - xpc_sh2_IPI_access0); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), - xpc_sh2_IPI_access1); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), - xpc_sh2_IPI_access2); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), - xpc_sh2_IPI_access3); - } - - } else { - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), - xpc_sh1_IPI_access); - - if (enable_shub_wars_1_1()) { - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0), - xpc_prot_vec[node]); - HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQRP_MMR_DIR_PRIVEC0), - xpc_prot_vec[node]); - } - } - } -} - - -/* - * At periodic intervals, scan through all active partitions and ensure - * their heartbeat is still active. If not, the partition is deactivated. - */ -void -xpc_check_remote_hb(void) -{ - struct xpc_vars *remote_vars; - struct xpc_partition *part; - partid_t partid; - bte_result_t bres; - - - remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; - - for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { - - if (xpc_exiting) { - break; - } - - if (partid == sn_partition_id) { - continue; - } - - part = &xpc_partitions[partid]; - - if (part->act_state == XPC_P_INACTIVE || - part->act_state == XPC_P_DEACTIVATING) { - continue; - } - - /* pull the remote_hb cache line */ - bres = xp_bte_copy(part->remote_vars_pa, - (u64) remote_vars, - XPC_RP_VARS_SIZE, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) { - XPC_DEACTIVATE_PARTITION(part, - xpc_map_bte_errors(bres)); - continue; - } - - dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" - " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n", - partid, remote_vars->heartbeat, part->last_heartbeat, - remote_vars->heartbeat_offline, - remote_vars->heartbeating_to_mask); - - if (((remote_vars->heartbeat == part->last_heartbeat) && - (remote_vars->heartbeat_offline == 0)) || - !xpc_hb_allowed(sn_partition_id, remote_vars)) { - - XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); - continue; - } - - part->last_heartbeat = remote_vars->heartbeat; - } -} - - -/* - * Get a copy of a portion of the remote partition's rsvd page. - * - * remote_rp points to a buffer that is cacheline aligned for BTE copies and - * is large enough to contain a copy of their reserved page header and - * part_nasids mask. - */ -static enum xpc_retval -xpc_get_remote_rp(int nasid, u64 *discovered_nasids, - struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa) -{ - int bres, i; - - - /* get the reserved page's physical address */ - - *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); - if (*remote_rp_pa == 0) { - return xpcNoRsvdPageAddr; - } - - - /* pull over the reserved page header and part_nasids mask */ - bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp, - XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) { - return xpc_map_bte_errors(bres); - } - - - if (discovered_nasids != NULL) { - u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp); - - - for (i = 0; i < xp_nasid_mask_words; i++) { - discovered_nasids[i] |= remote_part_nasids[i]; - } - } - - - /* check that the partid is for another partition */ - - if (remote_rp->partid < 1 || - remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { - return xpcInvalidPartid; - } - - if (remote_rp->partid == sn_partition_id) { - return xpcLocalPartid; - } - - - if (XPC_VERSION_MAJOR(remote_rp->version) != - XPC_VERSION_MAJOR(XPC_RP_VERSION)) { - return xpcBadVersion; - } - - return xpcSuccess; -} - - -/* - * Get a copy of the remote partition's XPC variables from the reserved page. - * - * remote_vars points to a buffer that is cacheline aligned for BTE copies and - * assumed to be of size XPC_RP_VARS_SIZE. - */ -static enum xpc_retval -xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) -{ - int bres; - - - if (remote_vars_pa == 0) { - return xpcVarsNotSet; - } - - /* pull over the cross partition variables */ - bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE, - (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (bres != BTE_SUCCESS) { - return xpc_map_bte_errors(bres); - } - - if (XPC_VERSION_MAJOR(remote_vars->version) != - XPC_VERSION_MAJOR(XPC_V_VERSION)) { - return xpcBadVersion; - } - - return xpcSuccess; -} - - -/* - * Update the remote partition's info. - */ -static void -xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version, - struct timespec *remote_rp_stamp, u64 remote_rp_pa, - u64 remote_vars_pa, struct xpc_vars *remote_vars) -{ - part->remote_rp_version = remote_rp_version; - dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", - part->remote_rp_version); - - part->remote_rp_stamp = *remote_rp_stamp; - dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n", - part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec); - - part->remote_rp_pa = remote_rp_pa; - dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); - - part->remote_vars_pa = remote_vars_pa; - dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", - part->remote_vars_pa); - - part->last_heartbeat = remote_vars->heartbeat; - dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", - part->last_heartbeat); - - part->remote_vars_part_pa = remote_vars->vars_part_pa; - dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", - part->remote_vars_part_pa); - - part->remote_act_nasid = remote_vars->act_nasid; - dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", - part->remote_act_nasid); - - part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; - dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", - part->remote_act_phys_cpuid); - - part->remote_amos_page_pa = remote_vars->amos_page_pa; - dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", - part->remote_amos_page_pa); - - part->remote_vars_version = remote_vars->version; - dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", - part->remote_vars_version); -} - - -/* - * Prior code has determined the nasid which generated an IPI. Inspect - * that nasid to determine if its partition needs to be activated or - * deactivated. - * - * A partition is consider "awaiting activation" if our partition - * flags indicate it is not active and it has a heartbeat. A - * partition is considered "awaiting deactivation" if our partition - * flags indicate it is active but it has no heartbeat or it is not - * sending its heartbeat to us. - * - * To determine the heartbeat, the remote nasid must have a properly - * initialized reserved page. - */ -static void -xpc_identify_act_IRQ_req(int nasid) -{ - struct xpc_rsvd_page *remote_rp; - struct xpc_vars *remote_vars; - u64 remote_rp_pa; - u64 remote_vars_pa; - int remote_rp_version; - int reactivate = 0; - int stamp_diff; - struct timespec remote_rp_stamp = { 0, 0 }; - partid_t partid; - struct xpc_partition *part; - enum xpc_retval ret; - - - /* pull over the reserved page structure */ - - remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; - - ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); - if (ret != xpcSuccess) { - dev_warn(xpc_part, "unable to get reserved page from nasid %d, " - "which sent interrupt, reason=%d\n", nasid, ret); - return; - } - - remote_vars_pa = remote_rp->vars_pa; - remote_rp_version = remote_rp->version; - if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { - remote_rp_stamp = remote_rp->stamp; - } - partid = remote_rp->partid; - part = &xpc_partitions[partid]; - - - /* pull over the cross partition variables */ - - remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; - - ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); - if (ret != xpcSuccess) { - - dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " - "which sent interrupt, reason=%d\n", nasid, ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - return; - } - - - part->act_IRQ_rcvd++; - - dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " - "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, - remote_vars->heartbeat, remote_vars->heartbeating_to_mask); - - if (xpc_partition_disengaged(part) && - part->act_state == XPC_P_INACTIVE) { - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { - if (xpc_partition_disengage_requested(1UL << partid)) { - /* - * Other side is waiting on us to disengage, - * even though we already have. - */ - return; - } - } else { - /* other side doesn't support disengage requests */ - xpc_clear_partition_disengage_request(1UL << partid); - } - - xpc_activate_partition(part); - return; - } - - DBUG_ON(part->remote_rp_version == 0); - DBUG_ON(part->remote_vars_version == 0); - - if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) { - DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part-> - remote_vars_version)); - - if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { - DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> - version)); - /* see if the other side rebooted */ - if (part->remote_amos_page_pa == - remote_vars->amos_page_pa && - xpc_hb_allowed(sn_partition_id, - remote_vars)) { - /* doesn't look that way, so ignore the IPI */ - return; - } - } - - /* - * Other side rebooted and previous XPC didn't support the - * disengage request, so we don't need to do anything special. - */ - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - part->reactivate_nasid = nasid; - XPC_DEACTIVATE_PARTITION(part, xpcReactivating); - return; - } - - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)); - - if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); - - /* - * Other side rebooted and previous XPC did support the - * disengage request, but the new one doesn't. - */ - - xpc_clear_partition_engaged(1UL << partid); - xpc_clear_partition_disengage_request(1UL << partid); - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - reactivate = 1; - - } else { - DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); - - stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp, - &remote_rp_stamp); - if (stamp_diff != 0) { - DBUG_ON(stamp_diff >= 0); - - /* - * Other side rebooted and the previous XPC did support - * the disengage request, as does the new one. - */ - - DBUG_ON(xpc_partition_engaged(1UL << partid)); - DBUG_ON(xpc_partition_disengage_requested(1UL << - partid)); - - xpc_update_partition_info(part, remote_rp_version, - &remote_rp_stamp, remote_rp_pa, - remote_vars_pa, remote_vars); - reactivate = 1; - } - } - - if (part->disengage_request_timeout > 0 && - !xpc_partition_disengaged(part)) { - /* still waiting on other side to disengage from us */ - return; - } - - if (reactivate) { - part->reactivate_nasid = nasid; - XPC_DEACTIVATE_PARTITION(part, xpcReactivating); - - } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) && - xpc_partition_disengage_requested(1UL << partid)) { - XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown); - } -} - - -/* - * Loop through the activation AMO variables and process any bits - * which are set. Each bit indicates a nasid sending a partition - * activation or deactivation request. - * - * Return #of IRQs detected. - */ -int -xpc_identify_act_IRQ_sender(void) -{ - int word, bit; - u64 nasid_mask; - u64 nasid; /* remote nasid */ - int n_IRQs_detected = 0; - AMO_t *act_amos; - - - act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS; - - - /* scan through act AMO variable looking for non-zero entries */ - for (word = 0; word < xp_nasid_mask_words; word++) { - - if (xpc_exiting) { - break; - } - - nasid_mask = xpc_IPI_receive(&act_amos[word]); - if (nasid_mask == 0) { - /* no IRQs from nasids in this variable */ - continue; - } - - dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word, - nasid_mask); - - - /* - * If this nasid has been added to the machine since - * our partition was reset, this will retain the - * remote nasid in our reserved pages machine mask. - * This is used in the event of module reload. - */ - xpc_mach_nasids[word] |= nasid_mask; - - - /* locate the nasid(s) which sent interrupts */ - - for (bit = 0; bit < (8 * sizeof(u64)); bit++) { - if (nasid_mask & (1UL << bit)) { - n_IRQs_detected++; - nasid = XPC_NASID_FROM_W_B(word, bit); - dev_dbg(xpc_part, "interrupt from nasid %ld\n", - nasid); - xpc_identify_act_IRQ_req(nasid); - } - } - } - return n_IRQs_detected; -} - - -/* - * See if the other side has responded to a partition disengage request - * from us. - */ -int -xpc_partition_disengaged(struct xpc_partition *part) -{ - partid_t partid = XPC_PARTID(part); - int disengaged; - - - disengaged = (xpc_partition_engaged(1UL << partid) == 0); - if (part->disengage_request_timeout) { - if (!disengaged) { - if (time_before(jiffies, part->disengage_request_timeout)) { - /* timelimit hasn't been reached yet */ - return 0; - } - - /* - * Other side hasn't responded to our disengage - * request in a timely fashion, so assume it's dead. - */ - - dev_info(xpc_part, "disengage from remote partition %d " - "timed out\n", partid); - xpc_disengage_request_timedout = 1; - xpc_clear_partition_engaged(1UL << partid); - disengaged = 1; - } - part->disengage_request_timeout = 0; - - /* cancel the timer function, provided it's not us */ - if (!in_interrupt()) { - del_singleshot_timer_sync(&part-> - disengage_request_timer); - } - - DBUG_ON(part->act_state != XPC_P_DEACTIVATING && - part->act_state != XPC_P_INACTIVE); - if (part->act_state != XPC_P_INACTIVE) { - xpc_wakeup_channel_mgr(part); - } - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { - xpc_cancel_partition_disengage_request(part); - } - } - return disengaged; -} - - -/* - * Mark specified partition as active. - */ -enum xpc_retval -xpc_mark_partition_active(struct xpc_partition *part) -{ - unsigned long irq_flags; - enum xpc_retval ret; - - - dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); - - spin_lock_irqsave(&part->act_lock, irq_flags); - if (part->act_state == XPC_P_ACTIVATING) { - part->act_state = XPC_P_ACTIVE; - ret = xpcSuccess; - } else { - DBUG_ON(part->reason == xpcSuccess); - ret = part->reason; - } - spin_unlock_irqrestore(&part->act_lock, irq_flags); - - return ret; -} - - -/* - * Notify XPC that the partition is down. - */ -void -xpc_deactivate_partition(const int line, struct xpc_partition *part, - enum xpc_retval reason) -{ - unsigned long irq_flags; - - - spin_lock_irqsave(&part->act_lock, irq_flags); - - if (part->act_state == XPC_P_INACTIVE) { - XPC_SET_REASON(part, reason, line); - spin_unlock_irqrestore(&part->act_lock, irq_flags); - if (reason == xpcReactivating) { - /* we interrupt ourselves to reactivate partition */ - xpc_IPI_send_reactivate(part); - } - return; - } - if (part->act_state == XPC_P_DEACTIVATING) { - if ((part->reason == xpcUnloading && reason != xpcUnloading) || - reason == xpcReactivating) { - XPC_SET_REASON(part, reason, line); - } - spin_unlock_irqrestore(&part->act_lock, irq_flags); - return; - } - - part->act_state = XPC_P_DEACTIVATING; - XPC_SET_REASON(part, reason, line); - - spin_unlock_irqrestore(&part->act_lock, irq_flags); - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { - xpc_request_partition_disengage(part); - xpc_IPI_send_disengage(part); - - /* set a timelimit on the disengage request */ - part->disengage_request_timeout = jiffies + - (xpc_disengage_request_timelimit * HZ); - part->disengage_request_timer.expires = - part->disengage_request_timeout; - add_timer(&part->disengage_request_timer); - } - - dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", - XPC_PARTID(part), reason); - - xpc_partition_going_down(part, reason); -} - - -/* - * Mark specified partition as inactive. - */ -void -xpc_mark_partition_inactive(struct xpc_partition *part) -{ - unsigned long irq_flags; - - - dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", - XPC_PARTID(part)); - - spin_lock_irqsave(&part->act_lock, irq_flags); - part->act_state = XPC_P_INACTIVE; - spin_unlock_irqrestore(&part->act_lock, irq_flags); - part->remote_rp_pa = 0; -} - - -/* - * SAL has provided a partition and machine mask. The partition mask - * contains a bit for each even nasid in our partition. The machine - * mask contains a bit for each even nasid in the entire machine. - * - * Using those two bit arrays, we can determine which nasids are - * known in the machine. Each should also have a reserved page - * initialized if they are available for partitioning. - */ -void -xpc_discovery(void) -{ - void *remote_rp_base; - struct xpc_rsvd_page *remote_rp; - struct xpc_vars *remote_vars; - u64 remote_rp_pa; - u64 remote_vars_pa; - int region; - int region_size; - int max_regions; - int nasid; - struct xpc_rsvd_page *rp; - partid_t partid; - struct xpc_partition *part; - u64 *discovered_nasids; - enum xpc_retval ret; - - - remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + - xp_nasid_mask_bytes, - GFP_KERNEL, &remote_rp_base); - if (remote_rp == NULL) { - return; - } - remote_vars = (struct xpc_vars *) remote_rp; - - - discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words, - GFP_KERNEL); - if (discovered_nasids == NULL) { - kfree(remote_rp_base); - return; - } - - rp = (struct xpc_rsvd_page *) xpc_rsvd_page; - - /* - * The term 'region' in this context refers to the minimum number of - * nodes that can comprise an access protection grouping. The access - * protection is in regards to memory, IOI and IPI. - */ - max_regions = 64; - region_size = sn_region_size; - - switch (region_size) { - case 128: - max_regions *= 2; - case 64: - max_regions *= 2; - case 32: - max_regions *= 2; - region_size = 16; - DBUG_ON(!is_shub2()); - } - - for (region = 0; region < max_regions; region++) { - - if ((volatile int) xpc_exiting) { - break; - } - - dev_dbg(xpc_part, "searching region %d\n", region); - - for (nasid = (region * region_size * 2); - nasid < ((region + 1) * region_size * 2); - nasid += 2) { - - if ((volatile int) xpc_exiting) { - break; - } - - dev_dbg(xpc_part, "checking nasid %d\n", nasid); - - - if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) { - dev_dbg(xpc_part, "PROM indicates Nasid %d is " - "part of the local partition; skipping " - "region\n", nasid); - break; - } - - if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) { - dev_dbg(xpc_part, "PROM indicates Nasid %d was " - "not on Numa-Link network at reset\n", - nasid); - continue; - } - - if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { - dev_dbg(xpc_part, "Nasid %d is part of a " - "partition which was previously " - "discovered\n", nasid); - continue; - } - - - /* pull over the reserved page structure */ - - ret = xpc_get_remote_rp(nasid, discovered_nasids, - remote_rp, &remote_rp_pa); - if (ret != xpcSuccess) { - dev_dbg(xpc_part, "unable to get reserved page " - "from nasid %d, reason=%d\n", nasid, - ret); - - if (ret == xpcLocalPartid) { - break; - } - continue; - } - - remote_vars_pa = remote_rp->vars_pa; - - partid = remote_rp->partid; - part = &xpc_partitions[partid]; - - - /* pull over the cross partition variables */ - - ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); - if (ret != xpcSuccess) { - dev_dbg(xpc_part, "unable to get XPC variables " - "from nasid %d, reason=%d\n", nasid, - ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - continue; - } - - if (part->act_state != XPC_P_INACTIVE) { - dev_dbg(xpc_part, "partition %d on nasid %d is " - "already activating\n", partid, nasid); - break; - } - - /* - * Register the remote partition's AMOs with SAL so it - * can handle and cleanup errors within that address - * range should the remote partition go down. We don't - * unregister this range because it is difficult to - * tell when outstanding writes to the remote partition - * are finished and thus when it is thus safe to - * unregister. This should not result in wasted space - * in the SAL xp_addr_region table because we should - * get the same page for remote_act_amos_pa after - * module reloads and system reboots. - */ - if (sn_register_xp_addr_region( - remote_vars->amos_page_pa, - PAGE_SIZE, 1) < 0) { - dev_dbg(xpc_part, "partition %d failed to " - "register xp_addr region 0x%016lx\n", - partid, remote_vars->amos_page_pa); - - XPC_SET_REASON(part, xpcPhysAddrRegFailed, - __LINE__); - break; - } - - /* - * The remote nasid is valid and available. - * Send an interrupt to that nasid to notify - * it that we are ready to begin activation. - */ - dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, " - "nasid %d, phys_cpuid 0x%x\n", - remote_vars->amos_page_pa, - remote_vars->act_nasid, - remote_vars->act_phys_cpuid); - - if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> - version)) { - part->remote_amos_page_pa = - remote_vars->amos_page_pa; - xpc_mark_partition_disengaged(part); - xpc_cancel_partition_disengage_request(part); - } - xpc_IPI_send_activate(remote_vars); - } - } - - kfree(discovered_nasids); - kfree(remote_rp_base); -} - - -/* - * Given a partid, get the nasids owned by that partition from the - * remote partition's reserved page. - */ -enum xpc_retval -xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask) -{ - struct xpc_partition *part; - u64 part_nasid_pa; - int bte_res; - - - part = &xpc_partitions[partid]; - if (part->remote_rp_pa == 0) { - return xpcPartitionDown; - } - - memset(nasid_mask, 0, XP_NASID_MASK_BYTES); - - part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa); - - bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask, - xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL); - - return xpc_map_bte_errors(bte_res); -} - diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c deleted file mode 100644 index a5df672d839..00000000000 --- a/arch/ia64/sn/kernel/xpnet.c +++ /dev/null @@ -1,718 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. - */ - - -/* - * Cross Partition Network Interface (XPNET) support - * - * XPNET provides a virtual network layered on top of the Cross - * Partition communication layer. - * - * XPNET provides direct point-to-point and broadcast-like support - * for an ethernet-like device. The ethernet broadcast medium is - * replaced with a point-to-point message structure which passes - * pointers to a DMA-capable block that a remote partition should - * retrieve and pass to the upper level networking layer. - * - */ - - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/delay.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/smp.h> -#include <linux/string.h> -#include <asm/sn/bte.h> -#include <asm/sn/io.h> -#include <asm/sn/sn_sal.h> -#include <asm/types.h> -#include <asm/atomic.h> -#include <asm/sn/xp.h> - - -/* - * The message payload transferred by XPC. - * - * buf_pa is the physical address where the DMA should pull from. - * - * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a - * cacheline boundary. To accomplish this, we record the number of - * bytes from the beginning of the first cacheline to the first useful - * byte of the skb (leadin_ignore) and the number of bytes from the - * last useful byte of the skb to the end of the last cacheline - * (tailout_ignore). - * - * size is the number of bytes to transfer which includes the skb->len - * (useful bytes of the senders skb) plus the leadin and tailout - */ -struct xpnet_message { - u16 version; /* Version for this message */ - u16 embedded_bytes; /* #of bytes embedded in XPC message */ - u32 magic; /* Special number indicating this is xpnet */ - u64 buf_pa; /* phys address of buffer to retrieve */ - u32 size; /* #of bytes in buffer */ - u8 leadin_ignore; /* #of bytes to ignore at the beginning */ - u8 tailout_ignore; /* #of bytes to ignore at the end */ - unsigned char data; /* body of small packets */ -}; - -/* - * Determine the size of our message, the cacheline aligned size, - * and then the number of message will request from XPC. - * - * XPC expects each message to exist in an individual cacheline. - */ -#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET) -#define XPNET_MSG_DATA_MAX \ - (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data)) -#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE)) -#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE) - - -#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) -#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) - -/* - * Version number of XPNET implementation. XPNET can always talk to versions - * with same major #, and never talk to versions with a different version. - */ -#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor)) -#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4) -#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf) - -#define XPNET_VERSION _XPNET_VERSION(1,0) /* version 1.0 */ -#define XPNET_VERSION_EMBED _XPNET_VERSION(1,1) /* version 1.1 */ -#define XPNET_MAGIC 0x88786984 /* "XNET" */ - -#define XPNET_VALID_MSG(_m) \ - ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \ - && (msg->magic == XPNET_MAGIC)) - -#define XPNET_DEVICE_NAME "xp0" - - -/* - * When messages are queued with xpc_send_notify, a kmalloc'd buffer - * of the following type is passed as a notification cookie. When the - * notification function is called, we use the cookie to decide - * whether all outstanding message sends have completed. The skb can - * then be released. - */ -struct xpnet_pending_msg { - struct list_head free_list; - struct sk_buff *skb; - atomic_t use_count; -}; - -/* driver specific structure pointed to by the device structure */ -struct xpnet_dev_private { - struct net_device_stats stats; -}; - -struct net_device *xpnet_device; - -/* - * When we are notified of other partitions activating, we add them to - * our bitmask of partitions to which we broadcast. - */ -static u64 xpnet_broadcast_partitions; -/* protect above */ -static DEFINE_SPINLOCK(xpnet_broadcast_lock); - -/* - * Since the Block Transfer Engine (BTE) is being used for the transfer - * and it relies upon cache-line size transfers, we need to reserve at - * least one cache-line for head and tail alignment. The BTE is - * limited to 8MB transfers. - * - * Testing has shown that changing MTU to greater than 64KB has no effect - * on TCP as the two sides negotiate a Max Segment Size that is limited - * to 64K. Other protocols May use packets greater than this, but for - * now, the default is 64KB. - */ -#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) -/* 32KB has been determined to be the ideal */ -#define XPNET_DEF_MTU (0x8000UL) - - -/* - * The partition id is encapsulated in the MAC address. The following - * define locates the octet the partid is in. - */ -#define XPNET_PARTID_OCTET 1 -#define XPNET_LICENSE_OCTET 2 - - -/* - * Define the XPNET debug device structure that is to be used with dev_dbg(), - * dev_err(), dev_warn(), and dev_info(). - */ -struct device_driver xpnet_dbg_name = { - .name = "xpnet" -}; - -struct device xpnet_dbg_subname = { - .bus_id = {0}, /* set to "" */ - .driver = &xpnet_dbg_name -}; - -struct device *xpnet = &xpnet_dbg_subname; - -/* - * Packet was recevied by XPC and forwarded to us. - */ -static void -xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg) -{ - struct sk_buff *skb; - bte_result_t bret; - struct xpnet_dev_private *priv = - (struct xpnet_dev_private *) xpnet_device->priv; - - - if (!XPNET_VALID_MSG(msg)) { - /* - * Packet with a different XPC version. Ignore. - */ - xpc_received(partid, channel, (void *) msg); - - priv->stats.rx_errors++; - - return; - } - dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size, - msg->leadin_ignore, msg->tailout_ignore); - - - /* reserve an extra cache line */ - skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES); - if (!skb) { - dev_err(xpnet, "failed on dev_alloc_skb(%d)\n", - msg->size + L1_CACHE_BYTES); - - xpc_received(partid, channel, (void *) msg); - - priv->stats.rx_errors++; - - return; - } - - /* - * The allocated skb has some reserved space. - * In order to use bte_copy, we need to get the - * skb->data pointer moved forward. - */ - skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & - (L1_CACHE_BYTES - 1)) + - msg->leadin_ignore)); - - /* - * Update the tail pointer to indicate data actually - * transferred. - */ - skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore)); - - /* - * Move the data over from the other side. - */ - if ((XPNET_VERSION_MINOR(msg->version) == 1) && - (msg->embedded_bytes != 0)) { - dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, " - "%lu)\n", skb->data, &msg->data, - (size_t) msg->embedded_bytes); - - skb_copy_to_linear_data(skb, &msg->data, (size_t)msg->embedded_bytes); - } else { - dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" - "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa, - (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), - msg->size); - - bret = bte_copy(msg->buf_pa, - __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), - msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL); - - if (bret != BTE_SUCCESS) { - // >>> Need better way of cleaning skb. Currently skb - // >>> appears in_use and we can't just call - // >>> dev_kfree_skb. - dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned " - "error=0x%x\n", (void *)msg->buf_pa, - (void *)__pa((u64)skb->data & - ~(L1_CACHE_BYTES - 1)), - msg->size, bret); - - xpc_received(partid, channel, (void *) msg); - - priv->stats.rx_errors++; - - return; - } - } - - dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p " - "skb->end=0x%p skb->len=%d\n", (void *) skb->head, - (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), - skb->len); - - skb->protocol = eth_type_trans(skb, xpnet_device); - skb->ip_summed = CHECKSUM_UNNECESSARY; - - dev_dbg(xpnet, "passing skb to network layer\n" - KERN_DEBUG "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p " - "skb->end=0x%p skb->len=%d\n", - (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb), - skb_end_pointer(skb), skb->len); - - - xpnet_device->last_rx = jiffies; - priv->stats.rx_packets++; - priv->stats.rx_bytes += skb->len + ETH_HLEN; - - netif_rx_ni(skb); - xpc_received(partid, channel, (void *) msg); -} - - -/* - * This is the handler which XPC calls during any sort of change in - * state or message reception on a connection. - */ -static void -xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel, - void *data, void *key) -{ - long bp; - - - DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); - DBUG_ON(channel != XPC_NET_CHANNEL); - - switch(reason) { - case xpcMsgReceived: /* message received */ - DBUG_ON(data == NULL); - - xpnet_receive(partid, channel, (struct xpnet_message *) data); - break; - - case xpcConnected: /* connection completed to a partition */ - spin_lock_bh(&xpnet_broadcast_lock); - xpnet_broadcast_partitions |= 1UL << (partid -1 ); - bp = xpnet_broadcast_partitions; - spin_unlock_bh(&xpnet_broadcast_lock); - - netif_carrier_on(xpnet_device); - - dev_dbg(xpnet, "%s connection created to partition %d; " - "xpnet_broadcast_partitions=0x%lx\n", - xpnet_device->name, partid, bp); - break; - - default: - spin_lock_bh(&xpnet_broadcast_lock); - xpnet_broadcast_partitions &= ~(1UL << (partid -1 )); - bp = xpnet_broadcast_partitions; - spin_unlock_bh(&xpnet_broadcast_lock); - - if (bp == 0) { - netif_carrier_off(xpnet_device); - } - - dev_dbg(xpnet, "%s disconnected from partition %d; " - "xpnet_broadcast_partitions=0x%lx\n", - xpnet_device->name, partid, bp); - break; - - } -} - - -static int -xpnet_dev_open(struct net_device *dev) -{ - enum xpc_retval ret; - - - dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, " - "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity, - XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS, - XPNET_MAX_IDLE_KTHREADS); - - ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, - XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, - XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); - if (ret != xpcSuccess) { - dev_err(xpnet, "ifconfig up of %s failed on XPC connect, " - "ret=%d\n", dev->name, ret); - - return -ENOMEM; - } - - dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name); - - return 0; -} - - -static int -xpnet_dev_stop(struct net_device *dev) -{ - xpc_disconnect(XPC_NET_CHANNEL); - - dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name); - - return 0; -} - - -static int -xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) -{ - /* 68 comes from min TCP+IP+MAC header */ - if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { - dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " - "between 68 and %ld\n", dev->name, new_mtu, - XPNET_MAX_MTU); - return -EINVAL; - } - - dev->mtu = new_mtu; - dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); - return 0; -} - - -/* - * Required for the net_device structure. - */ -static int -xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map) -{ - return 0; -} - - -/* - * Return statistics to the caller. - */ -static struct net_device_stats * -xpnet_dev_get_stats(struct net_device *dev) -{ - struct xpnet_dev_private *priv; - - - priv = (struct xpnet_dev_private *) dev->priv; - - return &priv->stats; -} - - -/* - * Notification that the other end has received the message and - * DMA'd the skb information. At this point, they are done with - * our side. When all recipients are done processing, we - * release the skb and then release our pending message structure. - */ -static void -xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel, - void *__qm) -{ - struct xpnet_pending_msg *queued_msg = - (struct xpnet_pending_msg *) __qm; - - - DBUG_ON(queued_msg == NULL); - - dev_dbg(xpnet, "message to %d notified with reason %d\n", - partid, reason); - - if (atomic_dec_return(&queued_msg->use_count) == 0) { - dev_dbg(xpnet, "all acks for skb->head=-x%p\n", - (void *) queued_msg->skb->head); - - dev_kfree_skb_any(queued_msg->skb); - kfree(queued_msg); - } -} - - -/* - * Network layer has formatted a packet (skb) and is ready to place it - * "on the wire". Prepare and send an xpnet_message to all partitions - * which have connected with us and are targets of this packet. - * - * MAC-NOTE: For the XPNET driver, the MAC address contains the - * destination partition_id. If the destination partition id word - * is 0xff, this packet is to broadcast to all partitions. - */ -static int -xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct xpnet_pending_msg *queued_msg; - enum xpc_retval ret; - struct xpnet_message *msg; - u64 start_addr, end_addr; - long dp; - u8 second_mac_octet; - partid_t dest_partid; - struct xpnet_dev_private *priv; - u16 embedded_bytes; - - - priv = (struct xpnet_dev_private *) dev->priv; - - - dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " - "skb->end=0x%p skb->len=%d\n", (void *) skb->head, - (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), - skb->len); - - - /* - * The xpnet_pending_msg tracks how many outstanding - * xpc_send_notifies are relying on this skb. When none - * remain, release the skb. - */ - queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC); - if (queued_msg == NULL) { - dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping " - "packet\n", sizeof(struct xpnet_pending_msg)); - - priv->stats.tx_errors++; - - return -ENOMEM; - } - - - /* get the beginning of the first cacheline and end of last */ - start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1)); - end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb)); - - /* calculate how many bytes to embed in the XPC message */ - embedded_bytes = 0; - if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { - /* skb->data does fit so embed */ - embedded_bytes = skb->len; - } - - - /* - * Since the send occurs asynchronously, we set the count to one - * and begin sending. Any sends that happen to complete before - * we are done sending will not free the skb. We will be left - * with that task during exit. This also handles the case of - * a packet destined for a partition which is no longer up. - */ - atomic_set(&queued_msg->use_count, 1); - queued_msg->skb = skb; - - - second_mac_octet = skb->data[XPNET_PARTID_OCTET]; - if (second_mac_octet == 0xff) { - /* we are being asked to broadcast to all partitions */ - dp = xpnet_broadcast_partitions; - } else if (second_mac_octet != 0) { - dp = xpnet_broadcast_partitions & - (1UL << (second_mac_octet - 1)); - } else { - /* 0 is an invalid partid. Ignore */ - dp = 0; - } - dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp); - - /* - * If we wanted to allow promiscuous mode to work like an - * unswitched network, this would be a good point to OR in a - * mask of partitions which should be receiving all packets. - */ - - /* - * Main send loop. - */ - for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS; - dest_partid++) { - - - if (!(dp & (1UL << (dest_partid - 1)))) { - /* not destined for this partition */ - continue; - } - - /* remove this partition from the destinations mask */ - dp &= ~(1UL << (dest_partid - 1)); - - - /* found a partition to send to */ - - ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL, - XPC_NOWAIT, (void **)&msg); - if (unlikely(ret != xpcSuccess)) { - continue; - } - - msg->embedded_bytes = embedded_bytes; - if (unlikely(embedded_bytes != 0)) { - msg->version = XPNET_VERSION_EMBED; - dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", - &msg->data, skb->data, (size_t) embedded_bytes); - skb_copy_from_linear_data(skb, &msg->data, - (size_t)embedded_bytes); - } else { - msg->version = XPNET_VERSION; - } - msg->magic = XPNET_MAGIC; - msg->size = end_addr - start_addr; - msg->leadin_ignore = (u64) skb->data - start_addr; - msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb); - msg->buf_pa = __pa(start_addr); - - dev_dbg(xpnet, "sending XPC message to %d:%d\n" - KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, " - "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n", - dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size, - msg->leadin_ignore, msg->tailout_ignore); - - - atomic_inc(&queued_msg->use_count); - - ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg, - xpnet_send_completed, queued_msg); - if (unlikely(ret != xpcSuccess)) { - atomic_dec(&queued_msg->use_count); - continue; - } - - } - - if (atomic_dec_return(&queued_msg->use_count) == 0) { - dev_dbg(xpnet, "no partitions to receive packet destined for " - "%d\n", dest_partid); - - - dev_kfree_skb(skb); - kfree(queued_msg); - } - - priv->stats.tx_packets++; - priv->stats.tx_bytes += skb->len; - - return 0; -} - - -/* - * Deal with transmit timeouts coming from the network layer. - */ -static void -xpnet_dev_tx_timeout (struct net_device *dev) -{ - struct xpnet_dev_private *priv; - - - priv = (struct xpnet_dev_private *) dev->priv; - - priv->stats.tx_errors++; - return; -} - - -static int __init -xpnet_init(void) -{ - int i; - u32 license_num; - int result = -ENOMEM; - - - if (!ia64_platform_is("sn2")) { - return -ENODEV; - } - - dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); - - /* - * use ether_setup() to init the majority of our device - * structure and then override the necessary pieces. - */ - xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private), - XPNET_DEVICE_NAME, ether_setup); - if (xpnet_device == NULL) { - return -ENOMEM; - } - - netif_carrier_off(xpnet_device); - - xpnet_device->mtu = XPNET_DEF_MTU; - xpnet_device->change_mtu = xpnet_dev_change_mtu; - xpnet_device->open = xpnet_dev_open; - xpnet_device->get_stats = xpnet_dev_get_stats; - xpnet_device->stop = xpnet_dev_stop; - xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit; - xpnet_device->tx_timeout = xpnet_dev_tx_timeout; - xpnet_device->set_config = xpnet_dev_set_config; - - /* - * Multicast assumes the LSB of the first octet is set for multicast - * MAC addresses. We chose the first octet of the MAC to be unlikely - * to collide with any vendor's officially issued MAC. - */ - xpnet_device->dev_addr[0] = 0xfe; - xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id; - license_num = sn_partition_serial_number_val(); - for (i = 3; i >= 0; i--) { - xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] = - license_num & 0xff; - license_num = license_num >> 8; - } - - /* - * ether_setup() sets this to a multicast device. We are - * really not supporting multicast at this time. - */ - xpnet_device->flags &= ~IFF_MULTICAST; - - /* - * No need to checksum as it is a DMA transfer. The BTE will - * report an error if the data is not retrievable and the - * packet will be dropped. - */ - xpnet_device->features = NETIF_F_NO_CSUM; - - result = register_netdev(xpnet_device); - if (result != 0) { - free_netdev(xpnet_device); - } - - return result; -} -module_init(xpnet_init); - - -static void __exit -xpnet_exit(void) -{ - dev_info(xpnet, "unregistering network device %s\n", - xpnet_device[0].name); - - unregister_netdev(xpnet_device); - - free_netdev(xpnet_device); -} -module_exit(xpnet_exit); - - -MODULE_AUTHOR("Silicon Graphics, Inc."); -MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)"); -MODULE_LICENSE("GPL"); - diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 9b3c1137302..94e584527f4 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -655,7 +655,8 @@ tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) * * Simply call tioce_do_dma_map() to create a map with the barrier bit set * in the address. - */ static u64 + */ +static u64 tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) { return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags); @@ -668,7 +669,8 @@ tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma * * Handle a CE error interrupt. Simply a wrapper around a SAL call which * defers processing to the SGI prom. - */ static irqreturn_t + */ +static irqreturn_t tioce_error_intr_handler(int irq, void *arg) { struct tioce_common *soft = arg; |