From 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:31 -0800 Subject: sparse irq_desc[] array: core kernel and x86 changes Impact: new feature Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with NR_CPUS set to large values. The goal is to be able to scale up to much larger NR_IRQS value without impacting the (important) common case. To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of irq_desc pointers. When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc, this also makes the IRQ descriptors NUMA-local (to the site that calls request_irq()). This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now uses desc->chip_data for x86 to store irq_cfg. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f94..bb6b69a6b12 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,11 +102,20 @@ #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) + +#ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif +#else +# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) +# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) +# else +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# endif +#endif #elif defined(CONFIG_X86_VOYAGER) -- cgit v1.2.3-70-g09d2 From 99d093d12897562a253540a902bbf65ec16042ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:32 -0800 Subject: x86: use NR_IRQS_LEGACY Impact: cleanup Introduce NR_IRQS_LEGACY instead of hard coded number. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 2 ++ arch/x86/kernel/io_apic.c | 10 +++++----- arch/x86/kernel/irqinit_32.c | 2 +- arch/x86/kernel/irqinit_64.c | 2 +- kernel/irq/handle.c | 6 +++--- 5 files changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index bb6b69a6b12..f7ff65032b9 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,6 +101,8 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) +#define NR_IRQS_LEGACY 16 + #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) #ifndef CONFIG_SPARSE_IRQ diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9de17f5c112..9870461ae93 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -847,7 +847,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); */ static int EISA_ELCR(unsigned int irq) { - if (irq < 16) { + if (irq < NR_IRQS_LEGACY) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@ -1388,7 +1388,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, } ioapic_register_intr(irq, trigger); - if (irq < 16) + if (irq < NR_IRQS_LEGACY) disable_8259A_irq(irq); ioapic_write_entry(apic, pin, entry); @@ -2080,7 +2080,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) struct irq_cfg *cfg; spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { + if (irq < NR_IRQS_LEGACY) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) was_pending = 1; @@ -2504,7 +2504,7 @@ static inline void init_IO_APIC_traps(void) * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < 16) + if (irq < NR_IRQS_LEGACY) make_8259A_irq(irq); else /* Strange. Oh, well.. */ @@ -3794,7 +3794,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= 16) { + if (irq >= NR_IRQS_LEGACY) { cfg = desc->chip_data; add_pin_to_irq_cpu(irq, cpu, ioapic, pin); } diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 5a5651b7f9e..6a92f47c52e 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -68,7 +68,7 @@ void __init init_ISA_irqs (void) /* * 16 old-style INTA-cycle interrupts: */ - for (i = 0; i < 16; i++) { + for (i = 0; i < NR_IRQS_LEGACY; i++) { struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index cd9f42d028d..40c1e62ec78 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -142,7 +142,7 @@ void __init init_ISA_irqs(void) init_bsp_APIC(); init_8259A(0); - for (i = 0; i < 16; i++) { + for (i = 0; i < NR_IRQS_LEGACY; i++) { struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 96ca203eb51..8aa09547f5e 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -117,8 +117,8 @@ static DEFINE_SPINLOCK(sparse_irq_lock); struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; -static struct irq_desc irq_desc_legacy[16] __cacheline_aligned_in_smp = { - [0 ... 15] = { +static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { + [0 ... NR_IRQS_LEGACY-1] = { .irq = -1, .status = IRQ_DISABLED, .chip = &no_irq_chip, @@ -132,7 +132,7 @@ static struct irq_desc irq_desc_legacy[16] __cacheline_aligned_in_smp = { }; /* FIXME: use bootmem alloc ...*/ -static unsigned int kstat_irqs_legacy[16][NR_CPUS]; +static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS]; void __init early_irq_init(void) { -- cgit v1.2.3-70-g09d2 From be5d5350a937cd8513b258739f1099420129e96f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:33 -0800 Subject: x86: MSI start irq numbering from nr_irqs_gsi Impact: sanitize MSI irq number ordering from top-down to bottom-up Increase new MSI IRQs starting from nr_irqs_gsi (which is somewhere below 256), instead of decreasing from NR_IRQS. (The latter method can result in confusingly high IRQ numbers - if NR_CPUS is set to a high value and NR_IRQS scales up to a high value.) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 2 +- arch/x86/kernel/io_apic.c | 24 +++++++++++++++++------- arch/x86/kernel/setup.c | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 6afd9933a7d..b35e94c2d6d 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -188,7 +188,7 @@ extern void restore_IO_APIC_setup(void); extern void reinit_intr_remapped_IO_APIC(int); #endif -extern int probe_nr_irqs(void); +extern void probe_nr_irqs_gsi(void); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9870461ae93..0dcde74abd1 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2973,7 +2973,7 @@ unsigned int create_irq_nr(unsigned int irq_want) irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new > 0; new--) { + for (new = irq_want; new < NR_IRQS; new++) { if (platform_legacy_irq(new)) continue; @@ -3001,11 +3001,14 @@ unsigned int create_irq_nr(unsigned int irq_want) return irq; } +static int nr_irqs_gsi = NR_IRQS_LEGACY; int create_irq(void) { + unsigned int irq_want; int irq; - irq = create_irq_nr(nr_irqs - 1); + irq_want = nr_irqs_gsi; + irq = create_irq_nr(irq_want); if (irq == 0) irq = -1; @@ -3281,7 +3284,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) int ret; unsigned int irq_want; - irq_want = nr_irqs - 1; + irq_want = nr_irqs_gsi; irq = create_irq_nr(irq_want); if (irq == 0) return -1; @@ -3321,11 +3324,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int index = 0; #endif - irq_want = nr_irqs - 1; + irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want); - irq_want--; + irq_want++; if (irq == 0) return -1; #ifdef CONFIG_INTR_REMAP @@ -3674,9 +3677,16 @@ int __init io_apic_get_redir_entries (int ioapic) return reg_01.bits.entries; } -int __init probe_nr_irqs(void) +void __init probe_nr_irqs_gsi(void) { - return NR_IRQS; + int idx; + int nr = 0; + + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; } /* -------------------------------------------------------------------------- diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9d5674f7b6c..a3834f12320 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1082,7 +1082,7 @@ void __init setup_arch(char **cmdline_p) ioapic_init_mappings(); /* need to wait for io_apic is mapped */ - nr_irqs = probe_nr_irqs(); + probe_nr_irqs_gsi(); kvm_guest_init(); -- cgit v1.2.3-70-g09d2 From 50dd94e017ec39f85c26b6c10ed9fb2d7a7d8042 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Dec 2008 18:47:51 +0100 Subject: sparseirq: fix typo in !CONFIG_IO_APIC case Impact: build fix Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index b35e94c2d6d..25d527ca136 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -193,12 +193,9 @@ extern void probe_nr_irqs_gsi(void); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; -static inline void ioapic_init_mappings(void) { } +static inline void ioapic_init_mappings(void) { } -static inline int probe_nr_irqs(void) -{ - return NR_IRQS; -} +static inline void probe_nr_irqs_gsi(void) { } #endif #endif /* _ASM_X86_IO_APIC_H */ -- cgit v1.2.3-70-g09d2 From a79b7a2a758c39315344f0d86b5adb21d90d786e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 16 Dec 2008 12:17:25 -0800 Subject: x86: remove unused iommu_nr_pages Impact: cleanup, remove dead code The last usage was removed by the patch set culminating in | commit e3c449f526cebb8d287241c7e82faafd9709668b | Author: Joerg Roedel | Date: Wed Oct 15 22:02:11 2008 -0700 | | x86, AMD IOMMU: convert driver to generic iommu_num_pages function Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iommu.h | 2 -- arch/x86/kernel/pci-dma.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 0b500c5b644..35276ec5925 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -7,8 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; -extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); - /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 19262482021..e150ad4f0cc 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -125,13 +125,6 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } -unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) -{ - unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); - - return size >> PAGE_SHIFT; -} -EXPORT_SYMBOL(iommu_nr_pages); #endif void *dma_generic_alloc_coherent(struct device *dev, size_t size, -- cgit v1.2.3-70-g09d2 From e7986739a76cde5079da08809d8bbc6878387ae0 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 16 Dec 2008 17:33:52 -0800 Subject: x86 smp: modify send_IPI_mask interface to accept cpumask_t pointers Impact: cleanup, change parameter passing * Change genapic interfaces to accept cpumask_t pointers where possible. * Modify external callers to use cpumask_t pointers in function calls. * Create new send_IPI_mask_allbutself which is the same as the send_IPI_mask functions but removes smp_processor_id() from list. This removes another common need for a temporary cpumask_t variable. * Functions that used a temp cpumask_t variable for: cpumask_t allbutme = cpu_online_map; cpu_clear(smp_processor_id(), allbutme); if (!cpus_empty(allbutme)) ... become: if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) ... * Other minor code optimizations (like using cpus_clear instead of CPU_MASK_NONE, etc.) Applies to linux-2.6.tip/master. Signed-off-by: Mike Travis Signed-off-by: Rusty Russell Acked-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 14 +-- arch/x86/include/asm/bigsmp/ipi.h | 9 +- arch/x86/include/asm/es7000/apic.h | 38 +++---- arch/x86/include/asm/es7000/ipi.h | 9 +- arch/x86/include/asm/genapic_32.h | 9 +- arch/x86/include/asm/genapic_64.h | 11 +- arch/x86/include/asm/ipi.h | 21 +++- arch/x86/include/asm/mach-default/mach_apic.h | 17 ++- arch/x86/include/asm/mach-default/mach_ipi.h | 18 ++-- arch/x86/include/asm/numaq/apic.h | 6 +- arch/x86/include/asm/numaq/ipi.h | 9 +- arch/x86/include/asm/smp.h | 6 +- arch/x86/include/asm/summit/apic.h | 12 +-- arch/x86/include/asm/summit/ipi.h | 9 +- arch/x86/kernel/apic.c | 6 +- arch/x86/kernel/crash.c | 5 +- arch/x86/kernel/genapic_flat_64.c | 76 +++++++++----- arch/x86/kernel/genx2apic_cluster.c | 60 +++++++---- arch/x86/kernel/genx2apic_phys.c | 55 +++++++--- arch/x86/kernel/genx2apic_uv_x.c | 43 +++++--- arch/x86/kernel/io_apic.c | 145 +++++++++++++------------- arch/x86/kernel/ipi.c | 26 +++-- arch/x86/kernel/smp.c | 8 +- arch/x86/kernel/tlb_32.c | 2 +- arch/x86/kernel/tlb_64.c | 2 +- arch/x86/mach-generic/bigsmp.c | 5 +- arch/x86/mach-generic/es7000.c | 5 +- arch/x86/mach-generic/numaq.c | 5 +- arch/x86/mach-generic/summit.c | 5 +- arch/x86/xen/smp.c | 17 ++- 30 files changed, 380 insertions(+), 273 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index ce547f24a1c..dc6225ca48a 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -9,12 +9,12 @@ static inline int apic_id_registered(void) return (1); } -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_map; + return &cpu_online_map; #else - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); #endif } @@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < NR_CPUS) + if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); return BAD_APICID; @@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ static inline int cpu_to_logical_apicid(int cpu) { - if (cpu >= NR_CPUS) + if (cpu >= nr_cpu_ids) return BAD_APICID; return cpu_physical_id(cpu); } @@ -119,12 +119,12 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpu; int apicid; - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); return apicid; } diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h index 9404c535b7e..63553e9f22b 100644 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ b/arch/x86/include/asm/bigsmp/ipi.h @@ -1,9 +1,10 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const cpumask_t *mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector) cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask(&mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(&cpu_online_map, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index e24ef876915..4cac0837bb4 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -9,14 +9,14 @@ static inline int apic_id_registered(void) return (1); } -static inline cpumask_t target_cpus_cluster(void) +static inline const cpumask_t *target_cpus_cluster(void) { - return CPU_MASK_ALL; + return &CPU_MASK_ALL; } -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { - return cpumask_of_cpu(smp_processor_id()); + return &cpumask_of_cpu(smp_processor_id()); } #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) @@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); + "Physical Cluster" : "Logical Cluster", + nr_ioapics, cpus_addr(*target_cpus())[0]); } static inline int multi_timer_check(int apic, int irq) @@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) return boot_cpu_physical_apicid; - else if (mps_cpu < NR_CPUS) + else if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; @@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[]; static inline int cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); #endif @@ -146,14 +147,15 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) return (1); } -static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) +static inline unsigned int +cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpumask_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return 0xFF; @@ -161,10 +163,10 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = cpumask_first(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -179,14 +181,14 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpus_weight(*cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return cpu_to_logical_apicid(0); @@ -194,10 +196,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpu_isset(cpu, *cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h index 632a955fcc0..1a8507265f9 100644 --- a/arch/x86/include/asm/es7000/ipi.h +++ b/arch/x86/include/asm/es7000/ipi.h @@ -1,9 +1,10 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const cpumask_t *mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -13,12 +14,12 @@ static inline void send_IPI_allbutself(int vector) cpumask_t mask = cpu_online_map; cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask(&mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(&cpu_online_map, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 0ac17d33a8c..b21ed21c574 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -24,7 +24,7 @@ struct genapic { int (*probe)(void); int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); + const cpumask_t *(*target_cpus)(void); int int_delivery_mode; int int_dest_mode; int ESR_DISABLE; @@ -57,12 +57,13 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); - cpumask_t (*vector_allocation_domain)(int cpu); + unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask); + void (*vector_allocation_domain)(int cpu, cpumask_t *retmask); #ifdef CONFIG_SMP /* ipi */ - void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_mask)(const cpumask_t *mask, int vector); + void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); #endif diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 2cae011668b..a020e7d35a4 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_GENAPIC_64_H #define _ASM_X86_GENAPIC_64_H +#include + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -18,16 +20,17 @@ struct genapic { u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); - cpumask_t (*vector_allocation_domain)(int cpu); + const cpumask_t *(*target_cpus)(void); + void (*vector_allocation_domain)(int cpu, cpumask_t *retmask); void (*init_apic_ldr)(void); /* ipi */ - void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_mask)(const cpumask_t *mask, int vector); + void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); /* */ - unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); + unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask); unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index f89dffb28aa..24b6e613edf 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -117,7 +117,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector) { unsigned long flags; unsigned long query_cpu; @@ -128,11 +128,28 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) * - mbligh */ local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, mask) { + for_each_cpu_mask_nr(query_cpu, *mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } +static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu_mask_nr(query_cpu, *mask) + if (query_cpu != this_cpu) + __send_IPI_dest_field( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + local_irq_restore(flags); +} + #endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 6cb3a467e06..c18896b0508 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,12 +8,12 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_map; + return &cpu_online_map; #else - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); #endif } @@ -61,9 +61,9 @@ static inline int apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { - return cpus_addr(cpumask)[0]; + return cpus_addr(*cpumask)[0]; } static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) @@ -88,7 +88,7 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline cpumask_t vector_allocation_domain(int cpu) +static inline void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -98,8 +98,7 @@ static inline cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; } #endif @@ -131,7 +130,7 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index fabca01ebac..9353ab854a1 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,7 +4,8 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(cpumask_t mask, int vector); +void send_IPI_mask_bitmask(const cpumask_t *mask, int vector); +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); void __send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; @@ -12,28 +13,27 @@ extern int no_broadcast; #ifdef CONFIG_X86_64 #include #define send_IPI_mask (genapic->send_IPI_mask) +#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const cpumask_t *mask, int vector) { send_IPI_mask_bitmask(mask, vector); } +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); #endif static inline void __local_send_IPI_allbutself(int vector) { - if (no_broadcast || vector == NMI_VECTOR) { - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); - send_IPI_mask(mask, vector); - } else + if (no_broadcast || vector == NMI_VECTOR) + send_IPI_mask_allbutself(&cpu_online_map, vector); + else __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } static inline void __local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(&cpu_online_map, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector); } diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 0bf2a06b7a4..1df7ebe738e 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -7,9 +7,9 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { - return CPU_MASK_ALL; + return &CPU_MASK_ALL; } #define NO_BALANCE_IRQ (1) @@ -122,7 +122,7 @@ static inline void enable_apic_mode(void) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { return (int) 0xF; } diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h index 935588d286c..c734d7acc43 100644 --- a/arch/x86/include/asm/numaq/ipi.h +++ b/arch/x86/include/asm/numaq/ipi.h @@ -1,9 +1,10 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(cpumask_t, int vector); +void send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const cpumask_t *mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector) cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask(&mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(&cpu_online_map, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d12811ce51d..c4a9aa52df6 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -60,7 +60,7 @@ struct smp_ops { void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); - void (*send_call_func_ipi)(cpumask_t mask); + void (*send_call_func_ipi)(const cpumask_t *mask); void (*send_call_func_single_ipi)(int cpu); }; @@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) static inline void arch_send_call_function_ipi(cpumask_t mask) { - smp_ops.send_call_func_ipi(mask); + smp_ops.send_call_func_ipi(&mask); } void cpu_disable_common(void); @@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); -void native_send_call_func_ipi(cpumask_t mask); +void native_send_call_func_ipi(const cpumask_t *mask); void native_send_call_func_single_ipi(int cpu); extern void prefill_possible_map(void); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 9b3070f1c2a..437dc83725c 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -14,13 +14,13 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing * Just start on cpu 0. IRQ balancing will spread load */ - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); } #define INT_DELIVERY_MODE (dest_LowestPrio) @@ -137,14 +137,14 @@ static inline void enable_apic_mode(void) { } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpus_weight(*cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return (int) 0xFF; @@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpu_isset(cpu, *cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h index 53bd1e7bd7b..a8a2c24f50c 100644 --- a/arch/x86/include/asm/summit/ipi.h +++ b/arch/x86/include/asm/summit/ipi.h @@ -1,9 +1,10 @@ #ifndef __ASM_SUMMIT_IPI_H #define __ASM_SUMMIT_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const cpumask_t *mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector) cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask(&mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(&cpu_online_map, vector); } #endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b2cef49f308..a375791c08c 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -141,7 +141,7 @@ static int lapic_next_event(unsigned long delta, struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); -static void lapic_timer_broadcast(const struct cpumask *mask); +static void lapic_timer_broadcast(const cpumask_t *mask); static void apic_pm_activate(void); /* @@ -453,10 +453,10 @@ static void lapic_timer_setup(enum clock_event_mode mode, /* * Local APIC timer broadcast function */ -static void lapic_timer_broadcast(const struct cpumask *mask) +static void lapic_timer_broadcast(const cpumask_t *mask) { #ifdef CONFIG_SMP - send_IPI_mask(*mask, LOCAL_TIMER_VECTOR); + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); #endif } diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 26855381790..81e01f7b1d1 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -77,10 +77,7 @@ static int crash_nmi_callback(struct notifier_block *self, static void smp_send_nmi_allbutself(void) { - cpumask_t mask = cpu_online_map; - cpu_clear(safe_smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, NMI_VECTOR); + send_IPI_allbutself(NMI_VECTOR); } static struct notifier_block crash_nmi_nb = { diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index c0262791bda..50eebd0328f 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static cpumask_t flat_target_cpus(void) +static const cpumask_t *flat_target_cpus(void) { - return cpu_online_map; + return &cpu_online_map; } -static cpumask_t flat_vector_allocation_domain(int cpu) +static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -45,8 +45,7 @@ static cpumask_t flat_vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t) { {[0] = APIC_ALL_CPUS, } }; } /* @@ -69,9 +68,8 @@ static void flat_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static void flat_send_IPI_mask(cpumask_t cpumask, int vector) +static inline void _flat_send_IPI_mask(unsigned long mask, int vector) { - unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); @@ -79,20 +77,40 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) local_irq_restore(flags); } +static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector) +{ + unsigned long mask = cpus_addr(*cpumask)[0]; + + _flat_send_IPI_mask(mask, vector); +} + +static void flat_send_IPI_mask_allbutself(const cpumask_t *cpumask, int vector) +{ + unsigned long mask = cpus_addr(*cpumask)[0]; + int cpu = smp_processor_id(); + + if (cpu < BITS_PER_LONG) + clear_bit(cpu, &mask); + _flat_send_IPI_mask(mask, vector); +} + static void flat_send_IPI_allbutself(int vector) { + int cpu = smp_processor_id(); #ifdef CONFIG_HOTPLUG_CPU int hotplug = 1; #else int hotplug = 0; #endif if (hotplug || vector == NMI_VECTOR) { - cpumask_t allbutme = cpu_online_map; + if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) { + unsigned long mask = cpus_addr(cpu_online_map)[0]; - cpu_clear(smp_processor_id(), allbutme); + if (cpu < BITS_PER_LONG) + clear_bit(cpu, &mask); - if (!cpus_empty(allbutme)) - flat_send_IPI_mask(allbutme, vector); + _flat_send_IPI_mask(mask, vector); + } } else if (num_online_cpus() > 1) { __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); } @@ -101,7 +119,7 @@ static void flat_send_IPI_allbutself(int vector) static void flat_send_IPI_all(int vector) { if (vector == NMI_VECTOR) - flat_send_IPI_mask(cpu_online_map, vector); + flat_send_IPI_mask(&cpu_online_map, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } @@ -135,9 +153,9 @@ static int flat_apic_id_registered(void) return physid_isset(read_xapic_id(), phys_cpu_present_map); } -static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask) { - return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; + return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS; } static unsigned int phys_pkg_id(int index_msb) @@ -157,6 +175,7 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, @@ -188,35 +207,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static cpumask_t physflat_target_cpus(void) +static const cpumask_t *physflat_target_cpus(void) { - return cpu_online_map; + return &cpu_online_map; } -static cpumask_t physflat_vector_allocation_domain(int cpu) +static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask) { - return cpumask_of_cpu(cpu); + cpus_clear(*retmask); + cpu_set(cpu, *retmask); } -static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) +static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); } -static void physflat_send_IPI_allbutself(int vector) +static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask, + int vector) { - cpumask_t allbutme = cpu_online_map; + send_IPI_mask_allbutself(cpumask, vector); +} - cpu_clear(smp_processor_id(), allbutme); - physflat_send_IPI_mask(allbutme, vector); +static void physflat_send_IPI_allbutself(int vector) +{ + send_IPI_mask_allbutself(&cpu_online_map, vector); } static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(cpu_online_map, vector); + physflat_send_IPI_mask(&cpu_online_map, vector); } -static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpu; @@ -224,7 +247,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else @@ -243,6 +266,7 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index f6a2c8eb48a..f5fa9a91ad3 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t x2apic_target_cpus(void) +static const cpumask_t *x2apic_target_cpus(void) { - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); } /* * for now each logical cpu is in its own vector allocation domain. */ -static cpumask_t x2apic_vector_allocation_domain(int cpu) +static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpus_clear(*retmask); + cpu_set(cpu, *retmask); } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -56,32 +55,52 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, * at once. We have 16 cpu's in a cluster. This will minimize IPI register * writes. */ -static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - } + for_each_cpu_mask_nr(query_cpu, *mask) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } -static void x2apic_send_IPI_allbutself(int vector) +static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector) { - cpumask_t mask = cpu_online_map; + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); - cpu_clear(smp_processor_id(), mask); + local_irq_save(flags); + for_each_cpu_mask_nr(query_cpu, *mask) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + local_irq_restore(flags); +} - if (!cpus_empty(mask)) - x2apic_send_IPI_mask(mask, vector); +static void x2apic_send_IPI_allbutself(int vector) +{ + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); + + local_irq_save(flags); + for_each_online_cpu(query_cpu) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + local_irq_restore(flags); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_map, vector); + x2apic_send_IPI_mask(&cpu_online_map, vector); } static int x2apic_apic_id_registered(void) @@ -89,7 +108,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpu; @@ -97,8 +116,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) + cpu = first_cpu(*cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; @@ -150,6 +169,7 @@ struct genapic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index d042211768b..41c27b2f3d0 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t x2apic_target_cpus(void) +static const cpumask_t *x2apic_target_cpus(void) { - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); } -static cpumask_t x2apic_vector_allocation_domain(int cpu) +static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpus_clear(*retmask); + cpu_set(cpu, *retmask); } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -54,32 +53,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, x2apic_icr_write(cfg, apicid); } -static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { + for_each_cpu_mask_nr(query_cpu, *mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static void x2apic_send_IPI_allbutself(int vector) +static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector) { - cpumask_t mask = cpu_online_map; + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); + + local_irq_save(flags); + for_each_cpu_mask_nr(query_cpu, *mask) { + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} - cpu_clear(smp_processor_id(), mask); +static void x2apic_send_IPI_allbutself(int vector) +{ + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); - if (!cpus_empty(mask)) - x2apic_send_IPI_mask(mask, vector); + local_irq_save(flags); + for_each_online_cpu(query_cpu) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + local_irq_restore(flags); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_map, vector); + x2apic_send_IPI_mask(&cpu_online_map, vector); } static int x2apic_apic_id_registered(void) @@ -87,7 +107,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpu; @@ -95,8 +115,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) + cpu = first_cpu(*cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; @@ -145,6 +165,7 @@ struct genapic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2c7dbdb9827..010659415ae 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -75,16 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t uv_target_cpus(void) +static const cpumask_t *uv_target_cpus(void) { - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); } -static cpumask_t uv_vector_allocation_domain(int cpu) +static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpus_clear(*retmask); + cpu_set(cpu, *retmask); } int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) @@ -123,28 +122,37 @@ static void uv_send_IPI_one(int cpu, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -static void uv_send_IPI_mask(cpumask_t mask, int vector) +static void uv_send_IPI_mask(const cpumask_t *mask, int vector) { unsigned int cpu; - for_each_possible_cpu(cpu) - if (cpu_isset(cpu, mask)) + for_each_cpu_mask_nr(cpu, *mask) + uv_send_IPI_one(cpu, vector); +} + +static void uv_send_IPI_mask_allbutself(const cpumask_t *mask, int vector) +{ + unsigned int cpu; + unsigned int this_cpu = smp_processor_id(); + + for_each_cpu_mask_nr(cpu, *mask) + if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); + unsigned int cpu; + unsigned int this_cpu = smp_processor_id(); - if (!cpus_empty(mask)) - uv_send_IPI_mask(mask, vector); + for_each_online_cpu(cpu) + if (cpu != this_cpu) + uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_all(int vector) { - uv_send_IPI_mask(cpu_online_map, vector); + uv_send_IPI_mask(&cpu_online_map, vector); } static int uv_apic_id_registered(void) @@ -156,7 +164,7 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpu; @@ -164,7 +172,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else @@ -218,6 +226,7 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, + .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_self = uv_send_IPI_self, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3d7d0d55253..7f23ce7f551 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -231,7 +231,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif -static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +static inline void +set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) { } @@ -396,7 +397,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) @@ -412,13 +414,13 @@ static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, irq = desc->irq; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, *mask)) + if (assign_irq_vector(irq, cfg, mask)) return; - set_extra_move_desc(desc, *mask); + set_extra_move_desc(desc, mask); cpumask_and(&tmp, &cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid(&tmp); /* * Only the high 8 bits are valid. */ @@ -1099,7 +1101,8 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) +static int +__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1115,35 +1118,32 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; int cpu; + cpumask_t tmp_mask; if ((cfg->move_in_progress) || cfg->move_cleanup_count) return -EBUSY; - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); - old_vector = cfg->vector; if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) + cpus_and(tmp_mask, *mask, cpu_online_map); + cpus_and(tmp_mask, cfg->domain, tmp_mask); + if (!cpus_empty(tmp_mask)) return 0; } - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; + /* Only try and allocate irqs on cpus that are present */ + for_each_cpu_and(cpu, mask, &cpu_online_map) { int new_cpu; int vector, offset; - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); + vector_allocation_domain(cpu, &tmp_mask); vector = current_vector; offset = current_offset; next: vector += 8; if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ + /* If out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } @@ -1156,7 +1156,7 @@ next: if (vector == SYSCALL_VECTOR) goto next; #endif - for_each_cpu_mask_nr(new_cpu, new_mask) + for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; /* Found one! */ @@ -1166,16 +1166,17 @@ next: cfg->move_in_progress = 1; cfg->old_domain = cfg->domain; } - for_each_cpu_mask_nr(new_cpu, new_mask) + for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map) per_cpu(vector_irq, new_cpu)[vector] = irq; cfg->vector = vector; - cfg->domain = domain; + cfg->domain = tmp_mask; return 0; } return -ENOSPC; } -static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -1384,8 +1385,8 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de cfg = desc->chip_data; - mask = TARGET_CPUS; - if (assign_irq_vector(irq, cfg, mask)) + mask = *TARGET_CPUS; + if (assign_irq_vector(irq, cfg, &mask)) return; cpus_and(mask, cfg->domain, mask); @@ -1398,7 +1399,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - cpu_mask_to_apicid(mask), trigger, polarity, + cpu_mask_to_apicid(&mask), trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); @@ -2121,7 +2122,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); + send_IPI_mask(&cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2170,18 +2171,19 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) +static void +migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; - cpumask_t tmp, cleanup_mask; + cpumask_t tmpmask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; unsigned int irq; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + cpus_and(tmpmask, *mask, cpu_online_map); + if (cpus_empty(tmpmask)) return; irq = desc->irq; @@ -2194,8 +2196,8 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) set_extra_move_desc(desc, mask); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + cpus_and(tmpmask, cfg->domain, *mask); + dest = cpu_mask_to_apicid(&tmpmask); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { @@ -2213,13 +2215,13 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) modify_irte(irq, &irte); if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cpus_and(tmpmask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(tmpmask); + send_IPI_mask(&tmpmask, IRQ_MOVE_CLEANUP_VECTOR); cfg->move_in_progress = 0; } - desc->affinity = mask; + desc->affinity = *mask; } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2241,7 +2243,7 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, desc->pending_mask); + migrate_ioapic_irq_desc(desc, &desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; @@ -2292,7 +2294,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, return; } - migrate_ioapic_irq_desc(desc, *mask); + migrate_ioapic_irq_desc(desc, mask); } static void set_ir_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) @@ -2359,7 +2361,7 @@ static void irq_complete_move(struct irq_desc **descp) cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); cfg->move_in_progress = 0; } } @@ -3089,13 +3091,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms cpumask_t tmp; cfg = irq_cfg(irq); - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, cfg, tmp); + tmp = *TARGET_CPUS; + err = assign_irq_vector(irq, cfg, &tmp); if (err) return err; cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid(&tmp); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3161,13 +3163,13 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, *mask)) + if (assign_irq_vector(irq, cfg, mask)) return; - set_extra_move_desc(desc, *mask); + set_extra_move_desc(desc, mask); cpumask_and(&tmp, &cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid(&tmp); read_msi_msg_desc(desc, &msg); @@ -3184,8 +3186,8 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void ir_set_msi_irq_affinity(unsigned int irq, - const struct cpumask *mask) +static void +ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3200,13 +3202,13 @@ static void ir_set_msi_irq_affinity(unsigned int irq, return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, *mask)) + if (assign_irq_vector(irq, cfg, mask)) return; - set_extra_move_desc(desc, *mask); + set_extra_move_desc(desc, mask); cpumask_and(&tmp, &cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid(&tmp); irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3224,7 +3226,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, if (cfg->move_in_progress) { cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); cfg->move_in_progress = 0; } @@ -3419,7 +3421,7 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static void dmar_msi_set_affinity(unsigned int irq, const cpumask_t *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3431,13 +3433,13 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, *mask)) + if (assign_irq_vector(irq, cfg, mask)) return; - set_extra_move_desc(desc, *mask); + set_extra_move_desc(desc, mask); cpumask_and(&tmp, &cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid(&tmp); dmar_msi_read(irq, &msg); @@ -3481,7 +3483,7 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static void hpet_msi_set_affinity(unsigned int irq, const cpumask_t *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3493,13 +3495,13 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, *mask)) + if (assign_irq_vector(irq, cfg, mask)) return; - set_extra_move_desc(desc, *mask); + set_extra_move_desc(desc, mask); cpumask_and(&tmp, &cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid(&tmp); hpet_msi_read(irq, &msg); @@ -3564,7 +3566,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) +static void set_ht_irq_affinity(unsigned int irq, const cpumask_t *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3575,13 +3577,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, *mask)) + if (assign_irq_vector(irq, cfg, mask)) return; - set_extra_move_desc(desc, *mask); + set_extra_move_desc(desc, mask); cpumask_and(&tmp, &cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid(&tmp); target_ht_irq(irq, dest, cfg->vector); cpumask_copy(&desc->affinity, mask); @@ -3607,14 +3609,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) cpumask_t tmp; cfg = irq_cfg(irq); - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, cfg, tmp); + err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (!err) { struct ht_irq_msg msg; unsigned dest; cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid(&tmp); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3650,7 +3651,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset) { - const cpumask_t *eligible_cpu = get_cpu_mask(cpu); + const cpumask_t *eligible_cpu = &cpumask_of_cpu(cpu); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3660,7 +3661,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, *eligible_cpu); + err = assign_irq_vector(irq, cfg, eligible_cpu); if (err != 0) return err; @@ -3679,7 +3680,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(*eligible_cpu); + entry->dest = cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -3890,7 +3891,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; struct irq_desc *desc; struct irq_cfg *cfg; - cpumask_t mask; + const cpumask_t *mask; if (skip_ioapic_setup == 1) return; @@ -3921,16 +3922,16 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = desc->affinity; + mask = &desc->affinity; else mask = TARGET_CPUS; #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq_desc(desc, &mask); + set_ir_ioapic_affinity_irq_desc(desc, mask); else #endif - set_ioapic_affinity_irq_desc(desc, &mask); + set_ioapic_affinity_irq_desc(desc, mask); } } diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index f1c688e46f3..86aa50fc65a 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -116,9 +116,9 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) +void send_IPI_mask_bitmask(const cpumask_t *cpumask, int vector) { - unsigned long mask = cpus_addr(cpumask)[0]; + unsigned long mask = cpus_addr(*cpumask)[0]; unsigned long flags; local_irq_save(flags); @@ -127,7 +127,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) local_irq_restore(flags); } -void send_IPI_mask_sequence(cpumask_t mask, int vector) +void send_IPI_mask_sequence(const cpumask_t *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) */ local_irq_save(flags); - for_each_possible_cpu(query_cpu) { - if (cpu_isset(query_cpu, mask)) { + for_each_cpu_mask_nr(query_cpu, *mask) + __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + local_irq_restore(flags); +} + +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu_mask_nr(query_cpu, *mask) + if (query_cpu != this_cpu) __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); - } - } local_irq_restore(flags); } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 3f92b134ab9..341df946f9a 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); + send_IPI_mask(&cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); + send_IPI_mask(&cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); } -void native_send_call_func_ipi(cpumask_t mask) +void native_send_call_func_ipi(const cpumask_t *mask) { cpumask_t allbutself; allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - if (cpus_equal(mask, allbutself) && + if (cpus_equal(*mask, allbutself) && cpus_equal(cpu_online_map, cpu_callout_map)) send_IPI_allbutself(CALL_FUNCTION_VECTOR); else diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index f4049f3513b..174ea90d1cb 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -164,7 +164,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 8f919ca6949..de6f1bda0c5 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpus_empty(f->flush_cpumask)) cpu_relax(); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 3624a364b7f..bc4c7840b2a 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { - return cpumask_of_cpu(cpu); + cpus_clear(*retmask); + cpu_set(cpu, *retmask); } static int probe_bigsmp(void) diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 7b4e6d0d169..4ba5ccaa158 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 71a309b122e..511d7941364 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2c6d234e000..2821ffc188b 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -24,7 +24,7 @@ static int probe_summit(void) return 0; } -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic apic_summit = APIC_INIT("summit", probe_summit); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index acd9b6705e0..2cce362c987 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void) { int i, rc; - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) { num_processors++; @@ -196,7 +196,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) + for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) continue; cpu_clear(cpu, cpu_possible_map); } @@ -408,24 +408,22 @@ static void xen_smp_send_reschedule(int cpu) xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } -static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) +static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector) { unsigned cpu; - cpus_and(mask, mask, cpu_online_map); - - for_each_cpu_mask_nr(cpu, mask) + for_each_cpu_and(cpu, mask, &cpu_online_map) xen_send_IPI_one(cpu, vector); } -static void xen_smp_send_call_function_ipi(cpumask_t mask) +static void xen_smp_send_call_function_ipi(const cpumask_t *mask) { int cpu; xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); /* Make sure other vcpus get a chance to run if they need to. */ - for_each_cpu_mask_nr(cpu, mask) { + for_each_cpu_mask_nr(cpu, *mask) { if (xen_vcpu_stolen(cpu)) { HYPERVISOR_sched_op(SCHEDOP_yield, 0); break; @@ -435,7 +433,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask) static void xen_smp_send_call_function_single_ipi(int cpu) { - xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); + xen_send_IPI_mask(&cpumask_of_cpu(cpu), + XEN_CALL_FUNCTION_SINGLE_VECTOR); } static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) -- cgit v1.2.3-70-g09d2 From 95d313cf1c1ecedc8bec5727b09bdacbf67dfc45 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 16 Dec 2008 17:33:54 -0800 Subject: x86: Add cpu_mask_to_apicid_and Impact: new API Add a helper function that takes two cpumask's, and's them and then returns the apicid of the result. This removes a need in io_apic.c that uses a temporary cpumask to hold (mask & cfg->domain). Signed-off-by: Mike Travis Signed-off-by: Rusty Russell --- arch/x86/include/asm/bigsmp/apic.h | 16 +++++++++ arch/x86/include/asm/es7000/apic.h | 47 +++++++++++++++++++++++++++ arch/x86/include/asm/genapic_32.h | 3 ++ arch/x86/include/asm/genapic_64.h | 2 ++ arch/x86/include/asm/mach-default/mach_apic.h | 10 ++++++ arch/x86/include/asm/mach-generic/mach_apic.h | 1 + arch/x86/include/asm/numaq/apic.h | 6 ++++ arch/x86/include/asm/summit/apic.h | 39 ++++++++++++++++++++++ arch/x86/kernel/genapic_flat_64.c | 26 +++++++++++++++ arch/x86/kernel/genx2apic_cluster.c | 16 +++++++++ arch/x86/kernel/genx2apic_phys.c | 16 +++++++++ arch/x86/kernel/genx2apic_uv_x.c | 16 +++++++++ 12 files changed, 198 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index dc6225ca48a..99f9abacf6a 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -129,6 +129,22 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } +static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) + if (cpu_isset(cpu, *andmask)) + return cpu_to_logical_apicid(cpu); + + return BAD_APICID; +} + static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 4cac0837bb4..c2bed772af8 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -214,6 +214,53 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } +static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + int num_bits_set; + int num_bits_set2; + int cpus_found = 0; + int cpu; + int apicid = 0; + + num_bits_set = cpus_weight(*cpumask); + num_bits_set2 = cpus_weight(*andmask); + num_bits_set = min_t(int, num_bits_set, num_bits_set2); + /* Return id to all */ + if (num_bits_set >= nr_cpu_ids) +#if defined CONFIG_ES7000_CLUSTERED_APIC + return 0xFF; +#else + return cpu_to_logical_apicid(0); +#endif + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) + if (cpu_isset(cpu, *andmask) + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk(KERN_WARNING + "%s: Not a valid mask!\n", __func__); +#if defined CONFIG_ES7000_CLUSTERED_APIC + return 0xFF; +#else + return cpu_to_logical_apicid(0); +#endif + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index b21ed21c574..325298a8223 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -58,6 +58,8 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask, + const cpumask_t *andmask); void (*vector_allocation_domain)(int cpu, cpumask_t *retmask); #ifdef CONFIG_SMP @@ -115,6 +117,7 @@ struct genapic { APICFUNC(get_apic_id) \ .apic_id_mask = APIC_ID_MASK, \ APICFUNC(cpu_mask_to_apicid) \ + APICFUNC(cpu_mask_to_apicid_and) \ APICFUNC(vector_allocation_domain) \ APICFUNC(acpi_madt_oem_check) \ IPIFUNC(send_IPI_mask) \ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index a020e7d35a4..301c7f41125 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -31,6 +31,8 @@ struct genapic { void (*send_IPI_self)(int vector); /* */ unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask, + const cpumask_t *andmask); unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index c18896b0508..229b605d104 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -28,6 +28,7 @@ static inline const cpumask_t *target_cpus(void) #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) @@ -66,6 +67,15 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return cpus_addr(*cpumask)[0]; } +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + unsigned long mask1 = cpus_addr(*cpumask)[0]; + unsigned long mask2 = cpus_addr(*andmask)[0]; + + return (unsigned int)(mask1 & mask2); +} + static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index e430f47df66..48553e958ad 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -24,6 +24,7 @@ #define check_phys_apicid_present (genapic->check_phys_apicid_present) #define check_apicid_used (genapic->check_apicid_used) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define vector_allocation_domain (genapic->vector_allocation_domain) #define enable_apic_mode (genapic->enable_apic_mode) #define phys_pkg_id (genapic->phys_pkg_id) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 1df7ebe738e..abf668ced50 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -127,6 +127,12 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return (int) 0xF; } +static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + return (int) 0xF; +} + /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 437dc83725c..cbcc2c7eb1d 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -170,6 +170,45 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } +static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + int num_bits_set; + int num_bits_set2; + int cpus_found = 0; + int cpu; + int apicid = 0; + + num_bits_set = cpus_weight(*cpumask); + num_bits_set2 = cpus_weight(*andmask); + num_bits_set = min_t(int, num_bits_set, num_bits_set2); + /* Return id to all */ + if (num_bits_set >= nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) + if (cpu_isset(cpu, *andmask) + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk(KERN_WARNING + "%s: Not a valid mask!\n", __func__); + return 0xFF; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + /* cpuid returns the value latched in the HW at reset, not the APIC ID * register's value. For any box whose BIOS changes APIC IDs, like * clustered APIC systems, we must use hard_smp_processor_id. diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 50eebd0328f..1efecd206a7 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -158,6 +158,15 @@ static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask) return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS; } +static unsigned int flat_cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + unsigned long mask1 = cpus_addr(*cpumask)[0] & APIC_ALL_CPUS; + unsigned long mask2 = cpus_addr(*andmask)[0] & APIC_ALL_CPUS; + + return (int)(mask1 & mask2); +} + static unsigned int phys_pkg_id(int index_msb) { return hard_smp_processor_id() >> index_msb; @@ -178,6 +187,7 @@ struct genapic apic_flat = { .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, @@ -254,6 +264,21 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask) return BAD_APICID; } +static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) + if (cpu_isset(cpu, *andmask)) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + struct genapic apic_physflat = { .name = "physical flat", .acpi_madt_oem_check = physflat_acpi_madt_oem_check, @@ -269,6 +294,7 @@ struct genapic apic_physflat = { .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index f5fa9a91ad3..fd8047f4e45 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -123,6 +123,21 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) return BAD_APICID; } +static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) + if (cpu_isset(cpu, *andmask)) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -172,6 +187,7 @@ struct genapic apic_x2apic_cluster = { .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 41c27b2f3d0..d5578bb8f16 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -122,6 +122,21 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) return BAD_APICID; } +static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) + if (cpu_isset(cpu, *andmask)) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -168,6 +183,7 @@ struct genapic apic_x2apic_phys = { .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 010659415ae..53bd2570272 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -179,6 +179,21 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask) return BAD_APICID; } +static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask, + const cpumask_t *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) + if (cpu_isset(cpu, *andmask)) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -229,6 +244,7 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_self = uv_send_IPI_self, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, -- cgit v1.2.3-70-g09d2 From 6eeb7c5a99434596c5953a95baa17d2f085664e3 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 16 Dec 2008 17:33:55 -0800 Subject: x86: update add-cpu_mask_to_apicid_and to use struct cpumask* Impact: use updated APIs Various API updates for x86:add-cpu_mask_to_apicid_and (Note: separate because previous patch has been "backported" to 2.6.27.) Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/include/asm/bigsmp/apic.h | 10 +++++----- arch/x86/include/asm/es7000/apic.h | 19 ++++++++++--------- arch/x86/include/asm/genapic_32.h | 4 ++-- arch/x86/include/asm/genapic_64.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 8 ++++---- arch/x86/include/asm/numaq/apic.h | 4 ++-- arch/x86/include/asm/summit/apic.h | 18 +++++++++--------- arch/x86/kernel/genapic_flat_64.c | 21 +++++++++++---------- arch/x86/kernel/genx2apic_cluster.c | 10 +++++----- arch/x86/kernel/genx2apic_phys.c | 10 +++++----- arch/x86/kernel/genx2apic_uv_x.c | 10 +++++----- 11 files changed, 60 insertions(+), 58 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 99f9abacf6a..976399debb3 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -129,8 +129,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -138,9 +138,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) - if (cpu_isset(cpu, *andmask)) - return cpu_to_logical_apicid(cpu); + cpu = cpumask_any_and(cpumask, andmask); + if (cpu < nr_cpu_ids) + return cpu_to_logical_apicid(cpu); return BAD_APICID; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index c2bed772af8..ba8423c5363 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -214,8 +214,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int num_bits_set; int num_bits_set2; @@ -223,9 +223,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, int cpu; int apicid = 0; - num_bits_set = cpus_weight(*cpumask); - num_bits_set2 = cpus_weight(*andmask); - num_bits_set = min_t(int, num_bits_set, num_bits_set2); + num_bits_set = cpumask_weight(cpumask); + num_bits_set2 = cpumask_weight(andmask); + num_bits_set = min(num_bits_set, num_bits_set2); /* Return id to all */ if (num_bits_set >= nr_cpu_ids) #if defined CONFIG_ES7000_CLUSTERED_APIC @@ -237,11 +237,12 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) - if (cpu_isset(cpu, *andmask) - apicid = cpu_to_logical_apicid(cpu); + cpu = cpumask_first_and(cpumask, andmask); + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) { + if (cpumask_test_cpu(cpu, cpumask) && + cpumask_test_cpu(cpu, andmask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)) { diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 325298a8223..eed6e305291 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -58,8 +58,8 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask, - const cpumask_t *andmask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); void (*vector_allocation_domain)(int cpu, cpumask_t *retmask); #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 301c7f41125..244b71729ec 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -31,8 +31,8 @@ struct genapic { void (*send_IPI_self)(int vector); /* */ unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask, - const cpumask_t *andmask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 229b605d104..df8e024c43c 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -67,11 +67,11 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return cpus_addr(*cpumask)[0]; } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask, - const cpumask_t *andmask) +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { - unsigned long mask1 = cpus_addr(*cpumask)[0]; - unsigned long mask2 = cpus_addr(*andmask)[0]; + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; return (unsigned int)(mask1 & mask2); } diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index abf668ced50..c80f00d2996 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -127,8 +127,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return (int) 0xF; } -static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { return (int) 0xF; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index cbcc2c7eb1d..651a9384934 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -170,8 +170,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int num_bits_set; int num_bits_set2; @@ -179,9 +179,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, int cpu; int apicid = 0; - num_bits_set = cpus_weight(*cpumask); - num_bits_set2 = cpus_weight(*andmask); - num_bits_set = min_t(int, num_bits_set, num_bits_set2); + num_bits_set = cpumask_weight(cpumask); + num_bits_set2 = cpumask_weight(andmask); + num_bits_set = min(num_bits_set, num_bits_set2); /* Return id to all */ if (num_bits_set >= nr_cpu_ids) return 0xFF; @@ -189,11 +189,11 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask, * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) - if (cpu_isset(cpu, *andmask) - apicid = cpu_to_logical_apicid(cpu); + cpu = cpumask_first_and(cpumask, andmask); + apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) { + if (cpumask_test_cpu(cpu, cpumask) + && cpumask_test_cpu(cpu, andmask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)) { diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1efecd206a7..c772bb10b17 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -158,13 +158,13 @@ static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask) return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS; } -static unsigned int flat_cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { - unsigned long mask1 = cpus_addr(*cpumask)[0] & APIC_ALL_CPUS; - unsigned long mask2 = cpus_addr(*andmask)[0] & APIC_ALL_CPUS; + unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; + unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; - return (int)(mask1 & mask2); + return mask1 & mask2; } static unsigned int phys_pkg_id(int index_msb) @@ -264,8 +264,9 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask) return BAD_APICID; } -static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static unsigned int +physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -273,9 +274,9 @@ static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) - if (cpu_isset(cpu, *andmask)) - return per_cpu(x86_cpu_to_apicid, cpu); + cpu = cpumask_any_and(cpumask, andmask); + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index fd8047f4e45..e7d16f53b9c 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -123,8 +123,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -132,9 +132,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) - if (cpu_isset(cpu, *andmask)) - return per_cpu(x86_cpu_to_apicid, cpu); + cpu = cpumask_any_and(cpumask, andmask); + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index d5578bb8f16..9d0386c7e79 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -122,8 +122,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -131,9 +131,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) - if (cpu_isset(cpu, *andmask)) - return per_cpu(x86_cpu_to_apicid, cpu); + cpu = cpumask_any_and(cpumask, andmask); + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 53bd2570272..22596ec94c8 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -179,8 +179,8 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask) return BAD_APICID; } -static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask, - const cpumask_t *andmask) +static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -188,9 +188,9 @@ static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids) - if (cpu_isset(cpu, *andmask)) - return per_cpu(x86_cpu_to_apicid, cpu); + cpu = cpumask_any_and(cpumask, andmask); + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); return BAD_APICID; } -- cgit v1.2.3-70-g09d2 From d7b381bb7b1ad69ff008ea063d26e988b686c8de Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 16 Dec 2008 17:33:58 -0800 Subject: x86: fixup_irqs() doesnt need an argument. Impact: cleanup, remove on-stack cpumask. The "map" arg is always cpu_online_mask. Importantly, set_affinity always ands the argument with cpu_online_mask anyway, so we don't need to do it in fixup_irqs(), avoiding a temporary. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/include/asm/irq.h | 2 +- arch/x86/kernel/irq_32.c | 13 +++++++------ arch/x86/kernel/irq_64.c | 15 ++++++++------- arch/x86/kernel/smpboot.c | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index bae0eda9548..8766d30fb74 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -37,7 +37,7 @@ extern int irqbalance_disable(char *str); #ifdef CONFIG_HOTPLUG_CPU #include -extern void fixup_irqs(cpumask_t map); +extern void fixup_irqs(void); #endif extern unsigned int do_IRQ(struct pt_regs *regs); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 9cf9cbbf7a0..9dc5588f336 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU #include -void fixup_irqs(cpumask_t map) +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - cpumask_t mask; + const struct cpumask *affinity; if (!desc) continue; if (irq == 2) continue; - cpus_and(mask, desc->affinity, map); - if (any_online_cpu(mask) == NR_CPUS) { + affinity = &desc->affinity; + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); - mask = map; + affinity = cpu_all_mask; } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, &mask); + desc->chip->set_affinity(irq, affinity); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 27f2307b0a3..fca2991443f 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -83,16 +83,17 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(cpumask_t map) +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - cpumask_t mask; int break_affinity = 0; int set_affinity = 1; + const struct cpumask *affinity; if (!desc) continue; @@ -102,23 +103,23 @@ void fixup_irqs(cpumask_t map) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); + affinity = &desc->affinity; if (!irq_has_action(irq) || - cpus_equal(desc->affinity, map)) { + cpumask_equal(affinity, cpu_online_mask)) { spin_unlock(&desc->lock); continue; } - cpus_and(mask, desc->affinity, map); - if (cpus_empty(mask)) { + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; - mask = map; + affinity = cpu_all_mask; } if (desc->chip->mask) desc->chip->mask(irq); if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, &mask); + desc->chip->set_affinity(irq, affinity); else if (!(warned++)) set_affinity = 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9d58134e023..8b6f675b363 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1346,7 +1346,7 @@ void cpu_disable_common(void) lock_vector_lock(); remove_cpu_from_maps(cpu); unlock_vector_lock(); - fixup_irqs(cpu_online_map); + fixup_irqs(); } int native_cpu_disable(void) -- cgit v1.2.3-70-g09d2 From bcda016eddd7a8b374bb371473c821a91ff1d8cc Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 16 Dec 2008 17:33:59 -0800 Subject: x86: cosmetic changes apic-related files. This patch simply changes cpumask_t to struct cpumask and similar trivial modernizations. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/include/asm/bigsmp/ipi.h | 14 +++----- arch/x86/include/asm/es7000/ipi.h | 13 +++---- arch/x86/include/asm/genapic_32.h | 11 +++--- arch/x86/include/asm/genapic_64.h | 11 +++--- arch/x86/include/asm/ipi.h | 10 +++--- arch/x86/include/asm/mach-default/mach_apic.h | 12 +++---- arch/x86/include/asm/mach-default/mach_ipi.h | 12 +++---- arch/x86/include/asm/numaq/ipi.h | 14 +++----- arch/x86/include/asm/smp.h | 4 +-- arch/x86/kernel/genapic_flat_64.c | 50 ++++++++++++++------------- arch/x86/kernel/genx2apic_cluster.c | 25 +++++++------- arch/x86/kernel/genx2apic_phys.c | 25 +++++++------- arch/x86/kernel/genx2apic_uv_x.c | 24 ++++++------- arch/x86/kernel/ipi.c | 14 ++++---- arch/x86/kernel/smp.c | 6 ++-- arch/x86/xen/smp.c | 11 +++--- 16 files changed, 127 insertions(+), 129 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h index 63553e9f22b..27fcd01b3ae 100644 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ b/arch/x86/include/asm/bigsmp/ipi.h @@ -1,26 +1,22 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(&mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(&cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h index 1a8507265f9..7e8ed24d4b8 100644 --- a/arch/x86/include/asm/es7000/ipi.h +++ b/arch/x86/include/asm/es7000/ipi.h @@ -1,25 +1,22 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(&mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(&cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index eed6e305291..746f37a7963 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -24,7 +24,7 @@ struct genapic { int (*probe)(void); int (*apic_id_registered)(void); - const cpumask_t *(*target_cpus)(void); + const struct cpumask *(*target_cpus)(void); int int_delivery_mode; int int_dest_mode; int ESR_DISABLE; @@ -57,15 +57,16 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask); + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, const struct cpumask *andmask); - void (*vector_allocation_domain)(int cpu, cpumask_t *retmask); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); #ifdef CONFIG_SMP /* ipi */ - void (*send_IPI_mask)(const cpumask_t *mask, int vector); - void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); #endif diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 244b71729ec..adf32fb56aa 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -20,17 +20,18 @@ struct genapic { u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); - const cpumask_t *(*target_cpus)(void); - void (*vector_allocation_domain)(int cpu, cpumask_t *retmask); + const struct cpumask *(*target_cpus)(void); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); /* ipi */ - void (*send_IPI_mask)(const cpumask_t *mask, int vector); - void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); /* */ - unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask); + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, const struct cpumask *andmask); unsigned int (*phys_pkg_id)(int index_msb); diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index 24b6e613edf..c745a306f7d 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector) +static inline void send_IPI_mask_sequence(const struct cpumask *mask, + int vector) { unsigned long flags; unsigned long query_cpu; @@ -128,14 +129,15 @@ static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector) * - mbligh */ local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, *mask) { + for_each_cpu(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector) +static inline void send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) { unsigned long flags; unsigned int query_cpu; @@ -144,7 +146,7 @@ static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector) /* See Hack comment above */ local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, *mask) + for_each_cpu(query_cpu, mask) if (query_cpu != this_cpu) __send_IPI_dest_field( per_cpu(x86_cpu_to_apicid, query_cpu), diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index df8e024c43c..8863d978cb9 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,12 +8,12 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline const cpumask_t *target_cpus(void) +static inline const struct cpumask *target_cpus(void) { #ifdef CONFIG_SMP - return &cpu_online_map; + return cpu_online_mask; #else - return &cpumask_of_cpu(0); + return cpumask_of(0); #endif } @@ -62,9 +62,9 @@ static inline int apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) { - return cpus_addr(*cpumask)[0]; + return cpumask_bits(cpumask)[0]; } static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, @@ -98,7 +98,7 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline void vector_allocation_domain(int cpu, cpumask_t *retmask) +static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index 9353ab854a1..191312d155d 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,8 +4,8 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); void __send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; @@ -15,17 +15,17 @@ extern int no_broadcast; #define send_IPI_mask (genapic->send_IPI_mask) #define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_bitmask(mask, vector); } -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif static inline void __local_send_IPI_allbutself(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask_allbutself(&cpu_online_map, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); else __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } @@ -33,7 +33,7 @@ static inline void __local_send_IPI_allbutself(int vector) static inline void __local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(&cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector); } diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h index c734d7acc43..a8374c65277 100644 --- a/arch/x86/include/asm/numaq/ipi.h +++ b/arch/x86/include/asm/numaq/ipi.h @@ -1,26 +1,22 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(&mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(&cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c4a9aa52df6..830b9fcb642 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -60,7 +60,7 @@ struct smp_ops { void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); - void (*send_call_func_ipi)(const cpumask_t *mask); + void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); }; @@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); -void native_send_call_func_ipi(const cpumask_t *mask); +void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); extern void prefill_possible_map(void); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index c772bb10b17..7fa5f49c2dd 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static const cpumask_t *flat_target_cpus(void) +static const struct cpumask *flat_target_cpus(void) { - return &cpu_online_map; + return cpu_online_mask; } -static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -45,7 +45,8 @@ static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - *retmask = (cpumask_t) { {[0] = APIC_ALL_CPUS, } }; + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; } /* @@ -77,16 +78,17 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) local_irq_restore(flags); } -static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector) +static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) { - unsigned long mask = cpus_addr(*cpumask)[0]; + unsigned long mask = cpumask_bits(cpumask)[0]; _flat_send_IPI_mask(mask, vector); } -static void flat_send_IPI_mask_allbutself(const cpumask_t *cpumask, int vector) +static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, + int vector) { - unsigned long mask = cpus_addr(*cpumask)[0]; + unsigned long mask = cpumask_bits(cpumask)[0]; int cpu = smp_processor_id(); if (cpu < BITS_PER_LONG) @@ -103,8 +105,8 @@ static void flat_send_IPI_allbutself(int vector) int hotplug = 0; #endif if (hotplug || vector == NMI_VECTOR) { - if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) { - unsigned long mask = cpus_addr(cpu_online_map)[0]; + if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { + unsigned long mask = cpumask_bits(cpu_online_mask)[0]; if (cpu < BITS_PER_LONG) clear_bit(cpu, &mask); @@ -119,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector) static void flat_send_IPI_all(int vector) { if (vector == NMI_VECTOR) - flat_send_IPI_mask(&cpu_online_map, vector); + flat_send_IPI_mask(cpu_online_mask, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } @@ -153,9 +155,9 @@ static int flat_apic_id_registered(void) return physid_isset(read_xapic_id(), phys_cpu_present_map); } -static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) { - return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS; + return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; } static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, @@ -217,23 +219,23 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static const cpumask_t *physflat_target_cpus(void) +static const struct cpumask *physflat_target_cpus(void) { - return &cpu_online_map; + return cpu_online_mask; } -static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpus_clear(*retmask); - cpu_set(cpu, *retmask); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } -static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector) +static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); } -static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask, +static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { send_IPI_mask_allbutself(cpumask, vector); @@ -241,15 +243,15 @@ static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask, static void physflat_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(&cpu_online_map, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(&cpu_online_map, vector); + physflat_send_IPI_mask(cpu_online_mask, vector); } -static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -257,7 +259,7 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(*cpumask); + cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index e7d16f53b9c..4716a0c9f93 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -22,18 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const cpumask_t *x2apic_target_cpus(void) +static const struct cpumask *x2apic_target_cpus(void) { - return &cpumask_of_cpu(0); + return cpumask_of(0); } /* * for now each logical cpu is in its own vector allocation domain. */ -static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpus_clear(*retmask); - cpu_set(cpu, *retmask); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -55,27 +55,28 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, * at once. We have 16 cpu's in a cluster. This will minimize IPI register * writes. */ -static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector) +static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, *mask) + for_each_cpu(query_cpu, mask) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector) +static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) { unsigned long flags; unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, *mask) + for_each_cpu(query_cpu, mask) if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), @@ -100,7 +101,7 @@ static void x2apic_send_IPI_allbutself(int vector) static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(&cpu_online_map, vector); + x2apic_send_IPI_mask(cpu_online_mask, vector); } static int x2apic_apic_id_registered(void) @@ -108,7 +109,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -116,7 +117,7 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(*cpumask); + cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); else diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 9d0386c7e79..b255507884f 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -29,15 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const cpumask_t *x2apic_target_cpus(void) +static const struct cpumask *x2apic_target_cpus(void) { - return &cpumask_of_cpu(0); + return cpumask_of(0); } -static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpus_clear(*retmask); - cpu_set(cpu, *retmask); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -53,27 +53,28 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, x2apic_icr_write(cfg, apicid); } -static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector) +static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, *mask) { + for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector) +static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) { unsigned long flags; unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, *mask) { + for_each_cpu(query_cpu, mask) { if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_apicid, query_cpu), @@ -99,7 +100,7 @@ static void x2apic_send_IPI_allbutself(int vector) static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(&cpu_online_map, vector); + x2apic_send_IPI_mask(cpu_online_mask, vector); } static int x2apic_apic_id_registered(void) @@ -107,7 +108,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -115,7 +116,7 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(*cpumask); + cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 22596ec94c8..3984682cd84 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -75,15 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static const cpumask_t *uv_target_cpus(void) +static const struct cpumask *uv_target_cpus(void) { - return &cpumask_of_cpu(0); + return cpumask_of(0); } -static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask) +static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpus_clear(*retmask); - cpu_set(cpu, *retmask); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) @@ -122,20 +122,20 @@ static void uv_send_IPI_one(int cpu, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -static void uv_send_IPI_mask(const cpumask_t *mask, int vector) +static void uv_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned int cpu; - for_each_cpu_mask_nr(cpu, *mask) + for_each_cpu(cpu, mask) uv_send_IPI_one(cpu, vector); } -static void uv_send_IPI_mask_allbutself(const cpumask_t *mask, int vector) +static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { unsigned int cpu; unsigned int this_cpu = smp_processor_id(); - for_each_cpu_mask_nr(cpu, *mask) + for_each_cpu(cpu, mask) if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); } @@ -152,7 +152,7 @@ static void uv_send_IPI_allbutself(int vector) static void uv_send_IPI_all(int vector) { - uv_send_IPI_mask(&cpu_online_map, vector); + uv_send_IPI_mask(cpu_online_mask, vector); } static int uv_apic_id_registered(void) @@ -164,7 +164,7 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -172,7 +172,7 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(*cpumask); + cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 86aa50fc65a..285bbf8831f 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(const cpumask_t *cpumask, int vector) +void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) { - unsigned long mask = cpus_addr(*cpumask)[0]; + unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; local_irq_save(flags); - WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); __send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(const cpumask_t *mask, int vector) +void send_IPI_mask_sequence(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -139,12 +139,12 @@ void send_IPI_mask_sequence(const cpumask_t *mask, int vector) */ local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, *mask) + for_each_cpu(query_cpu, mask) __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector) +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -153,7 +153,7 @@ void send_IPI_mask_allbutself(const cpumask_t *mask, int vector) /* See Hack comment above */ local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, *mask) + for_each_cpu(query_cpu, mask) if (query_cpu != this_cpu) __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 341df946f9a..49ed667b06f 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -118,15 +118,15 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(&cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); + send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(&cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); + send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); } -void native_send_call_func_ipi(const cpumask_t *mask) +void native_send_call_func_ipi(const struct cpumask *mask) { cpumask_t allbutself; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index b3a95868839..c44e2069c7c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -411,22 +411,23 @@ static void xen_smp_send_reschedule(int cpu) xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } -static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector) +static void xen_send_IPI_mask(const struct cpumask *mask, + enum ipi_vector vector) { unsigned cpu; - for_each_cpu_and(cpu, mask, &cpu_online_map) + for_each_cpu_and(cpu, mask, cpu_online_mask) xen_send_IPI_one(cpu, vector); } -static void xen_smp_send_call_function_ipi(const cpumask_t *mask) +static void xen_smp_send_call_function_ipi(const struct cpumask *mask) { int cpu; xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); /* Make sure other vcpus get a chance to run if they need to. */ - for_each_cpu_mask_nr(cpu, *mask) { + for_each_cpu(cpu, mask) { if (xen_vcpu_stolen(cpu)) { HYPERVISOR_sched_op(SCHEDOP_yield, 0); break; @@ -436,7 +437,7 @@ static void xen_smp_send_call_function_ipi(const cpumask_t *mask) static void xen_smp_send_call_function_single_ipi(int cpu) { - xen_send_IPI_mask(&cpumask_of_cpu(cpu), + xen_send_IPI_mask(cpumask_of(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); } -- cgit v1.2.3-70-g09d2 From 83b19597f793fd5f91533bda0dc2eb3d21936798 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 16 Dec 2008 17:34:06 -0800 Subject: x86: Introduce topology_core_cpumask()/topology_thread_cpumask() Impact: new API The old topology_core_siblings() and topology_thread_siblings() return a cpumask_t; these new ones return a (const) struct cpumask *. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/include/asm/topology.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index ff386ff50ed..79e31e9dcdd 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -226,6 +226,8 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) /* indicates that pointers to the topology cpumask_t maps are valid */ #define arch_provides_topology_pointers yes -- cgit v1.2.3-70-g09d2 From cfb80c9eae8c7ed8f2ee81090062d15ead51cbe8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 16 Dec 2008 12:17:36 -0800 Subject: x86: unify pci iommu setup and allow swiotlb to compile for 32 bit swiotlb on 32 bit will be used by Xen domain 0 support. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 2 +- arch/x86/include/asm/pci.h | 2 ++ arch/x86/include/asm/pci_64.h | 1 - arch/x86/kernel/Makefile | 3 ++- arch/x86/kernel/pci-dma.c | 6 ++++-- arch/x86/kernel/pci-swiotlb_64.c | 2 ++ arch/x86/mm/init_32.c | 3 +++ 7 files changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 097794ff6b7..3b43a65894c 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -65,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) return dma_ops; else return dev->archdata.dma_ops; -#endif /* _ASM_X86_DMA_MAPPING_H */ +#endif } /* Make sure we keep the same behaviour */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 875b38edf19..50ac542c938 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -82,6 +82,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, static inline void early_quirks(void) { } #endif +extern void pci_iommu_alloc(void); + #endif /* __KERNEL__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index d02d936840a..4da20798277 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h @@ -23,7 +23,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); extern void dma32_reserve_bootmem(void); -extern void pci_iommu_alloc(void); /* The PCI address space does equal the physical memory * address space. The networking and block device layers use diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b62a7667828..a9c656f2d66 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -105,6 +105,8 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) @@ -118,7 +120,6 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o - obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o endif diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index e150ad4f0cc..00e07447a5b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -105,11 +105,15 @@ static void __init dma32_free_bootmem(void) dma32_bootmem_ptr = NULL; dma32_bootmem_size = 0; } +#endif void __init pci_iommu_alloc(void) { +#ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); +#endif + /* * The order of these functions is important for * fall-back/fail-over reasons @@ -125,8 +129,6 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } -#endif - void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index f47a097a135..a991afea670 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -62,8 +62,10 @@ struct dma_mapping_ops swiotlb_dma_ops = { void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ +#ifdef CONFIG_X86_64 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; +#endif if (swiotlb_force) swiotlb = 1; if (swiotlb) { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c483f424207..2b4b14fc0c0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -971,6 +972,8 @@ void __init mem_init(void) start_periodic_check_for_corruption(); + pci_iommu_alloc(); + #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif -- cgit v1.2.3-70-g09d2 From a775a38b1353161a6d7af86b667d6523c12c1a37 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 17 Dec 2008 15:21:39 -0800 Subject: x86: fix cpu_mask_to_apicid_and to include cpu_online_mask Impact: fix potential APIC crash In determining the destination apicid, there are usually three cpumasks that are considered: the incoming cpumask arg, cfg->domain and the cpu_online_mask. Since we are just introducing the cpu_mask_to_apicid_and function, make sure it includes the cpu_online_mask in it's evaluation. [Added with this patch.] There are two io_apic.c functions that did not previously use the cpu_online_mask: setup_IO_APIC_irq and msi_compose_msg. Both of these simply used cpu_mask_to_apicid(cfg->domain & TARGET_CPUS), and all but one arch (NUMAQ[*]) returns only online cpus in the TARGET_CPUS mask, so the behavior is identical for all cases. [*: NUMAQ bug?] Note that alloc_cpumask_var is only used for the 32-bit cases where it's highly likely that the cpumask set size will be small and therefore CPUMASK_OFFSTACK=n. But if that's not the case, failing the allocate will cause the same return value as the default. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 ++- arch/x86/include/asm/es7000/apic.h | 40 ++++++++++++--------------- arch/x86/include/asm/mach-default/mach_apic.h | 3 +- arch/x86/include/asm/summit/apic.h | 30 +++++++++++--------- arch/x86/kernel/genapic_flat_64.c | 4 ++- arch/x86/kernel/genx2apic_cluster.c | 4 ++- arch/x86/kernel/genx2apic_phys.c | 4 ++- arch/x86/kernel/genx2apic_uv_x.c | 4 ++- 8 files changed, 52 insertions(+), 41 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 976399debb3..d8dd9f53791 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -138,7 +138,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_any_and(cpumask, andmask); + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; if (cpu < nr_cpu_ids) return cpu_to_logical_apicid(cpu); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index ba8423c5363..51ac1230294 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -214,51 +214,47 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { int num_bits_set; - int num_bits_set2; int cpus_found = 0; int cpu; - int apicid = 0; + int apicid = cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); num_bits_set = cpumask_weight(cpumask); - num_bits_set2 = cpumask_weight(andmask); - num_bits_set = min(num_bits_set, num_bits_set2); /* Return id to all */ - if (num_bits_set >= nr_cpu_ids) -#if defined CONFIG_ES7000_CLUSTERED_APIC - return 0xFF; -#else - return cpu_to_logical_apicid(0); -#endif + if (num_bits_set == NR_CPUS) + goto exit; /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = cpumask_first_and(cpumask, andmask); + cpu = cpumask_first(cpumask); apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask) && - cpumask_test_cpu(cpu, andmask)) { + if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk(KERN_WARNING - "%s: Not a valid mask!\n", __func__); -#if defined CONFIG_ES7000_CLUSTERED_APIC - return 0xFF; -#else + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n", __func__); return cpu_to_logical_apicid(0); -#endif } apicid = new_apicid; cpus_found++; } cpu++; } +exit: + free_cpumask_var(cpumask); return apicid; } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 8863d978cb9..cc09cbbee27 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -72,8 +72,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, { unsigned long mask1 = cpumask_bits(cpumask)[0]; unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - return (unsigned int)(mask1 & mask2); + return (unsigned int)(mask1 & mask2 & mask3); } static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 651a9384934..99327d1be49 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -170,35 +170,37 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { int num_bits_set; - int num_bits_set2; int cpus_found = 0; int cpu; - int apicid = 0; + int apicid = 0xFF; + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return (int) 0xFF; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); num_bits_set = cpumask_weight(cpumask); - num_bits_set2 = cpumask_weight(andmask); - num_bits_set = min(num_bits_set, num_bits_set2); /* Return id to all */ - if (num_bits_set >= nr_cpu_ids) - return 0xFF; + if (num_bits_set == nr_cpu_ids) + goto exit; /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = cpumask_first_and(cpumask, andmask); + cpu = cpumask_first(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask) - && cpumask_test_cpu(cpu, andmask)) { + if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk(KERN_WARNING - "%s: Not a valid mask!\n", __func__); + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n", __func__); return 0xFF; } apicid = apicid | new_apicid; @@ -206,6 +208,8 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, } cpu++; } +exit: + free_cpumask_var(cpumask); return apicid; } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7fa5f49c2dd..34185488e4f 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -276,7 +276,9 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_any_and(cpumask, andmask); + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); return BAD_APICID; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 4716a0c9f93..d451c9b9fdf 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -133,7 +133,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_any_and(cpumask, andmask); + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); return BAD_APICID; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index b255507884f..62895cf315f 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -132,7 +132,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_any_and(cpumask, andmask); + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); return BAD_APICID; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 3984682cd84..0e88be11227 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -188,7 +188,9 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_any_and(cpumask, andmask); + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); return BAD_APICID; -- cgit v1.2.3-70-g09d2 From b77b881f21b29aa7efa668fde69ee3dc0372ae3f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 19 Dec 2008 15:23:44 -0800 Subject: x86: fix lguest used_vectors breakage, -v2 Impact: fix lguest, clean up 32-bit lguest used used_vectors to record vectors, but that model of allocating vectors changed and got broken, after we changed vector allocation to a per_cpu array. Try enable that for 64bit, and the array is used for all vectors that are not managed by vector_irq per_cpu array. Also kill system_vectors[], that is now a duplication of the used_vectors bitmap. [ merged in cpus4096 due to io_apic.c cpumask changes. ] [ -v2, fix build failure ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 10 ++++------ arch/x86/include/asm/irq.h | 1 + arch/x86/kernel/apic.c | 2 -- arch/x86/kernel/io_apic.c | 9 +++------ arch/x86/kernel/irqinit_32.c | 16 +++++++++++++++- arch/x86/kernel/irqinit_64.c | 13 +++++++++++++ arch/x86/kernel/traps.c | 12 +++++++----- drivers/lguest/interrupts_and_traps.c | 13 +++++++++---- 8 files changed, 52 insertions(+), 24 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e6b82b17b07..dc27705f544 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } -#define SYS_VECTOR_FREE 0 -#define SYS_VECTOR_ALLOCED 1 - extern int first_system_vector; -extern char system_vectors[]; +/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ +extern unsigned long used_vectors[]; static inline void alloc_system_vector(int vector) { - if (system_vectors[vector] == SYS_VECTOR_FREE) { - system_vectors[vector] = SYS_VECTOR_ALLOCED; + if (!test_bit(vector, used_vectors)) { + set_bit(vector, used_vectors); if (first_system_vector > vector) first_system_vector = vector; } else diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 8766d30fb74..4bb732e45a8 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -46,5 +46,6 @@ extern void native_init_IRQ(void); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); +extern int vector_used_by_percpu_irq(unsigned int vector); #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index f7a32a3beb2..b9019271af6 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -118,8 +118,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); int first_system_vector = 0xfe; -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - /* * Debug level, exported for io_apic.c */ diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 908c1d00a5c..1cbf7c8d46e 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1326,13 +1326,10 @@ next: } if (unlikely(current_vector == vector)) continue; -#ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; -#else - if (vector == SYSCALL_VECTOR) + + if (test_bit(vector, used_vectors)) goto next; -#endif + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 6a92f47c52e..61aa2a1004b 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; +int vector_used_by_percpu_irq(unsigned int vector) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (per_cpu(vector_irq, cpu)[vector] != -1) + return 1; + } + + return 0; +} + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -146,10 +158,12 @@ void __init native_init_IRQ(void) alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); + alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, + call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 40c1e62ec78..1020919efe1 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -135,6 +135,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; +int vector_used_by_percpu_irq(unsigned int vector) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (per_cpu(vector_irq, cpu)[vector] != -1) + return 1; + } + + return 0; +} + void __init init_ISA_irqs(void) { int i; @@ -187,6 +199,7 @@ static void __init smp_intr_init(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 04d242ab016..4a6dff39a47 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -72,9 +72,6 @@ #include "cpu/mcheck/mce.h" -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -89,6 +86,9 @@ gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; #endif +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + static int ignore_nmis; static inline void conditional_sti(struct pt_regs *regs) @@ -949,9 +949,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { -#ifdef CONFIG_X86_32 int i; -#endif #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); @@ -1008,11 +1006,15 @@ void __init trap_init(void) } set_system_trap_gate(SYSCALL_VECTOR, &system_call); +#endif /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); +#ifdef CONFIG_X86_64 + set_bit(IA32_SYSCALL_VECTOR, used_vectors); +#else set_bit(SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index a1039068f95..415fab0125a 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -222,11 +222,16 @@ bool check_syscall_vector(struct lguest *lg) int init_interrupts(void) { /* If they want some strange system call vector, reserve it now */ - if (syscall_vector != SYSCALL_VECTOR - && test_and_set_bit(syscall_vector, used_vectors)) { - printk("lg: couldn't reserve syscall %u\n", syscall_vector); - return -EBUSY; + if (syscall_vector != SYSCALL_VECTOR) { + if (test_bit(syscall_vector, used_vectors) || + vector_used_by_percpu_irq(syscall_vector)) { + printk(KERN_ERR "lg: couldn't reserve syscall %u\n", + syscall_vector); + return -EBUSY; + } + set_bit(syscall_vector, used_vectors); } + return 0; } -- cgit v1.2.3-70-g09d2 From 33f089ca5a61f7aead26e8e1866dfc961dd88a9e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 26 Sep 2008 09:30:49 +0200 Subject: KVM: VMX: refactor/fix IRQ and NMI injectability determination There are currently two ways in VMX to check if an IRQ or NMI can be injected: - vmx_{nmi|irq}_enabled and - vcpu.arch.{nmi|interrupt}_window_open. Even worse, one test (at the end of vmx_vcpu_run) uses an inconsistent, likely incorrect logic. This patch consolidates and unifies the tests over {nmi|interrupt}_window_open as cache + vmx_update_window_states for updating the cache content. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 46 +++++++++++++++++++---------------------- 2 files changed, 22 insertions(+), 25 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8346be87cfa..bfbbdea869b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -327,6 +327,7 @@ struct kvm_vcpu_arch { bool nmi_pending; bool nmi_injected; + bool nmi_window_open; u64 mtrr[0x100]; }; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8d0fc68fd4e..f0866e1d20e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2362,6 +2362,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } +static void vmx_update_window_states(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + + vcpu->arch.nmi_window_open = + !(guest_intr & (GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_NMI)); + + vcpu->arch.interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + !(guest_intr & (GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS))); +} + static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->arch.irq_summary); @@ -2374,15 +2389,12 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) kvm_queue_interrupt(vcpu, irq); } - static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u32 cpu_based_vm_exec_control; - vcpu->arch.interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); + vmx_update_window_states(vcpu); if (vcpu->arch.interrupt_window_open && vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) @@ -3075,22 +3087,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } -static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - return !(guest_intr & (GUEST_INTR_STATE_NMI | - GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_STI)); -} - -static int vmx_irq_enabled(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_STI)) && - (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); -} - static void enable_intr_window(struct kvm_vcpu *vcpu) { if (vcpu->arch.nmi_pending) @@ -3159,11 +3155,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) { update_tpr_threshold(vcpu); + vmx_update_window_states(vcpu); + if (cpu_has_virtual_nmis()) { if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { if (vcpu->arch.interrupt.pending) { enable_nmi_window(vcpu); - } else if (vmx_nmi_enabled(vcpu)) { + } else if (vcpu->arch.nmi_window_open) { vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = true; } else { @@ -3178,7 +3176,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) } } if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { - if (vmx_irq_enabled(vcpu)) + if (vcpu->arch.interrupt_window_open) kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); else enable_irq_window(vcpu); @@ -3339,9 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); - vcpu->arch.interrupt_window_open = - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; + vmx_update_window_states(vcpu); asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; -- cgit v1.2.3-70-g09d2 From c4abb7c9cde24b7351a47328ef866e6a2bbb1ad0 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 26 Sep 2008 09:30:55 +0200 Subject: KVM: x86: Support for user space injected NMIs Introduces the KVM_NMI IOCTL to the generic x86 part of KVM for injecting NMIs from user space and also extends the statistic report accordingly. Based on the original patch by Sheng Yang. Signed-off-by: Jan Kiszka Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 46 +++++++++++++++++++++++++++++++++++++++-- include/linux/kvm.h | 11 ++++++++-- 3 files changed, 55 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bfbbdea869b..a40fa847892 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -398,6 +398,7 @@ struct kvm_vcpu_stat { u32 halt_exits; u32 halt_wakeup; u32 request_irq_exits; + u32 request_nmi_exits; u32 irq_exits; u32 host_state_reload; u32 efer_reload; @@ -406,6 +407,7 @@ struct kvm_vcpu_stat { u32 insn_emulation_fail; u32 hypercalls; u32 irq_injections; + u32 nmi_injections; }; struct descriptor_table { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1fa9a6db633..07971451b94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, + { "request_nmi", VCPU_STAT(request_nmi_exits) }, { "irq_exits", VCPU_STAT(irq_exits) }, { "host_state_reload", VCPU_STAT(host_state_reload) }, { "efer_reload", VCPU_STAT(efer_reload) }, @@ -93,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "insn_emulation", VCPU_STAT(insn_emulation) }, { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, { "irq_injections", VCPU_STAT(irq_injections) }, + { "nmi_injections", VCPU_STAT(nmi_injections) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, @@ -1318,6 +1320,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return 0; } +static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) +{ + vcpu_load(vcpu); + kvm_inject_nmi(vcpu); + vcpu_put(vcpu); + + return 0; +} + static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, struct kvm_tpr_access_ctl *tac) { @@ -1377,6 +1388,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } + case KVM_NMI: { + r = kvm_vcpu_ioctl_nmi(vcpu); + if (r) + goto out; + r = 0; + break; + } case KVM_SET_CPUID: { struct kvm_cpuid __user *cpuid_arg = argp; struct kvm_cpuid cpuid; @@ -2812,18 +2830,37 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF)); } +/* + * Check if userspace requested a NMI window, and that the NMI window + * is open. + * + * No need to exit to userspace if we already have a NMI queued. + */ +static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return (!vcpu->arch.nmi_pending && + kvm_run->request_nmi_window && + vcpu->arch.nmi_window_open); +} + static void post_kvm_run_save(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); - if (irqchip_in_kernel(vcpu->kvm)) + if (irqchip_in_kernel(vcpu->kvm)) { kvm_run->ready_for_interrupt_injection = 1; - else + kvm_run->ready_for_nmi_injection = 1; + } else { kvm_run->ready_for_interrupt_injection = (vcpu->arch.interrupt_window_open && vcpu->arch.irq_summary == 0); + kvm_run->ready_for_nmi_injection = + (vcpu->arch.nmi_window_open && + vcpu->arch.nmi_pending == 0); + } } static void vapic_enter(struct kvm_vcpu *vcpu) @@ -2999,6 +3036,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if (r > 0) { + if (dm_request_for_nmi_injection(vcpu, kvm_run)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_NMI; + ++vcpu->stat.request_nmi_exits; + } if (dm_request_for_irq_injection(vcpu, kvm_run)) { r = -EINTR; kvm_run->exit_reason = KVM_EXIT_INTR; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f18b86fa865..44fd7fa0af2 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -83,18 +83,22 @@ struct kvm_irqchip { #define KVM_EXIT_S390_SIEIC 13 #define KVM_EXIT_S390_RESET 14 #define KVM_EXIT_DCR 15 +#define KVM_EXIT_NMI 16 +#define KVM_EXIT_NMI_WINDOW_OPEN 17 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 request_nmi_window; + __u8 padding1[6]; /* out */ __u32 exit_reason; __u8 ready_for_interrupt_injection; __u8 if_flag; - __u8 padding2[2]; + __u8 ready_for_nmi_injection; + __u8 padding2; /* in (pre_kvm_run), out (post_kvm_run) */ __u64 cr8; @@ -387,6 +391,7 @@ struct kvm_trace_rec { #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 +#define KVM_CAP_NMI 19 /* * ioctls for VM fds @@ -458,6 +463,8 @@ struct kvm_trace_rec { #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) +/* Available with KVM_CAP_NMI */ +#define KVM_NMI _IO(KVMIO, 0x9a) #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -- cgit v1.2.3-70-g09d2 From 932d27a7913fc6b3c64c6e6082628b0a1561dec9 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Oct 2008 16:01:53 +0800 Subject: x86: Export some definition of MTRR For KVM can reuse the type define, and need them to support shadow MTRR. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/mtrr.h | 25 +++++++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/generic.c | 12 +++--------- arch/x86/kernel/cpu/mtrr/mtrr.h | 17 ----------------- 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 7c1e4258b31..cb988aab716 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -57,6 +57,31 @@ struct mtrr_gentry { }; #endif /* !__i386__ */ +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +#define MTRR_NUM_FIXED_RANGES 88 +#define MTRR_MAX_VAR_RANGES 256 + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + /* These are the various ioctls */ #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 90db91e1593..b59ddcc88cd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -14,14 +14,6 @@ #include #include "mtrr.h" -struct mtrr_state_type { - struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; - mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - struct fixed_range_block { int base_msr; /* start address of an MTRR block */ int ranges; /* number of MTRRs in this block */ @@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = { }; static unsigned long smp_changes_mask; -static struct mtrr_state_type mtrr_state = {}; static int mtrr_state_set; u64 mtrr_tom2; +struct mtrr_state_type mtrr_state = {}; +EXPORT_SYMBOL_GPL(mtrr_state); + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 988538202dd..ffd60409cc6 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -8,12 +8,6 @@ #define MTRRcap_MSR 0x0fe #define MTRRdefType_MSR 0x2ff -#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) -#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) - -#define MTRR_NUM_FIXED_RANGES 88 -#define MTRR_MAX_VAR_RANGES 256 - #define MTRRfix64K_00000_MSR 0x250 #define MTRRfix16K_80000_MSR 0x258 #define MTRRfix16K_A0000_MSR 0x259 @@ -30,10 +24,6 @@ #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 -/* In the Intel processor's MTRR interface, the MTRR type is always held in - an 8 bit field: */ -typedef u8 mtrr_type; - extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; struct mtrr_ops { @@ -71,13 +61,6 @@ struct set_mtrr_context { u32 ccr3; }; -struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; -}; - void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); -- cgit v1.2.3-70-g09d2 From 0bed3b568b68e5835ef5da888a372b9beabf7544 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Oct 2008 16:01:54 +0800 Subject: KVM: Improve MTRR structure As well as reset mmu context when set MTRR. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 5 +++- arch/x86/kvm/x86.c | 61 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a40fa847892..8082e87f628 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include #include +#include #define KVM_MAX_VCPUS 16 #define KVM_MEMORY_SLOTS 32 @@ -86,6 +87,7 @@ #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 40 +#define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 extern spinlock_t kvm_lock; @@ -329,7 +331,8 @@ struct kvm_vcpu_arch { bool nmi_injected; bool nmi_window_open; - u64 mtrr[0x100]; + struct mtrr_state_type mtrr_state; + u32 pat; }; struct kvm_mem_alias { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a2c4b559455..f5b2334c6bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -39,6 +39,7 @@ #include #include #include +#include #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -650,10 +651,38 @@ static bool msr_mtrr_valid(unsigned msr) static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; + if (!msr_mtrr_valid(msr)) return 1; - vcpu->arch.mtrr[msr - 0x200] = data; + if (msr == MSR_MTRRdefType) { + vcpu->arch.mtrr_state.def_type = data; + vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; + } else if (msr == MSR_MTRRfix64K_00000) + p[0] = data; + else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) + p[1 + msr - MSR_MTRRfix16K_80000] = data; + else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) + p[3 + msr - MSR_MTRRfix4K_C0000] = data; + else if (msr == MSR_IA32_CR_PAT) + vcpu->arch.pat = data; + else { /* Variable MTRRs */ + int idx, is_mtrr_mask; + u64 *pt; + + idx = (msr - 0x200) / 2; + is_mtrr_mask = msr - 0x200 - 2 * idx; + if (!is_mtrr_mask) + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; + else + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; + *pt = data; + } + + kvm_mmu_reset_context(vcpu); return 0; } @@ -749,10 +778,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; + if (!msr_mtrr_valid(msr)) return 1; - *pdata = vcpu->arch.mtrr[msr - 0x200]; + if (msr == MSR_MTRRdefType) + *pdata = vcpu->arch.mtrr_state.def_type + + (vcpu->arch.mtrr_state.enabled << 10); + else if (msr == MSR_MTRRfix64K_00000) + *pdata = p[0]; + else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) + *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; + else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) + *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; + else if (msr == MSR_IA32_CR_PAT) + *pdata = vcpu->arch.pat; + else { /* Variable MTRRs */ + int idx, is_mtrr_mask; + u64 *pt; + + idx = (msr - 0x200) / 2; + is_mtrr_mask = msr - 0x200 - 2 * idx; + if (!is_mtrr_mask) + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; + else + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; + *pdata = *pt; + } + return 0; } @@ -3942,6 +3998,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) /* We do fxsave: this must be aligned. */ BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); + vcpu->arch.mtrr_state.have_fixed = 1; vcpu_load(vcpu); r = kvm_arch_vcpu_reset(vcpu); if (r == 0) -- cgit v1.2.3-70-g09d2 From 64d4d521757117aa5c1cfe79d3baa6cf57703f81 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Oct 2008 16:01:57 +0800 Subject: KVM: Enable MTRR for EPT The effective memory type of EPT is the mixture of MSR_IA32_CR_PAT and memory type field of EPT entry. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/mmu.c | 11 ++++++++++- arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/vmx.c | 10 ++++++++-- arch/x86/kvm/x86.c | 2 +- 5 files changed, 27 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8082e87f628..93040b5eed9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -483,6 +483,7 @@ struct kvm_x86_ops { int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); + int (*get_mt_mask_shift)(void); }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -496,7 +497,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ac2304fd173..09d05f57bf6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; +static u64 __read_mostly shadow_mt_mask; void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte) EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; + shadow_mt_mask = mt_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -1546,6 +1548,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, { u64 spte; int ret = 0; + u64 mt_mask = shadow_mt_mask; + /* * We don't set the accessed bit, since we sometimes want to see * whether the guest actually used the pte (in order to detect @@ -1564,6 +1568,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; + if (mt_mask) { + mt_mask = get_memory_type(vcpu, gfn) << + kvm_x86_ops->get_mt_mask_shift(); + spte |= mt_mask; + } spte |= (u64)pfn << PAGE_SHIFT; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9c4ce657d96..05efc4ef75a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1912,6 +1912,11 @@ static int get_npt_level(void) #endif } +static int svm_get_mt_mask_shift(void) +{ + return 0; +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -1967,6 +1972,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, + .get_mt_mask_shift = svm_get_mt_mask_shift, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b4c95a501cc..dae134fa09e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3574,6 +3574,11 @@ static int get_ept_level(void) return VMX_EPT_DEFAULT_GAW + 1; } +static int vmx_get_mt_mask_shift(void) +{ + return VMX_EPT_MT_EPTE_SHIFT; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -3629,6 +3634,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, + .get_mt_mask_shift = vmx_get_mt_mask_shift, }; static int __init vmx_init(void) @@ -3685,10 +3691,10 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, - VMX_EPT_EXECUTABLE_MASK); + VMX_EPT_EXECUTABLE_MASK, + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); kvm_enable_tdp(); } else kvm_disable_tdp(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0edf75339f3..f175b796c2a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2615,7 +2615,7 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_nonpresent_ptes(0ull, 0ull); kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0); + PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); return 0; out: -- cgit v1.2.3-70-g09d2 From d73fa29a9b75b2af7f69dae276d2c602a23b329b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 14 Oct 2008 15:59:10 +0800 Subject: KVM: Clean up kvm_x86_emulate.h Remove one left improper comment of removed CR2. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_x86_emulate.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 25179a29f20..16a002655f3 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -146,22 +146,18 @@ struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; - /* Linear faulting address (if emulating a page-faulting instruction) */ unsigned long eflags; - /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* decode cache */ - struct decode_cache decode; }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 1 +#define REPNE_PREFIX 2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -170,7 +166,7 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ /* Host execution mode. */ -#if defined(__i386__) +#if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 #elif defined(CONFIG_X86_64) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 -- cgit v1.2.3-70-g09d2 From 291f26bc0f89518ad7ee3207c09eb8a743ac8fcc Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 16 Oct 2008 17:30:57 +0800 Subject: KVM: MMU: Extend kvm_mmu_page->slot_bitmap size Otherwise set_bit() for private memory slot(above KVM_MEMORY_SLOTS) would corrupted memory in 32bit host. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 8 +++++--- arch/x86/kvm/mmu.c | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 93040b5eed9..59c3ae10de6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -192,9 +192,11 @@ struct kvm_mmu_page { u64 *spt; /* hold the gfn of each spte inside spt */ gfn_t *gfns; - unsigned long slot_bitmap; /* One bit set per slot which has memory - * in this shadow page. - */ + /* + * One bit set per slot which has memory + * in this shadow page. + */ + DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 09d05f57bf6..8687758b529 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -789,7 +789,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); ASSERT(is_empty_shadow_page(sp->spt)); - sp->slot_bitmap = 0; + bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; sp->parent_pte = parent_pte; --vcpu->kvm->arch.n_free_mmu_pages; @@ -1364,7 +1364,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); struct kvm_mmu_page *sp = page_header(__pa(pte)); - __set_bit(slot, &sp->slot_bitmap); + __set_bit(slot, sp->slot_bitmap); } static void mmu_convert_notrap(struct kvm_mmu_page *sp) @@ -2564,7 +2564,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) int i; u64 *pt; - if (!test_bit(slot, &sp->slot_bitmap)) + if (!test_bit(slot, sp->slot_bitmap)) continue; pt = sp->spt; -- cgit v1.2.3-70-g09d2 From cc6e462cd54e64858ea25816df87d033229efe56 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 20 Oct 2008 10:20:03 +0200 Subject: KVM: x86: Optimize NMI watchdog delivery As suggested by Avi, this patch introduces a counter of VCPUs that have LVT0 set to NMI mode. Only if the counter > 0, we push the PIT ticks via all LAPIC LVT0 lines to enable NMI watchdog support. Signed-off-by: Jan Kiszka Acked-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/i8254.c | 11 ++++++----- arch/x86/kvm/lapic.c | 23 ++++++++++++++++++++--- 3 files changed, 27 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 59c3ae10de6..09e6c56572c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -361,6 +361,7 @@ struct kvm_arch{ struct kvm_ioapic *vioapic; struct kvm_pit *vpit; struct hlist_head irq_ack_notifier_list; + int vapics_in_nmi_mode; int round_robin_prev_vcpu; unsigned int tss_addr; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b6fcf5a9e50..e665d1c623c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -620,11 +620,12 @@ static void __inject_pit_timer_intr(struct kvm *kvm) * LVT0 to NMI delivery. Other PIC interrupts are just sent to * VCPU0, and only if its LVT0 is in EXTINT mode. */ - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (vcpu) - kvm_apic_nmi_wd_deliver(vcpu); - } + if (kvm->arch.vapics_in_nmi_mode > 0) + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (vcpu) + kvm_apic_nmi_wd_deliver(vcpu); + } } void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0b0d413f0af..afac68c0815 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic) return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; } +static inline int apic_lvt_nmi_mode(u32 lvt_val) +{ + return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; +} + static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ @@ -672,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic) apic->timer.period))); } +static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) +{ + int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); + + if (apic_lvt_nmi_mode(lvt0_val)) { + if (!nmi_wd_enabled) { + apic_debug("Receive NMI setting on APIC_LVT0 " + "for cpu %d\n", apic->vcpu->vcpu_id); + apic->vcpu->kvm->arch.vapics_in_nmi_mode++; + } + } else if (nmi_wd_enabled) + apic->vcpu->kvm->arch.vapics_in_nmi_mode--; +} + static void apic_mmio_write(struct kvm_io_device *this, gpa_t address, int len, const void *data) { @@ -753,9 +772,7 @@ static void apic_mmio_write(struct kvm_io_device *this, break; case APIC_LVT0: - if (val == APIC_DM_NMI) - apic_debug("Receive NMI setting on APIC_LVT0 " - "for cpu %d\n", apic->vcpu->vcpu_id); + apic_manage_nmi_watchdog(apic, val); case APIC_LVTT: case APIC_LVTTHMR: case APIC_LVTPC: -- cgit v1.2.3-70-g09d2 From 2843099fee32a6020e1caa95c6026f28b5d43bff Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Fri, 3 Oct 2008 17:40:32 +0300 Subject: KVM: MMU: Fix aliased gfns treated as unaliased Some areas of kvm x86 mmu are using gfn offset inside a slot without unaliasing the gfn first. This patch makes sure that the gfn will be unaliased and add gfn_to_memslot_unaliased() to save the calculating of the gfn unaliasing in case we have it unaliased already. Signed-off-by: Izik Eidus Acked-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/mmu.c | 14 ++++++++++---- virt/kvm/kvm_main.c | 9 +++++---- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09e6c56572c..99e3cc149d2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -617,6 +617,8 @@ void kvm_disable_tdp(void); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); +struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); + static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8687758b529..8904e8ada97 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -386,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; - write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); + gfn = unalias_gfn(kvm, gfn); + write_count = slot_largepage_idx(gfn, + gfn_to_memslot_unaliased(kvm, gfn)); *write_count += 1; } @@ -394,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; - write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); + gfn = unalias_gfn(kvm, gfn); + write_count = slot_largepage_idx(gfn, + gfn_to_memslot_unaliased(kvm, gfn)); *write_count -= 1; WARN_ON(*write_count < 0); } static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + struct kvm_memory_slot *slot; int *largepage_idx; + gfn = unalias_gfn(kvm, gfn); + slot = gfn_to_memslot_unaliased(kvm, gfn); if (slot) { largepage_idx = slot_largepage_idx(gfn, slot); return *largepage_idx; @@ -2973,8 +2979,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) if (sp->role.metaphysical) continue; - slot = gfn_to_memslot(vcpu->kvm, sp->gfn); gfn = unalias_gfn(vcpu->kvm, sp->gfn); + slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); rmapp = &slot->rmap[gfn - slot->base_gfn]; if (*rmapp) printk(KERN_ERR "%s: (%s) shadow page has writable" diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1838052f3c9..a65baa9039d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -923,7 +923,7 @@ int kvm_is_error_hva(unsigned long addr) } EXPORT_SYMBOL_GPL(kvm_is_error_hva); -static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) +struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) { int i; @@ -936,11 +936,12 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) } return NULL; } +EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { gfn = unalias_gfn(kvm, gfn); - return __gfn_to_memslot(kvm, gfn); + return gfn_to_memslot_unaliased(kvm, gfn); } int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) @@ -964,7 +965,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *slot; gfn = unalias_gfn(kvm, gfn); - slot = __gfn_to_memslot(kvm, gfn); + slot = gfn_to_memslot_unaliased(kvm, gfn); if (!slot) return bad_hva(); return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); @@ -1215,7 +1216,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *memslot; gfn = unalias_gfn(kvm, gfn); - memslot = __gfn_to_memslot(kvm, gfn); + memslot = gfn_to_memslot_unaliased(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; -- cgit v1.2.3-70-g09d2 From 13673a90f1cf88296f726265cc7cf3ec76ecba30 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:13 -0200 Subject: KVM: VMX: move vmx.h to include/asm vmx.h will be used by core code that is independent of KVM, so I am moving it outside the arch/x86/kvm directory. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/vmx.h | 367 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/vmx.h | 367 --------------------------------------------- 4 files changed, 369 insertions(+), 369 deletions(-) create mode 100644 arch/x86/include/asm/vmx.h delete mode 100644 arch/x86/kvm/vmx.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h new file mode 100644 index 00000000000..3db236c26fa --- /dev/null +++ b/arch/x86/include/asm/vmx.h @@ -0,0 +1,367 @@ +#ifndef VMX_H +#define VMX_H + +/* + * vmx.h: VMX Architecture related definitions + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * A few random additions are: + * Copyright (C) 2006 Qumranet + * Avi Kivity + * Yaniv Kamay + * + */ + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 + + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 + +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 + +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 + +/* VMCS Encodings */ +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 + +/* + * Interruption-information format + */ +#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ +#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ +#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ +#define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */ +#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ +#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 + +#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK +#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK +#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK +#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK + +#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ +#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ +#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ + +/* GUEST_INTERRUPTIBILITY_INFO flags. */ +#define GUEST_INTR_STATE_STI 0x00000001 +#define GUEST_INTR_STATE_MOV_SS 0x00000002 +#define GUEST_INTR_STATE_SMI 0x00000004 +#define GUEST_INTR_STATE_NMI 0x00000008 + +/* + * Exit Qualifications for MOV for Control Register Access + */ +#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control reg.*/ +#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */ +#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose reg. */ +#define LMSW_SOURCE_DATA_SHIFT 16 +#define LMSW_SOURCE_DATA (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */ +#define REG_EAX (0 << 8) +#define REG_ECX (1 << 8) +#define REG_EDX (2 << 8) +#define REG_EBX (3 << 8) +#define REG_ESP (4 << 8) +#define REG_EBP (5 << 8) +#define REG_ESI (6 << 8) +#define REG_EDI (7 << 8) +#define REG_R8 (8 << 8) +#define REG_R9 (9 << 8) +#define REG_R10 (10 << 8) +#define REG_R11 (11 << 8) +#define REG_R12 (12 << 8) +#define REG_R13 (13 << 8) +#define REG_R14 (14 << 8) +#define REG_R15 (15 << 8) + +/* + * Exit Qualifications for MOV for Debug Register Access + */ +#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug reg. */ +#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ +#define TYPE_MOV_TO_DR (0 << 4) +#define TYPE_MOV_FROM_DR (1 << 4) +#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */ + + +/* segment AR */ +#define SEGMENT_AR_L_MASK (1 << 13) + +#define AR_TYPE_ACCESSES_MASK 1 +#define AR_TYPE_READABLE_MASK (1 << 1) +#define AR_TYPE_WRITEABLE_MASK (1 << 2) +#define AR_TYPE_CODE_MASK (1 << 3) +#define AR_TYPE_MASK 0x0f +#define AR_TYPE_BUSY_64_TSS 11 +#define AR_TYPE_BUSY_32_TSS 11 +#define AR_TYPE_BUSY_16_TSS 3 +#define AR_TYPE_LDT 2 + +#define AR_UNUSABLE_MASK (1 << 16) +#define AR_S_MASK (1 << 4) +#define AR_P_MASK (1 << 7) +#define AR_L_MASK (1 << 13) +#define AR_DB_MASK (1 << 14) +#define AR_G_MASK (1 << 15) +#define AR_DPL_SHIFT 5 +#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3) + +#define AR_RESERVD_MASK 0xfffe0f00 + +#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) + +#define VMX_NR_VPIDS (1 << 16) +#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 +#define VMX_VPID_EXTENT_ALL_CONTEXT 2 + +#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 +#define VMX_EPT_EXTENT_CONTEXT 1 +#define VMX_EPT_EXTENT_GLOBAL 2 +#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) +#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) +#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) +#define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_MAX_GAW 0x4 +#define VMX_EPT_MT_EPTE_SHIFT 3 +#define VMX_EPT_GAW_EPTP_SHIFT 3 +#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPT_READABLE_MASK 0x1ull +#define VMX_EPT_WRITABLE_MASK 0x2ull +#define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_IGMT_BIT (1ull << 6) + +#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + +#endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8904e8ada97..fa3486d6407 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -17,7 +17,6 @@ * */ -#include "vmx.h" #include "mmu.h" #include @@ -33,6 +32,7 @@ #include #include #include +#include /* * When setting this variable to true it enables Two-Dimensional-Paging diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 427dbc14fae..ec71f6464cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -16,7 +16,6 @@ */ #include "irq.h" -#include "vmx.h" #include "mmu.h" #include @@ -31,6 +30,7 @@ #include #include +#include #define __ex(x) __kvm_handle_fault_on_reboot(x) diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h deleted file mode 100644 index 3db236c26fa..00000000000 --- a/arch/x86/kvm/vmx.h +++ /dev/null @@ -1,367 +0,0 @@ -#ifndef VMX_H -#define VMX_H - -/* - * vmx.h: VMX Architecture related definitions - * Copyright (c) 2004, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * A few random additions are: - * Copyright (C) 2006 Qumranet - * Avi Kivity - * Yaniv Kamay - * - */ - -/* - * Definitions of Primary Processor-Based VM-Execution Controls. - */ -#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 -#define CPU_BASED_HLT_EXITING 0x00000080 -#define CPU_BASED_INVLPG_EXITING 0x00000200 -#define CPU_BASED_MWAIT_EXITING 0x00000400 -#define CPU_BASED_RDPMC_EXITING 0x00000800 -#define CPU_BASED_RDTSC_EXITING 0x00001000 -#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 -#define CPU_BASED_CR3_STORE_EXITING 0x00010000 -#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 -#define CPU_BASED_CR8_STORE_EXITING 0x00100000 -#define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 -#define CPU_BASED_MOV_DR_EXITING 0x00800000 -#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 -#define CPU_BASED_USE_IO_BITMAPS 0x02000000 -#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 -#define CPU_BASED_MONITOR_EXITING 0x20000000 -#define CPU_BASED_PAUSE_EXITING 0x40000000 -#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 -/* - * Definitions of Secondary Processor-Based VM-Execution Controls. - */ -#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 -#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 -#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 -#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 - - -#define PIN_BASED_EXT_INTR_MASK 0x00000001 -#define PIN_BASED_NMI_EXITING 0x00000008 -#define PIN_BASED_VIRTUAL_NMIS 0x00000020 - -#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 -#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 -#define VM_EXIT_SAVE_IA32_PAT 0x00040000 -#define VM_EXIT_LOAD_IA32_PAT 0x00080000 - -#define VM_ENTRY_IA32E_MODE 0x00000200 -#define VM_ENTRY_SMM 0x00000400 -#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 -#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 - -/* VMCS Encodings */ -enum vmcs_field { - VIRTUAL_PROCESSOR_ID = 0x00000000, - GUEST_ES_SELECTOR = 0x00000800, - GUEST_CS_SELECTOR = 0x00000802, - GUEST_SS_SELECTOR = 0x00000804, - GUEST_DS_SELECTOR = 0x00000806, - GUEST_FS_SELECTOR = 0x00000808, - GUEST_GS_SELECTOR = 0x0000080a, - GUEST_LDTR_SELECTOR = 0x0000080c, - GUEST_TR_SELECTOR = 0x0000080e, - HOST_ES_SELECTOR = 0x00000c00, - HOST_CS_SELECTOR = 0x00000c02, - HOST_SS_SELECTOR = 0x00000c04, - HOST_DS_SELECTOR = 0x00000c06, - HOST_FS_SELECTOR = 0x00000c08, - HOST_GS_SELECTOR = 0x00000c0a, - HOST_TR_SELECTOR = 0x00000c0c, - IO_BITMAP_A = 0x00002000, - IO_BITMAP_A_HIGH = 0x00002001, - IO_BITMAP_B = 0x00002002, - IO_BITMAP_B_HIGH = 0x00002003, - MSR_BITMAP = 0x00002004, - MSR_BITMAP_HIGH = 0x00002005, - VM_EXIT_MSR_STORE_ADDR = 0x00002006, - VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, - VM_EXIT_MSR_LOAD_ADDR = 0x00002008, - VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, - VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, - VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, - TSC_OFFSET = 0x00002010, - TSC_OFFSET_HIGH = 0x00002011, - VIRTUAL_APIC_PAGE_ADDR = 0x00002012, - VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, - APIC_ACCESS_ADDR = 0x00002014, - APIC_ACCESS_ADDR_HIGH = 0x00002015, - EPT_POINTER = 0x0000201a, - EPT_POINTER_HIGH = 0x0000201b, - GUEST_PHYSICAL_ADDRESS = 0x00002400, - GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, - VMCS_LINK_POINTER = 0x00002800, - VMCS_LINK_POINTER_HIGH = 0x00002801, - GUEST_IA32_DEBUGCTL = 0x00002802, - GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, - GUEST_IA32_PAT = 0x00002804, - GUEST_IA32_PAT_HIGH = 0x00002805, - GUEST_PDPTR0 = 0x0000280a, - GUEST_PDPTR0_HIGH = 0x0000280b, - GUEST_PDPTR1 = 0x0000280c, - GUEST_PDPTR1_HIGH = 0x0000280d, - GUEST_PDPTR2 = 0x0000280e, - GUEST_PDPTR2_HIGH = 0x0000280f, - GUEST_PDPTR3 = 0x00002810, - GUEST_PDPTR3_HIGH = 0x00002811, - HOST_IA32_PAT = 0x00002c00, - HOST_IA32_PAT_HIGH = 0x00002c01, - PIN_BASED_VM_EXEC_CONTROL = 0x00004000, - CPU_BASED_VM_EXEC_CONTROL = 0x00004002, - EXCEPTION_BITMAP = 0x00004004, - PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, - PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, - CR3_TARGET_COUNT = 0x0000400a, - VM_EXIT_CONTROLS = 0x0000400c, - VM_EXIT_MSR_STORE_COUNT = 0x0000400e, - VM_EXIT_MSR_LOAD_COUNT = 0x00004010, - VM_ENTRY_CONTROLS = 0x00004012, - VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, - VM_ENTRY_INTR_INFO_FIELD = 0x00004016, - VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, - VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, - TPR_THRESHOLD = 0x0000401c, - SECONDARY_VM_EXEC_CONTROL = 0x0000401e, - VM_INSTRUCTION_ERROR = 0x00004400, - VM_EXIT_REASON = 0x00004402, - VM_EXIT_INTR_INFO = 0x00004404, - VM_EXIT_INTR_ERROR_CODE = 0x00004406, - IDT_VECTORING_INFO_FIELD = 0x00004408, - IDT_VECTORING_ERROR_CODE = 0x0000440a, - VM_EXIT_INSTRUCTION_LEN = 0x0000440c, - VMX_INSTRUCTION_INFO = 0x0000440e, - GUEST_ES_LIMIT = 0x00004800, - GUEST_CS_LIMIT = 0x00004802, - GUEST_SS_LIMIT = 0x00004804, - GUEST_DS_LIMIT = 0x00004806, - GUEST_FS_LIMIT = 0x00004808, - GUEST_GS_LIMIT = 0x0000480a, - GUEST_LDTR_LIMIT = 0x0000480c, - GUEST_TR_LIMIT = 0x0000480e, - GUEST_GDTR_LIMIT = 0x00004810, - GUEST_IDTR_LIMIT = 0x00004812, - GUEST_ES_AR_BYTES = 0x00004814, - GUEST_CS_AR_BYTES = 0x00004816, - GUEST_SS_AR_BYTES = 0x00004818, - GUEST_DS_AR_BYTES = 0x0000481a, - GUEST_FS_AR_BYTES = 0x0000481c, - GUEST_GS_AR_BYTES = 0x0000481e, - GUEST_LDTR_AR_BYTES = 0x00004820, - GUEST_TR_AR_BYTES = 0x00004822, - GUEST_INTERRUPTIBILITY_INFO = 0x00004824, - GUEST_ACTIVITY_STATE = 0X00004826, - GUEST_SYSENTER_CS = 0x0000482A, - HOST_IA32_SYSENTER_CS = 0x00004c00, - CR0_GUEST_HOST_MASK = 0x00006000, - CR4_GUEST_HOST_MASK = 0x00006002, - CR0_READ_SHADOW = 0x00006004, - CR4_READ_SHADOW = 0x00006006, - CR3_TARGET_VALUE0 = 0x00006008, - CR3_TARGET_VALUE1 = 0x0000600a, - CR3_TARGET_VALUE2 = 0x0000600c, - CR3_TARGET_VALUE3 = 0x0000600e, - EXIT_QUALIFICATION = 0x00006400, - GUEST_LINEAR_ADDRESS = 0x0000640a, - GUEST_CR0 = 0x00006800, - GUEST_CR3 = 0x00006802, - GUEST_CR4 = 0x00006804, - GUEST_ES_BASE = 0x00006806, - GUEST_CS_BASE = 0x00006808, - GUEST_SS_BASE = 0x0000680a, - GUEST_DS_BASE = 0x0000680c, - GUEST_FS_BASE = 0x0000680e, - GUEST_GS_BASE = 0x00006810, - GUEST_LDTR_BASE = 0x00006812, - GUEST_TR_BASE = 0x00006814, - GUEST_GDTR_BASE = 0x00006816, - GUEST_IDTR_BASE = 0x00006818, - GUEST_DR7 = 0x0000681a, - GUEST_RSP = 0x0000681c, - GUEST_RIP = 0x0000681e, - GUEST_RFLAGS = 0x00006820, - GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, - GUEST_SYSENTER_ESP = 0x00006824, - GUEST_SYSENTER_EIP = 0x00006826, - HOST_CR0 = 0x00006c00, - HOST_CR3 = 0x00006c02, - HOST_CR4 = 0x00006c04, - HOST_FS_BASE = 0x00006c06, - HOST_GS_BASE = 0x00006c08, - HOST_TR_BASE = 0x00006c0a, - HOST_GDTR_BASE = 0x00006c0c, - HOST_IDTR_BASE = 0x00006c0e, - HOST_IA32_SYSENTER_ESP = 0x00006c10, - HOST_IA32_SYSENTER_EIP = 0x00006c12, - HOST_RSP = 0x00006c14, - HOST_RIP = 0x00006c16, -}; - -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 - -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 - -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_WBINVD 54 - -/* - * Interruption-information format - */ -#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ -#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ -#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ -#define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */ -#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ -#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 - -#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK -#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK -#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK -#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK - -#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ -#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ -#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ -#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ - -/* GUEST_INTERRUPTIBILITY_INFO flags. */ -#define GUEST_INTR_STATE_STI 0x00000001 -#define GUEST_INTR_STATE_MOV_SS 0x00000002 -#define GUEST_INTR_STATE_SMI 0x00000004 -#define GUEST_INTR_STATE_NMI 0x00000008 - -/* - * Exit Qualifications for MOV for Control Register Access - */ -#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control reg.*/ -#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */ -#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose reg. */ -#define LMSW_SOURCE_DATA_SHIFT 16 -#define LMSW_SOURCE_DATA (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */ -#define REG_EAX (0 << 8) -#define REG_ECX (1 << 8) -#define REG_EDX (2 << 8) -#define REG_EBX (3 << 8) -#define REG_ESP (4 << 8) -#define REG_EBP (5 << 8) -#define REG_ESI (6 << 8) -#define REG_EDI (7 << 8) -#define REG_R8 (8 << 8) -#define REG_R9 (9 << 8) -#define REG_R10 (10 << 8) -#define REG_R11 (11 << 8) -#define REG_R12 (12 << 8) -#define REG_R13 (13 << 8) -#define REG_R14 (14 << 8) -#define REG_R15 (15 << 8) - -/* - * Exit Qualifications for MOV for Debug Register Access - */ -#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug reg. */ -#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ -#define TYPE_MOV_TO_DR (0 << 4) -#define TYPE_MOV_FROM_DR (1 << 4) -#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */ - - -/* segment AR */ -#define SEGMENT_AR_L_MASK (1 << 13) - -#define AR_TYPE_ACCESSES_MASK 1 -#define AR_TYPE_READABLE_MASK (1 << 1) -#define AR_TYPE_WRITEABLE_MASK (1 << 2) -#define AR_TYPE_CODE_MASK (1 << 3) -#define AR_TYPE_MASK 0x0f -#define AR_TYPE_BUSY_64_TSS 11 -#define AR_TYPE_BUSY_32_TSS 11 -#define AR_TYPE_BUSY_16_TSS 3 -#define AR_TYPE_LDT 2 - -#define AR_UNUSABLE_MASK (1 << 16) -#define AR_S_MASK (1 << 4) -#define AR_P_MASK (1 << 7) -#define AR_L_MASK (1 << 13) -#define AR_DB_MASK (1 << 14) -#define AR_G_MASK (1 << 15) -#define AR_DPL_SHIFT 5 -#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3) - -#define AR_RESERVD_MASK 0xfffe0f00 - -#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) - -#define VMX_NR_VPIDS (1 << 16) -#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 -#define VMX_VPID_EXTENT_ALL_CONTEXT 2 - -#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 -#define VMX_EPT_EXTENT_CONTEXT 1 -#define VMX_EPT_EXTENT_GLOBAL 2 -#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) -#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) -#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) -#define VMX_EPT_DEFAULT_GAW 3 -#define VMX_EPT_MAX_GAW 0x4 -#define VMX_EPT_MT_EPTE_SHIFT 3 -#define VMX_EPT_GAW_EPTP_SHIFT 3 -#define VMX_EPT_DEFAULT_MT 0x6ull -#define VMX_EPT_READABLE_MASK 0x1ull -#define VMX_EPT_WRITABLE_MASK 0x2ull -#define VMX_EPT_EXECUTABLE_MASK 0x4ull -#define VMX_EPT_IGMT_BIT (1ull << 6) - -#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul - -#endif -- cgit v1.2.3-70-g09d2 From c2cedf7be2017e3264c93a4c0d75b1d96d0d7104 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:14 -0200 Subject: KVM: SVM: move svm.h to include/asm svm.h will be used by core code that is independent of KVM, so I am moving it outside the arch/x86/kvm directory. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/svm.h | 328 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/kvm_svm.h | 2 +- arch/x86/kvm/svm.h | 328 --------------------------------------------- 3 files changed, 329 insertions(+), 329 deletions(-) create mode 100644 arch/x86/include/asm/svm.h delete mode 100644 arch/x86/kvm/svm.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h new file mode 100644 index 00000000000..1b8afa78e86 --- /dev/null +++ b/arch/x86/include/asm/svm.h @@ -0,0 +1,328 @@ +#ifndef __SVM_H +#define __SVM_H + +enum { + INTERCEPT_INTR, + INTERCEPT_NMI, + INTERCEPT_SMI, + INTERCEPT_INIT, + INTERCEPT_VINTR, + INTERCEPT_SELECTIVE_CR0, + INTERCEPT_STORE_IDTR, + INTERCEPT_STORE_GDTR, + INTERCEPT_STORE_LDTR, + INTERCEPT_STORE_TR, + INTERCEPT_LOAD_IDTR, + INTERCEPT_LOAD_GDTR, + INTERCEPT_LOAD_LDTR, + INTERCEPT_LOAD_TR, + INTERCEPT_RDTSC, + INTERCEPT_RDPMC, + INTERCEPT_PUSHF, + INTERCEPT_POPF, + INTERCEPT_CPUID, + INTERCEPT_RSM, + INTERCEPT_IRET, + INTERCEPT_INTn, + INTERCEPT_INVD, + INTERCEPT_PAUSE, + INTERCEPT_HLT, + INTERCEPT_INVLPG, + INTERCEPT_INVLPGA, + INTERCEPT_IOIO_PROT, + INTERCEPT_MSR_PROT, + INTERCEPT_TASK_SWITCH, + INTERCEPT_FERR_FREEZE, + INTERCEPT_SHUTDOWN, + INTERCEPT_VMRUN, + INTERCEPT_VMMCALL, + INTERCEPT_VMLOAD, + INTERCEPT_VMSAVE, + INTERCEPT_STGI, + INTERCEPT_CLGI, + INTERCEPT_SKINIT, + INTERCEPT_RDTSCP, + INTERCEPT_ICEBP, + INTERCEPT_WBINVD, + INTERCEPT_MONITOR, + INTERCEPT_MWAIT, + INTERCEPT_MWAIT_COND, +}; + + +struct __attribute__ ((__packed__)) vmcb_control_area { + u16 intercept_cr_read; + u16 intercept_cr_write; + u16 intercept_dr_read; + u16 intercept_dr_write; + u32 intercept_exceptions; + u64 intercept; + u8 reserved_1[44]; + u64 iopm_base_pa; + u64 msrpm_base_pa; + u64 tsc_offset; + u32 asid; + u8 tlb_ctl; + u8 reserved_2[3]; + u32 int_ctl; + u32 int_vector; + u32 int_state; + u8 reserved_3[4]; + u32 exit_code; + u32 exit_code_hi; + u64 exit_info_1; + u64 exit_info_2; + u32 exit_int_info; + u32 exit_int_info_err; + u64 nested_ctl; + u8 reserved_4[16]; + u32 event_inj; + u32 event_inj_err; + u64 nested_cr3; + u64 lbr_ctl; + u8 reserved_5[832]; +}; + + +#define TLB_CONTROL_DO_NOTHING 0 +#define TLB_CONTROL_FLUSH_ALL_ASID 1 + +#define V_TPR_MASK 0x0f + +#define V_IRQ_SHIFT 8 +#define V_IRQ_MASK (1 << V_IRQ_SHIFT) + +#define V_INTR_PRIO_SHIFT 16 +#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) + +#define V_IGN_TPR_SHIFT 20 +#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) + +#define V_INTR_MASKING_SHIFT 24 +#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) + +#define SVM_INTERRUPT_SHADOW_MASK 1 + +#define SVM_IOIO_STR_SHIFT 2 +#define SVM_IOIO_REP_SHIFT 3 +#define SVM_IOIO_SIZE_SHIFT 4 +#define SVM_IOIO_ASIZE_SHIFT 7 + +#define SVM_IOIO_TYPE_MASK 1 +#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) +#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) +#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) +#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) + +struct __attribute__ ((__packed__)) vmcb_seg { + u16 selector; + u16 attrib; + u32 limit; + u64 base; +}; + +struct __attribute__ ((__packed__)) vmcb_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u8 reserved_1[43]; + u8 cpl; + u8 reserved_2[4]; + u64 efer; + u8 reserved_3[112]; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u8 reserved_4[88]; + u64 rsp; + u8 reserved_5[24]; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_6[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; +}; + +struct __attribute__ ((__packed__)) vmcb { + struct vmcb_control_area control; + struct vmcb_save_area save; +}; + +#define SVM_CPUID_FEATURE_SHIFT 2 +#define SVM_CPUID_FUNC 0x8000000a + +#define MSR_EFER_SVME_MASK (1ULL << 12) +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_HSAVE_PA 0xc0010117ULL + +#define SVM_VM_CR_SVM_DISABLE 4 + +#define SVM_SELECTOR_S_SHIFT 4 +#define SVM_SELECTOR_DPL_SHIFT 5 +#define SVM_SELECTOR_P_SHIFT 7 +#define SVM_SELECTOR_AVL_SHIFT 8 +#define SVM_SELECTOR_L_SHIFT 9 +#define SVM_SELECTOR_DB_SHIFT 10 +#define SVM_SELECTOR_G_SHIFT 11 + +#define SVM_SELECTOR_TYPE_MASK (0xf) +#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT) +#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT) +#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT) +#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT) +#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT) +#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT) +#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT) + +#define SVM_SELECTOR_WRITE_MASK (1 << 1) +#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK +#define SVM_SELECTOR_CODE_MASK (1 << 3) + +#define INTERCEPT_CR0_MASK 1 +#define INTERCEPT_CR3_MASK (1 << 3) +#define INTERCEPT_CR4_MASK (1 << 4) +#define INTERCEPT_CR8_MASK (1 << 8) + +#define INTERCEPT_DR0_MASK 1 +#define INTERCEPT_DR1_MASK (1 << 1) +#define INTERCEPT_DR2_MASK (1 << 2) +#define INTERCEPT_DR3_MASK (1 << 3) +#define INTERCEPT_DR4_MASK (1 << 4) +#define INTERCEPT_DR5_MASK (1 << 5) +#define INTERCEPT_DR6_MASK (1 << 6) +#define INTERCEPT_DR7_MASK (1 << 7) + +#define SVM_EVTINJ_VEC_MASK 0xff + +#define SVM_EVTINJ_TYPE_SHIFT 8 +#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_VALID (1 << 31) +#define SVM_EVTINJ_VALID_ERR (1 << 11) + +#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK + +#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR +#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI +#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT +#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT + +#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID +#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR + +#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 +#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 + +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ + +#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" +#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" +#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb" +#define SVM_CLGI ".byte 0x0f, 0x01, 0xdd" +#define SVM_STGI ".byte 0x0f, 0x01, 0xdc" +#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf" + +#endif + diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 65ef0fc2c03..8e5ee99551f 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -7,7 +7,7 @@ #include #include -#include "svm.h" +#include static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h deleted file mode 100644 index 1b8afa78e86..00000000000 --- a/arch/x86/kvm/svm.h +++ /dev/null @@ -1,328 +0,0 @@ -#ifndef __SVM_H -#define __SVM_H - -enum { - INTERCEPT_INTR, - INTERCEPT_NMI, - INTERCEPT_SMI, - INTERCEPT_INIT, - INTERCEPT_VINTR, - INTERCEPT_SELECTIVE_CR0, - INTERCEPT_STORE_IDTR, - INTERCEPT_STORE_GDTR, - INTERCEPT_STORE_LDTR, - INTERCEPT_STORE_TR, - INTERCEPT_LOAD_IDTR, - INTERCEPT_LOAD_GDTR, - INTERCEPT_LOAD_LDTR, - INTERCEPT_LOAD_TR, - INTERCEPT_RDTSC, - INTERCEPT_RDPMC, - INTERCEPT_PUSHF, - INTERCEPT_POPF, - INTERCEPT_CPUID, - INTERCEPT_RSM, - INTERCEPT_IRET, - INTERCEPT_INTn, - INTERCEPT_INVD, - INTERCEPT_PAUSE, - INTERCEPT_HLT, - INTERCEPT_INVLPG, - INTERCEPT_INVLPGA, - INTERCEPT_IOIO_PROT, - INTERCEPT_MSR_PROT, - INTERCEPT_TASK_SWITCH, - INTERCEPT_FERR_FREEZE, - INTERCEPT_SHUTDOWN, - INTERCEPT_VMRUN, - INTERCEPT_VMMCALL, - INTERCEPT_VMLOAD, - INTERCEPT_VMSAVE, - INTERCEPT_STGI, - INTERCEPT_CLGI, - INTERCEPT_SKINIT, - INTERCEPT_RDTSCP, - INTERCEPT_ICEBP, - INTERCEPT_WBINVD, - INTERCEPT_MONITOR, - INTERCEPT_MWAIT, - INTERCEPT_MWAIT_COND, -}; - - -struct __attribute__ ((__packed__)) vmcb_control_area { - u16 intercept_cr_read; - u16 intercept_cr_write; - u16 intercept_dr_read; - u16 intercept_dr_write; - u32 intercept_exceptions; - u64 intercept; - u8 reserved_1[44]; - u64 iopm_base_pa; - u64 msrpm_base_pa; - u64 tsc_offset; - u32 asid; - u8 tlb_ctl; - u8 reserved_2[3]; - u32 int_ctl; - u32 int_vector; - u32 int_state; - u8 reserved_3[4]; - u32 exit_code; - u32 exit_code_hi; - u64 exit_info_1; - u64 exit_info_2; - u32 exit_int_info; - u32 exit_int_info_err; - u64 nested_ctl; - u8 reserved_4[16]; - u32 event_inj; - u32 event_inj_err; - u64 nested_cr3; - u64 lbr_ctl; - u8 reserved_5[832]; -}; - - -#define TLB_CONTROL_DO_NOTHING 0 -#define TLB_CONTROL_FLUSH_ALL_ASID 1 - -#define V_TPR_MASK 0x0f - -#define V_IRQ_SHIFT 8 -#define V_IRQ_MASK (1 << V_IRQ_SHIFT) - -#define V_INTR_PRIO_SHIFT 16 -#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) - -#define V_IGN_TPR_SHIFT 20 -#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) - -#define V_INTR_MASKING_SHIFT 24 -#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) - -#define SVM_INTERRUPT_SHADOW_MASK 1 - -#define SVM_IOIO_STR_SHIFT 2 -#define SVM_IOIO_REP_SHIFT 3 -#define SVM_IOIO_SIZE_SHIFT 4 -#define SVM_IOIO_ASIZE_SHIFT 7 - -#define SVM_IOIO_TYPE_MASK 1 -#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) -#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) -#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) -#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) - -struct __attribute__ ((__packed__)) vmcb_seg { - u16 selector; - u16 attrib; - u32 limit; - u64 base; -}; - -struct __attribute__ ((__packed__)) vmcb_save_area { - struct vmcb_seg es; - struct vmcb_seg cs; - struct vmcb_seg ss; - struct vmcb_seg ds; - struct vmcb_seg fs; - struct vmcb_seg gs; - struct vmcb_seg gdtr; - struct vmcb_seg ldtr; - struct vmcb_seg idtr; - struct vmcb_seg tr; - u8 reserved_1[43]; - u8 cpl; - u8 reserved_2[4]; - u64 efer; - u8 reserved_3[112]; - u64 cr4; - u64 cr3; - u64 cr0; - u64 dr7; - u64 dr6; - u64 rflags; - u64 rip; - u8 reserved_4[88]; - u64 rsp; - u8 reserved_5[24]; - u64 rax; - u64 star; - u64 lstar; - u64 cstar; - u64 sfmask; - u64 kernel_gs_base; - u64 sysenter_cs; - u64 sysenter_esp; - u64 sysenter_eip; - u64 cr2; - u8 reserved_6[32]; - u64 g_pat; - u64 dbgctl; - u64 br_from; - u64 br_to; - u64 last_excp_from; - u64 last_excp_to; -}; - -struct __attribute__ ((__packed__)) vmcb { - struct vmcb_control_area control; - struct vmcb_save_area save; -}; - -#define SVM_CPUID_FEATURE_SHIFT 2 -#define SVM_CPUID_FUNC 0x8000000a - -#define MSR_EFER_SVME_MASK (1ULL << 12) -#define MSR_VM_CR 0xc0010114 -#define MSR_VM_HSAVE_PA 0xc0010117ULL - -#define SVM_VM_CR_SVM_DISABLE 4 - -#define SVM_SELECTOR_S_SHIFT 4 -#define SVM_SELECTOR_DPL_SHIFT 5 -#define SVM_SELECTOR_P_SHIFT 7 -#define SVM_SELECTOR_AVL_SHIFT 8 -#define SVM_SELECTOR_L_SHIFT 9 -#define SVM_SELECTOR_DB_SHIFT 10 -#define SVM_SELECTOR_G_SHIFT 11 - -#define SVM_SELECTOR_TYPE_MASK (0xf) -#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT) -#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT) -#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT) -#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT) -#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT) -#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT) -#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT) - -#define SVM_SELECTOR_WRITE_MASK (1 << 1) -#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK -#define SVM_SELECTOR_CODE_MASK (1 << 3) - -#define INTERCEPT_CR0_MASK 1 -#define INTERCEPT_CR3_MASK (1 << 3) -#define INTERCEPT_CR4_MASK (1 << 4) -#define INTERCEPT_CR8_MASK (1 << 8) - -#define INTERCEPT_DR0_MASK 1 -#define INTERCEPT_DR1_MASK (1 << 1) -#define INTERCEPT_DR2_MASK (1 << 2) -#define INTERCEPT_DR3_MASK (1 << 3) -#define INTERCEPT_DR4_MASK (1 << 4) -#define INTERCEPT_DR5_MASK (1 << 5) -#define INTERCEPT_DR6_MASK (1 << 6) -#define INTERCEPT_DR7_MASK (1 << 7) - -#define SVM_EVTINJ_VEC_MASK 0xff - -#define SVM_EVTINJ_TYPE_SHIFT 8 -#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) - -#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) -#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) -#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) -#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) - -#define SVM_EVTINJ_VALID (1 << 31) -#define SVM_EVTINJ_VALID_ERR (1 << 11) - -#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK - -#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR -#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI -#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT -#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT - -#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID -#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR - -#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 -#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 - -#define SVM_EXIT_READ_CR0 0x000 -#define SVM_EXIT_READ_CR3 0x003 -#define SVM_EXIT_READ_CR4 0x004 -#define SVM_EXIT_READ_CR8 0x008 -#define SVM_EXIT_WRITE_CR0 0x010 -#define SVM_EXIT_WRITE_CR3 0x013 -#define SVM_EXIT_WRITE_CR4 0x014 -#define SVM_EXIT_WRITE_CR8 0x018 -#define SVM_EXIT_READ_DR0 0x020 -#define SVM_EXIT_READ_DR1 0x021 -#define SVM_EXIT_READ_DR2 0x022 -#define SVM_EXIT_READ_DR3 0x023 -#define SVM_EXIT_READ_DR4 0x024 -#define SVM_EXIT_READ_DR5 0x025 -#define SVM_EXIT_READ_DR6 0x026 -#define SVM_EXIT_READ_DR7 0x027 -#define SVM_EXIT_WRITE_DR0 0x030 -#define SVM_EXIT_WRITE_DR1 0x031 -#define SVM_EXIT_WRITE_DR2 0x032 -#define SVM_EXIT_WRITE_DR3 0x033 -#define SVM_EXIT_WRITE_DR4 0x034 -#define SVM_EXIT_WRITE_DR5 0x035 -#define SVM_EXIT_WRITE_DR6 0x036 -#define SVM_EXIT_WRITE_DR7 0x037 -#define SVM_EXIT_EXCP_BASE 0x040 -#define SVM_EXIT_INTR 0x060 -#define SVM_EXIT_NMI 0x061 -#define SVM_EXIT_SMI 0x062 -#define SVM_EXIT_INIT 0x063 -#define SVM_EXIT_VINTR 0x064 -#define SVM_EXIT_CR0_SEL_WRITE 0x065 -#define SVM_EXIT_IDTR_READ 0x066 -#define SVM_EXIT_GDTR_READ 0x067 -#define SVM_EXIT_LDTR_READ 0x068 -#define SVM_EXIT_TR_READ 0x069 -#define SVM_EXIT_IDTR_WRITE 0x06a -#define SVM_EXIT_GDTR_WRITE 0x06b -#define SVM_EXIT_LDTR_WRITE 0x06c -#define SVM_EXIT_TR_WRITE 0x06d -#define SVM_EXIT_RDTSC 0x06e -#define SVM_EXIT_RDPMC 0x06f -#define SVM_EXIT_PUSHF 0x070 -#define SVM_EXIT_POPF 0x071 -#define SVM_EXIT_CPUID 0x072 -#define SVM_EXIT_RSM 0x073 -#define SVM_EXIT_IRET 0x074 -#define SVM_EXIT_SWINT 0x075 -#define SVM_EXIT_INVD 0x076 -#define SVM_EXIT_PAUSE 0x077 -#define SVM_EXIT_HLT 0x078 -#define SVM_EXIT_INVLPG 0x079 -#define SVM_EXIT_INVLPGA 0x07a -#define SVM_EXIT_IOIO 0x07b -#define SVM_EXIT_MSR 0x07c -#define SVM_EXIT_TASK_SWITCH 0x07d -#define SVM_EXIT_FERR_FREEZE 0x07e -#define SVM_EXIT_SHUTDOWN 0x07f -#define SVM_EXIT_VMRUN 0x080 -#define SVM_EXIT_VMMCALL 0x081 -#define SVM_EXIT_VMLOAD 0x082 -#define SVM_EXIT_VMSAVE 0x083 -#define SVM_EXIT_STGI 0x084 -#define SVM_EXIT_CLGI 0x085 -#define SVM_EXIT_SKINIT 0x086 -#define SVM_EXIT_RDTSCP 0x087 -#define SVM_EXIT_ICEBP 0x088 -#define SVM_EXIT_WBINVD 0x089 -#define SVM_EXIT_MONITOR 0x08a -#define SVM_EXIT_MWAIT 0x08b -#define SVM_EXIT_MWAIT_COND 0x08c -#define SVM_EXIT_NPF 0x400 - -#define SVM_EXIT_ERR -1 - -#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ - -#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" -#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" -#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb" -#define SVM_CLGI ".byte 0x0f, 0x01, 0xdd" -#define SVM_STGI ".byte 0x0f, 0x01, 0xdc" -#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf" - -#endif - -- cgit v1.2.3-70-g09d2 From eca70fc5671b226966dfb7ee9953d59199288566 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:15 -0200 Subject: KVM: VMX: move ASM_VMX_* definitions from asm/kvm_host.h to asm/vmx.h Those definitions will be used by code outside KVM, so move it outside of a KVM-specific source file. Those definitions are used only on kvm/vmx.c, that already includes asm/vmx.h, so they can be moved safely. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 12 ------------ arch/x86/include/asm/vmx.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 99e3cc149d2..f58f7ebdea8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -714,18 +714,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" -#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" -#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" -#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" -#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" -#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" -#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" -#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" -#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" - #define MSR_IA32_TIME_STAMP_COUNTER 0x010 #define TSS_IOPB_BASE_OFFSET 0x66 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 3db236c26fa..d0238e6151d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -364,4 +364,19 @@ enum vmcs_field { #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + +#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" +#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" +#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" +#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" +#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" +#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" +#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" +#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" + + + #endif -- cgit v1.2.3-70-g09d2 From 6210e37b122583643da335c0389f74098713e5ca Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:16 -0200 Subject: KVM: VMX: move cpu_has_kvm_support() to an inline on asm/virtext.h It will be used by core code on kdump and reboot, to disable vmx if needed. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/virtext.h | 31 +++++++++++++++++++++++++++++++ arch/x86/kvm/vmx.c | 4 ++-- 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 arch/x86/include/asm/virtext.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h new file mode 100644 index 00000000000..298b6a06110 --- /dev/null +++ b/arch/x86/include/asm/virtext.h @@ -0,0 +1,31 @@ +/* CPU virtualization extensions handling + * + * This should carry the code for handling CPU virtualization extensions + * that needs to live in the kernel core. + * + * Author: Eduardo Habkost + * + * Copyright (C) 2008, Red Hat Inc. + * + * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#ifndef _ASM_X86_VIRTEX_H +#define _ASM_X86_VIRTEX_H + +#include +#include + +/* + * VMX functions: + */ + +static inline int cpu_has_vmx(void) +{ + unsigned long ecx = cpuid_ecx(1); + return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ +} + +#endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ec71f6464cf..defaeeb3f75 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -31,6 +31,7 @@ #include #include #include +#include #define __ex(x) __kvm_handle_fault_on_reboot(x) @@ -1044,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu) static __init int cpu_has_kvm_support(void) { - unsigned long ecx = cpuid_ecx(1); - return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ + return cpu_has_vmx(); } static __init int vmx_disabled_by_bios(void) -- cgit v1.2.3-70-g09d2 From 1e9931146c748420343aeefadb3bb17bd1c14a37 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:17 -0200 Subject: x86: asm/virtext.h: add cpu_vmxoff() inline function Unfortunately we can't use exactly the same code from vmx hardware_disable(), because the KVM function uses the __kvm_handle_fault_on_reboot() tricks. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/virtext.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 298b6a06110..7dee5b59930 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -18,6 +18,8 @@ #include #include +#include + /* * VMX functions: */ @@ -28,4 +30,17 @@ static inline int cpu_has_vmx(void) return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ } + +/** Disable VMX on the current CPU + * + * vmxoff causes a undefined-opcode exception if vmxon was not run + * on the CPU previously. Only call this function if you know VMX + * is enabled. + */ +static inline void cpu_vmxoff(void) +{ + asm volatile (ASM_VMX_VMXOFF : : : "cc"); + write_cr4(read_cr4() & ~X86_CR4_VMXE); +} + #endif /* _ASM_X86_VIRTEX_H */ -- cgit v1.2.3-70-g09d2 From 6aa07a0d77f6aafbe69e4e8609ffaf2b7ee1b591 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:19 -0200 Subject: x86: cpu_emergency_vmxoff() function Add cpu_emergency_vmxoff() and its friends: cpu_vmx_enabled() and __cpu_emergency_vmxoff(). Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/virtext.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 7dee5b59930..6bcf0acb4ef 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -43,4 +43,27 @@ static inline void cpu_vmxoff(void) write_cr4(read_cr4() & ~X86_CR4_VMXE); } +static inline int cpu_vmx_enabled(void) +{ + return read_cr4() & X86_CR4_VMXE; +} + +/** Disable VMX if it is enabled on the current CPU + * + * You shouldn't call this if cpu_has_vmx() returns 0. + */ +static inline void __cpu_emergency_vmxoff(void) +{ + if (cpu_vmx_enabled()) + cpu_vmxoff(); +} + +/** Disable VMX if it is supported and enabled on the current CPU + */ +static inline void cpu_emergency_vmxoff(void) +{ + if (cpu_has_vmx()) + __cpu_emergency_vmxoff(); +} + #endif /* _ASM_X86_VIRTEX_H */ -- cgit v1.2.3-70-g09d2 From 63d1142f8f69e39468bc6079ab2239e902828134 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:20 -0200 Subject: KVM: SVM: move has_svm() code to asm/virtext.h Use a trick to keep the printk()s on has_svm() working as before. gcc will take care of not generating code for the 'msg' stuff when the function is called with a NULL msg argument. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/virtext.h | 41 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm.c | 19 +++++-------------- 2 files changed, 46 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 6bcf0acb4ef..6f0d409c368 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -19,6 +19,7 @@ #include #include +#include /* * VMX functions: @@ -66,4 +67,44 @@ static inline void cpu_emergency_vmxoff(void) __cpu_emergency_vmxoff(); } + + + +/* + * SVM functions: + */ + +/** Check if the CPU has SVM support + * + * You can use the 'msg' arg to get a message describing the problem, + * if the function returns zero. Simply pass NULL if you are not interested + * on the messages; gcc should take care of not generating code for + * the messages on this case. + */ +static inline int cpu_has_svm(const char **msg) +{ + uint32_t eax, ebx, ecx, edx; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + if (msg) + *msg = "not amd"; + return 0; + } + + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + if (eax < SVM_CPUID_FUNC) { + if (msg) + *msg = "can't execute cpuid_8000000a"; + return 0; + } + + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { + if (msg) + *msg = "svm not available"; + return 0; + } + return 1; +} + #endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f0ad4d4217e..0667c6d13d3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -28,6 +28,8 @@ #include +#include + #define __ex(x) __kvm_handle_fault_on_reboot(x) MODULE_AUTHOR("Qumranet"); @@ -245,24 +247,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) static int has_svm(void) { - uint32_t eax, ebx, ecx, edx; - - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - printk(KERN_INFO "has_svm: not amd\n"); - return 0; - } + const char *msg; - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - if (eax < SVM_CPUID_FUNC) { - printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n"); + if (!cpu_has_svm(&msg)) { + printk(KERN_INFO "has_svn: %s\n", msg); return 0; } - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { - printk(KERN_DEBUG "has_svm: svm not available\n"); - return 0; - } return 1; } -- cgit v1.2.3-70-g09d2 From 2c8dceebb238680d5577500f8283397d41ca5590 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:21 -0200 Subject: KVM: SVM: move svm_hardware_disable() code to asm/virtext.h Create cpu_svm_disable() function. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/virtext.h | 14 ++++++++++++++ arch/x86/kvm/svm.c | 6 +----- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 6f0d409c368..2cfe363729c 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -107,4 +107,18 @@ static inline int cpu_has_svm(const char **msg) return 1; } + +/** Disable SVM on the current CPU + * + * You should call this only if cpu_has_svm() returned true. + */ +static inline void cpu_svm_disable(void) +{ + uint64_t efer; + + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); +} + #endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0667c6d13d3..1452851ae25 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -259,11 +259,7 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { - uint64_t efer; - - wrmsrl(MSR_VM_HSAVE_PA, 0); - rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); + cpu_svm_disable(); } static void svm_hardware_enable(void *garbage) -- cgit v1.2.3-70-g09d2 From 0f3e9eeba0ea212bbea88790729d054b700ab91e Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:22 -0200 Subject: x86: cpu_emergency_svm_disable() function This function can be used by the reboot or kdump code to forcibly disable SVM on the CPU. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/include/asm/virtext.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 2cfe363729c..59363627523 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -121,4 +121,12 @@ static inline void cpu_svm_disable(void) wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); } +/** Makes sure SVM is disabled, if it is supported on the CPU + */ +static inline void cpu_emergency_svm_disable(void) +{ + if (cpu_has_svm(NULL)) + cpu_svm_disable(); +} + #endif /* _ASM_X86_VIRTEX_H */ -- cgit v1.2.3-70-g09d2 From 0dc8d10f7d848b63c8d32cf6fd31ba7def792ac9 Mon Sep 17 00:00:00 2001 From: Guillaume Thouvenin Date: Thu, 4 Dec 2008 14:26:42 +0100 Subject: KVM: x86 emulator: add Src2 decode set Instruction like shld has three operands, so we need to add a Src2 decode set. We start with Src2None, Src2CL, and Src2ImmByte, Src2One to support shld/shrd and we will expand it later. Signed-off-by: Guillaume Thouvenin Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_x86_emulate.h | 1 + arch/x86/kvm/x86_emulate.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 16a002655f3..6a159732881 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -123,6 +123,7 @@ struct decode_cache { u8 ad_bytes; u8 rex_prefix; struct operand src; + struct operand src2; struct operand dst; bool has_seg_override; u8 seg_override; diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 7a07ca46c8a..7f5cd62362c 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -70,6 +70,12 @@ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define GroupMask 0xff /* Group number stored in bits 0:7 */ +/* Source 2 operand type */ +#define Src2None (0<<29) +#define Src2CL (1<<29) +#define Src2ImmByte (2<<29) +#define Src2One (3<<29) +#define Src2Mask (7<<29) enum { Group1_80, Group1_81, Group1_82, Group1_83, @@ -1000,6 +1006,29 @@ done_prefixes: break; } + /* + * Decode and fetch the second source operand: register, memory + * or immediate. + */ + switch (c->d & Src2Mask) { + case Src2None: + break; + case Src2CL: + c->src2.bytes = 1; + c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8; + break; + case Src2ImmByte: + c->src2.type = OP_IMM; + c->src2.ptr = (unsigned long *)c->eip; + c->src2.bytes = 1; + c->src2.val = insn_fetch(u8, 1, c->eip); + break; + case Src2One: + c->src2.bytes = 1; + c->src2.val = 1; + break; + } + /* Decode and fetch the destination operand: register or memory. */ switch (c->d & DstMask) { case ImplicitOps: -- cgit v1.2.3-70-g09d2 From 60c8aec6e2c9923492dabbd6b67e34692bd26c20 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 1 Dec 2008 22:32:02 -0200 Subject: KVM: MMU: use page array in unsync walk Instead of invoking the handler directly collect pages into an array so the caller can work with it. Simplifies TLB flush collapsing. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 195 ++++++++++++++++++++++++++++------------ 2 files changed, 141 insertions(+), 56 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f58f7ebdea8..93d0aed3588 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -200,7 +200,7 @@ struct kvm_mmu_page { int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; - bool unsync_children; + unsigned int unsync_children; union { u64 *parent_pte; /* !multimapped */ struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index dd20b199a7c..7ce92f78f33 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -908,8 +908,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte) struct kvm_mmu_page *sp = page_header(__pa(spte)); index = spte - sp->spt; - __set_bit(index, sp->unsync_child_bitmap); - sp->unsync_children = 1; + if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) + sp->unsync_children++; + WARN_ON(!sp->unsync_children); } static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) @@ -936,7 +937,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - sp->unsync_children = 1; kvm_mmu_update_parents_unsync(sp); return 1; } @@ -967,18 +967,41 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { } +#define KVM_PAGE_ARRAY_NR 16 + +struct kvm_mmu_pages { + struct mmu_page_and_offset { + struct kvm_mmu_page *sp; + unsigned int idx; + } page[KVM_PAGE_ARRAY_NR]; + unsigned int nr; +}; + #define for_each_unsync_children(bitmap, idx) \ for (idx = find_first_bit(bitmap, 512); \ idx < 512; \ idx = find_next_bit(bitmap, 512, idx+1)) -static int mmu_unsync_walk(struct kvm_mmu_page *sp, - struct kvm_unsync_walk *walker) +int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, + int idx) { - int i, ret; + int i; - if (!sp->unsync_children) - return 0; + if (sp->unsync) + for (i=0; i < pvec->nr; i++) + if (pvec->page[i].sp == sp) + return 0; + + pvec->page[pvec->nr].sp = sp; + pvec->page[pvec->nr].idx = idx; + pvec->nr++; + return (pvec->nr == KVM_PAGE_ARRAY_NR); +} + +static int __mmu_unsync_walk(struct kvm_mmu_page *sp, + struct kvm_mmu_pages *pvec) +{ + int i, ret, nr_unsync_leaf = 0; for_each_unsync_children(sp->unsync_child_bitmap, i) { u64 ent = sp->spt[i]; @@ -988,17 +1011,22 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, child = page_header(ent & PT64_BASE_ADDR_MASK); if (child->unsync_children) { - ret = mmu_unsync_walk(child, walker); - if (ret) + if (mmu_pages_add(pvec, child, i)) + return -ENOSPC; + + ret = __mmu_unsync_walk(child, pvec); + if (!ret) + __clear_bit(i, sp->unsync_child_bitmap); + else if (ret > 0) + nr_unsync_leaf += ret; + else return ret; - __clear_bit(i, sp->unsync_child_bitmap); } if (child->unsync) { - ret = walker->entry(child, walker); - __clear_bit(i, sp->unsync_child_bitmap); - if (ret) - return ret; + nr_unsync_leaf++; + if (mmu_pages_add(pvec, child, i)) + return -ENOSPC; } } } @@ -1006,7 +1034,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) sp->unsync_children = 0; - return 0; + return nr_unsync_leaf; +} + +static int mmu_unsync_walk(struct kvm_mmu_page *sp, + struct kvm_mmu_pages *pvec) +{ + if (!sp->unsync_children) + return 0; + + mmu_pages_add(pvec, sp, 0); + return __mmu_unsync_walk(sp, pvec); } static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) @@ -1056,30 +1094,81 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) return 0; } -struct sync_walker { - struct kvm_vcpu *vcpu; - struct kvm_unsync_walk walker; +struct mmu_page_path { + struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; + unsigned int idx[PT64_ROOT_LEVEL-1]; }; -static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) +#define for_each_sp(pvec, sp, parents, i) \ + for (i = mmu_pages_next(&pvec, &parents, -1), \ + sp = pvec.page[i].sp; \ + i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ + i = mmu_pages_next(&pvec, &parents, i)) + +int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, + int i) { - struct sync_walker *sync_walk = container_of(walk, struct sync_walker, - walker); - struct kvm_vcpu *vcpu = sync_walk->vcpu; + int n; + + for (n = i+1; n < pvec->nr; n++) { + struct kvm_mmu_page *sp = pvec->page[n].sp; + + if (sp->role.level == PT_PAGE_TABLE_LEVEL) { + parents->idx[0] = pvec->page[n].idx; + return n; + } - kvm_sync_page(vcpu, sp); - return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); + parents->parent[sp->role.level-2] = sp; + parents->idx[sp->role.level-1] = pvec->page[n].idx; + } + + return n; } -static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +void mmu_pages_clear_parents(struct mmu_page_path *parents) { - struct sync_walker walker = { - .walker = { .entry = mmu_sync_fn, }, - .vcpu = vcpu, - }; + struct kvm_mmu_page *sp; + unsigned int level = 0; + + do { + unsigned int idx = parents->idx[level]; + + sp = parents->parent[level]; + if (!sp) + return; + + --sp->unsync_children; + WARN_ON((int)sp->unsync_children < 0); + __clear_bit(idx, sp->unsync_child_bitmap); + level++; + } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); +} + +static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, + struct mmu_page_path *parents, + struct kvm_mmu_pages *pvec) +{ + parents->parent[parent->role.level-1] = NULL; + pvec->nr = 0; +} - while (mmu_unsync_walk(sp, &walker.walker)) +static void mmu_sync_children(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *parent) +{ + int i; + struct kvm_mmu_page *sp; + struct mmu_page_path parents; + struct kvm_mmu_pages pages; + + kvm_mmu_pages_init(parent, &parents, &pages); + while (mmu_unsync_walk(parent, &pages)) { + for_each_sp(pages, sp, parents, i) { + kvm_sync_page(vcpu, sp); + mmu_pages_clear_parents(&parents); + } cond_resched_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_pages_init(parent, &parents, &pages); + } } static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, @@ -1245,33 +1334,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) } } -struct zap_walker { - struct kvm_unsync_walk walker; - struct kvm *kvm; - int zapped; -}; - -static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) -{ - struct zap_walker *zap_walk = container_of(walk, struct zap_walker, - walker); - kvm_mmu_zap_page(zap_walk->kvm, sp); - zap_walk->zapped = 1; - return 0; -} - -static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) +static int mmu_zap_unsync_children(struct kvm *kvm, + struct kvm_mmu_page *parent) { - struct zap_walker walker = { - .walker = { .entry = mmu_zap_fn, }, - .kvm = kvm, - .zapped = 0, - }; + int i, zapped = 0; + struct mmu_page_path parents; + struct kvm_mmu_pages pages; - if (sp->role.level == PT_PAGE_TABLE_LEVEL) + if (parent->role.level == PT_PAGE_TABLE_LEVEL) return 0; - mmu_unsync_walk(sp, &walker.walker); - return walker.zapped; + + kvm_mmu_pages_init(parent, &parents, &pages); + while (mmu_unsync_walk(parent, &pages)) { + struct kvm_mmu_page *sp; + + for_each_sp(pages, sp, parents, i) { + kvm_mmu_zap_page(kvm, sp); + mmu_pages_clear_parents(&parents); + } + zapped += pages.nr; + kvm_mmu_pages_init(parent, &parents, &pages); + } + + return zapped; } static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) -- cgit v1.2.3-70-g09d2 From 6cffe8ca4a2adf1ac5003d9cad08fe4434d6eee0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 1 Dec 2008 22:32:04 -0200 Subject: KVM: MMU: skip global pgtables on sync due to cr3 switch Skip syncing global pages on cr3 switch (but not on cr4/cr0). This is important for Linux 32-bit guests with PAE, where the kmap page is marked as global. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 6 +++++ arch/x86/kvm/mmu.c | 53 ++++++++++++++++++++++++++++++++++++----- arch/x86/kvm/paging_tmpl.h | 10 ++++---- arch/x86/kvm/x86.c | 4 ++++ 4 files changed, 63 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 93d0aed3588..65b1ed29569 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -182,6 +182,8 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head oos_link; + /* * The following two entries are used to key the shadow page in the * hash table. @@ -200,6 +202,7 @@ struct kvm_mmu_page { int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; + bool global; unsigned int unsync_children; union { u64 *parent_pte; /* !multimapped */ @@ -356,6 +359,7 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; + struct list_head oos_global_pages; struct dmar_domain *intel_iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; @@ -385,6 +389,7 @@ struct kvm_vm_stat { u32 mmu_recycled; u32 mmu_cache_miss; u32 mmu_unsync; + u32 mmu_unsync_global; u32 remote_tlb_flush; u32 lpages; }; @@ -603,6 +608,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); +void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 58c35dead32..cbac9e4b156 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -793,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&sp->oos_link); ASSERT(is_empty_shadow_page(sp->spt)); bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; + sp->global = 1; sp->parent_pte = parent_pte; --vcpu->kvm->arch.n_free_mmu_pages; return sp; @@ -1066,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) return NULL; } +static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + list_del(&sp->oos_link); + --kvm->stat.mmu_unsync_global; +} + static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON(!sp->unsync); sp->unsync = 0; + if (sp->global) + kvm_unlink_unsync_global(kvm, sp); --kvm->stat.mmu_unsync; } @@ -1615,9 +1625,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (s->role.word != sp->role.word) return 1; } - kvm_mmu_mark_parents_unsync(vcpu, sp); ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; + + if (sp->global) { + list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); + ++vcpu->kvm->stat.mmu_unsync_global; + } else + kvm_mmu_mark_parents_unsync(vcpu, sp); + mmu_convert_notrap(sp); return 0; } @@ -1643,12 +1659,21 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pte_access, int user_fault, int write_fault, int dirty, int largepage, - gfn_t gfn, pfn_t pfn, bool speculative, + int global, gfn_t gfn, pfn_t pfn, bool speculative, bool can_unsync) { u64 spte; int ret = 0; u64 mt_mask = shadow_mt_mask; + struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); + + if (!global && sp->global) { + sp->global = 0; + if (sp->unsync) { + kvm_unlink_unsync_global(vcpu->kvm, sp); + kvm_mmu_mark_parents_unsync(vcpu, sp); + } + } /* * We don't set the accessed bit, since we sometimes want to see @@ -1717,8 +1742,8 @@ set_pte: static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, - int *ptwrite, int largepage, gfn_t gfn, - pfn_t pfn, bool speculative) + int *ptwrite, int largepage, int global, + gfn_t gfn, pfn_t pfn, bool speculative) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); @@ -1751,7 +1776,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, - dirty, largepage, gfn, pfn, speculative, true)) { + dirty, largepage, global, gfn, pfn, speculative, true)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1808,7 +1833,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk, || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 0, walk->write, 1, &walk->pt_write, - walk->largepage, gfn, walk->pfn, false); + walk->largepage, 0, gfn, walk->pfn, false); ++vcpu->stat.pf_fixed; return 1; } @@ -1995,6 +2020,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) } } +static void mmu_sync_global(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_page *sp, *n; + + list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) + kvm_sync_page(vcpu, sp); +} + void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->kvm->mmu_lock); @@ -2002,6 +2036,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->kvm->mmu_lock); } +void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->kvm->mmu_lock); + mmu_sync_global(vcpu); + spin_unlock(&vcpu->kvm->mmu_lock); +} + static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) { return vaddr; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 84eee43bbe7..e644d81979b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -274,7 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, return; kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, - gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), + gpte & PT_DIRTY_MASK, NULL, largepage, + gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), pfn, true); } @@ -301,8 +302,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, sw->user_fault, sw->write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, - sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, - false); + sw->ptwrite, sw->largepage, + gw->ptes[gw->level-1] & PT_GLOBAL_MASK, + gw->gfn, sw->pfn, false); sw->sptep = sptep; return 1; } @@ -580,7 +582,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, - is_dirty_pte(gpte), 0, gfn, + is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, spte_to_pfn(sp->spt[i]), true, false); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7a2aeba0bfb..774db00d2db 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -104,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_recycled", VM_STAT(mmu_recycled) }, { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, + { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "largepages", VM_STAT(lpages) }, { NULL } @@ -315,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_x86_ops->set_cr0(vcpu, cr0); vcpu->arch.cr0 = cr0; + kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); return; } @@ -358,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; + kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } EXPORT_SYMBOL_GPL(kvm_set_cr4); @@ -4113,6 +4116,7 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.oos_global_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ -- cgit v1.2.3-70-g09d2 From ad218f85e388e8ca816ff09d91c246cd014c53a8 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 1 Dec 2008 22:32:05 -0200 Subject: KVM: MMU: prepopulate the shadow on invlpg If the guest executes invlpg, peek into the pagetable and attempt to prepopulate the shadow entry. Also stop dirty fault updates from interfering with the fork detector. 2% improvement on RHEL3/AIM7. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/mmu.c | 25 +++++++++++++------------ arch/x86/kvm/paging_tmpl.h | 25 ++++++++++++++++++++++++- arch/x86/kvm/x86.c | 2 +- 4 files changed, 40 insertions(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 65b1ed29569..97215a458e5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -602,7 +602,8 @@ unsigned long segment_base(u16 selector); void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes); + const u8 *new, int bytes, + bool guest_initiated); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cbac9e4b156..863baf70506 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2441,7 +2441,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes) + const u8 *new, int bytes, + bool guest_initiated) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; @@ -2467,15 +2468,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, "pre pte write"); - if (gfn == vcpu->arch.last_pt_write_gfn - && !last_updated_pte_accessed(vcpu)) { - ++vcpu->arch.last_pt_write_count; - if (vcpu->arch.last_pt_write_count >= 3) - flooded = 1; - } else { - vcpu->arch.last_pt_write_gfn = gfn; - vcpu->arch.last_pt_write_count = 1; - vcpu->arch.last_pte_updated = NULL; + if (guest_initiated) { + if (gfn == vcpu->arch.last_pt_write_gfn + && !last_updated_pte_accessed(vcpu)) { + ++vcpu->arch.last_pt_write_count; + if (vcpu->arch.last_pt_write_count >= 3) + flooded = 1; + } else { + vcpu->arch.last_pt_write_gfn = gfn; + vcpu->arch.last_pt_write_count = 1; + vcpu->arch.last_pte_updated = NULL; + } } index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; @@ -2615,9 +2618,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { - spin_lock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.invlpg(vcpu, gva); - spin_unlock(&vcpu->kvm->mmu_lock); kvm_mmu_flush_tlb(vcpu); ++vcpu->stat.invlpg; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index e644d81979b..d2064015421 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -82,6 +82,7 @@ struct shadow_walker { int *ptwrite; pfn_t pfn; u64 *sptep; + gpa_t pte_gpa; }; static gfn_t gpte_to_gfn(pt_element_t gpte) @@ -222,7 +223,7 @@ walk: if (ret) goto walk; pte |= PT_DIRTY_MASK; - kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); + kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); walker->ptes[walker->level - 1] = pte; } @@ -468,8 +469,15 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, struct kvm_vcpu *vcpu, u64 addr, u64 *sptep, int level) { + struct shadow_walker *sw = + container_of(_sw, struct shadow_walker, walker); if (level == PT_PAGE_TABLE_LEVEL) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + sw->pte_gpa = (sp->gfn << PAGE_SHIFT); + sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); + if (is_shadow_present_pte(*sptep)) rmap_remove(vcpu->kvm, sptep); set_shadow_pte(sptep, shadow_trap_nonpresent_pte); @@ -482,11 +490,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { + pt_element_t gpte; struct shadow_walker walker = { .walker = { .entry = FNAME(shadow_invlpg_entry), }, + .pte_gpa = -1, }; + spin_lock(&vcpu->kvm->mmu_lock); walk_shadow(&walker.walker, vcpu, gva); + spin_unlock(&vcpu->kvm->mmu_lock); + if (walker.pte_gpa == -1) + return; + if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, + sizeof(pt_element_t))) + return; + if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { + if (mmu_topup_memory_caches(vcpu)) + return; + kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, + sizeof(pt_element_t), 0); + } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 774db00d2db..ba102879de3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2046,7 +2046,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); if (ret < 0) return 0; - kvm_mmu_pte_write(vcpu, gpa, val, bytes); + kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); return 1; } -- cgit v1.2.3-70-g09d2