diff options
Diffstat (limited to 'arch/x86/include/asm')
166 files changed, 4481 insertions, 3859 deletions
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 37822206083..bb70e397aa8 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -23,8 +23,6 @@ */ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) { - u16 gs; - /* changed the size calculations - should hopefully work better. lbt */ dump->magic = CMAGIC; dump->start_code = 0; @@ -57,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) dump->regs.ds = (u16)regs->ds; dump->regs.es = (u16)regs->es; dump->regs.fs = (u16)regs->fs; - savesegment(gs, gs); + dump->regs.gs = get_user_gs(regs); dump->regs.orig_ax = regs->orig_ax; dump->regs.ip = regs->ip; dump->regs.cs = (u16)regs->cs; diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8d676d8ecde..4518dc50090 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -102,9 +102,6 @@ static inline void disable_acpi(void) acpi_noirq = 1; } -/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ -#define FIX_ACPI_PAGES 4 - extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); static inline void acpi_noirq_set(void) { acpi_noirq = 1; } @@ -113,7 +110,6 @@ static inline void acpi_disable_pci(void) acpi_pci_disabled = 1; acpi_noirq_set(); } -extern int acpi_irq_balance_set(char *str); /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 1a30c0440c6..95c8cd9d22b 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -190,16 +190,23 @@ /* FIXME: move this macro to <linux/pci.h> */ #define PCI_BUS(x) (((x) >> 8) & 0xff) +/* Protection domain flags */ +#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ +#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops + domain for an IOMMU */ + /* * This structure contains generic data for IOMMU protection domains * independent of their use. */ struct protection_domain { - spinlock_t lock; /* mostly used to lock the page table*/ - u16 id; /* the domain id written to the device table */ - int mode; /* paging mode (0-6 levels) */ - u64 *pt_root; /* page table root pointer */ - void *priv; /* private data */ + spinlock_t lock; /* mostly used to lock the page table*/ + u16 id; /* the domain id written to the device table */ + int mode; /* paging mode (0-6 levels) */ + u64 *pt_root; /* page table root pointer */ + unsigned long flags; /* flags to find out type of domain */ + unsigned dev_cnt; /* devices assigned to this domain */ + void *priv; /* private data */ }; /* @@ -251,13 +258,6 @@ struct amd_iommu { /* Pointer to PCI device of this IOMMU */ struct pci_dev *dev; - /* - * Capability pointer. There could be more than one IOMMU per PCI - * device function if there are more than one AMD IOMMU capability - * pointers. - */ - u16 cap_ptr; - /* physical address of MMIO space */ u64 mmio_phys; /* virtual address of MMIO space */ @@ -266,6 +266,13 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ + u16 cap_ptr; + /* pci domain of this IOMMU */ u16 pci_seg; @@ -284,19 +291,19 @@ struct amd_iommu { /* size of command buffer */ u32 cmd_buf_size; - /* event buffer virtual address */ - u8 *evt_buf; /* size of event buffer */ u32 evt_buf_size; + /* event buffer virtual address */ + u8 *evt_buf; /* MSI number for event interrupt */ u16 evt_msi_num; - /* if one, we need to send a completion wait command */ - int need_sync; - /* true if interrupts for this IOMMU are already enabled */ bool int_enabled; + /* if one, we need to send a completion wait command */ + bool need_sync; + /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; }; @@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table; extern unsigned long *amd_iommu_pd_alloc_bitmap; /* will be 1 if device isolation is enabled */ -extern int amd_iommu_isolate; +extern bool amd_iommu_isolate; /* * If true, the addresses will be flushed on unmap time, not when @@ -382,18 +389,6 @@ extern int amd_iommu_isolate; */ extern bool amd_iommu_unmap_flush; -/* takes a PCI device id and prints it out in a readable form */ -static inline void print_devid(u16 devid, int nl) -{ - int bus = devid >> 8; - int dev = devid >> 3 & 0x1f; - int fn = devid & 0x07; - - printk("%02x:%02x.%x", bus, dev, fn); - if (nl) - printk("\n"); -} - /* takes bus and device/function and returns the device id * FIXME: should that be in generic PCI code? */ static inline u16 calc_devid(u8 bus, u8 devfn) @@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn) return (((u16)bus) << 8) | devfn; } +#ifdef CONFIG_AMD_IOMMU_STATS + +struct __iommu_counter { + char *name; + struct dentry *dent; + u64 value; +}; + +#define DECLARE_STATS_COUNTER(nm) \ + static struct __iommu_counter nm = { \ + .name = #nm, \ + } + +#define INC_STATS_COUNTER(name) name.value += 1 +#define ADD_STATS_COUNTER(name, x) name.value += (x) +#define SUB_STATS_COUNTER(name, x) name.value -= (x) + +#else /* CONFIG_AMD_IOMMU_STATS */ + +#define DECLARE_STATS_COUNTER(name) +#define INC_STATS_COUNTER(name) +#define ADD_STATS_COUNTER(name, x) +#define SUB_STATS_COUNTER(name, x) + +static inline void amd_iommu_stats_init(void) { } + +#endif /* CONFIG_AMD_IOMMU_STATS */ + #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3b1510b4fc5..fba49f66228 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -33,7 +33,13 @@ } while (0) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) extern void generic_apic_probe(void); +#else +static inline void generic_apic_probe(void) +{ +} +#endif #ifdef CONFIG_X86_LOCAL_APIC @@ -41,6 +47,21 @@ extern unsigned int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; + +#ifdef CONFIG_SMP +extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ + +static inline void default_inquire_remote_apic(int apicid) +{ + if (apic_verbosity >= APIC_DEBUG) + __inquire_remote_apic(apicid); +} + /* * Basic functions accessing APICs. */ @@ -54,7 +75,6 @@ extern int disable_apic; extern int is_vsmp_box(void); extern void xapic_wait_icr_idle(void); extern u32 safe_xapic_wait_icr_idle(void); -extern u64 xapic_icr_read(void); extern void xapic_icr_write(u32, u32); extern int setup_profiling_timer(unsigned int); @@ -93,7 +113,7 @@ static inline u32 native_apic_msr_read(u32 reg) } #ifndef CONFIG_X86_32 -extern int x2apic, x2apic_preenabled; +extern int x2apic; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); @@ -125,12 +145,35 @@ struct apic_ops { extern struct apic_ops *apic_ops; -#define apic_read (apic_ops->read) -#define apic_write (apic_ops->write) -#define apic_icr_read (apic_ops->icr_read) -#define apic_icr_write (apic_ops->icr_write) -#define apic_wait_icr_idle (apic_ops->wait_icr_idle) -#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) +static inline u32 apic_read(u32 reg) +{ + return apic_ops->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic_ops->write(reg, val); +} + +static inline u64 apic_icr_read(void) +{ + return apic_ops->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic_ops->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic_ops->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic_ops->safe_wait_icr_idle(); +} extern int get_physical_broadcast(void); @@ -193,7 +236,26 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 static inline void init_apic_mappings(void) { } +static inline void disable_local_APIC(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_X86_64 +#define SET_APIC_ID(x) (apic->set_apic_id(x)) +#else + +#ifdef CONFIG_X86_LOCAL_APIC +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} +#endif + +#endif + #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h new file mode 100644 index 00000000000..82f613c607c --- /dev/null +++ b/arch/x86/include/asm/apicnum.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_APICNUM_H +#define _ASM_X86_APICNUM_H + +/* define MAX_IO_APICS */ +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/mach-default/apm.h b/arch/x86/include/asm/apm.h index 20370c6db74..20370c6db74 100644 --- a/arch/x86/include/asm/mach-default/apm.h +++ b/arch/x86/include/asm/apm.h diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index ad5b9f6ecdd..85b46fba422 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -2,6 +2,7 @@ #define _ASM_X86_ATOMIC_32_H #include <linux/compiler.h> +#include <linux/types.h> #include <asm/processor.h> #include <asm/cmpxchg.h> @@ -10,15 +11,6 @@ * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } /** diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h index 279d2a731f3..8c21731984d 100644 --- a/arch/x86/include/asm/atomic_64.h +++ b/arch/x86/include/asm/atomic_64.h @@ -1,25 +1,15 @@ #ifndef _ASM_X86_ATOMIC_64_H #define _ASM_X86_ATOMIC_64_H +#include <linux/types.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> -/* atomic_t should be 32 bit signed type */ - /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } /** @@ -191,11 +181,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) -/* An 64bit atomic type */ - -typedef struct { - long counter; -} atomic64_t; +/* The 64-bit atomic type */ #define ATOMIC64_INIT(i) { (i) } diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h deleted file mode 100644 index 1d9543b9d35..00000000000 --- a/arch/x86/include/asm/bigsmp/apic.h +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H - -#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) -#define esr_disable (1) - -static inline int apic_id_registered(void) -{ - return (1); -} - -static inline cpumask_t target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_map; -#else - return cpumask_of_cpu(0); -#endif -} - -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0 -#define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target proc */ -#define NO_BALANCE_IRQ (0) -#define WAKE_SECONDARY_VIA_INIT - - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return (0); -} - -static inline unsigned long check_apicid_present(int bit) -{ - return (1); -} - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long val, id; - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = xapic_phys_to_log_apicid(cpu); - val |= SET_APIC_LOGICAL_ID(id); - return val; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static inline void setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Physflat", nr_ioapics); -} - -static inline int multi_timer_check(int apic, int irq) -{ - return (0); -} - -static inline int apicid_to_node(int logical_apicid) -{ - return apicid_2_node[hard_smp_processor_id()]; -} - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < NR_CPUS) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - - return BAD_APICID; -} - -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ - if (cpu >= NR_CPUS) - return BAD_APICID; - return cpu_physical_id(cpu); -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); -} - -static inline void setup_portio_remap(void) -{ -} - -static inline void enable_apic_mode(void) -{ -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return (1); -} - -/* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int cpu; - int apicid; - - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - return apicid; -} - -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_MACH_APIC_H */ diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h deleted file mode 100644 index 392c3f5ef2f..00000000000 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h deleted file mode 100644 index 9404c535b7e..00000000000 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 36001032271..02b47a603fc 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -3,6 +3,9 @@ /* * Copyright 1992, Linus Torvalds. + * + * Note: inlines with more than a single statement should be marked + * __always_inline to avoid problems with older gcc's inlining heuristics. */ #ifndef _LINUX_BITOPS_H @@ -53,7 +56,8 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *addr) +static __always_inline void +set_bit(unsigned int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" @@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void +clear_bit(int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" @@ -168,7 +173,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) */ static inline void change_bit(int nr, volatile unsigned long *addr) { - asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "xorb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX "btc %1,%0" + : BITOP_ADDR(addr) + : "Ir" (nr)); + } } /** @@ -196,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) +static __always_inline int +test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -292,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) return oldbit; } -static inline int constant_test_bit(int nr, const volatile unsigned long *addr) +static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 3def2065fce..d9cf1cd156d 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -9,7 +9,7 @@ #ifdef CONFIG_X86_32 # define __BUG_C0 "2:\t.long 1b, %c0\n" #else -# define __BUG_C0 "2:\t.quad 1b, %c0\n" +# define __BUG_C0 "2:\t.long 1b - 2b, %c0 - 2b\n" #endif #define BUG() \ diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index e02ae2d89ac..b13a7a88f3e 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h @@ -1,81 +1,6 @@ #ifndef _ASM_X86_BYTEORDER_H #define _ASM_X86_BYTEORDER_H -#include <asm/types.h> -#include <linux/compiler.h> - -#ifdef __GNUC__ - -#ifdef __i386__ - -static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) -{ -#ifdef CONFIG_X86_BSWAP - asm("bswap %0" : "=r" (x) : "0" (x)); -#else - asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ - "rorl $16,%0\n\t" /* swap words */ - "xchgb %b0,%h0" /* swap higher bytes */ - : "=q" (x) - : "0" (x)); -#endif - return x; -} - -static inline __attribute_const__ __u64 ___arch__swab64(__u64 val) -{ - union { - struct { - __u32 a; - __u32 b; - } s; - __u64 u; - } v; - v.u = val; -#ifdef CONFIG_X86_BSWAP - asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" - : "=r" (v.s.a), "=r" (v.s.b) - : "0" (v.s.a), "1" (v.s.b)); -#else - v.s.a = ___arch__swab32(v.s.a); - v.s.b = ___arch__swab32(v.s.b); - asm("xchgl %0,%1" - : "=r" (v.s.a), "=r" (v.s.b) - : "0" (v.s.a), "1" (v.s.b)); -#endif - return v.u; -} - -#else /* __i386__ */ - -static inline __attribute_const__ __u64 ___arch__swab64(__u64 x) -{ - asm("bswapq %0" - : "=r" (x) - : "0" (x)); - return x; -} - -static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) -{ - asm("bswapl %0" - : "=r" (x) - : "0" (x)); - return x; -} - -#endif - -/* Do not define swab16. Gcc is smart enough to recognize "C" version and - convert it into rotation or exhange. */ - -#define __arch__swab64(x) ___arch__swab64(x) -#define __arch__swab32(x) ___arch__swab32(x) - -#define __BYTEORDER_HAS_U64__ - -#endif /* __GNUC__ */ - #include <linux/byteorder/little_endian.h> #endif /* _ASM_X86_BYTEORDER_H */ diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 2bc162e0ec6..0e63c9a2a8d 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -1,5 +1,55 @@ /* - * Some macros to handle stack frames in assembly. + + x86 function call convention, 64-bit: + ------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + --------------------------------------------------------------------------- + rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] + + ( rsp is obviously invariant across normal function calls. (gcc can 'merge' + functions when it sees tail-call optimization possibilities) rflags is + clobbered. Leftover arguments are passed over the stack frame.) + + [*] In the frame-pointers case rbp is fixed to the stack frame. + + [**] for struct return values wider than 64 bits the return convention is a + bit more complex: up to 128 bits width we return small structures + straight in rax, rdx. For structures larger than that (3 words or + larger) the caller puts a pointer to an on-stack return struct + [allocated in the caller's stack frame] into the first argument - i.e. + into rdi. All other arguments shift up by one in this case. + Fortunately this case is rare in the kernel. + +For 32-bit we have the following conventions - kernel is built with +-mregparm=3 and -freg-struct-return: + + x86 function calling convention, 32-bit: + ---------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + ------------------------------------------------------------------------- + eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**] + + ( here too esp is obviously invariant across normal function calls. eflags + is clobbered. Leftover arguments are passed over the stack frame. ) + + [*] In the frame-pointers case ebp is fixed to the stack frame. + + [**] We build with -freg-struct-return, which on 32-bit means similar + semantics as on 64-bit: edx can be used for a second return value + (i.e. covering integer and structure sizes up to 64 bits) - after that + it gets more complex and more expensive: 3-word or larger struct returns + get done in the caller's frame and the pointer to the return struct goes + into regparm0, i.e. eax - the other arguments shift up and the + function's register parameters degenerate to regparm=2 in essence. + +*/ + + +/* + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: */ #define R15 0 @@ -9,7 +59,7 @@ #define RBP 32 #define RBX 40 -/* arguments: interrupts/non tracing syscalls only save upto here*/ +/* arguments: interrupts/non tracing syscalls only save up to here: */ #define R11 48 #define R10 56 #define R9 64 @@ -22,7 +72,7 @@ #define ORIG_RAX 120 /* + error_code */ /* end of arguments */ -/* cpu exception frame or undefined in case of fast syscall. */ +/* cpu exception frame or undefined in case of fast syscall: */ #define RIP 128 #define CS 136 #define EFLAGS 144 diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bae482df603..b185091bf19 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,20 @@ #include <linux/nodemask.h> #include <linux/percpu.h> +#ifdef CONFIG_SMP + +extern void prefill_possible_map(void); + +#else /* CONFIG_SMP */ + +static inline void prefill_possible_map(void) {} + +#define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define safe_smp_processor_id() 0 +#define stack_smp_processor_id() 0 + +#endif /* CONFIG_SMP */ + struct x86_cpu { struct cpu cpu; }; @@ -17,4 +31,7 @@ extern void arch_unregister_cpu(int); #endif DECLARE_PER_CPU(int, cpu_state); + +extern unsigned int boot_cpu_id; + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index cfdf8c2c5c3..7301e60dc4a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -80,7 +80,6 @@ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ #define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ @@ -92,6 +91,9 @@ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ @@ -117,6 +119,7 @@ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ @@ -237,6 +240,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) +#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h new file mode 100644 index 00000000000..a7f3c75f8ad --- /dev/null +++ b/arch/x86/include/asm/cpumask.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_CPUMASK_H +#define _ASM_X86_CPUMASK_H +#ifndef __ASSEMBLY__ +#include <linux/cpumask.h> + +#ifdef CONFIG_X86_64 + +extern cpumask_var_t cpu_callin_mask; +extern cpumask_var_t cpu_callout_mask; +extern cpumask_var_t cpu_initialized_mask; +extern cpumask_var_t cpu_sibling_setup_mask; + +extern void setup_cpu_local_masks(void); + +#else /* CONFIG_X86_32 */ + +extern cpumask_t cpu_callin_map; +extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_initialized; +extern cpumask_t cpu_sibling_setup_map; + +#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) +#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) +#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) +#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) + +static inline void setup_cpu_local_masks(void) { } + +#endif /* CONFIG_X86_32 */ + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0930b4f8d67..c68c361697e 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -1,39 +1,21 @@ #ifndef _ASM_X86_CURRENT_H #define _ASM_X86_CURRENT_H -#ifdef CONFIG_X86_32 #include <linux/compiler.h> #include <asm/percpu.h> +#ifndef __ASSEMBLY__ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); -static __always_inline struct task_struct *get_current(void) -{ - return x86_read_percpu(current_task); -} - -#else /* X86_32 */ - -#ifndef __ASSEMBLY__ -#include <asm/pda.h> - -struct task_struct; static __always_inline struct task_struct *get_current(void) { - return read_pda(pcurrent); + return percpu_read(current_task); } -#else /* __ASSEMBLY__ */ - -#include <asm/asm-offsets.h> -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg +#define current get_current() #endif /* __ASSEMBLY__ */ -#endif /* X86_32 */ - -#define current get_current() - #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e6b82b17b07..dc27705f544 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } -#define SYS_VECTOR_FREE 0 -#define SYS_VECTOR_ALLOCED 1 - extern int first_system_vector; -extern char system_vectors[]; +/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ +extern unsigned long used_vectors[]; static inline void alloc_system_vector(int vector) { - if (system_vectors[vector] == SYS_VECTOR_FREE) { - system_vectors[vector] = SYS_VECTOR_ALLOCED; + if (!test_bit(vector, used_vectors)) { + set_bit(vector, used_vectors); if (first_system_vector > vector) first_system_vector = vector; } else diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 7f225a4b2a2..132a134d12f 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -2,8 +2,8 @@ #define _ASM_X86_DMA_MAPPING_H /* - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for - * documentation. + * IOMMU interface. See Documentation/PCI/PCI-DMA-mapping.txt and + * Documentation/DMA-API.txt for documentation. */ #include <linux/scatterlist.h> @@ -65,21 +65,17 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) return dma_ops; else return dev->archdata.dma_ops; -#endif /* _ASM_X86_DMA_MAPPING_H */ +#endif } /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { -#ifdef CONFIG_X86_32 - return 0; -#else struct dma_mapping_ops *ops = get_dma_ops(dev); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); return (dma_addr == bad_dma_address); -#endif } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) diff --git a/arch/x86/include/asm/mach-default/do_timer.h b/arch/x86/include/asm/do_timer.h index 23ecda0b28a..23ecda0b28a 100644 --- a/arch/x86/include/asm/mach-default/do_timer.h +++ b/arch/x86/include/asm/do_timer.h diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 72c5a190bf4..a8f672ba100 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -6,14 +6,13 @@ * precise-event based sampling (PEBS). * * It manages: - * - per-thread and per-cpu allocation of BTS and PEBS - * - buffer memory allocation (optional) - * - buffer overflow handling + * - DS and BTS hardware configuration + * - buffer overflow handling (to be done) * - buffer access * - * It assumes: - * - get_task_struct on all parameter tasks - * - current is allowed to trace parameter tasks + * It does not do: + * - security checking (is the caller allowed to trace the task) + * - buffer allocation (memory accounting) * * * Copyright (C) 2007-2008 Intel Corporation. @@ -23,13 +22,54 @@ #ifndef _ASM_X86_DS_H #define _ASM_X86_DS_H -#ifdef CONFIG_X86_DS #include <linux/types.h> #include <linux/init.h> +#include <linux/err.h> + +#ifdef CONFIG_X86_DS struct task_struct; +struct ds_context; +struct ds_tracer; +struct bts_tracer; +struct pebs_tracer; + +typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); +typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); + + +/* + * A list of features plus corresponding macros to talk about them in + * the ds_request function's flags parameter. + * + * We use the enum to index an array of corresponding control bits; + * we use the macro to index a flags bit-vector. + */ +enum ds_feature { + dsf_bts = 0, + dsf_bts_kernel, +#define BTS_KERNEL (1 << dsf_bts_kernel) + /* trace kernel-mode branches */ + + dsf_bts_user, +#define BTS_USER (1 << dsf_bts_user) + /* trace user-mode branches */ + + dsf_bts_overflow, + dsf_bts_max, + dsf_pebs = dsf_bts_max, + + dsf_pebs_max, + dsf_ctl_max = dsf_pebs_max, + dsf_bts_timestamps = dsf_ctl_max, +#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) + /* add timestamps into BTS trace */ + +#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) +}; + /* * Request BTS or PEBS @@ -37,163 +77,169 @@ struct task_struct; * Due to alignement constraints, the actual buffer may be slightly * smaller than the requested or provided buffer. * - * Returns 0 on success; -Eerrno otherwise + * Returns a pointer to a tracer structure on success, or + * ERR_PTR(errcode) on failure. + * + * The interrupt threshold is independent from the overflow callback + * to allow users to use their own overflow interrupt handling mechanism. * * task: the task to request recording for; * NULL for per-cpu recording on the current cpu * base: the base pointer for the (non-pageable) buffer; - * NULL if buffer allocation requested - * size: the size of the requested or provided buffer + * size: the size of the provided buffer in bytes * ovfl: pointer to a function to be called on buffer overflow; * NULL if cyclic buffer requested + * th: the interrupt threshold in records from the end of the buffer; + * -1 if no interrupt threshold is requested. + * flags: a bit-mask of the above flags */ -typedef void (*ds_ovfl_callback_t)(struct task_struct *); -extern int ds_request_bts(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl); -extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl); +extern struct bts_tracer *ds_request_bts(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); /* * Release BTS or PEBS resources + * Suspend and resume BTS or PEBS tracing * - * Frees buffers allocated on ds_request. - * - * Returns 0 on success; -Eerrno otherwise - * - * task: the task to release resources for; - * NULL to release resources for the current cpu + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_release_bts(struct task_struct *task); -extern int ds_release_pebs(struct task_struct *task); +extern void ds_release_bts(struct bts_tracer *tracer); +extern void ds_suspend_bts(struct bts_tracer *tracer); +extern void ds_resume_bts(struct bts_tracer *tracer); +extern void ds_release_pebs(struct pebs_tracer *tracer); +extern void ds_suspend_pebs(struct pebs_tracer *tracer); +extern void ds_resume_pebs(struct pebs_tracer *tracer); -/* - * Return the (array) index of the write pointer. - * (assuming an array of BTS/PEBS records) - * - * Returns -Eerrno on error - * - * task: the task to access; - * NULL to access the current cpu - * pos (out): if not NULL, will hold the result - */ -extern int ds_get_bts_index(struct task_struct *task, size_t *pos); -extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); /* - * Return the (array) index one record beyond the end of the array. - * (assuming an array of BTS/PEBS records) + * The raw DS buffer state as it is used for BTS and PEBS recording. * - * Returns -Eerrno on error - * - * task: the task to access; - * NULL to access the current cpu - * pos (out): if not NULL, will hold the result + * This is the low-level, arch-dependent interface for working + * directly on the raw trace data. */ -extern int ds_get_bts_end(struct task_struct *task, size_t *pos); -extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); +struct ds_trace { + /* the number of bts/pebs records */ + size_t n; + /* the size of a bts/pebs record in bytes */ + size_t size; + /* pointers into the raw buffer: + - to the first entry */ + void *begin; + /* - one beyond the last entry */ + void *end; + /* - one beyond the newest entry */ + void *top; + /* - the interrupt threshold */ + void *ith; + /* flags given on ds_request() */ + unsigned int flags; +}; /* - * Provide a pointer to the BTS/PEBS record at parameter index. - * (assuming an array of BTS/PEBS records) - * - * The pointer points directly into the buffer. The user is - * responsible for copying the record. - * - * Returns the size of a single record on success; -Eerrno on error - * - * task: the task to access; - * NULL to access the current cpu - * index: the index of the requested record - * record (out): pointer to the requested record + * An arch-independent view on branch trace data. */ -extern int ds_access_bts(struct task_struct *task, - size_t index, const void **record); -extern int ds_access_pebs(struct task_struct *task, - size_t index, const void **record); +enum bts_qualifier { + bts_invalid, +#define BTS_INVALID bts_invalid + + bts_branch, +#define BTS_BRANCH bts_branch + + bts_task_arrives, +#define BTS_TASK_ARRIVES bts_task_arrives + + bts_task_departs, +#define BTS_TASK_DEPARTS bts_task_departs + + bts_qual_bit_size = 4, + bts_qual_max = (1 << bts_qual_bit_size), +}; + +struct bts_struct { + __u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + __u64 from; + __u64 to; + } lbr; + /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ + struct { + __u64 jiffies; + pid_t pid; + } timestamp; + } variant; +}; -/* - * Write one or more BTS/PEBS records at the write pointer index and - * advance the write pointer. - * - * If size is not a multiple of the record size, trailing bytes are - * zeroed out. - * - * May result in one or more overflow notifications. - * - * If called during overflow handling, that is, with index >= - * interrupt threshold, the write will wrap around. - * - * An overflow notification is given if and when the interrupt - * threshold is reached during or after the write. - * - * Returns the number of bytes written or -Eerrno. - * - * task: the task to access; - * NULL to access the current cpu - * buffer: the buffer to write - * size: the size of the buffer - */ -extern int ds_write_bts(struct task_struct *task, - const void *buffer, size_t size); -extern int ds_write_pebs(struct task_struct *task, - const void *buffer, size_t size); /* - * Same as ds_write_bts/pebs, but omit ownership checks. + * The BTS state. * - * This is needed to have some other task than the owner of the - * BTS/PEBS buffer or the parameter task itself write into the - * respective buffer. + * This gives access to the raw DS state and adds functions to provide + * an arch-independent view of the BTS data. */ -extern int ds_unchecked_write_bts(struct task_struct *task, - const void *buffer, size_t size); -extern int ds_unchecked_write_pebs(struct task_struct *task, - const void *buffer, size_t size); +struct bts_trace { + struct ds_trace ds; + + int (*read)(struct bts_tracer *tracer, const void *at, + struct bts_struct *out); + int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); +}; + /* - * Reset the write pointer of the BTS/PEBS buffer. + * The PEBS state. * - * Returns 0 on success; -Eerrno on error - * - * task: the task to access; - * NULL to access the current cpu + * This gives access to the raw DS state and the PEBS-specific counter + * reset value. */ -extern int ds_reset_bts(struct task_struct *task); -extern int ds_reset_pebs(struct task_struct *task); +struct pebs_trace { + struct ds_trace ds; + + /* the PEBS reset value */ + unsigned long long reset_value; +}; + /* - * Clear the BTS/PEBS buffer and reset the write pointer. - * The entire buffer will be zeroed out. + * Read the BTS or PEBS trace. * - * Returns 0 on success; -Eerrno on error + * Returns a view on the trace collected for the parameter tracer. * - * task: the task to access; - * NULL to access the current cpu + * The view remains valid as long as the traced task is not running or + * the tracer is suspended. + * Writes into the trace buffer are not reflected. + * + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_clear_bts(struct task_struct *task); -extern int ds_clear_pebs(struct task_struct *task); +extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); +extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); + /* - * Provide the PEBS counter reset value. + * Reset the write pointer of the BTS/PEBS buffer. * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu - * value (out): the counter reset value + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); +extern int ds_reset_bts(struct bts_tracer *tracer); +extern int ds_reset_pebs(struct pebs_tracer *tracer); /* * Set the PEBS counter reset value. * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_pebs() * value: the new counter reset value */ -extern int ds_set_pebs_reset(struct task_struct *task, u64 value); +extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); /* * Initialization @@ -201,38 +247,26 @@ extern int ds_set_pebs_reset(struct task_struct *task, u64 value); struct cpuinfo_x86; extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); - - /* - * The DS context - part of struct thread_struct. + * Context switch work */ -struct ds_context { - /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ - unsigned char *ds; - /* the owner of the BTS and PEBS configuration, respectively */ - struct task_struct *owner[2]; - /* buffer overflow notification function for BTS and PEBS */ - ds_ovfl_callback_t callback[2]; - /* the original buffer address */ - void *buffer[2]; - /* the number of allocated pages for on-request allocated buffers */ - unsigned int pages[2]; - /* use count */ - unsigned long count; - /* a pointer to the context location inside the thread_struct - * or the per_cpu context array */ - struct ds_context **this; - /* a pointer to the task owning this context, or NULL, if the - * context is owned by a cpu */ - struct task_struct *task; -}; +extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); -/* called by exit_thread() to free leftover contexts */ -extern void ds_free(struct ds_context *context); +/* + * Task clone/init and cleanup work + */ +extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); +extern void ds_exit_thread(struct task_struct *tsk); #else /* CONFIG_X86_DS */ -#define ds_init_intel(config) do {} while (0) +struct cpuinfo_x86; +static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} +static inline void ds_switch_to(struct task_struct *prev, + struct task_struct *next) {} +static inline void ds_copy_thread(struct task_struct *tsk, + struct task_struct *father) {} +static inline void ds_exit_thread(struct task_struct *tsk) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 804b6e6be92..3afc5e87cfd 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -6,56 +6,91 @@ #endif /* - Macros for dwarf2 CFI unwind table entries. - See "as.info" for details on these pseudo ops. Unfortunately - they are only supported in very new binutils, so define them - away for older version. + * Macros for dwarf2 CFI unwind table entries. + * See "as.info" for details on these pseudo ops. Unfortunately + * they are only supported in very new binutils, so define them + * away for older version. */ #ifdef CONFIG_AS_CFI -#define CFI_STARTPROC .cfi_startproc -#define CFI_ENDPROC .cfi_endproc -#define CFI_DEF_CFA .cfi_def_cfa -#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register -#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset -#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset -#define CFI_OFFSET .cfi_offset -#define CFI_REL_OFFSET .cfi_rel_offset -#define CFI_REGISTER .cfi_register -#define CFI_RESTORE .cfi_restore -#define CFI_REMEMBER_STATE .cfi_remember_state -#define CFI_RESTORE_STATE .cfi_restore_state -#define CFI_UNDEFINED .cfi_undefined +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state +#define CFI_UNDEFINED .cfi_undefined #ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame +#define CFI_SIGNAL_FRAME .cfi_signal_frame #else #define CFI_SIGNAL_FRAME #endif #else -/* Due to the structure of pre-exisiting code, don't use assembler line - comment character # to ignore the arguments. Instead, use a dummy macro. */ +/* + * Due to the structure of pre-exisiting code, don't use assembler line + * comment character # to ignore the arguments. Instead, use a dummy macro. + */ .macro cfi_ignore a=0, b=0, c=0, d=0 .endm -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore +#define CFI_STARTPROC cfi_ignore +#define CFI_ENDPROC cfi_ignore +#define CFI_DEF_CFA cfi_ignore #define CFI_DEF_CFA_REGISTER cfi_ignore #define CFI_DEF_CFA_OFFSET cfi_ignore #define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore +#define CFI_OFFSET cfi_ignore +#define CFI_REL_OFFSET cfi_ignore +#define CFI_REGISTER cfi_ignore +#define CFI_RESTORE cfi_ignore +#define CFI_REMEMBER_STATE cfi_ignore +#define CFI_RESTORE_STATE cfi_ignore +#define CFI_UNDEFINED cfi_ignore +#define CFI_SIGNAL_FRAME cfi_ignore #endif +/* + * An attempt to make CFI annotations more or less + * correct and shorter. It is implied that you know + * what you're doing if you use them. + */ +#ifdef __ASSEMBLY__ +#ifdef CONFIG_X86_64 + .macro pushq_cfi reg + pushq \reg + CFI_ADJUST_CFA_OFFSET 8 + .endm + + .macro popq_cfi reg + popq \reg + CFI_ADJUST_CFA_OFFSET -8 + .endm + + .macro movq_cfi reg offset=0 + movq %\reg, \offset(%rsp) + CFI_REL_OFFSET \reg, \offset + .endm + + .macro movq_cfi_restore offset reg + movq \offset(%rsp), %\reg + CFI_RESTORE \reg + .endm +#else /*!CONFIG_X86_64*/ + + /* 32bit defenitions are missed yet */ + +#endif /*!CONFIG_X86_64*/ +#endif /*__ASSEMBLY__*/ + #endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 3d8ceddbd40..00d41ce4c84 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -49,6 +49,7 @@ #define E820_RESERVED_KERN 128 #ifndef __ASSEMBLY__ +#include <linux/types.h> struct e820entry { __u64 addr; /* start of memory segment */ __u64 size; /* size of memory segment */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a2e545c91c3..ca5ffb2856b 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); #endif /* CONFIG_X86_32 */ +extern int add_efi_memmap; extern void efi_reserve_early(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 40ca1bea791..83c1bc8d2e8 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -112,7 +112,7 @@ extern unsigned int vdso_enabled; * now struct_user_regs, they are different) */ -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ do { \ pr_reg[0] = regs->bx; \ pr_reg[1] = regs->cx; \ @@ -124,7 +124,6 @@ do { \ pr_reg[7] = regs->ds & 0xffff; \ pr_reg[8] = regs->es & 0xffff; \ pr_reg[9] = regs->fs & 0xffff; \ - savesegment(gs, pr_reg[10]); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs & 0xffff; \ @@ -133,6 +132,18 @@ do { \ pr_reg[16] = regs->ss & 0xffff; \ } while (0); +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + pr_reg[10] = get_user_gs(regs); \ +} while (0); + +#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + savesegment(gs, pr_reg[10]); \ +} while (0); + #define ELF_PLATFORM (utsname()->machine) #define set_personality_64bit() do { } while (0) @@ -325,7 +336,7 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 extern int arch_setup_additional_pages(struct linux_binprm *bprm, - int executable_stack); + int uses_interp); extern int syscall32_setup_pages(struct linux_binprm *, int exstack); #define compat_arch_setup_additional_pages syscall32_setup_pages diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index 94826cf8745..cc70c1c78ca 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -8,7 +8,9 @@ enum reboot_type { BOOT_BIOS = 'b', #endif BOOT_ACPI = 'a', - BOOT_EFI = 'e' + BOOT_EFI = 'e', + BOOT_CF9 = 'p', + BOOT_CF9_COND = 'q', }; extern enum reboot_type reboot_type; diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 6b1add8e31d..854d538ae85 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -9,12 +9,28 @@ * is no hardware IRQ pin equivalent for them, they are triggered * through the ICC by us (IPIs) */ -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) -BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) + +BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, + smp_invalidate_interrupt) #endif /* @@ -25,10 +41,15 @@ BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) * a much simpler SMP time architecture: */ #ifdef CONFIG_X86_LOCAL_APIC + BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +#ifdef CONFIG_PERF_COUNTERS +BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) +#endif + #ifdef CONFIG_X86_MCE_P4THERMAL BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #endif diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h deleted file mode 100644 index 380f0b4f17e..00000000000 --- a/arch/x86/include/asm/es7000/apic.h +++ /dev/null @@ -1,193 +0,0 @@ -#ifndef __ASM_ES7000_APIC_H -#define __ASM_ES7000_APIC_H - -#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) -#define esr_disable (1) - -static inline int apic_id_registered(void) -{ - return (1); -} - -static inline cpumask_t target_cpus(void) -{ -#if defined CONFIG_ES7000_CLUSTERED_APIC - return CPU_MASK_ALL; -#else - return cpumask_of_cpu(smp_processor_id()); -#endif -} - -#if defined CONFIG_ES7000_CLUSTERED_APIC -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ (1) -#undef WAKE_SECONDARY_VIA_INIT -#define WAKE_SECONDARY_VIA_MIP -#else -#define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target procs */ -#define NO_BALANCE_IRQ (0) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0x0 -#define WAKE_SECONDARY_VIA_INIT -#endif - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} -static inline unsigned long check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long id; - id = xapic_phys_to_log_apicid(cpu); - return (SET_APIC_LOGICAL_ID(id)); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -#ifndef CONFIG_X86_GENERICARCH -extern void enable_apic_mode(void); -#endif - -extern int apic_version [MAX_APICS]; -static inline void setup_apic_routing(void) -{ - int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); -} - -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - -static inline int apicid_to_node(int logical_apicid) -{ - return 0; -} - - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < NR_CPUS) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) -{ - static int id = 0; - physid_mask_t mask; - mask = physid_mask_of_physid(id); - ++id; - return mask; -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); -} - - -static inline void setup_portio_remap(void) -{ -} - -extern unsigned int boot_cpu_physical_apicid; -static inline int check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = read_apic_id(); - return (1); -} - -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid; - - num_bits_set = cpus_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) -#if defined CONFIG_ES7000_CLUSTERED_APIC - return 0xFF; -#else - return cpu_to_logical_apicid(0); -#endif - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); -#if defined CONFIG_ES7000_CLUSTERED_APIC - return 0xFF; -#else - return cpu_to_logical_apicid(0); -#endif - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_ES7000_APIC_H */ diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h deleted file mode 100644 index 8b234a3cb85..00000000000 --- a/arch/x86/include/asm/es7000/apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_ES7000_APICDEF_H -#define __ASM_ES7000_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h deleted file mode 100644 index 632a955fcc0..00000000000 --- a/arch/x86/include/asm/es7000/ipi.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __ASM_ES7000_IPI_H -#define __ASM_ES7000_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h deleted file mode 100644 index ed5a3caae14..00000000000 --- a/arch/x86/include/asm/es7000/mpparse.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef __ASM_ES7000_MPPARSE_H -#define __ASM_ES7000_MPPARSE_H - -#include <linux/acpi.h> - -extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); -extern void setup_unisys(void); - -#ifndef CONFIG_X86_GENERICARCH -extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); -extern int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid); -#endif - -#ifdef CONFIG_ACPI - -static inline int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} -#endif - -#endif /* __ASM_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h deleted file mode 100644 index 39849346191..00000000000 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef __ASM_ES7000_WAKECPU_H -#define __ASM_ES7000_WAKECPU_H - -/* - * This file copes with machines that wakeup secondary CPUs by the - * INIT, INIT, STARTUP sequence. - */ - -#ifdef CONFIG_ES7000_CLUSTERED_APIC -#define WAKE_SECONDARY_VIA_MIP -#else -#define WAKE_SECONDARY_VIA_INIT -#endif - -#ifdef WAKE_SECONDARY_VIA_MIP -extern int es7000_start_cpu(int cpu, unsigned long eip); -static inline int -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) -{ - int boot_error = 0; - boot_error = es7000_start_cpu(phys_apicid, start_eip); - return boot_error; -} -#endif - -#define TRAMPOLINE_LOW phys_to_virt(0x467) -#define TRAMPOLINE_HIGH phys_to_virt(0x469) - -#define boot_cpu_apicid boot_cpu_physical_apicid - -static inline void wait_for_init_deassert(atomic_t *deassert) -{ -#ifdef WAKE_SECONDARY_VIA_INIT - while (!atomic_read(deassert)) - cpu_relax(); -#endif - return; -} - -/* Nothing to do for most platforms, since cleared by the INIT cycle */ -static inline void smp_callin_clear_local_apic(void) -{ -} - -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -#define inquire_remote_apic(apicid) do { \ - if (apic_verbosity >= APIC_DEBUG) \ - __inquire_remote_apic(apicid); \ - } while (0) - -#endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h index c7115c1d721..047d9bab2b3 100644 --- a/arch/x86/include/asm/fixmap_32.h +++ b/arch/x86/include/asm/fixmap_32.h @@ -95,10 +95,6 @@ enum fixed_addresses { (__end_of_permanent_fixed_addresses & 255), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, FIX_WP_TEST, -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index 00a30ab9b1a..298d9ba3fae 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -50,10 +50,6 @@ enum fixed_addresses { FIX_PARAVIRT_BOOTMAP, #endif __end_of_permanent_fixed_addresses, -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9e8bc29b8b1..b55b4a7fbef 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -1,6 +1,33 @@ #ifndef _ASM_X86_FTRACE_H #define _ASM_X86_FTRACE_H +#ifdef __ASSEMBLY__ + + .macro MCOUNT_SAVE_FRAME + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + .endm + + .macro MCOUNT_RESTORE_FRAME + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + .endm + +#endif + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ @@ -17,8 +44,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } -#endif +#ifdef CONFIG_DYNAMIC_FTRACE + +struct dyn_arch_ftrace { + /* No extra data needed for x86 */ +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +#ifndef __ASSEMBLY__ + +/* + * Stack of return addresses for functions + * of a thread. + * Used in struct thread_info + */ +struct ftrace_ret_stack { + unsigned long ret; + unsigned long func; + unsigned long long calltime; +}; + +/* + * Primary handler of a function return. + * It relays on ftrace_return_to_handler. + * Defined in entry_32/64.S + */ +extern void return_to_handler(void); + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 74252264433..6cfdafa409d 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -29,6 +29,39 @@ extern int fix_aperture; #define AMD64_GARTCACHECTL 0x9c #define AMD64_GARTEN (1<<0) +#ifdef CONFIG_GART_IOMMU +extern int gart_iommu_aperture; +extern int gart_iommu_aperture_allowed; +extern int gart_iommu_aperture_disabled; + +extern void early_gart_iommu_check(void); +extern void gart_iommu_init(void); +extern void gart_iommu_shutdown(void); +extern void __init gart_parse_options(char *); +extern void gart_iommu_hole_init(void); + +#else +#define gart_iommu_aperture 0 +#define gart_iommu_aperture_allowed 0 +#define gart_iommu_aperture_disabled 1 + +static inline void early_gart_iommu_check(void) +{ +} +static inline void gart_iommu_init(void) +{ +} +static inline void gart_iommu_shutdown(void) +{ +} +static inline void gart_parse_options(char *options) +{ +} +static inline void gart_iommu_hole_init(void) +{ +} +#endif + extern int agp_amd64_init(void); static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index d48bee663a6..273b99452ae 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -1,5 +1,263 @@ +#ifndef _ASM_X86_GENAPIC_H +#define _ASM_X86_GENAPIC_H + +#include <linux/cpumask.h> + +#include <asm/mpspec.h> +#include <asm/atomic.h> + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +struct genapic { + char *name; + + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_registered)(void); + + u32 irq_delivery_mode; + u32 irq_dest_mode; + + const struct cpumask *(*target_cpus)(void); + + int disable_esr; + + int dest_logical; + unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_present)(int apicid); + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*init_apic_ldr)(void); + + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*apicid_to_node)(int logical_apicid); + int (*cpu_to_logical_apicid)(int cpu); + int (*cpu_present_to_apicid)(int mps_cpu); + physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); + void (*enable_apic_mode)(void); + int (*phys_pkg_id)(int cpuid_apic, int index_msb); + + /* + * When one of the next two hooks returns 1 the genapic + * is switched to this. Essentially they are additional + * probe functions: + */ + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); + + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; + + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + + /* ipi */ + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + + /* wakeup_secondary_cpu */ + int (*wakeup_cpu)(int apicid, unsigned long start_eip); + + int trampoline_phys_low; + int trampoline_phys_high; + + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*store_NMI_vector)(unsigned short *high, unsigned short *low); + void (*inquire_remote_apic)(int apicid); +}; + +extern struct genapic *apic; + +/* + * Warm reset vector default position: + */ +#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 +#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 + #ifdef CONFIG_X86_32 -# include "genapic_32.h" +extern void es7000_update_genapic_to_cluster(void); #else -# include "genapic_64.h" +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2apic_cluster; +extern struct genapic apic_x2apic_phys; +extern int default_acpi_madt_oem_check(char *, char *); + +extern void apic_send_IPI_self(int vector); + +extern struct genapic apic_x2apic_uv_x; +DECLARE_PER_CPU(int, x2apic_extra_bits); + +extern void default_setup_apic_routing(void); + +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); #endif + +static inline void default_wait_for_init_deassert(atomic_t *deassert) +{ + while (!atomic_read(deassert)) + cpu_relax(); + return; +} + +extern void generic_bigsmp_probe(void); + + +#ifdef CONFIG_X86_LOCAL_APIC + +#include <asm/smp.h> + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline const struct cpumask *default_target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_mask; +#else + return cpumask_of(0); +#endif +} + +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); + + +static inline unsigned int read_apic_id(void) +{ + unsigned int reg; + + reg = apic_read(APIC_ID); + + return apic->get_apic_id(reg); +} + +#ifdef CONFIG_X86_64 +extern void default_setup_apic_routing(void); +#else + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +extern void default_init_apic_ldr(void); + +static inline int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static inline unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + return cpumask_bits(cpumask)[0]; +} + +static inline unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + + return (unsigned int)(mask1 & mask2 & mask3); +} + +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +static inline void default_setup_apic_routing(void) +{ +#ifdef CONFIG_X86_IO_APIC + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Flat", nr_ioapics); +#endif +} + +extern int default_apicid_to_node(int logical_apicid); + +#endif + +static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long default_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +{ + return phys_map; +} + +/* Mapping from cpu number to logical apicid */ +static inline int default_cpu_to_logical_apicid(int cpu) +{ + return 1 << cpu; +} + +static inline int __default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline int +__default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); +} + +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +static inline int +default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} +#else +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); +#endif + +static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h deleted file mode 100644 index 5cbd4fcc06f..00000000000 --- a/arch/x86/include/asm/genapic_32.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef _ASM_X86_GENAPIC_32_H -#define _ASM_X86_GENAPIC_32_H - -#include <asm/mpspec.h> - -/* - * Generic APIC driver interface. - * - * An straight forward mapping of the APIC related parts of the - * x86 subarchitecture interface to a dynamic object. - * - * This is used by the "generic" x86 subarchitecture. - * - * Copyright 2003 Andi Kleen, SuSE Labs. - */ - -struct mpc_config_bus; -struct mp_config_table; -struct mpc_config_processor; - -struct genapic { - char *name; - int (*probe)(void); - - int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); - int int_delivery_mode; - int int_dest_mode; - int ESR_DISABLE; - int apic_destination_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); - unsigned long (*check_apicid_present)(int apicid); - int no_balance_irq; - int no_ioapic_check; - void (*init_apic_ldr)(void); - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - - void (*setup_apic_routing)(void); - int (*multi_timer_check)(int apic, int irq); - int (*apicid_to_node)(int logical_apicid); - int (*cpu_to_logical_apicid)(int cpu); - int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); - void (*setup_portio_remap)(void); - int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); - void (*enable_apic_mode)(void); - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); - - /* mpparse */ - /* When one of the next two hooks returns 1 the genapic - is switched to this. Essentially they are additional probe - functions. */ - int (*mps_oem_check)(struct mp_config_table *mpc, char *oem, - char *productid); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - - unsigned (*get_apic_id)(unsigned long x); - unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); - cpumask_t (*vector_allocation_domain)(int cpu); - -#ifdef CONFIG_SMP - /* ipi */ - void (*send_IPI_mask)(cpumask_t mask, int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); -#endif -}; - -#define APICFUNC(x) .x = x, - -/* More functions could be probably marked IPIFUNC and save some space - in UP GENERICARCH kernels, but I don't have the nerve right now - to untangle this mess. -AK */ -#ifdef CONFIG_SMP -#define IPIFUNC(x) APICFUNC(x) -#else -#define IPIFUNC(x) -#endif - -#define APIC_INIT(aname, aprobe) \ -{ \ - .name = aname, \ - .probe = aprobe, \ - .int_delivery_mode = INT_DELIVERY_MODE, \ - .int_dest_mode = INT_DEST_MODE, \ - .no_balance_irq = NO_BALANCE_IRQ, \ - .ESR_DISABLE = esr_disable, \ - .apic_destination_logical = APIC_DEST_LOGICAL, \ - APICFUNC(apic_id_registered) \ - APICFUNC(target_cpus) \ - APICFUNC(check_apicid_used) \ - APICFUNC(check_apicid_present) \ - APICFUNC(init_apic_ldr) \ - APICFUNC(ioapic_phys_id_map) \ - APICFUNC(setup_apic_routing) \ - APICFUNC(multi_timer_check) \ - APICFUNC(apicid_to_node) \ - APICFUNC(cpu_to_logical_apicid) \ - APICFUNC(cpu_present_to_apicid) \ - APICFUNC(apicid_to_cpu_present) \ - APICFUNC(setup_portio_remap) \ - APICFUNC(check_phys_apicid_present) \ - APICFUNC(mps_oem_check) \ - APICFUNC(get_apic_id) \ - .apic_id_mask = APIC_ID_MASK, \ - APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(vector_allocation_domain) \ - APICFUNC(acpi_madt_oem_check) \ - IPIFUNC(send_IPI_mask) \ - IPIFUNC(send_IPI_allbutself) \ - IPIFUNC(send_IPI_all) \ - APICFUNC(enable_apic_mode) \ - APICFUNC(phys_pkg_id) \ -} - -extern struct genapic *genapic; - -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -#define get_uv_system_type() UV_NONE -#define is_uv_system() 0 -#define uv_wakeup_secondary(a, b) 1 -#define uv_system_init() do {} while (0) - - -#endif /* _ASM_X86_GENAPIC_32_H */ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h deleted file mode 100644 index 13c4e96199e..00000000000 --- a/arch/x86/include/asm/genapic_64.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _ASM_X86_GENAPIC_64_H -#define _ASM_X86_GENAPIC_64_H - -/* - * Copyright 2004 James Cleverdon, IBM. - * Subject to the GNU Public License, v.2 - * - * Generic APIC sub-arch data struct. - * - * Hacked for x86-64 by James Cleverdon from i386 architecture code by - * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and - * James Cleverdon. - */ - -struct genapic { - char *name; - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - u32 int_delivery_mode; - u32 int_dest_mode; - int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); - cpumask_t (*vector_allocation_domain)(int cpu); - void (*init_apic_ldr)(void); - /* ipi */ - void (*send_IPI_mask)(cpumask_t mask, int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); - /* */ - unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); - unsigned int (*phys_pkg_id)(int index_msb); - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; -}; - -extern struct genapic *genapic; - -extern struct genapic apic_flat; -extern struct genapic apic_physflat; -extern struct genapic apic_x2apic_cluster; -extern struct genapic apic_x2apic_phys; -extern int acpi_madt_oem_check(char *, char *); - -extern void apic_send_IPI_self(int vector); -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -extern enum uv_system_type get_uv_system_type(void); -extern int is_uv_system(void); - -extern struct genapic apic_x2apic_uv_x; -DECLARE_PER_CPU(int, x2apic_extra_bits); -extern void uv_cpu_init(void); -extern void uv_system_init(void); -extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); - -extern void setup_apic_routing(void); - -#endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 000787df66e..176f058e715 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -1,11 +1,52 @@ -#ifdef CONFIG_X86_32 -# include "hardirq_32.h" -#else -# include "hardirq_64.h" +#ifndef _ASM_X86_HARDIRQ_H +#define _ASM_X86_HARDIRQ_H + +#include <linux/threads.h> +#include <linux/irq.h> + +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int irq0_irqs; +#ifdef CONFIG_X86_LOCAL_APIC + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq_spurious_count; +#endif +#ifdef CONFIG_SMP + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + unsigned int irq_thermal_count; +# ifdef CONFIG_X86_64 + unsigned int irq_threshold_count; +# endif #endif +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + +#define __ARCH_IRQ_STAT + +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) + +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) + +extern void ack_bad_irq(unsigned int irq); extern u64 arch_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu arch_irq_stat_cpu extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat + +#endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h deleted file mode 100644 index 5ca135e72f2..00000000000 --- a/arch/x86/include/asm/hardirq_32.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _ASM_X86_HARDIRQ_32_H -#define _ASM_X86_HARDIRQ_32_H - -#include <linux/threads.h> -#include <linux/irq.h> - -typedef struct { - unsigned int __softirq_pending; - unsigned long idle_timestamp; - unsigned int __nmi_count; /* arch dependent */ - unsigned int apic_timer_irqs; /* arch dependent */ - unsigned int irq0_irqs; - unsigned int irq_resched_count; - unsigned int irq_call_count; - unsigned int irq_tlb_count; - unsigned int irq_thermal_count; - unsigned int irq_spurious_count; -} ____cacheline_aligned irq_cpustat_t; - -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); - -#define __ARCH_IRQ_STAT -#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) - -void ack_bad_irq(unsigned int irq); -#include <linux/irq_cpustat.h> - -#endif /* _ASM_X86_HARDIRQ_32_H */ diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h deleted file mode 100644 index 1ba381fc51d..00000000000 --- a/arch/x86/include/asm/hardirq_64.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _ASM_X86_HARDIRQ_64_H -#define _ASM_X86_HARDIRQ_64_H - -#include <linux/threads.h> -#include <linux/irq.h> -#include <asm/pda.h> -#include <asm/apic.h> - -/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ -#define MAX_HARDIRQS_PER_CPU NR_VECTORS - -#define __ARCH_IRQ_STAT 1 - -#define local_softirq_pending() read_pda(__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING 1 - -#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) -#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) - -extern void ack_bad_irq(unsigned int irq); - -#endif /* _ASM_X86_HARDIRQ_64_H */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b97aecb0b61..370e1c83bb4 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -25,8 +25,6 @@ #include <asm/irq.h> #include <asm/sections.h> -#define platform_legacy_irq(irq) ((irq) < 16) - /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void error_interrupt(void); @@ -58,7 +56,7 @@ extern void make_8259A_irq(unsigned int irq); extern void init_8259A(int aeoi); /* IOAPIC */ -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) +#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) extern unsigned long io_apic_irqs; extern void init_VISWS_APIC_irqs(void); @@ -67,15 +65,7 @@ extern void disable_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); -#ifdef CONFIG_X86_64 extern void enable_IO_APIC(void); -#endif - -/* IPI functions */ -#ifdef CONFIG_X86_32 -extern void send_IPI_self(int vector); -#endif -extern void send_IPI(int dest, int vector); /* Statistics */ extern atomic_t irq_err_count; @@ -84,21 +74,11 @@ extern atomic_t irq_mis_count; /* EISA */ extern void eisa_set_level_irq(unsigned int irq); -/* Voyager functions */ -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); - /* SMP */ extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); extern void smp_error_interrupt(struct pt_regs *); -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP extern void smp_reschedule_interrupt(struct pt_regs *); extern void smp_call_function_interrupt(struct pt_regs *); extern void smp_call_function_single_interrupt(struct pt_regs *); @@ -109,9 +89,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); #endif #endif -#ifdef CONFIG_X86_32 -extern void (*const interrupt[NR_VECTORS])(void); -#endif +extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h new file mode 100644 index 00000000000..369f5c5d09a --- /dev/null +++ b/arch/x86/include/asm/hypervisor.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2008, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef ASM_X86__HYPERVISOR_H +#define ASM_X86__HYPERVISOR_H + +extern unsigned long get_hypervisor_tsc_freq(void); +extern void init_hypervisor(struct cpuinfo_x86 *c); + +#endif diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 97989c0e534..50ca486fd88 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -129,24 +129,6 @@ typedef struct compat_siginfo { } _sifields; } compat_siginfo_t; -struct sigframe32 { - u32 pretcode; - int sig; - struct sigcontext_ia32 sc; - struct _fpstate_ia32 fpstate; - unsigned int extramask[_COMPAT_NSIG_WORDS-1]; -}; - -struct rt_sigframe32 { - u32 pretcode; - int sig; - u32 pinfo; - u32 puc; - compat_siginfo_t info; - struct ucontext_ia32 uc; - struct _fpstate_ia32 fpstate; -}; - struct ustat32 { __u32 f_tfree; compat_ino_t f_tinode; diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index 44c89c3a23e..38d87379e27 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -8,8 +8,13 @@ struct notifier_block; void idle_notifier_register(struct notifier_block *n); void idle_notifier_unregister(struct notifier_block *n); +#ifdef CONFIG_X86_64 void enter_idle(void); void exit_idle(void); +#else /* !CONFIG_X86_64 */ +static inline void enter_idle(void) { } +static inline void exit_idle(void) { } +#endif /* CONFIG_X86_64 */ void c1e_remove_cpu(int cpu); diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index ac2abc88cd9..e5a2ab44cd5 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -4,6 +4,8 @@ #define ARCH_HAS_IOREMAP_WC #include <linux/compiler.h> +#include <asm-generic/int-ll64.h> +#include <asm/page.h> #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -45,21 +47,128 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) #define mmiowb() barrier() #ifdef CONFIG_X86_64 + build_mmio_read(readq, "q", unsigned long, "=r", :"memory") -build_mmio_read(__readq, "q", unsigned long, "=r", ) build_mmio_write(writeq, "q", unsigned long, "r", :"memory") -build_mmio_write(__writeq, "q", unsigned long, "r", ) -#define readq_relaxed(a) __readq(a) -#define __raw_readq __readq -#define __raw_writeq writeq +#else + +static inline __u64 readq(const volatile void __iomem *addr) +{ + const volatile u32 __iomem *p = addr; + u32 low, high; + + low = readl(p); + high = readl(p + 1); + + return low + ((u64)high << 32); +} + +static inline void writeq(__u64 val, volatile void __iomem *addr) +{ + writel(val, addr); + writel(val >> 32, addr+4); +} -/* Let people know we have them */ -#define readq readq -#define writeq writeq #endif -extern int iommu_bio_merge; +#define readq_relaxed(a) readq(a) + +#define __raw_readq(a) readq(a) +#define __raw_writeq(val, addr) writeq(val, addr) + +/* Let people know that we have them */ +#define readq readq +#define writeq writeq + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline phys_addr_t virt_to_phys(volatile void *address) +{ + return __pa(address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void *phys_to_virt(phys_addr_t address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +/* + * ISA I/O bus memory addresses are 1:1 with the physical address. + */ +#define isa_virt_to_bus virt_to_phys +#define isa_page_to_bus page_to_phys +#define isa_bus_to_virt phys_to_virt + +/* + * However PCI ones are not necessarily 1:1 and therefore these interfaces + * are forbidden in portable PCI drivers. + * + * Allow them on x86 for legacy drivers, though. + */ +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * If the area you are trying to map is a PCI BAR you should have a + * look at pci_iomap(). + */ +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); + +/* + * The default ioremap() behavior is non-cached: + */ +static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) +{ + return ioremap_nocache(offset, size); +} + +extern void iounmap(volatile void __iomem *addr); + +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); + #ifdef CONFIG_X86_32 # include "io_32.h" @@ -72,7 +181,7 @@ extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); -extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); +extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); /* * early_ioremap() and early_iounmap() are for temporary early boot-time @@ -80,12 +189,12 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); * A boot-time mapping is currently limited to at most 16 pages. */ extern void early_ioremap_init(void); -extern void early_ioremap_clear(void); extern void early_ioremap_reset(void); extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); extern void __iomem *early_memremap(unsigned long offset, unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); +#define IO_SPACE_LIMIT 0xffff #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h index d8e242e1b39..a299900f592 100644 --- a/arch/x86/include/asm/io_32.h +++ b/arch/x86/include/asm/io_32.h @@ -37,8 +37,6 @@ * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ -#define IO_SPACE_LIMIT 0xffff - #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ @@ -53,92 +51,6 @@ */ #define xlate_dev_kmem_ptr(p) p -/** - * virt_to_phys - map virtual addresses to physical - * @address: address to remap - * - * The returned physical address is the physical (CPU) mapping for - * the memory address given. It is only valid to use this function on - * addresses directly mapped or allocated via kmalloc. - * - * This function does not give bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline unsigned long virt_to_phys(volatile void *address) -{ - return __pa(address); -} - -/** - * phys_to_virt - map physical address to virtual - * @address: address to remap - * - * The returned virtual address is a current CPU mapping for - * the memory address given. It is only valid to use this function on - * addresses that have a kernel mapping - * - * This function does not handle bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline void *phys_to_virt(unsigned long address) -{ - return __va(address); -} - -/* - * Change "struct page" to physical address. - */ -#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) - -/** - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * If the area you are trying to map is a PCI BAR you should have a - * look at pci_iomap(). - */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, - unsigned long prot_val); - -/* - * The default ioremap() behavior is non-cached: - */ -static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) -{ - return ioremap_nocache(offset, size); -} - -extern void iounmap(volatile void __iomem *addr); - -/* - * ISA I/O bus memory addresses are 1:1 with the physical address. - */ -#define isa_virt_to_bus virt_to_phys -#define isa_page_to_bus page_to_phys -#define isa_bus_to_virt phys_to_virt - -/* - * However PCI ones are not necessarily 1:1 and therefore these interfaces - * are forbidden in portable PCI drivers. - * - * Allow them on x86 for legacy drivers, though. - */ -#define virt_to_bus virt_to_phys -#define bus_to_virt phys_to_virt - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index fea325a1122..244067893af 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h @@ -136,73 +136,12 @@ __OUTS(b) __OUTS(w) __OUTS(l) -#define IO_SPACE_LIMIT 0xffff - #if defined(__KERNEL__) && defined(__x86_64__) #include <linux/vmalloc.h> -#ifndef __i386__ -/* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial - */ -static inline unsigned long virt_to_phys(volatile void *address) -{ - return __pa(address); -} - -static inline void *phys_to_virt(unsigned long address) -{ - return __va(address); -} -#endif - -/* - * Change "struct page" to physical address. - */ -#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) - #include <asm-generic/iomap.h> -/* - * This one maps high address device memory and turns off caching for that area. - * it's useful if some control registers are in such an area and write combining - * or read caching is not desirable: - */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, - unsigned long prot_val); - -/* - * The default ioremap() behavior is non-cached: - */ -static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) -{ - return ioremap_nocache(offset, size); -} - -extern void iounmap(volatile void __iomem *addr); - -extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); - -/* - * ISA I/O bus memory addresses are 1:1 with the physical address. - */ -#define isa_virt_to_bus virt_to_phys -#define isa_page_to_bus page_to_phys -#define isa_bus_to_virt phys_to_virt - -/* - * However PCI ones are not necessarily 1:1 and therefore these interfaces - * are forbidden in portable PCI drivers. - * - * Allow them on x86 for legacy drivers, though. - */ -#define virt_to_bus virt_to_phys -#define bus_to_virt phys_to_virt - void __memcpy_fromio(void *, unsigned long, unsigned); void __memcpy_toio(unsigned long, const void *, unsigned); @@ -232,8 +171,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c); #define flush_write_buffers() -#define BIO_VMERGE_BOUNDARY iommu_bio_merge - /* * Convert a virtual cached pointer to an uncached pointer */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 6afd9933a7d..59cb4a1317b 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -114,38 +114,16 @@ struct IR_IO_APIC_route_entry { extern int nr_ioapics; extern int nr_ioapic_registers[MAX_IO_APICS]; -/* - * MP-BIOS irq configuration table structures: - */ - #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_ioapic { - unsigned long mp_apicaddr; - unsigned int mp_apicid; - unsigned char mp_type; - unsigned char mp_apicver; - unsigned char mp_flags; -}; - -struct mp_config_intsrc { - unsigned int mp_dstapic; - unsigned char mp_type; - unsigned char mp_irqtype; - unsigned short mp_irqflag; - unsigned char mp_srcbus; - unsigned char mp_srcbusirq; - unsigned char mp_dstirq; -}; - /* I/O APIC entries */ -extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; @@ -156,14 +134,15 @@ extern int sis_apic_bug; /* 1 if "noapic" boot option passed */ extern int skip_ioapic_setup; +/* 1 if "noapic" boot option passed */ +extern int noioapicquirk; + +/* -1 if "noapic" boot option passed */ +extern int noioapicreroute; + /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ extern int timer_through_8259; -static inline void disable_ioapic_setup(void) -{ - skip_ioapic_setup = 1; -} - /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. @@ -188,17 +167,20 @@ extern void restore_IO_APIC_setup(void); extern void reinit_intr_remapped_IO_APIC(int); #endif -extern int probe_nr_irqs(void); +extern void probe_nr_irqs_gsi(void); +extern int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector); +extern void ioapic_write_entry(int apic, int pin, + struct IO_APIC_route_entry e); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; -static inline void ioapic_init_mappings(void) { } +static inline void ioapic_init_mappings(void) { } -static inline int probe_nr_irqs(void) -{ - return NR_IRQS; -} +static inline void probe_nr_irqs_gsi(void) { } #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h new file mode 100644 index 00000000000..c1f06289b14 --- /dev/null +++ b/arch/x86/include/asm/iomap.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2008 Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); + +void +iounmap_atomic(void *kvaddr, enum km_type type); diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index e4a552d4446..a6ee9e6f530 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,44 +6,8 @@ extern void no_iommu_init(void); extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; -extern int dmar_disabled; - -extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) -#ifdef CONFIG_GART_IOMMU -extern int gart_iommu_aperture; -extern int gart_iommu_aperture_allowed; -extern int gart_iommu_aperture_disabled; - -extern void early_gart_iommu_check(void); -extern void gart_iommu_init(void); -extern void gart_iommu_shutdown(void); -extern void __init gart_parse_options(char *); -extern void gart_iommu_hole_init(void); - -#else -#define gart_iommu_aperture 0 -#define gart_iommu_aperture_allowed 0 -#define gart_iommu_aperture_disabled 1 - -static inline void early_gart_iommu_check(void) -{ -} -static inline void gart_iommu_init(void) -{ -} -static inline void gart_iommu_shutdown(void) -{ -} -static inline void gart_parse_options(char *options) -{ -} -static inline void gart_iommu_hole_init(void) -{ -} -#endif - #endif /* _ASM_X86_IOMMU_H */ diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index f89dffb28aa..5f2efc5d992 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_IPI_H #define _ASM_X86_IPI_H +#ifdef CONFIG_X86_LOCAL_APIC + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -55,8 +57,8 @@ static inline void __xapic_wait_icr_idle(void) cpu_relax(); } -static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, - unsigned int dest) +static inline void +__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -87,8 +89,8 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -static inline void __send_IPI_dest_field(unsigned int mask, int vector, - unsigned int dest) +static inline void + __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) { unsigned long cfg; @@ -117,22 +119,46 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, + int vector); +#include <asm/genapic.h> + +extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + int vector); + +/* Avoid include hell */ +#define NMI_VECTOR 0x02 + +extern int no_broadcast; + +static inline void __default_local_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; + if (no_broadcast || vector == NMI_VECTOR) + apic->send_IPI_mask_allbutself(cpu_online_mask, vector); + else + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); +} - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicast to each CPU instead. - * - mbligh - */ - local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, mask) { - __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); +static inline void __default_local_send_IPI_all(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) + apic->send_IPI_mask(cpu_online_mask, vector); + else + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); } +#ifdef CONFIG_X86_32 +extern void default_send_IPI_mask_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_allbutself(int vector); +extern void default_send_IPI_all(int vector); +extern void default_send_IPI_self(int vector); +#endif + +#endif + #endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index bae0eda9548..107eb219669 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -31,20 +31,19 @@ static inline int irq_canonicalize(int irq) # endif #endif -#ifdef CONFIG_IRQBALANCE -extern int irqbalance_disable(char *str); -#endif - #ifdef CONFIG_HOTPLUG_CPU #include <linux/cpumask.h> -extern void fixup_irqs(cpumask_t map); +extern void fixup_irqs(void); #endif -extern unsigned int do_IRQ(struct pt_regs *regs); extern void init_IRQ(void); extern void native_init_IRQ(void); +extern bool handle_irq(unsigned irq, struct pt_regs *regs); + +extern unsigned int do_IRQ(struct pt_regs *regs); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); +extern int vector_used_by_percpu_irq(unsigned int vector); #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h index 89c898ab298..77843225b7e 100644 --- a/arch/x86/include/asm/irq_regs.h +++ b/arch/x86/include/asm/irq_regs.h @@ -1,5 +1,31 @@ -#ifdef CONFIG_X86_32 -# include "irq_regs_32.h" -#else -# include "irq_regs_64.h" -#endif +/* + * Per-cpu current frame pointer - the location of the last exception frame on + * the stack, stored in the per-cpu area. + * + * Jeremy Fitzhardinge <jeremy@goop.org> + */ +#ifndef _ASM_X86_IRQ_REGS_H +#define _ASM_X86_IRQ_REGS_H + +#include <asm/percpu.h> + +#define ARCH_HAS_OWN_IRQ_REGS + +DECLARE_PER_CPU(struct pt_regs *, irq_regs); + +static inline struct pt_regs *get_irq_regs(void) +{ + return percpu_read(irq_regs); +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = get_irq_regs(); + percpu_write(irq_regs, new_regs); + + return old_regs; +} + +#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h deleted file mode 100644 index af2f02d27fc..00000000000 --- a/arch/x86/include/asm/irq_regs_32.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the per-cpu area. - * - * Jeremy Fitzhardinge <jeremy@goop.org> - */ -#ifndef _ASM_X86_IRQ_REGS_32_H -#define _ASM_X86_IRQ_REGS_32_H - -#include <asm/percpu.h> - -DECLARE_PER_CPU(struct pt_regs *, irq_regs); - -static inline struct pt_regs *get_irq_regs(void) -{ - return x86_read_percpu(irq_regs); -} - -static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) -{ - struct pt_regs *old_regs; - - old_regs = get_irq_regs(); - x86_write_percpu(irq_regs, new_regs); - - return old_regs; -} - -#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h deleted file mode 100644 index 3dd9c0b7027..00000000000 --- a/arch/x86/include/asm/irq_regs_64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f94..b07278c55e9 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -1,47 +1,69 @@ #ifndef _ASM_X86_IRQ_VECTORS_H #define _ASM_X86_IRQ_VECTORS_H -#include <linux/threads.h> +/* + * Linux IRQ vector layout. + * + * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can + * be defined by Linux. They are used as a jump table by the CPU when a + * given vector is triggered - by a CPU-external, CPU-internal or + * software-triggered event. + * + * Linux sets the kernel code address each entry jumps to early during + * bootup, and never changes them. This is the general layout of the + * IDT entries: + * + * Vectors 0 ... 31 : system traps and exceptions - hardcoded events + * Vectors 32 ... 127 : device interrupts + * Vector 128 : legacy int80 syscall interface + * Vectors 129 ... 237 : device interrupts + * Vectors 238 ... 255 : special interrupts + * + * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. + * + * This file enumerates the exact layout of them: + */ -#define NMI_VECTOR 0x02 +#define NMI_VECTOR 0x02 /* * IDT vectors usable for external interrupt sources start * at 0x20: */ -#define FIRST_EXTERNAL_VECTOR 0x20 +#define FIRST_EXTERNAL_VECTOR 0x20 #ifdef CONFIG_X86_32 -# define SYSCALL_VECTOR 0x80 +# define SYSCALL_VECTOR 0x80 #else -# define IA32_SYSCALL_VECTOR 0x80 +# define IA32_SYSCALL_VECTOR 0x80 #endif /* * Reserve the lowest usable priority level 0x20 - 0x2f for triggering * cleanup after irq migration. */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR /* * Vectors 0x30-0x3f are used for ISA interrupts. */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) + +#define IRQ1_VECTOR (IRQ0_VECTOR + 1) +#define IRQ2_VECTOR (IRQ0_VECTOR + 2) +#define IRQ3_VECTOR (IRQ0_VECTOR + 3) +#define IRQ4_VECTOR (IRQ0_VECTOR + 4) +#define IRQ5_VECTOR (IRQ0_VECTOR + 5) +#define IRQ6_VECTOR (IRQ0_VECTOR + 6) +#define IRQ7_VECTOR (IRQ0_VECTOR + 7) +#define IRQ8_VECTOR (IRQ0_VECTOR + 8) +#define IRQ9_VECTOR (IRQ0_VECTOR + 9) +#define IRQ10_VECTOR (IRQ0_VECTOR + 10) +#define IRQ11_VECTOR (IRQ0_VECTOR + 11) +#define IRQ12_VECTOR (IRQ0_VECTOR + 12) +#define IRQ13_VECTOR (IRQ0_VECTOR + 13) +#define IRQ14_VECTOR (IRQ0_VECTOR + 14) +#define IRQ15_VECTOR (IRQ0_VECTOR + 15) /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff @@ -49,108 +71,98 @@ * some of the following vectors are 'rare', they are merged * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). */ -#ifdef CONFIG_X86_32 - -# define SPURIOUS_APIC_VECTOR 0xff -# define ERROR_APIC_VECTOR 0xfe -# define INVALIDATE_TLB_VECTOR 0xfd -# define RESCHEDULE_VECTOR 0xfc -# define CALL_FUNCTION_VECTOR 0xfb -# define CALL_FUNCTION_SINGLE_VECTOR 0xfa -# define THERMAL_APIC_VECTOR 0xf0 - -#else #define SPURIOUS_APIC_VECTOR 0xff +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + #define ERROR_APIC_VECTOR 0xfe #define RESCHEDULE_VECTOR 0xfd #define CALL_FUNCTION_VECTOR 0xfc #define CALL_FUNCTION_SINGLE_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -#define UV_BAU_MESSAGE 0xf8 -#define INVALIDATE_TLB_VECTOR_END 0xf7 -#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ - -#define NUM_INVALIDATE_TLB_VECTORS 8 +#ifdef CONFIG_X86_32 +/* 0xf8 - 0xf9 : free */ +#else +# define THRESHOLD_APIC_VECTOR 0xf9 +# define UV_BAU_MESSAGE 0xf8 #endif +/* f0-f7 used for spreading out TLB flushes: */ +#define INVALIDATE_TLB_VECTOR_END 0xf7 +#define INVALIDATE_TLB_VECTOR_START 0xf0 +#define NUM_INVALIDATE_TLB_VECTORS 8 + /* * Local APIC timer IRQ vector is on a different priority level, * to work around the 'lost local interrupt if more than 2 IRQ * sources per level' errata. */ -#define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_TIMER_VECTOR 0xef + +/* + * Performance monitoring interrupt vector: + */ +#define LOCAL_PERF_VECTOR 0xee /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) - -#define NR_VECTORS 256 +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FPU_IRQ 13 +#define NR_VECTORS 256 -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) +#define FPU_IRQ 13 -#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) -# if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 -#elif defined(CONFIG_X86_VOYAGER) +#ifndef __ASSEMBLY__ +static inline int invalid_vm86_irq(int irq) +{ + return irq < 3 || irq > 15; +} +#endif -# define NR_IRQS 224 +/* + * Size the maximum number of interrupts. + * + * If the irq_desc[] array has a sparse layout, we can size things + * generously - it scales up linearly with the maximum number of CPUs, + * and the maximum number of IO-APICs, whichever is higher. + * + * In other cases we size more conservatively, to not create too large + * static arrays. + */ -#else /* IO_APIC || VOYAGER */ +#define NR_IRQS_LEGACY 16 -# define NR_IRQS 16 +#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) +#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) +#ifdef CONFIG_X86_IO_APIC +# ifdef CONFIG_SPARSE_IRQ +# define NR_IRQS \ + (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) +# else +# if NR_CPUS < MAX_IO_APICS +# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) +# else +# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +# endif +# endif +#else /* !CONFIG_X86_IO_APIC: */ +# define NR_IRQS NR_IRQS_LEGACY #endif -/* Voyager specific defines */ -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 -#define VIC_CALL_FUNCTION_SINGLE_CPI 7 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - - #endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index b95162af0bf..d2e3bf3608a 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -6,7 +6,7 @@ * */ -#include <asm/types.h> +#include <linux/types.h> #include <linux/ioctl.h> /* Architectural interrupt line count. */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8346be87cfa..730843d1d2f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include <asm/pvclock-abi.h> #include <asm/desc.h> +#include <asm/mtrr.h> #define KVM_MAX_VCPUS 16 #define KVM_MEMORY_SLOTS 32 @@ -86,6 +87,7 @@ #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 40 +#define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 extern spinlock_t kvm_lock; @@ -180,6 +182,8 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head oos_link; + /* * The following two entries are used to key the shadow page in the * hash table. @@ -190,13 +194,16 @@ struct kvm_mmu_page { u64 *spt; /* hold the gfn of each spte inside spt */ gfn_t *gfns; - unsigned long slot_bitmap; /* One bit set per slot which has memory - * in this shadow page. - */ + /* + * One bit set per slot which has memory + * in this shadow page. + */ + DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; - bool unsync_children; + bool global; + unsigned int unsync_children; union { u64 *parent_pte; /* !multimapped */ struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ @@ -327,8 +334,10 @@ struct kvm_vcpu_arch { bool nmi_pending; bool nmi_injected; + bool nmi_window_open; - u64 mtrr[0x100]; + struct mtrr_state_type mtrr_state; + u32 pat; }; struct kvm_mem_alias { @@ -350,11 +359,13 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; - struct dmar_domain *intel_iommu_domain; + struct list_head oos_global_pages; + struct iommu_domain *iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; struct hlist_head irq_ack_notifier_list; + int vapics_in_nmi_mode; int round_robin_prev_vcpu; unsigned int tss_addr; @@ -378,6 +389,7 @@ struct kvm_vm_stat { u32 mmu_recycled; u32 mmu_cache_miss; u32 mmu_unsync; + u32 mmu_unsync_global; u32 remote_tlb_flush; u32 lpages; }; @@ -397,6 +409,7 @@ struct kvm_vcpu_stat { u32 halt_exits; u32 halt_wakeup; u32 request_irq_exits; + u32 request_nmi_exits; u32 irq_exits; u32 host_state_reload; u32 efer_reload; @@ -405,6 +418,7 @@ struct kvm_vcpu_stat { u32 insn_emulation_fail; u32 hypercalls; u32 irq_injections; + u32 nmi_injections; }; struct descriptor_table { @@ -477,6 +491,7 @@ struct kvm_x86_ops { int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); + int (*get_mt_mask_shift)(void); }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); @@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector); void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes); + const u8 *new, int bytes, + bool guest_initiated); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); +void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); @@ -607,6 +624,8 @@ void kvm_disable_tdp(void); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); +struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); + static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); @@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" -#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" -#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" -#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" -#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" -#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" -#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" -#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" -#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" - #define MSR_IA32_TIME_STAMP_COUNTER 0x010 #define TSS_IOPB_BASE_OFFSET 0x66 diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 25179a29f20..6a159732881 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -123,6 +123,7 @@ struct decode_cache { u8 ad_bytes; u8 rex_prefix; struct operand src; + struct operand src2; struct operand dst; bool has_seg_override; u8 seg_override; @@ -146,22 +147,18 @@ struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; - /* Linear faulting address (if emulating a page-faulting instruction) */ unsigned long eflags; - /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* decode cache */ - struct decode_cache decode; }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 1 +#define REPNE_PREFIX 2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -170,7 +167,7 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ /* Host execution mode. */ -#if defined(__i386__) +#if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 #elif defined(CONFIG_X86_64) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index d28a507cef3..1caf57628b9 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -15,7 +15,7 @@ #define SHARED_SWITCHER_PAGES \ DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) /* Pages for switcher itself, then two pages per cpu */ -#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) +#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) /* We map at -4M for ease of mapping into the guest (one PTE page). */ #define SWITCHER_ADDR 0xFFC00000 diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index f61ee8f937e..5d98d0b68ff 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -57,5 +57,65 @@ #define __ALIGN_STR ".align 16,0x90" #endif +/* + * to check ENTRY_X86/END_X86 and + * KPROBE_ENTRY_X86/KPROBE_END_X86 + * unbalanced-missed-mixed appearance + */ +#define __set_entry_x86 .set ENTRY_X86_IN, 0 +#define __unset_entry_x86 .set ENTRY_X86_IN, 1 +#define __set_kprobe_x86 .set KPROBE_X86_IN, 0 +#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1 + +#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed" + +#define __check_entry_x86 \ + .ifdef ENTRY_X86_IN; \ + .ifeq ENTRY_X86_IN; \ + __macro_err_x86; \ + .abort; \ + .endif; \ + .endif + +#define __check_kprobe_x86 \ + .ifdef KPROBE_X86_IN; \ + .ifeq KPROBE_X86_IN; \ + __macro_err_x86; \ + .abort; \ + .endif; \ + .endif + +#define __check_entry_kprobe_x86 \ + __check_entry_x86; \ + __check_kprobe_x86 + +#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86 + +#define ENTRY_X86(name) \ + __check_entry_kprobe_x86; \ + __set_entry_x86; \ + .globl name; \ + __ALIGN; \ + name: + +#define END_X86(name) \ + __unset_entry_x86; \ + __check_entry_kprobe_x86; \ + .size name, .-name + +#define KPROBE_ENTRY_X86(name) \ + __check_entry_kprobe_x86; \ + __set_kprobe_x86; \ + .pushsection .kprobes.text, "ax"; \ + .globl name; \ + __ALIGN; \ + name: + +#define KPROBE_END_X86(name) \ + __unset_kprobe_x86; \ + __check_entry_kprobe_x86; \ + .size name, .-name; \ + .popsection + #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h deleted file mode 100644 index ff3a6c236c0..00000000000 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ /dev/null @@ -1,156 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_APIC_H -#define _ASM_X86_MACH_DEFAULT_MACH_APIC_H - -#ifdef CONFIG_X86_LOCAL_APIC - -#include <mach_apicdef.h> -#include <asm/smp.h> - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline cpumask_t target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_map; -#else - return cpumask_of_cpu(0); -#endif -} - -#define NO_BALANCE_IRQ (0) -#define esr_disable (0) - -#ifdef CONFIG_X86_64 -#include <asm/genapic.h> -#define INT_DELIVERY_MODE (genapic->int_delivery_mode) -#define INT_DEST_MODE (genapic->int_dest_mode) -#define TARGET_CPUS (genapic->target_cpus()) -#define apic_id_registered (genapic->apic_id_registered) -#define init_apic_ldr (genapic->init_apic_ldr) -#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define phys_pkg_id (genapic->phys_pkg_id) -#define vector_allocation_domain (genapic->vector_allocation_domain) -#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) -#define send_IPI_self (genapic->send_IPI_self) -extern void setup_apic_routing(void); -#else -#define INT_DELIVERY_MODE dest_LowestPrio -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ -#define TARGET_CPUS (target_cpus()) -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void init_apic_ldr(void) -{ - unsigned long val; - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write(APIC_LDR, val); -} - -static inline int apic_id_registered(void) -{ - return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - return cpus_addr(cpumask)[0]; -} - -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static inline void setup_apic_routing(void) -{ -#ifdef CONFIG_X86_IO_APIC - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Flat", nr_ioapics); -#endif -} - -static inline int apicid_to_node(int logical_apicid) -{ -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif -} - -static inline cpumask_t vector_allocation_domain(int cpu) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; -} -#endif - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} - -static inline unsigned long check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - return phys_map; -} - -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ - return 1 << cpu; -} - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -static inline void setup_portio_remap(void) -{ -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); -} - -static inline void enable_apic_mode(void) -{ -} -#endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* _ASM_X86_MACH_DEFAULT_MACH_APIC_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h deleted file mode 100644 index 53179936d6c..00000000000 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H -#define _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H - -#include <asm/apic.h> - -#ifdef CONFIG_X86_64 -#define APIC_ID_MASK (genapic->apic_id_mask) -#define GET_APIC_ID(x) (genapic->get_apic_id(x)) -#define SET_APIC_ID(x) (genapic->set_apic_id(x)) -#else -#define APIC_ID_MASK (0xF<<24) -static inline unsigned get_apic_id(unsigned long x) -{ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_XAPIC(ver)) - return (((x)>>24)&0xFF); - else - return (((x)>>24)&0xF); -} - -#define GET_APIC_ID(x) get_apic_id(x) -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h deleted file mode 100644 index fabca01ebac..00000000000 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_IPI_H -#define _ASM_X86_MACH_DEFAULT_MACH_IPI_H - -/* Avoid include hell */ -#define NMI_VECTOR 0x02 - -void send_IPI_mask_bitmask(cpumask_t mask, int vector); -void __send_IPI_shortcut(unsigned int shortcut, int vector); - -extern int no_broadcast; - -#ifdef CONFIG_X86_64 -#include <asm/genapic.h> -#define send_IPI_mask (genapic->send_IPI_mask) -#else -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_bitmask(mask, vector); -} -#endif - -static inline void __local_send_IPI_allbutself(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) { - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); - send_IPI_mask(mask, vector); - } else - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); -} - -static inline void __local_send_IPI_all(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_map, vector); - else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector); -} - -#ifdef CONFIG_X86_64 -#define send_IPI_allbutself (genapic->send_IPI_allbutself) -#define send_IPI_all (genapic->send_IPI_all) -#else -static inline void send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then we get an APIC send - * error if we try to broadcast, thus avoid sending IPIs in this case. - */ - if (!(num_online_cpus() > 1)) - return; - - __local_send_IPI_allbutself(vector); - return; -} - -static inline void send_IPI_all(int vector) -{ - __local_send_IPI_all(vector); -} -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h deleted file mode 100644 index 8c1ea21238a..00000000000 --- a/arch/x86/include/asm/mach-default/mach_mpparse.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H -#define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H - -static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - return 0; -} - -/* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} - - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/mach-default/mach_mpspec.h b/arch/x86/include/asm/mach-default/mach_mpspec.h deleted file mode 100644 index e85ede686be..00000000000 --- a/arch/x86/include/asm/mach-default/mach_mpspec.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H -#define _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -#if CONFIG_BASE_SMALL == 0 -#define MAX_MP_BUSSES 256 -#else -#define MAX_MP_BUSSES 32 -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H */ diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h deleted file mode 100644 index 9d80db91e99..00000000000 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H - -/* - * This file copes with machines that wakeup secondary CPUs by the - * INIT, INIT, STARTUP sequence. - */ - -#define WAKE_SECONDARY_VIA_INIT - -#define TRAMPOLINE_LOW phys_to_virt(0x467) -#define TRAMPOLINE_HIGH phys_to_virt(0x469) - -#define boot_cpu_apicid boot_cpu_physical_apicid - -static inline void wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); - return; -} - -/* Nothing to do for most platforms, since cleared by the INIT cycle */ -static inline void smp_callin_clear_local_apic(void) -{ -} - -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -#define inquire_remote_apic(apicid) do { \ - if (apic_verbosity >= APIC_DEBUG) \ - __inquire_remote_apic(apicid); \ - } while (0) - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/mach-generic/gpio.h b/arch/x86/include/asm/mach-generic/gpio.h deleted file mode 100644 index 995c45efdb3..00000000000 --- a/arch/x86/include/asm/mach-generic/gpio.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_GPIO_H -#define _ASM_X86_MACH_GENERIC_GPIO_H - -int gpio_request(unsigned gpio, const char *label); -void gpio_free(unsigned gpio); -int gpio_direction_input(unsigned gpio); -int gpio_direction_output(unsigned gpio, int value); -int gpio_get_value(unsigned gpio); -void gpio_set_value(unsigned gpio, int value); -int gpio_to_irq(unsigned gpio); -int irq_to_gpio(unsigned irq); - -#include <asm-generic/gpio.h> /* cansleep wrappers */ - -#endif /* _ASM_X86_MACH_GENERIC_GPIO_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h deleted file mode 100644 index 5180bd7478f..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_APIC_H -#define _ASM_X86_MACH_GENERIC_MACH_APIC_H - -#include <asm/genapic.h> - -#define esr_disable (genapic->ESR_DISABLE) -#define NO_BALANCE_IRQ (genapic->no_balance_irq) -#define INT_DELIVERY_MODE (genapic->int_delivery_mode) -#define INT_DEST_MODE (genapic->int_dest_mode) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL (genapic->apic_destination_logical) -#define TARGET_CPUS (genapic->target_cpus()) -#define apic_id_registered (genapic->apic_id_registered) -#define init_apic_ldr (genapic->init_apic_ldr) -#define ioapic_phys_id_map (genapic->ioapic_phys_id_map) -#define setup_apic_routing (genapic->setup_apic_routing) -#define multi_timer_check (genapic->multi_timer_check) -#define apicid_to_node (genapic->apicid_to_node) -#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) -#define cpu_present_to_apicid (genapic->cpu_present_to_apicid) -#define apicid_to_cpu_present (genapic->apicid_to_cpu_present) -#define setup_portio_remap (genapic->setup_portio_remap) -#define check_apicid_present (genapic->check_apicid_present) -#define check_phys_apicid_present (genapic->check_phys_apicid_present) -#define check_apicid_used (genapic->check_apicid_used) -#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define vector_allocation_domain (genapic->vector_allocation_domain) -#define enable_apic_mode (genapic->enable_apic_mode) -#define phys_pkg_id (genapic->phys_pkg_id) - -extern void generic_bigsmp_probe(void); - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h deleted file mode 100644 index 68041f3802f..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_APICDEF_H -#define _ASM_X86_MACH_GENERIC_MACH_APICDEF_H - -#ifndef APIC_DEFINITION -#include <asm/genapic.h> - -#define GET_APIC_ID (genapic->get_apic_id) -#define APIC_ID_MASK (genapic->apic_id_mask) -#endif - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h deleted file mode 100644 index ffd637e3c3d..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_ipi.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_IPI_H -#define _ASM_X86_MACH_GENERIC_MACH_IPI_H - -#include <asm/genapic.h> - -#define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_allbutself (genapic->send_IPI_allbutself) -#define send_IPI_all (genapic->send_IPI_all) - -#endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h deleted file mode 100644 index 048f1d46853..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_mpparse.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H -#define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H - - -extern int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid); - -extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); - -#endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_mpspec.h b/arch/x86/include/asm/mach-generic/mach_mpspec.h deleted file mode 100644 index bbab5ccfd4f..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_mpspec.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H -#define _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -/* Summit or generic (i.e. installer) kernels need lots of bus entries. */ -/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#define MAX_MP_BUSSES 260 - -extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid); -#endif /* _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H */ diff --git a/arch/x86/include/asm/mach-rdc321x/gpio.h b/arch/x86/include/asm/mach-rdc321x/gpio.h deleted file mode 100644 index c210ab5788b..00000000000 --- a/arch/x86/include/asm/mach-rdc321x/gpio.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _ASM_X86_MACH_RDC321X_GPIO_H -#define _ASM_X86_MACH_RDC321X_GPIO_H - -#include <linux/kernel.h> - -extern int rdc_gpio_get_value(unsigned gpio); -extern void rdc_gpio_set_value(unsigned gpio, int value); -extern int rdc_gpio_direction_input(unsigned gpio); -extern int rdc_gpio_direction_output(unsigned gpio, int value); -extern int rdc_gpio_request(unsigned gpio, const char *label); -extern void rdc_gpio_free(unsigned gpio); -extern void __init rdc321x_gpio_setup(void); - -/* Wrappers for the arch-neutral GPIO API */ - -static inline int gpio_request(unsigned gpio, const char *label) -{ - return rdc_gpio_request(gpio, label); -} - -static inline void gpio_free(unsigned gpio) -{ - might_sleep(); - rdc_gpio_free(gpio); -} - -static inline int gpio_direction_input(unsigned gpio) -{ - return rdc_gpio_direction_input(gpio); -} - -static inline int gpio_direction_output(unsigned gpio, int value) -{ - return rdc_gpio_direction_output(gpio, value); -} - -static inline int gpio_get_value(unsigned gpio) -{ - return rdc_gpio_get_value(gpio); -} - -static inline void gpio_set_value(unsigned gpio, int value) -{ - rdc_gpio_set_value(gpio, value); -} - -static inline int gpio_to_irq(unsigned gpio) -{ - return gpio; -} - -static inline int irq_to_gpio(unsigned irq) -{ - return irq; -} - -/* For cansleep */ -#include <asm-generic/gpio.h> - -#endif /* _ASM_X86_MACH_RDC321X_GPIO_H */ diff --git a/arch/x86/include/asm/mach-default/mach_timer.h b/arch/x86/include/asm/mach_timer.h index 853728519ae..853728519ae 100644 --- a/arch/x86/include/asm/mach-default/mach_timer.h +++ b/arch/x86/include/asm/mach_timer.h diff --git a/arch/x86/include/asm/mach-default/mach_traps.h b/arch/x86/include/asm/mach_traps.h index f7920601e47..f7920601e47 100644 --- a/arch/x86/include/asm/mach-default/mach_traps.h +++ b/arch/x86/include/asm/mach_traps.h diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 5a65b107ad5..031f6266f42 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -1,31 +1,18 @@ #ifndef _ASM_X86_MATH_EMU_H #define _ASM_X86_MATH_EMU_H +#include <asm/ptrace.h> +#include <asm/vm86.h> + /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved automatically by the 80386/80486. */ -struct info { +struct math_emu_info { long ___orig_eip; - long ___ebx; - long ___ecx; - long ___edx; - long ___esi; - long ___edi; - long ___ebp; - long ___eax; - long ___ds; - long ___es; - long ___fs; - long ___orig_eax; - long ___eip; - long ___cs; - long ___eflags; - long ___esp; - long ___ss; - long ___vm86_es; /* This and the following only in vm86 mode */ - long ___vm86_ds; - long ___vm86_fs; - long ___vm86_gs; + union { + struct pt_regs *regs; + struct kernel_vm86_regs *vm86; + }; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 1d6e17c2f23..32c6e17b960 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -3,8 +3,8 @@ #ifdef __x86_64__ +#include <linux/types.h> #include <asm/ioctls.h> -#include <asm/types.h> /* * Machine Check support for x86 @@ -115,8 +115,6 @@ extern int mce_notify_user(void); #endif /* !CONFIG_X86_32 */ - - #ifdef CONFIG_X86_MCE extern void mcheck_init(struct cpuinfo_x86 *c); #else @@ -126,5 +124,4 @@ extern void stop_mce(void); extern void restart_mce(void); #endif /* __KERNEL__ */ - #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 8aeeb3fd73d..f923203dc39 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); -#ifdef CONFIG_X86_32 -# include "mmu_context_32.h" -#else -# include "mmu_context_64.h" + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +#ifdef CONFIG_SMP + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); +#endif +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned cpu = smp_processor_id(); + + if (likely(prev != next)) { + /* stop flush ipis for the previous mm */ + cpu_clear(cpu, prev->cpu_vm_mask); +#ifdef CONFIG_SMP + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif + cpu_set(cpu, next->cpu_vm_mask); + + /* Re-load page tables */ + load_cr3(next->pgd); + + /* + * load the LDT, if the LDT is different: + */ + if (unlikely(prev->context.ldt != next->context.ldt)) + load_LDT_nolock(&next->context); + } +#ifdef CONFIG_SMP + else { + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. + */ + load_cr3(next->pgd); + load_LDT_nolock(&next->context); + } + } +#endif +} #define activate_mm(prev, next) \ do { \ @@ -33,5 +76,17 @@ do { \ switch_mm((prev), (next), NULL); \ } while (0); +#ifdef CONFIG_X86_32 +#define deactivate_mm(tsk, mm) \ +do { \ + lazy_load_gs(0); \ +} while (0) +#else +#define deactivate_mm(tsk, mm) \ +do { \ + load_gs_index(0); \ + loadsegment(fs, 0); \ +} while (0) +#endif #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h deleted file mode 100644 index 8e10015781f..00000000000 --- a/arch/x86/include/asm/mmu_context_32.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef _ASM_X86_MMU_CONTEXT_32_H -#define _ASM_X86_MMU_CONTEXT_32_H - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -#ifdef CONFIG_SMP - unsigned cpu = smp_processor_id(); - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY; -#endif -} - -static inline void switch_mm(struct mm_struct *prev, - struct mm_struct *next, - struct task_struct *tsk) -{ - int cpu = smp_processor_id(); - - if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); -#ifdef CONFIG_SMP - per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; - per_cpu(cpu_tlbstate, cpu).active_mm = next; -#endif - cpu_set(cpu, next->cpu_vm_mask); - - /* Re-load page tables */ - load_cr3(next->pgd); - - /* - * load the LDT, if the LDT is different: - */ - if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); - } -#ifdef CONFIG_SMP - else { - per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; - BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); - - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload %cr3. - */ - load_cr3(next->pgd); - load_LDT_nolock(&next->context); - } - } -#endif -} - -#define deactivate_mm(tsk, mm) \ - asm("movl %0,%%gs": :"r" (0)); - -#endif /* _ASM_X86_MMU_CONTEXT_32_H */ diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h deleted file mode 100644 index 677d36e9540..00000000000 --- a/arch/x86/include/asm/mmu_context_64.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _ASM_X86_MMU_CONTEXT_64_H -#define _ASM_X86_MMU_CONTEXT_64_H - -#include <asm/pda.h> - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -#ifdef CONFIG_SMP - if (read_pda(mmu_state) == TLBSTATE_OK) - write_pda(mmu_state, TLBSTATE_LAZY); -#endif -} - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - unsigned cpu = smp_processor_id(); - if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); -#ifdef CONFIG_SMP - write_pda(mmu_state, TLBSTATE_OK); - write_pda(active_mm, next); -#endif - cpu_set(cpu, next->cpu_vm_mask); - load_cr3(next->pgd); - - if (unlikely(next->context.ldt != prev->context.ldt)) - load_LDT_nolock(&next->context); - } -#ifdef CONFIG_SMP - else { - write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - BUG(); - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload CR3 - * to make sure to use no freed page tables. - */ - load_cr3(next->pgd); - load_LDT_nolock(&next->context); - } - } -#endif -} - -#define deactivate_mm(tsk, mm) \ -do { \ - load_gs_index(0); \ - asm volatile("movl %0,%%fs"::"r"(0)); \ -} while (0) - -#endif /* _ASM_X86_MMU_CONTEXT_64_H */ diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 485bdf059ff..07f1af494ca 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void) extern int early_pfn_to_nid(unsigned long pfn); +extern void resume_map_numa_kva(pgd_t *pgd); + #else /* !CONFIG_NUMA */ #define get_memcfg_numa get_memcfg_numa_flat +static inline void resume_map_numa_kva(pgd_t *pgd) {} + #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 91885c28f66..5916c8df09d 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -6,13 +6,24 @@ #include <asm/mpspec_def.h> extern int apic_version[MAX_APICS]; +extern int pic_mode; #ifdef CONFIG_X86_32 -#include <mach_mpspec.h> + +/* + * Summit or generic (i.e. installer) kernels need lots of bus entries. + * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. + */ +#if CONFIG_BASE_SMALL == 0 +# define MAX_MP_BUSSES 260 +#else +# define MAX_MP_BUSSES 32 +#endif + +#define MAX_IRQ_SOURCES 256 extern unsigned int def_to_bigsmp; extern u8 apicid_2_node[]; -extern int pic_mode; #ifdef CONFIG_X86_NUMAQ extern int mp_bus_id_to_node[MAX_MP_BUSSES]; @@ -20,15 +31,15 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES]; extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; #endif -#define MAX_APICID 256 +#define MAX_APICID 256 -#else +#else /* CONFIG_X86_64: */ -#define MAX_MP_BUSSES 256 +#define MAX_MP_BUSSES 256 /* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ -#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) -#endif +#endif /* CONFIG_X86_64 */ extern void early_find_smp_config(void); extern void early_get_smp_config(void); @@ -45,11 +56,13 @@ extern int smp_found_config; extern int mpc_default_type; extern unsigned long mp_lapic_addr; -extern void find_smp_config(void); extern void get_smp_config(void); + #ifdef CONFIG_X86_MPPARSE +extern void find_smp_config(void); extern void early_reserve_e820_mpc_new(void); #else +static inline void find_smp_config(void) { } static inline void early_reserve_e820_mpc_new(void) { } #endif @@ -60,9 +73,12 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); +extern int mp_find_ioapic(int gsi); +extern int mp_find_ioapic_pin(int ioapic, int gsi); #else static inline int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, @@ -71,6 +87,11 @@ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, return 0; } #endif +#else /* !CONFIG_ACPI: */ +static inline int acpi_probe_gsi(void) +{ + return 0; +} #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) @@ -142,4 +163,10 @@ static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) extern physid_mask_t phys_cpu_present_map; +extern int generic_mps_oem_check(struct mpc_table *, char *, char *); + +extern int default_acpi_madt_oem_check(char *, char *); + +extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); + #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index e3ace7d1d35..4a7f96d7c18 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -24,32 +24,33 @@ # endif #endif -struct intel_mp_floating { - char mpf_signature[4]; /* "_MP_" */ - unsigned int mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ +/* Intel MP Floating Pointer Structure */ +struct mpf_intel { + char signature[4]; /* "_MP_" */ + unsigned int physptr; /* Configuration table address */ + unsigned char length; /* Our length (paragraphs) */ + unsigned char specification; /* Specification version */ + unsigned char checksum; /* Checksum (makes sum 0) */ + unsigned char feature1; /* Standard or configuration ? */ + unsigned char feature2; /* Bit7 set for IMCR|PIC */ + unsigned char feature3; /* Unused (0) */ + unsigned char feature4; /* Unused (0) */ + unsigned char feature5; /* Unused (0) */ }; #define MPC_SIGNATURE "PCMP" -struct mp_config_table { - char mpc_signature[4]; - unsigned short mpc_length; /* Size of table */ - char mpc_spec; /* 0x01 */ - char mpc_checksum; - char mpc_oem[8]; - char mpc_productid[12]; - unsigned int mpc_oemptr; /* 0 if not present */ - unsigned short mpc_oemsize; /* 0 if not present */ - unsigned short mpc_oemcount; - unsigned int mpc_lapic; /* APIC address */ +struct mpc_table { + char signature[4]; + unsigned short length; /* Size of table */ + char spec; /* 0x01 */ + char checksum; + char oem[8]; + char productid[12]; + unsigned int oemptr; /* 0 if not present */ + unsigned short oemsize; /* 0 if not present */ + unsigned short oemcount; + unsigned int lapic; /* APIC address */ unsigned int reserved; }; @@ -70,20 +71,20 @@ struct mp_config_table { #define CPU_MODEL_MASK 0x00F0 #define CPU_FAMILY_MASK 0x0F00 -struct mpc_config_processor { - unsigned char mpc_type; - unsigned char mpc_apicid; /* Local APIC number */ - unsigned char mpc_apicver; /* Its versions */ - unsigned char mpc_cpuflag; - unsigned int mpc_cpufeature; - unsigned int mpc_featureflag; /* CPUID feature value */ - unsigned int mpc_reserved[2]; +struct mpc_cpu { + unsigned char type; + unsigned char apicid; /* Local APIC number */ + unsigned char apicver; /* Its versions */ + unsigned char cpuflag; + unsigned int cpufeature; + unsigned int featureflag; /* CPUID feature value */ + unsigned int reserved[2]; }; -struct mpc_config_bus { - unsigned char mpc_type; - unsigned char mpc_busid; - unsigned char mpc_bustype[6]; +struct mpc_bus { + unsigned char type; + unsigned char busid; + unsigned char bustype[6]; }; /* List of Bus Type string values, Intel MP Spec. */ @@ -108,22 +109,22 @@ struct mpc_config_bus { #define MPC_APIC_USABLE 0x01 -struct mpc_config_ioapic { - unsigned char mpc_type; - unsigned char mpc_apicid; - unsigned char mpc_apicver; - unsigned char mpc_flags; - unsigned int mpc_apicaddr; +struct mpc_ioapic { + unsigned char type; + unsigned char apicid; + unsigned char apicver; + unsigned char flags; + unsigned int apicaddr; }; -struct mpc_config_intsrc { - unsigned char mpc_type; - unsigned char mpc_irqtype; - unsigned short mpc_irqflag; - unsigned char mpc_srcbus; - unsigned char mpc_srcbusirq; - unsigned char mpc_dstapic; - unsigned char mpc_dstirq; +struct mpc_intsrc { + unsigned char type; + unsigned char irqtype; + unsigned short irqflag; + unsigned char srcbus; + unsigned char srcbusirq; + unsigned char dstapic; + unsigned char dstirq; }; enum mp_irq_source_types { @@ -139,24 +140,24 @@ enum mp_irq_source_types { #define MP_APIC_ALL 0xFF -struct mpc_config_lintsrc { - unsigned char mpc_type; - unsigned char mpc_irqtype; - unsigned short mpc_irqflag; - unsigned char mpc_srcbusid; - unsigned char mpc_srcbusirq; - unsigned char mpc_destapic; - unsigned char mpc_destapiclint; +struct mpc_lintsrc { + unsigned char type; + unsigned char irqtype; + unsigned short irqflag; + unsigned char srcbusid; + unsigned char srcbusirq; + unsigned char destapic; + unsigned char destapiclint; }; #define MPC_OEM_SIGNATURE "_OEM" -struct mp_config_oemtable { - char oem_signature[4]; - unsigned short oem_length; /* Size of table */ - char oem_rev; /* 0x01 */ - char oem_checksum; - char mpc_oem[8]; +struct mpc_oemtable { + char signature[4]; + unsigned short length; /* Size of table */ + char rev; /* 0x01 */ + char checksum; + char mpc[8]; }; /* diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e38859d577a..358acc59ae0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -85,7 +85,9 @@ /* AMD64 MSRs. Not complete. See the architecture manual for a more complete list. */ +#define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 @@ -200,6 +202,35 @@ #define MSR_IA32_THERM_STATUS 0x0000019c #define MSR_IA32_MISC_ENABLE 0x000001a0 +/* MISC_ENABLE bits: architectural */ +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16) +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22) +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34) + +/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2) +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9) +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10) +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) + /* Intel Model 6 */ #define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL1 0x00000187 diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c2a812ebde8..638bf624180 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -22,10 +22,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) } /* - * i386 calling convention returns 64-bit value in edx:eax, while - * x86_64 returns at rax. Also, the "A" constraint does not really - * mean rdx:rax in x86_64, so we need specialized behaviour for each - * architecture + * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" + * constraint has different meanings. For i386, "A" means exactly + * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead, + * it means rax *or* rdx. */ #ifdef CONFIG_X86_64 #define DECLARE_ARGS(val, low, high) unsigned low, high @@ -85,7 +85,8 @@ static inline void native_write_msr(unsigned int msr, asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); } -static inline int native_write_msr_safe(unsigned int msr, +/* Can be uninlined because referenced by paravirt */ +notrace static inline int native_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int err; @@ -181,10 +182,10 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) } #define rdtscl(low) \ - ((low) = (u32)native_read_tsc()) + ((low) = (u32)__native_read_tsc()) #define rdtscll(val) \ - ((val) = native_read_tsc()) + ((val) = __native_read_tsc()) #define rdpmc(counter, low, high) \ do { \ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 7c1e4258b31..a51ada8467d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -23,6 +23,7 @@ #ifndef _ASM_X86_MTRR_H #define _ASM_X86_MTRR_H +#include <linux/types.h> #include <linux/ioctl.h> #include <linux/errno.h> @@ -57,6 +58,31 @@ struct mtrr_gentry { }; #endif /* !__i386__ */ +struct mtrr_var_range { + __u32 base_lo; + __u32 base_hi; + __u32 mask_lo; + __u32 mask_hi; +}; + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef __u8 mtrr_type; + +#define MTRR_NUM_FIXED_RANGES 88 +#define MTRR_MAX_VAR_RANGES 256 + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + /* These are the various ioctls */ #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h index 1e8bd30b4c1..9f0a5f5d29e 100644 --- a/arch/x86/include/asm/numaq.h +++ b/arch/x86/include/asm/numaq.h @@ -31,6 +31,8 @@ extern int found_numaq; extern int get_memcfg_numaq(void); +extern void *xquad_portio; + /* * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the */ diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h deleted file mode 100644 index 0bf2a06b7a4..00000000000 --- a/arch/x86/include/asm/numaq/apic.h +++ /dev/null @@ -1,136 +0,0 @@ -#ifndef __ASM_NUMAQ_APIC_H -#define __ASM_NUMAQ_APIC_H - -#include <asm/io.h> -#include <linux/mmzone.h> -#include <linux/nodemask.h> - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline cpumask_t target_cpus(void) -{ - return CPU_MASK_ALL; -} - -#define NO_BALANCE_IRQ (1) -#define esr_disable (1) - -#define INT_DELIVERY_MODE dest_LowestPrio -#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} -static inline unsigned long check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline int apic_id_registered(void) -{ - return 1; -} - -static inline void init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "NUMA-Q", nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); -} - -/* Mapping from cpu number to logical apicid */ -extern u8 cpu_2_logical_apicid[]; -static inline int cpu_to_logical_apicid(int cpu) -{ - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) -{ - int node = apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - return physid_mask_of_physid(cpu + 4*node); -} - -extern void *xquad_portio; - -static inline void setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk("Remapping cross-quad port I/O for %d quads\n", num_quads); - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - printk("xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return (1); -} - -static inline void enable_apic_mode(void) -{ -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - return (int) 0xF; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_NUMAQ_APIC_H */ diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h deleted file mode 100644 index e012a46cc22..00000000000 --- a/arch/x86/include/asm/numaq/apicdef.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __ASM_NUMAQ_APICDEF_H -#define __ASM_NUMAQ_APICDEF_H - - -#define APIC_ID_MASK (0xF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0x0F); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h deleted file mode 100644 index 935588d286c..00000000000 --- a/arch/x86/include/asm/numaq/ipi.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __ASM_NUMAQ_IPI_H -#define __ASM_NUMAQ_IPI_H - -void send_IPI_mask_sequence(cpumask_t, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/numaq/mpparse.h b/arch/x86/include/asm/numaq/mpparse.h deleted file mode 100644 index 252292e077b..00000000000 --- a/arch/x86/include/asm/numaq/mpparse.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_NUMAQ_MPPARSE_H -#define __ASM_NUMAQ_MPPARSE_H - -extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid); - -#endif /* __ASM_NUMAQ_MPPARSE_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h deleted file mode 100644 index c577bda5b1c..00000000000 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef __ASM_NUMAQ_WAKECPU_H -#define __ASM_NUMAQ_WAKECPU_H - -/* This file copes with machines that wakeup secondary CPUs by NMIs */ - -#define WAKE_SECONDARY_VIA_NMI - -#define TRAMPOLINE_LOW phys_to_virt(0x8) -#define TRAMPOLINE_HIGH phys_to_virt(0xa) - -#define boot_cpu_apicid boot_cpu_logical_apicid - -/* We don't do anything here because we use NMI's to boot instead */ -static inline void wait_for_init_deassert(atomic_t *deassert) -{ -} - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it. - */ -static inline void smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Storing NMI vector\n"); - *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); - *low = *((volatile unsigned short *) TRAMPOLINE_LOW); -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Restoring NMI vector\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; - *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; -} - -#define inquire_remote_apic(apicid) {} - -#endif /* __ASM_NUMAQ_WAKECPU_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e869..40226999cbf 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -95,6 +95,11 @@ static inline pgdval_t native_pgd_val(pgd_t pgd) return pgd.pgd; } +static inline pgdval_t pgd_flags(pgd_t pgd) +{ + return native_pgd_val(pgd) & PTE_FLAGS_MASK; +} + #if PAGETABLE_LEVELS >= 3 #if PAGETABLE_LEVELS == 4 typedef struct { pudval_t pud; } pud_t; @@ -117,6 +122,11 @@ static inline pudval_t native_pud_val(pud_t pud) } #endif /* PAGETABLE_LEVELS == 4 */ +static inline pudval_t pud_flags(pud_t pud) +{ + return native_pud_val(pud) & PTE_FLAGS_MASK; +} + typedef struct { pmdval_t pmd; } pmd_t; static inline pmd_t native_make_pmd(pmdval_t val) @@ -128,6 +138,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) { return pmd.pmd; } + #else /* PAGETABLE_LEVELS == 2 */ #include <asm-generic/pgtable-nopmd.h> @@ -137,6 +148,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) } #endif /* PAGETABLE_LEVELS >= 3 */ +static inline pmdval_t pmd_flags(pmd_t pmd) +{ + return native_pmd_val(pmd) & PTE_FLAGS_MASK; +} + static inline pte_t native_make_pte(pteval_t val) { return (pte_t) { .pte = val }; @@ -147,7 +163,7 @@ static inline pteval_t native_pte_val(pte_t pte) return pte.pte; } -static inline pteval_t native_pte_flags(pte_t pte) +static inline pteval_t pte_flags(pte_t pte) { return native_pte_val(pte) & PTE_FLAGS_MASK; } @@ -173,7 +189,6 @@ static inline pteval_t native_pte_flags(pte_t pte) #endif #define pte_val(x) native_pte_val(x) -#define pte_flags(x) native_pte_flags(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 5ebca29f44f..e27fdbe5f9e 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -13,8 +13,8 @@ #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -#define IRQSTACK_ORDER 2 -#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) +#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) #define STACKFAULT_STACK 1 #define DOUBLEFAULT_STACK 2 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..1c244b64573 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -12,21 +12,38 @@ #define CLBR_EAX (1 << 0) #define CLBR_ECX (1 << 1) #define CLBR_EDX (1 << 2) +#define CLBR_EDI (1 << 3) -#ifdef CONFIG_X86_64 -#define CLBR_RSI (1 << 3) -#define CLBR_RDI (1 << 4) +#ifdef CONFIG_X86_32 +/* CLBR_ANY should match all regs platform has. For i386, that's just it */ +#define CLBR_ANY ((1 << 4) - 1) + +#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) +#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) +#define CLBR_SCRATCH (0) +#else +#define CLBR_RAX CLBR_EAX +#define CLBR_RCX CLBR_ECX +#define CLBR_RDX CLBR_EDX +#define CLBR_RDI CLBR_EDI +#define CLBR_RSI (1 << 4) #define CLBR_R8 (1 << 5) #define CLBR_R9 (1 << 6) #define CLBR_R10 (1 << 7) #define CLBR_R11 (1 << 8) + #define CLBR_ANY ((1 << 9) - 1) + +#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ + CLBR_RCX | CLBR_R8 | CLBR_R9) +#define CLBR_RET_REG (CLBR_RAX) +#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) + #include <asm/desc_defs.h> -#else -/* CLBR_ANY should match all regs platform has. For i386, that's just it */ -#define CLBR_ANY ((1 << 3) - 1) #endif /* X86_64 */ +#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) + #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/cpumask.h> @@ -40,6 +57,14 @@ struct tss_struct; struct mm_struct; struct desc_struct; +/* + * Wrapper type for pointers to code which uses the non-standard + * calling convention. See PV_CALL_SAVE_REGS_THUNK below. + */ +struct paravirt_callee_save { + void *func; +}; + /* general info */ struct pv_info { unsigned int kernel_rpl; @@ -189,11 +214,15 @@ struct pv_irq_ops { * expected to use X86_EFLAGS_IF; all other bits * returned from save_fl are undefined, and may be ignored by * restore_fl. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. */ - unsigned long (*save_fl)(void); - void (*restore_fl)(unsigned long); - void (*irq_disable)(void); - void (*irq_enable)(void); + struct paravirt_callee_save save_fl; + struct paravirt_callee_save restore_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; + void (*safe_halt)(void); void (*halt)(void); @@ -244,7 +273,8 @@ struct pv_mmu_ops { void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); - void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + void (*flush_tlb_others)(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va); /* Hooks for allocating and freeing a pagetable top-level */ @@ -278,12 +308,11 @@ struct pv_mmu_ops { void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); - pteval_t (*pte_val)(pte_t); - pteval_t (*pte_flags)(pte_t); - pte_t (*make_pte)(pteval_t pte); + struct paravirt_callee_save pte_val; + struct paravirt_callee_save make_pte; - pgdval_t (*pgd_val)(pgd_t); - pgd_t (*make_pgd)(pgdval_t pgd); + struct paravirt_callee_save pgd_val; + struct paravirt_callee_save make_pgd; #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE @@ -298,12 +327,12 @@ struct pv_mmu_ops { void (*set_pud)(pud_t *pudp, pud_t pudval); - pmdval_t (*pmd_val)(pmd_t); - pmd_t (*make_pmd)(pmdval_t pmd); + struct paravirt_callee_save pmd_val; + struct paravirt_callee_save make_pmd; #if PAGETABLE_LEVELS == 4 - pudval_t (*pud_val)(pud_t); - pud_t (*make_pud)(pudval_t pud); + struct paravirt_callee_save pud_val; + struct paravirt_callee_save make_pud; void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); #endif /* PAGETABLE_LEVELS == 4 */ @@ -388,6 +417,8 @@ extern struct pv_lock_ops pv_lock_ops; asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); unsigned paravirt_patch_ignore(unsigned len); unsigned paravirt_patch_call(void *insnbuf, const void *target, u16 tgt_clobbers, @@ -479,25 +510,45 @@ int paravirt_disable_iospace(void); * makes sure the incoming and outgoing types are always correct. */ #ifdef CONFIG_X86_32 -#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx +#define PVOP_VCALL_ARGS \ + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx #define PVOP_CALL_ARGS PVOP_VCALL_ARGS + +#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) + #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS + +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + #define EXTRA_CLOBBERS #define VEXTRA_CLOBBERS -#else -#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx +#else /* CONFIG_X86_64 */ +#define PVOP_VCALL_ARGS \ + unsigned long __edi = __edi, __esi = __esi, \ + __edx = __edx, __ecx = __ecx #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax + +#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) + #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ "=S" (__esi), "=d" (__edx), \ "=c" (__ecx) - #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" -#endif +#endif /* CONFIG_X86_32 */ #ifdef CONFIG_PARAVIRT_DEBUG #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) @@ -505,10 +556,11 @@ int paravirt_disable_iospace(void); #define PVOP_TEST_NULL(op) ((void)op) #endif -#define __PVOP_CALL(rettype, op, pre, post, ...) \ +#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ + pre, post, ...) \ ({ \ rettype __ret; \ - PVOP_CALL_ARGS; \ + PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ /* This is 32-bit specific, but is okay in 64-bit */ \ /* since this condition will never hold */ \ @@ -516,70 +568,113 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_CALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" EXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_CALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" EXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ __ret = (rettype)__eax; \ } \ __ret; \ }) -#define __PVOP_VCALL(op, pre, post, ...) \ + +#define __PVOP_CALL(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ + EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) + +#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_CALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + +#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ ({ \ PVOP_VCALL_ARGS; \ PVOP_TEST_NULL(op); \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_VCALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" VEXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ }) +#define __PVOP_VCALL(op, pre, post, ...) \ + ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + VEXTRA_CLOBBERS, \ + pre, post, ##__VA_ARGS__) + +#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + + #define PVOP_CALL0(rettype, op) \ __PVOP_CALL(rettype, op, "", "") #define PVOP_VCALL0(op) \ __PVOP_VCALL(op, "", "") +#define PVOP_CALLEE0(rettype, op) \ + __PVOP_CALLEESAVE(rettype, op, "", "") +#define PVOP_VCALLEE0(op) \ + __PVOP_VCALLEESAVE(op, "", "") + + #define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) #define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) + +#define PVOP_CALLEE1(rettype, op, arg1) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALLEE1(op, arg1) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) + #define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1" ((unsigned long)(arg2))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) #define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1" ((unsigned long)(arg2))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + +#define PVOP_CALLEE2(rettype, op, arg1, arg2) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) +#define PVOP_VCALLEE2(op, arg1, arg2) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) #define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) /* This is the only difference in x86_64. We can make it much simpler */ #ifdef CONFIG_X86_32 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ __PVOP_CALL(rettype, op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ __PVOP_VCALL(op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ @@ -587,13 +682,13 @@ int paravirt_disable_iospace(void); "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) #else #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ - "3"((unsigned long)(arg4))) + __PVOP_CALL(rettype, op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ - "3"((unsigned long)(arg4))) + __PVOP_VCALL(op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #endif static inline int paravirt_enabled(void) @@ -984,10 +1079,11 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); } -static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, +static inline void flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); + PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) @@ -1059,13 +1155,13 @@ static inline pte_t __pte(pteval_t val) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, - pv_mmu_ops.make_pte, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pteval_t, + pv_mmu_ops.make_pte, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pteval_t, - pv_mmu_ops.make_pte, - val); + ret = PVOP_CALLEE1(pteval_t, + pv_mmu_ops.make_pte, + val); return (pte_t) { .pte = ret }; } @@ -1075,29 +1171,12 @@ static inline pteval_t pte_val(pte_t pte) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, - pte.pte, (u64)pte.pte >> 32); - else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, - pte.pte); - - return ret; -} - -static inline pteval_t pte_flags(pte_t pte) -{ - pteval_t ret; - - if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, - pte.pte, (u64)pte.pte >> 32); + ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val, + pte.pte, (u64)pte.pte >> 32); else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, - pte.pte); + ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val, + pte.pte); -#ifdef CONFIG_PARAVIRT_DEBUG - BUG_ON(ret & PTE_PFN_MASK); -#endif return ret; } @@ -1106,11 +1185,11 @@ static inline pgd_t __pgd(pgdval_t val) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, - val); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd, + val); return (pgd_t) { ret }; } @@ -1120,11 +1199,11 @@ static inline pgdval_t pgd_val(pgd_t pgd) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd, (u64)pgd.pgd >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd, (u64)pgd.pgd >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd); return ret; } @@ -1188,11 +1267,11 @@ static inline pmd_t __pmd(pmdval_t val) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, - val); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd, + val); return (pmd_t) { ret }; } @@ -1202,11 +1281,11 @@ static inline pmdval_t pmd_val(pmd_t pmd) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd, (u64)pmd.pmd >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd, (u64)pmd.pmd >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd); return ret; } @@ -1228,11 +1307,11 @@ static inline pud_t __pud(pudval_t val) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, - val); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud, + val); return (pud_t) { ret }; } @@ -1242,11 +1321,11 @@ static inline pudval_t pud_val(pud_t pud) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, - pud.pud, (u64)pud.pud >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val, + pud.pud, (u64)pud.pud >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, - pud.pud); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val, + pud.pud); return ret; } @@ -1387,9 +1466,10 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, } void _paravirt_nop(void); -#define paravirt_nop ((void *)_paravirt_nop) +u32 _paravirt_ident_32(u32); +u64 _paravirt_ident_64(u64); -void paravirt_use_bytelocks(void); +#define paravirt_nop ((void *)_paravirt_nop) #ifdef CONFIG_SMP @@ -1402,6 +1482,7 @@ static inline int __raw_spin_is_contended(struct raw_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); } +#define __raw_spin_is_contended __raw_spin_is_contended static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) { @@ -1438,12 +1519,37 @@ extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; #ifdef CONFIG_X86_32 -#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" -#define PV_RESTORE_REGS "popl %%edx; popl %%ecx" +#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" +#define PV_RESTORE_REGS "popl %edx; popl %ecx;" + +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" +#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" + #define PV_FLAGS_ARG "0" #define PV_EXTRA_CLOBBERS #define PV_VEXTRA_CLOBBERS #else +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS \ + "push %rcx;" \ + "push %rdx;" \ + "push %rsi;" \ + "push %rdi;" \ + "push %r8;" \ + "push %r9;" \ + "push %r10;" \ + "push %r11;" +#define PV_RESTORE_ALL_CALLER_REGS \ + "pop %r11;" \ + "pop %r10;" \ + "pop %r9;" \ + "pop %r8;" \ + "pop %rdi;" \ + "pop %rsi;" \ + "pop %rdx;" \ + "pop %rcx;" + /* We save some registers, but all of them, that's too much. We clobber all * caller saved registers but the argument parameter */ #define PV_SAVE_REGS "pushq %%rdi;" @@ -1453,52 +1559,76 @@ extern struct paravirt_patch_site __parainstructions[], #define PV_FLAGS_ARG "D" #endif +/* + * Generate a thunk around a function which saves all caller-save + * registers except for the return value. This allows C functions to + * be called from assembler code where fewer than normal registers are + * available. It may also help code generation around calls from C + * code if the common case doesn't use many registers. + * + * When a callee is wrapped in a thunk, the caller can assume that all + * arg regs and all scratch registers are preserved across the + * call. The return value in rax/eax will not be saved, even for void + * functions. + */ +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + extern typeof(func) __raw_callee_save_##func; \ + static void *__##func##__ __used = func; \ + \ + asm(".pushsection .text;" \ + "__raw_callee_save_" #func ": " \ + PV_SAVE_ALL_CALLER_REGS \ + "call " #func ";" \ + PV_RESTORE_ALL_CALLER_REGS \ + "ret;" \ + ".popsection") + +/* Get a reference to a callee-save function */ +#define PV_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { __raw_callee_save_##func }) + +/* Promise that "func" already uses the right calling convention */ +#define __PV_IS_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { func }) + static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : paravirt_type(pv_irq_ops.save_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_VEXTRA_CLOBBERS); + : "memory", "cc"); return f; } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : PV_FLAGS_ARG(f), paravirt_type(pv_irq_ops.restore_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_EXTRA_CLOBBERS); + : "memory", "cc"); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_disable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_enable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline unsigned long __raw_local_irq_save(void) @@ -1541,33 +1671,49 @@ static inline unsigned long __raw_local_irq_save(void) .popsection +#define COND_PUSH(set, mask, reg) \ + .if ((~(set)) & mask); push %reg; .endif +#define COND_POP(set, mask, reg) \ + .if ((~(set)) & mask); pop %reg; .endif + #ifdef CONFIG_X86_64 -#define PV_SAVE_REGS \ - push %rax; \ - push %rcx; \ - push %rdx; \ - push %rsi; \ - push %rdi; \ - push %r8; \ - push %r9; \ - push %r10; \ - push %r11 -#define PV_RESTORE_REGS \ - pop %r11; \ - pop %r10; \ - pop %r9; \ - pop %r8; \ - pop %rdi; \ - pop %rsi; \ - pop %rdx; \ - pop %rcx; \ - pop %rax + +#define PV_SAVE_REGS(set) \ + COND_PUSH(set, CLBR_RAX, rax); \ + COND_PUSH(set, CLBR_RCX, rcx); \ + COND_PUSH(set, CLBR_RDX, rdx); \ + COND_PUSH(set, CLBR_RSI, rsi); \ + COND_PUSH(set, CLBR_RDI, rdi); \ + COND_PUSH(set, CLBR_R8, r8); \ + COND_PUSH(set, CLBR_R9, r9); \ + COND_PUSH(set, CLBR_R10, r10); \ + COND_PUSH(set, CLBR_R11, r11) +#define PV_RESTORE_REGS(set) \ + COND_POP(set, CLBR_R11, r11); \ + COND_POP(set, CLBR_R10, r10); \ + COND_POP(set, CLBR_R9, r9); \ + COND_POP(set, CLBR_R8, r8); \ + COND_POP(set, CLBR_RDI, rdi); \ + COND_POP(set, CLBR_RSI, rsi); \ + COND_POP(set, CLBR_RDX, rdx); \ + COND_POP(set, CLBR_RCX, rcx); \ + COND_POP(set, CLBR_RAX, rax) + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) #else -#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx -#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax +#define PV_SAVE_REGS(set) \ + COND_PUSH(set, CLBR_EAX, eax); \ + COND_PUSH(set, CLBR_EDI, edi); \ + COND_PUSH(set, CLBR_ECX, ecx); \ + COND_PUSH(set, CLBR_EDX, edx) +#define PV_RESTORE_REGS(set) \ + COND_POP(set, CLBR_EDX, edx); \ + COND_POP(set, CLBR_ECX, ecx); \ + COND_POP(set, CLBR_EDI, edi); \ + COND_POP(set, CLBR_EAX, eax) + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) #define PARA_INDIRECT(addr) *%cs:addr @@ -1579,15 +1725,15 @@ static inline unsigned long __raw_local_irq_save(void) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS; \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS;) \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS; \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS;) + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define USERGS_SYSRET32 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ @@ -1617,11 +1763,15 @@ static inline unsigned long __raw_local_irq_save(void) PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ swapgs) +/* + * Note: swapgs is very special, and in practise is either going to be + * implemented with a single "swapgs" instruction or something very + * special. Either way, we don't need to save any registers for + * it. + */ #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - PV_SAVE_REGS; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ - PV_RESTORE_REGS \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ ) #define GET_CR2_INTO_RCX \ diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index b8493b3b989..9709fdff661 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -5,10 +5,8 @@ #ifdef CONFIG_X86_PAT extern int pat_enabled; -extern void validate_pat_support(struct cpuinfo_x86 *c); #else static const int pat_enabled; -static inline void validate_pat_support(struct cpuinfo_x86 *c) { } #endif extern void pat_init(void); @@ -17,6 +15,4 @@ extern int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long *ret_type); extern int free_memtype(u64 start, u64 end); -extern void pat_disable(char *reason); - #endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/mach-default/pci-functions.h b/arch/x86/include/asm/pci-functions.h index ed0bab42735..ed0bab42735 100644 --- a/arch/x86/include/asm/mach-default/pci-functions.h +++ b/arch/x86/include/asm/pci-functions.h diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 875b38edf19..a977de23cb4 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -19,6 +19,8 @@ struct pci_sysdata { }; extern int pci_routeirq; +extern int noioapicquirk; +extern int noioapicreroute; /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, @@ -82,6 +84,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, static inline void early_quirks(void) { } #endif +extern void pci_iommu_alloc(void); + #endif /* __KERNEL__ */ #ifdef CONFIG_X86_32 @@ -98,9 +102,9 @@ static inline void early_quirks(void) { } #ifdef CONFIG_NUMA /* Returns the node based on pci bus */ -static inline int __pcibus_to_node(struct pci_bus *bus) +static inline int __pcibus_to_node(const struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; + const struct pci_sysdata *sd = bus->sysdata; return sd->node; } @@ -109,6 +113,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) { return node_to_cpumask(__pcibus_to_node(bus)); } + +static inline const struct cpumask * +cpumask_of_pcibus(const struct pci_bus *bus) +{ + return cpumask_of_node(__pcibus_to_node(bus)); +} #endif #endif /* _ASM_X86_PCI_H */ diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index 5b28995d664..4da20798277 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h @@ -23,7 +23,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); extern void dma32_reserve_bootmem(void); -extern void pci_iommu_alloc(void); /* The PCI address space does equal the physical memory * address space. The networking and block device layers use @@ -34,8 +33,6 @@ extern void pci_iommu_alloc(void); */ #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) -#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU) - #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ @@ -49,18 +46,6 @@ extern void pci_iommu_alloc(void); #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ (((PTR)->LEN_NAME) = (VAL)) -#else -/* No IOMMU */ - -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_X86_PCI_64_H */ diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h new file mode 100644 index 00000000000..e60fd3e14bd --- /dev/null +++ b/arch/x86/include/asm/pci_x86.h @@ -0,0 +1,165 @@ +/* + * Low-Level PCI Access for i386 machines. + * + * (c) 1999 Martin Mares <mj@ucw.cz> + */ + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define PCI_PROBE_BIOS 0x0001 +#define PCI_PROBE_CONF1 0x0002 +#define PCI_PROBE_CONF2 0x0004 +#define PCI_PROBE_MMCONF 0x0008 +#define PCI_PROBE_MASK 0x000f +#define PCI_PROBE_NOEARLY 0x0010 + +#define PCI_NO_CHECKS 0x0400 +#define PCI_USE_PIRQ_MASK 0x0800 +#define PCI_ASSIGN_ROMS 0x1000 +#define PCI_BIOS_IRQ_SCAN 0x2000 +#define PCI_ASSIGN_ALL_BUSSES 0x4000 +#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 +#define PCI_USE__CRS 0x10000 +#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 +#define PCI_HAS_IO_ECS 0x40000 +#define PCI_NOASSIGN_ROMS 0x80000 + +extern unsigned int pci_probe; +extern unsigned long pirq_table_addr; + +enum pci_bf_sort_state { + pci_bf_sort_default, + pci_force_nobf, + pci_force_bf, + pci_dmi_bf, +}; + +/* pci-i386.c */ + +extern unsigned int pcibios_max_latency; + +void pcibios_resource_survey(void); + +/* pci-pc.c */ + +extern int pcibios_last_bus; +extern struct pci_bus *pci_root_bus; +extern struct pci_ops pci_root_ops; + +/* pci-irq.c */ + +struct irq_info { + u8 bus, devfn; /* Bus, device and function */ + struct { + u8 link; /* IRQ line ID, chipset dependent, + 0 = not routed */ + u16 bitmap; /* Available IRQs */ + } __attribute__((packed)) irq[4]; + u8 slot; /* Slot number, 0=onboard */ + u8 rfu; +} __attribute__((packed)); + +struct irq_routing_table { + u32 signature; /* PIRQ_SIGNATURE should be here */ + u16 version; /* PIRQ_VERSION */ + u16 size; /* Table size in bytes */ + u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ + u16 exclusive_irqs; /* IRQs devoted exclusively to + PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of + interrupt router */ + u32 miniport_data; /* Crap */ + u8 rfu[11]; + u8 checksum; /* Modulo 256 checksum must give 0 */ + struct irq_info slots[0]; +} __attribute__((packed)); + +extern unsigned int pcibios_irq_mask; + +extern int pcibios_scanned; +extern spinlock_t pci_config_lock; + +extern int (*pcibios_enable_irq)(struct pci_dev *dev); +extern void (*pcibios_disable_irq)(struct pci_dev *dev); + +struct pci_raw_ops { + int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *val); + int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 val); +}; + +extern struct pci_raw_ops *raw_pci_ops; +extern struct pci_raw_ops *raw_pci_ext_ops; + +extern struct pci_raw_ops pci_direct_conf1; +extern bool port_cf9_safe; + +/* arch_initcall level */ +extern int pci_direct_probe(void); +extern void pci_direct_init(int type); +extern void pci_pcbios_init(void); +extern int pci_olpc_init(void); +extern void __init dmi_check_pciprobe(void); +extern void __init dmi_check_skip_isa_align(void); + +/* some common used subsys_initcalls */ +extern int __init pci_acpi_init(void); +extern int __init pcibios_irq_init(void); +extern int __init pci_visws_init(void); +extern int __init pci_numaq_init(void); +extern int __init pcibios_init(void); + +/* pci-mmconfig.c */ + +extern int __init pci_mmcfg_arch_init(void); +extern void __init pci_mmcfg_arch_free(void); + +/* + * AMD Fam10h CPUs are buggy, and cannot access MMIO config space + * on their northbrige except through the * %eax register. As such, you MUST + * NOT use normal IOMEM accesses, you need to only use the magic mmio-config + * accessor functions. + * In fact just use pci_config_*, nothing else please. + */ +static inline unsigned char mmio_config_readb(void __iomem *pos) +{ + u8 val; + asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned short mmio_config_readw(void __iomem *pos) +{ + u16 val; + asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned int mmio_config_readl(void __iomem *pos) +{ + u32 val; + asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline void mmio_config_writeb(void __iomem *pos, u8 val) +{ + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writew(void __iomem *pos, u16 val) +{ + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writel(void __iomem *pos, u32 val) +{ + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); +} diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h deleted file mode 100644 index 2fbfff88df3..00000000000 --- a/arch/x86/include/asm/pda.h +++ /dev/null @@ -1,137 +0,0 @@ -#ifndef _ASM_X86_PDA_H -#define _ASM_X86_PDA_H - -#ifndef __ASSEMBLY__ -#include <linux/stddef.h> -#include <linux/types.h> -#include <linux/cache.h> -#include <asm/page.h> - -/* Per processor datastructure. %gs points to it while the kernel runs */ -struct x8664_pda { - struct task_struct *pcurrent; /* 0 Current process */ - unsigned long data_offset; /* 8 Per cpu data offset from linker - address */ - unsigned long kernelstack; /* 16 top of kernel stack for current */ - unsigned long oldrsp; /* 24 user rsp for system call */ - int irqcount; /* 32 Irq nesting counter. Starts -1 */ - unsigned int cpunumber; /* 36 Logical CPU number */ -#ifdef CONFIG_CC_STACKPROTECTOR - unsigned long stack_canary; /* 40 stack canary value */ - /* gcc-ABI: this canary MUST be at - offset 40!!! */ -#endif - char *irqstackptr; - short nodenumber; /* number of current node (32k max) */ - short in_bootmem; /* pda lives in bootmem */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ - short mmu_state; - short isidle; - struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; -} ____cacheline_aligned_in_smp; - -extern struct x8664_pda **_cpu_pda; -extern void pda_init(int); - -#define cpu_pda(i) (_cpu_pda[i]) - -/* - * There is no fast way to get the base address of the PDA, all the accesses - * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ -extern void __bad_pda_field(void) __attribute__((noreturn)); - -/* - * proxy_pda doesn't actually exist, but tell gcc it is accessed for - * all PDA accesses so it gets read/write dependencies right. - */ -extern struct x8664_pda _proxy_pda; - -#define pda_offset(field) offsetof(struct x8664_pda, field) - -#define pda_to_op(op, field, val) \ -do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i" (pda_offset(field))); \ - break; \ - case 8: \ - asm(op "q %1,%%gs:%c2": \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ -} while (0) - -#define pda_from_op(op, field) \ -({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %%gs:%c1,%0" : \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%gs:%c1,%0": \ - "=r" (ret__): \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 8: \ - asm(op "q %%gs:%c1,%0": \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ - ret__; \ -}) - -#define read_pda(field) pda_from_op("mov", field) -#define write_pda(field, val) pda_to_op("mov", field, val) -#define add_pda(field, val) pda_to_op("add", field, val) -#define sub_pda(field, val) pda_to_op("sub", field, val) -#define or_pda(field, val) pda_to_op("or", field, val) - -/* This is not atomic against other CPUs -- CPU preemption needs to be off */ -#define test_and_clear_bit_pda(bit, field) \ -({ \ - int old__; \ - asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (_proxy_pda.field) \ - : "dIr" (bit), "i" (pda_offset(field)) : "memory");\ - old__; \ -}) - -#endif - -#define PDA_STACKOFFSET (5*8) - -#endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ece72053ba6..aee103b26d0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -2,53 +2,12 @@ #define _ASM_X86_PERCPU_H #ifdef CONFIG_X86_64 -#include <linux/compiler.h> - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP -#include <asm/pda.h> - -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) -#define __my_cpu_offset read_pda(data_offset) - -#define per_cpu_offset(x) (__per_cpu_offset(x)) - +#define __percpu_seg gs +#define __percpu_mov_op movq +#else +#define __percpu_seg fs +#define __percpu_mov_op movl #endif -#include <asm-generic/percpu.h> - -DECLARE_PER_CPU(struct x8664_pda, pda); - -/* - * These are supposed to be implemented as a single instruction which - * operates on the per-cpu data base segment. x86-64 doesn't have - * that yet, so this is a fairly inefficient workaround for the - * meantime. The single instruction is atomic with respect to - * preemption and interrupts, so we need to explicitly disable - * interrupts here to achieve the same effect. However, because it - * can be used from within interrupt-disable/enable, we can't actually - * disable interrupts; disabling preemption is enough. - */ -#define x86_read_percpu(var) \ - ({ \ - typeof(per_cpu_var(var)) __tmp; \ - preempt_disable(); \ - __tmp = __get_cpu_var(var); \ - preempt_enable(); \ - __tmp; \ - }) - -#define x86_write_percpu(var, val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(var) = (val); \ - preempt_enable(); \ - } while(0) - -#else /* CONFIG_X86_64 */ #ifdef __ASSEMBLY__ @@ -65,47 +24,48 @@ DECLARE_PER_CPU(struct x8664_pda, pda); * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__##this_cpu_off, reg; \ +#define PER_CPU(var, reg) \ + __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %fs:per_cpu__##var +#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg +#define PER_CPU(var, reg) \ + __percpu_mov_op $per_cpu__##var, reg #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ +#ifdef CONFIG_X86_64_SMP +#define INIT_PER_CPU_VAR(var) init_per_cpu__##var +#else +#define INIT_PER_CPU_VAR(var) per_cpu__##var +#endif + #else /* ...!ASSEMBLY */ +#include <linux/stringify.h> + +#ifdef CONFIG_SMP +#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x +#define __my_cpu_offset percpu_read(this_cpu_off) +#else +#define __percpu_arg(x) "%" #x +#endif + /* - * PER_CPU finds an address of a per-cpu variable. + * Initialized pointers to per-cpu variables needed for the boot + * processor need to use these macros to get the proper address + * offset from __per_cpu_load on SMP. * - * Args: - * var - variable name - * cpu - 32bit register containing the current CPU number - * - * The resulting address is stored in the "cpu" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) + * There also must be an entry in vmlinux_64.lds.S */ -#ifdef CONFIG_SMP - -#define __my_cpu_offset x86_read_percpu(this_cpu_off) - -/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ -#define __percpu_seg "%%fs:" - -#else /* !SMP */ - -#define __percpu_seg "" - -#endif /* SMP */ - -#include <asm-generic/percpu.h> +#define DECLARE_INIT_PER_CPU(var) \ + extern typeof(per_cpu_var(var)) init_per_cpu_var(var) -/* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); +#ifdef CONFIG_X86_64_SMP +#define init_per_cpu_var(var) init_per_cpu__##var +#else +#define init_per_cpu_var(var) per_cpu_var(var) +#endif /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ @@ -120,20 +80,25 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg"%0" \ + asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg"%0" \ + asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg"%0" \ + asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ + case 8: \ + asm(op "q %1,"__percpu_arg(0) \ + : "+m" (var) \ + : "re" ((T__)val)); \ + break; \ default: __bad_percpu_size(); \ } \ } while (0) @@ -143,17 +108,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg"%1,%0" \ + asm(op "b "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg"%1,%0" \ + asm(op "w "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg"%1,%0" \ + asm(op "l "__percpu_arg(1)",%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 8: \ + asm(op "q "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -162,13 +132,30 @@ do { \ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) +#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) +#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) +#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) +#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) +#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) + +/* This is not atomic against other CPUs -- CPU preemption needs to be off */ +#define x86_test_and_clear_bit_percpu(bit, var) \ +({ \ + int old__; \ + asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ + : "=r" (old__), "+m" (per_cpu__##var) \ + : "dIr" (bit)); \ + old__; \ +}) + +#include <asm-generic/percpu.h> + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_SMP @@ -195,9 +182,9 @@ do { \ #define early_per_cpu_ptr(_name) (_name##_early_ptr) #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) #define early_per_cpu(_name, _cpu) \ - (early_per_cpu_ptr(_name) ? \ - early_per_cpu_ptr(_name)[_cpu] : \ - per_cpu(_name, _cpu)) + *(early_per_cpu_ptr(_name) ? \ + &early_per_cpu_ptr(_name)[_cpu] : \ + &per_cpu(_name, _cpu)) #else /* !CONFIG_SMP */ #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index cb7c151a8bf..dd14c54ac71 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -42,6 +42,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, struct page *pte) { + pgtable_page_dtor(pte); __free_page(pte); } diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index b17edfd2362..c1774ac9da7 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -53,26 +53,56 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif -#define pte_none(x) (!(x).pte_low) - /* - * Bits 0, 6 and 7 are taken, split up the 29 bits of offset - * into this range: + * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, + * split up the 29 bits of offset into this range: */ #define PTE_FILE_MAX_BITS 29 +#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) +#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE +#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1) +#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1) +#else +#define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1) +#define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1) +#endif +#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) +#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) #define pte_to_pgoff(pte) \ - ((((pte).pte_low >> 1) & 0x1f) + (((pte).pte_low >> 8) << 5)) + ((((pte).pte_low >> PTE_FILE_SHIFT1) \ + & ((1U << PTE_FILE_BITS1) - 1)) \ + + ((((pte).pte_low >> PTE_FILE_SHIFT2) \ + & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \ + + (((pte).pte_low >> PTE_FILE_SHIFT3) \ + << (PTE_FILE_BITS1 + PTE_FILE_BITS2))) #define pgoff_to_pte(off) \ - ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + \ - (((off) >> 5) << 8) + _PAGE_FILE }) + ((pte_t) { .pte_low = \ + (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ + + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \ + << PTE_FILE_SHIFT2) \ + + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ + << PTE_FILE_SHIFT3) \ + + _PAGE_FILE }) /* Encode and de-code a swap entry */ -#define __swp_type(x) (((x).val >> 1) & 0x1f) -#define __swp_offset(x) ((x).val >> 8) -#define __swp_entry(type, offset) \ - ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) +#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE +#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) +#else +#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) +#endif + +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) + +#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ + & ((1U << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << (_PAGE_BIT_PRESENT + 1)) \ + | ((offset) << SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 52597aeadff..3f13cdf6115 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -18,21 +18,6 @@ printk("%s:%d: bad pgd %p(%016Lx).\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) -static inline int pud_none(pud_t pud) -{ - return pud_val(pud) == 0; -} - -static inline int pud_bad(pud_t pud) -{ - return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; -} - -static inline int pud_present(pud_t pud) -{ - return pud_val(pud) & _PAGE_PRESENT; -} - /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is @@ -120,15 +105,6 @@ static inline void pud_clear(pud_t *pudp) write_cr3(pgd); } -#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) - -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) - - -/* Find an entry in the second-level page table.. */ -#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \ - pmd_index(address)) - #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { @@ -145,17 +121,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif -#define __HAVE_ARCH_PTE_SAME -static inline int pte_same(pte_t a, pte_t b) -{ - return a.pte_low == b.pte_low && a.pte_high == b.pte_high; -} - -static inline int pte_none(pte_t pte) -{ - return !pte.pte_low && !pte.pte_high; -} - /* * Bits 0, 6 and 7 are taken in the low part of the pte, * put the 32 bits of offset into the high part. @@ -166,6 +131,7 @@ static inline int pte_none(pte_t pte) #define PTE_FILE_MAX_BITS 32 /* Encode and de-code a swap entry */ +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) #define __swp_type(x) (((x).val) & 0x1f) #define __swp_offset(x) ((x).val >> 5) #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c012f3b1167..8fef0f6bfbb 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_PGTABLE_H #define _ASM_X86_PGTABLE_H +#include <asm/page.h> + #define FIRST_USER_ADDRESS 0 #define _PAGE_BIT_PRESENT 0 /* is present */ @@ -10,7 +12,6 @@ #define _PAGE_BIT_PCD 4 /* page cache disabled */ #define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ #define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ -#define _PAGE_BIT_FILE 6 #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ #define _PAGE_BIT_PAT 7 /* on 4KB pages */ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ @@ -22,6 +23,12 @@ #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ +/* If _PAGE_BIT_PRESENT is clear, we use these: */ +/* - if the user mapped it with PROT_NONE; pte_present gives true */ +#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL +/* - set: nonlinear file mapping, saved PTE; unset:swap */ +#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY + #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) #define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) @@ -46,11 +53,8 @@ #define _PAGE_NX (_AT(pteval_t, 0)) #endif -/* If _PAGE_PRESENT is clear, we use these: */ -#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, - * saved PTE; unset:swap */ -#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; - pte_present gives true */ +#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) +#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ _PAGE_ACCESSED | _PAGE_DIRTY) @@ -158,8 +162,19 @@ #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ #endif +/* + * Macro to mark a page protection value as UC- + */ +#define pgprot_noncached(prot) \ + ((boot_cpu_data.x86 > 3) \ + ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \ + : (prot)) + #ifndef __ASSEMBLY__ +#define pgprot_writecombine pgprot_writecombine +extern pgprot_t pgprot_writecombine(pgprot_t prot); + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -223,82 +238,110 @@ static inline unsigned long pte_pfn(pte_t pte) static inline int pmd_large(pmd_t pte) { - return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == + return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); } +static inline pte_t pte_set_flags(pte_t pte, pteval_t set) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v | set); +} + +static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v & ~clear); +} + static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_DIRTY); + return pte_clear_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkold(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_ACCESSED); + return pte_clear_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_RW); + return pte_clear_flags(pte, _PAGE_RW); } static inline pte_t pte_mkexec(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_NX); + return pte_clear_flags(pte, _PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_DIRTY); + return pte_set_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_ACCESSED); + return pte_set_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkwrite(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_RW); + return pte_set_flags(pte, _PAGE_RW); } static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_PSE); + return pte_set_flags(pte, _PAGE_PSE); } static inline pte_t pte_clrhuge(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_PSE); + return pte_clear_flags(pte, _PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_GLOBAL); + return pte_set_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_clrglobal(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_GLOBAL); + return pte_clear_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPECIAL); + return pte_set_flags(pte, _PAGE_SPECIAL); } extern pteval_t __supported_pte_mask; +/* + * Mask out unsupported bits in a present pgprot. Non-present pgprots + * can use those bits for other purposes, so leave them be. + */ +static inline pgprotval_t massage_pgprot(pgprot_t pgprot) +{ + pgprotval_t protval = pgprot_val(pgprot); + + if (protval & _PAGE_PRESENT) + protval &= __supported_pte_mask; + + return protval; +} + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -310,7 +353,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * the newprot (if present): */ val &= _PAGE_CHG_MASK; - val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; + val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; return __pte(val); } @@ -326,9 +369,31 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) -#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#define canon_pgprot(p) __pgprot(massage_pgprot(p)) + +static inline int is_new_memtype_allowed(unsigned long flags, + unsigned long new_flags) +{ + /* + * Certain new memtypes are not allowed with certain + * requested memtype: + * - request is uncached, return cannot be write-back + * - request is write-combine, return cannot be write-back + */ + if ((flags == _PAGE_CACHE_UC_MINUS && + new_flags == _PAGE_CACHE_WB) || + (flags == _PAGE_CACHE_WC && + new_flags == _PAGE_CACHE_WB)) { + return 0; + } + + return 1; +} #ifndef __ASSEMBLY__ +/* Indicate that x86 has its own track and untrack pfn vma functions */ +#define __HAVE_PFNMAP_TRACKING + #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, @@ -402,6 +467,190 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) # include "pgtable_64.h" #endif +#ifndef __ASSEMBLY__ +#include <linux/mm_types.h> + +static inline int pte_none(pte_t pte) +{ + return !pte.pte; +} + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t a, pte_t b) +{ + return a.pte == b.pte; +} + +static inline int pte_present(pte_t a) +{ + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); +} + +static inline int pmd_present(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_PRESENT; +} + +static inline int pmd_none(pmd_t pmd) +{ + /* Only check low word on 32-bit platforms, since it might be + out of sync with upper half. */ + return (unsigned long)native_pmd_val(pmd) == 0; +} + +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK); +} + +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) + +/* + * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] + * + * this macro returns the index of the entry in the pmd page which would + * control the given virtual address + */ +static inline unsigned pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * (Currently stuck as a macro because of indirect forward reference + * to linux/mm.h:page_to_nid()) + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +/* + * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] + * + * this function returns the index of the entry in the pte page which would + * control the given virtual address + */ +static inline unsigned pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} + +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) +{ + return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); +} + +static inline int pmd_bad(pmd_t pmd) +{ + return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; +} + +static inline unsigned long pages_to_mb(unsigned long npg) +{ + return npg >> (20 - PAGE_SHIFT); +} + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +#if PAGETABLE_LEVELS == 2 +static inline int pud_large(pud_t pud) +{ + return 0; +} +#endif + +#if PAGETABLE_LEVELS > 2 +static inline int pud_none(pud_t pud) +{ + return native_pud_val(pud) == 0; +} + +static inline int pud_present(pud_t pud) +{ + return pud_flags(pud) & _PAGE_PRESENT; +} + +static inline unsigned long pud_page_vaddr(pud_t pud) +{ + return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK); +} + +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) + +/* Find an entry in the second-level page table.. */ +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +{ + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + +static inline int pud_large(pud_t pud) +{ + return (pud_flags(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); +} + +static inline int pud_bad(pud_t pud) +{ + return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; +} +#endif /* PAGETABLE_LEVELS > 2 */ + +#if PAGETABLE_LEVELS > 3 +static inline int pgd_present(pgd_t pgd) +{ + return pgd_flags(pgd) & _PAGE_PRESENT; +} + +static inline unsigned long pgd_page_vaddr(pgd_t pgd) +{ + return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); +} + +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) + +/* to find an entry in a page-table-directory. */ +static inline unsigned pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +{ + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); +} + +static inline int pgd_bad(pgd_t pgd) +{ + return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; +} + +static inline int pgd_none(pgd_t pgd) +{ + return !native_pgd_val(pgd); +} +#endif /* PAGETABLE_LEVELS > 3 */ + +#endif /* __ASSEMBLY__ */ + /* * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] * diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index f9d5889b336..1952bb762aa 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -85,64 +85,12 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* The boot page tables (all created as a single array) */ extern unsigned long pg0[]; -#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) - -/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ -#define pmd_none(x) (!(unsigned long)pmd_val((x))) -#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) -#define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) - -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - #ifdef CONFIG_X86_PAE # include <asm/pgtable-3level.h> #else # include <asm/pgtable-2level.h> #endif -/* - * Macro to mark a page protection value as "uncacheable". - * On processors which do not support it, this is a no-op. - */ -#define pgprot_noncached(prot) \ - ((boot_cpu_data.x86 > 3) \ - ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \ - : (prot)) - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) - - -static inline int pud_large(pud_t pud) { return 0; } - -/* - * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] - * - * this macro returns the index of the entry in the pmd page which would - * control the given virtual address - */ -#define pmd_index(address) \ - (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) - -/* - * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] - * - * this macro returns the index of the entry in the pte page which would - * control the given virtual address - */ -#define pte_index(address) \ - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_kernel(dir, address) \ - ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address))) - -#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) - -#define pmd_page_vaddr(pmd) \ - ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK)) - #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ @@ -185,7 +133,4 @@ do { \ #define kern_addr_valid(kaddr) (0) #endif -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 545a0e042bb..1c4e247c51f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -11,7 +11,6 @@ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> -#include <asm/pda.h> extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; @@ -67,9 +66,6 @@ extern void paging_init(void); printk("%s:%d: bad pgd %p(%016lx).\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) -#define pgd_none(x) (!pgd_val(x)) -#define pud_none(x) (!pud_val(x)) - struct mm_struct; void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); @@ -134,8 +130,6 @@ static inline void native_pgd_clear(pgd_t *pgd) native_set_pgd(pgd, native_make_pgd(0)); } -#define pte_same(a, b) ((a).pte == (b).pte) - #endif /* !__ASSEMBLY__ */ #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) @@ -146,7 +140,7 @@ static inline void native_pgd_clear(pgd_t *pgd) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -#define MAXMEM _AC(0x00003fffffffffff, UL) +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #define VMALLOC_START _AC(0xffffc20000000000, UL) #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) #define VMEMMAP_START _AC(0xffffe20000000000, UL) @@ -156,32 +150,6 @@ static inline void native_pgd_clear(pgd_t *pgd) #ifndef __ASSEMBLY__ -static inline int pgd_bad(pgd_t pgd) -{ - return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; -} - -static inline int pud_bad(pud_t pud) -{ - return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; -} - -static inline int pmd_bad(pmd_t pmd) -{ - return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; -} - -#define pte_none(x) (!pte_val((x))) -#define pte_present(x) (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE)) - -#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ - -/* - * Macro to mark a page protection value as "uncacheable". - */ -#define pgprot_noncached(prot) \ - (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT)) - /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -190,41 +158,12 @@ static inline int pmd_bad(pmd_t pmd) /* * Level 4 access. */ -#define pgd_page_vaddr(pgd) \ - ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK)) -#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) -#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT) static inline int pgd_large(pgd_t pgd) { return 0; } #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) /* PUD - Level3 access */ -/* to find an entry in a page-table-directory. */ -#define pud_page_vaddr(pud) \ - ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK)) -#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT)) -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -#define pud_offset(pgd, address) \ - ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address))) -#define pud_present(pud) (pud_val((pud)) & _PAGE_PRESENT) - -static inline int pud_large(pud_t pte) -{ - return (pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == - (_PAGE_PSE | _PAGE_PRESENT); -} /* PMD - Level 2 access */ -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK)) -#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) - -#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ - pmd_index(address)) -#define pmd_none(x) (!pmd_val((x))) -#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) -#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) -#define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) - #define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) #define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \ _PAGE_FILE }) @@ -232,13 +171,6 @@ static inline int pud_large(pud_t pte) /* PTE - Level 1 access. */ -/* page, protection -> pte */ -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot)) - -#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ - pte_index((address))) - /* x86-64 always has all page tables mapped. */ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) @@ -250,19 +182,28 @@ static inline int pud_large(pud_t pte) extern int direct_gbpages; /* Encode and de-code a swap entry */ -#define __swp_type(x) (((x).val >> 1) & 0x3f) -#define __swp_offset(x) ((x).val >> 8) -#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ - ((offset) << 8) }) +#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE +#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) +#else +#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) +#endif + +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) + +#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ + & ((1U << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << (_PAGE_BIT_PRESENT + 1)) \ + | ((offset) << SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) extern int kern_addr_valid(unsigned long addr); extern void cleanup_highmap(void); -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h index fe681147a4f..3ac5032fae0 100644 --- a/arch/x86/include/asm/prctl.h +++ b/arch/x86/include/asm/prctl.h @@ -6,5 +6,4 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 - #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5ca01e38326..a0133838b67 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -73,7 +73,7 @@ struct cpuinfo_x86 { char pad0; #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ - int x86_tlbsize; + int x86_tlbsize; __u8 x86_virt_bits; __u8 x86_phys_bits; #endif @@ -110,6 +110,7 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; #endif + unsigned int x86_hyper_vendor; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 @@ -123,6 +124,9 @@ struct cpuinfo_x86 { #define X86_VENDOR_UNKNOWN 0xff +#define X86_HYPER_VENDOR_NONE 0 +#define X86_HYPER_VENDOR_VMWARE 1 + /* * capabilities of CPUs */ @@ -349,7 +353,7 @@ struct i387_soft_struct { u8 no_update; u8 rm; u8 alimit; - struct info *info; + struct math_emu_info *info; u32 entry_eip; }; @@ -374,7 +378,29 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); + +union irq_stack_union { + char irq_stack[IRQ_STACK_SIZE]; + /* + * GCC hardcodes the stack canary as %gs:40. Since the + * irq_stack is the object at %gs:0, we reserve the bottom + * 48 bytes of the irq stack for the canary. + */ + struct { + char gs_base[40]; + unsigned long stack_canary; + }; +}; + +DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_INIT_PER_CPU(irq_stack_union); + +DECLARE_PER_CPU(char *, irq_stack_ptr); +#else /* X86_64 */ +#ifdef CONFIG_CC_STACKPROTECTOR +DECLARE_PER_CPU(unsigned long, stack_canary); #endif +#endif /* X86_64 */ extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; @@ -748,9 +774,22 @@ extern int sysenter_setup(void); extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); -extern void switch_to_new_gdt(void); +extern void switch_to_new_gdt(int); +extern void load_percpu_segment(int); extern void cpu_init(void); -extern void init_gdt(int cpu); + +static inline unsigned long get_debugctlmsr(void) +{ + unsigned long debugctlmsr = 0; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; +#endif + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); + + return debugctlmsr; +} static inline void update_debugctlmsr(unsigned long debugctlmsr) { diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index d6a22f92ba7..49fb3ecf3bb 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -18,11 +18,7 @@ extern void syscall32_cpu_init(void); extern void check_efer(void); -#ifdef CONFIG_X86_BIOS_REBOOT extern int reboot_force; -#else -static const int reboot_force = 0; -#endif long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 25f1bb8fc62..8e0f8d199e0 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h @@ -83,7 +83,7 @@ #ifdef CONFIG_X86_PTRACE_BTS #ifndef __ASSEMBLY__ -#include <asm/types.h> +#include <linux/types.h> /* configuration/status structure used in PTRACE_BTS_CONFIG and PTRACE_BTS_STATUS commands. diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index d1531c8480b..e304b66abee 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -6,7 +6,6 @@ #include <asm/processor-flags.h> #ifdef __KERNEL__ -#include <asm/ds.h> /* the DS BTS struct is used for ptrace too */ #include <asm/segment.h> #endif @@ -29,7 +28,7 @@ struct pt_regs { int xds; int xes; int xfs; - /* int gs; */ + int xgs; long orig_eax; long eip; int xcs; @@ -51,7 +50,7 @@ struct pt_regs { unsigned long ds; unsigned long es; unsigned long fs; - /* int gs; */ + unsigned long gs; unsigned long orig_ax; unsigned long ip; unsigned long cs; @@ -128,34 +127,6 @@ struct pt_regs { #endif /* !__i386__ */ -#ifdef CONFIG_X86_PTRACE_BTS -/* a branch trace record entry - * - * In order to unify the interface between various processor versions, - * we use the below data structure for all processors. - */ -enum bts_qualifier { - BTS_INVALID = 0, - BTS_BRANCH, - BTS_TASK_ARRIVES, - BTS_TASK_DEPARTS -}; - -struct bts_struct { - __u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - __u64 from_ip; - __u64 to_ip; - } lbr; - /* BTS_TASK_ARRIVES or - BTS_TASK_DEPARTS */ - __u64 jiffies; - } variant; -}; -#endif /* CONFIG_X86_PTRACE_BTS */ - #ifdef __KERNEL__ #include <linux/init.h> @@ -163,13 +134,6 @@ struct bts_struct { struct cpuinfo_x86; struct task_struct; -#ifdef CONFIG_X86_PTRACE_BTS -extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); -extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); -#else -#define ptrace_bts_init_intel(config) do {} while (0) -#endif /* CONFIG_X86_PTRACE_BTS */ - extern unsigned long profile_pc(struct pt_regs *regs); extern unsigned long @@ -271,7 +235,12 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -#define __ARCH_WANT_COMPAT_SYS_PTRACE +extern void x86_ptrace_untrace(struct task_struct *); +extern void x86_ptrace_fork(struct task_struct *child, + unsigned long clone_flags); + +#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) +#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h b/arch/x86/include/asm/rdc321x_defs.h index c8e9c8bed3d..c8e9c8bed3d 100644 --- a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h +++ b/arch/x86/include/asm/rdc321x_defs.h diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 1dc1b51ac62..14e0ed86a6f 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -61,7 +61,7 @@ * * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - unused + * 28 - stack_canary-20 [ for stack protector ] * 29 - unused * 30 - unused * 31 - TSS for double fault handler @@ -95,6 +95,13 @@ #define __KERNEL_PERCPU 0 #endif +#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16) +#ifdef CONFIG_CC_STACKPROTECTOR +#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8) +#else +#define __KERNEL_STACK_CANARY 0 +#endif + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 /* diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index f12d3723746..45b40278b58 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SETUP_H #define _ASM_X86_SETUP_H +#ifdef __KERNEL__ + #define COMMAND_LINE_SIZE 2048 #ifndef __ASSEMBLY__ @@ -8,6 +10,8 @@ /* Interrupt control for vSMPowered x86_64 systems */ void vsmp_init(void); +void setup_bios_corruption_check(void); + #ifdef CONFIG_X86_VISWS extern void visws_early_detect(void); extern int is_visws_box(void); @@ -16,12 +20,14 @@ static inline void visws_early_detect(void) { } static inline int is_visws_box(void) { return 0; } #endif +extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); /* * Any setup quirks to be performed? */ -struct mpc_config_processor; -struct mpc_config_bus; -struct mp_config_oemtable; +struct mpc_cpu; +struct mpc_bus; +struct mpc_oemtable; struct x86_quirks { int (*arch_pre_time_init)(void); int (*arch_time_init)(void); @@ -33,12 +39,13 @@ struct x86_quirks { int (*mach_find_smp_config)(unsigned int reserve); int *mpc_record; - int (*mpc_apic_id)(struct mpc_config_processor *m); - void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name); - void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); - void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, - unsigned short oemsize); + int (*mpc_apic_id)(struct mpc_cpu *m); + void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); + void (*mpc_oem_pci_bus)(struct mpc_bus *m); + void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, + unsigned short oemsize); int (*setup_ioapic_ids)(void); + int (*update_genapic)(void); }; extern struct x86_quirks *x86_quirks; @@ -49,8 +56,6 @@ extern unsigned long saved_video_mode; #endif #endif /* __ASSEMBLY__ */ -#ifdef __KERNEL__ - #ifdef __i386__ #include <linux/pfn.h> @@ -93,7 +98,6 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else -void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/arch/x86/include/asm/mach-default/setup_arch.h b/arch/x86/include/asm/setup_arch.h index 38846208b54..38846208b54 100644 --- a/arch/x86/include/asm/mach-default/setup_arch.h +++ b/arch/x86/include/asm/setup_arch.h diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 0afcb5e58ac..ec666491aaa 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -2,7 +2,7 @@ #define _ASM_X86_SIGCONTEXT_H #include <linux/compiler.h> -#include <asm/types.h> +#include <linux/types.h> #define FP_XSTATE_MAGIC1 0x46505853U #define FP_XSTATE_MAGIC2 0x46505845U diff --git a/arch/x86/include/asm/sigcontext32.h b/arch/x86/include/asm/sigcontext32.h index 6126188cf3a..ad1478c4ae1 100644 --- a/arch/x86/include/asm/sigcontext32.h +++ b/arch/x86/include/asm/sigcontext32.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SIGCONTEXT32_H #define _ASM_X86_SIGCONTEXT32_H +#include <linux/types.h> + /* signal context for 32bit programs. */ #define X86_FXSR_MAGIC 0x0000 diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h new file mode 100644 index 00000000000..4e0fe26d27d --- /dev/null +++ b/arch/x86/include/asm/sigframe.h @@ -0,0 +1,70 @@ +#ifndef _ASM_X86_SIGFRAME_H +#define _ASM_X86_SIGFRAME_H + +#include <asm/sigcontext.h> +#include <asm/siginfo.h> +#include <asm/ucontext.h> + +#ifdef CONFIG_X86_32 +#define sigframe_ia32 sigframe +#define rt_sigframe_ia32 rt_sigframe +#define sigcontext_ia32 sigcontext +#define _fpstate_ia32 _fpstate +#define ucontext_ia32 ucontext +#else /* !CONFIG_X86_32 */ + +#ifdef CONFIG_IA32_EMULATION +#include <asm/ia32.h> +#endif /* CONFIG_IA32_EMULATION */ + +#endif /* CONFIG_X86_32 */ + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +struct sigframe_ia32 { + u32 pretcode; + int sig; + struct sigcontext_ia32 sc; + /* + * fpstate is unused. fpstate is moved/allocated after + * retcode[] below. This movement allows to have the FP state and the + * future state extensions (xsave) stay together. + * And at the same time retaining the unused fpstate, prevents changing + * the offset of extramask[] in the sigframe and thus prevent any + * legacy application accessing/modifying it. + */ + struct _fpstate_ia32 fpstate_unused; +#ifdef CONFIG_IA32_EMULATION + unsigned int extramask[_COMPAT_NSIG_WORDS-1]; +#else /* !CONFIG_IA32_EMULATION */ + unsigned long extramask[_NSIG_WORDS-1]; +#endif /* CONFIG_IA32_EMULATION */ + char retcode[8]; + /* fp state follows here */ +}; + +struct rt_sigframe_ia32 { + u32 pretcode; + int sig; + u32 pinfo; + u32 puc; +#ifdef CONFIG_IA32_EMULATION + compat_siginfo_t info; +#else /* !CONFIG_IA32_EMULATION */ + struct siginfo info; +#endif /* CONFIG_IA32_EMULATION */ + struct ucontext_ia32 uc; + char retcode[8]; + /* fp state follows here */ +}; +#endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */ + +#ifdef CONFIG_X86_64 +struct rt_sigframe { + char __user *pretcode; + struct ucontext uc; + struct siginfo info; + /* fp state follows here */ +}; +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 96ac44f275d..7761a5d554b 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -121,6 +121,10 @@ typedef unsigned long sigset_t; #ifndef __ASSEMBLY__ +# ifdef __KERNEL__ +extern void do_notify_resume(struct pt_regs *, void *, __u32); +# endif /* __KERNEL__ */ + #ifdef __i386__ # ifdef __KERNEL__ struct old_sigaction { @@ -141,8 +145,6 @@ struct k_sigaction { struct sigaction sa; }; -extern void do_notify_resume(struct pt_regs *, void *, __u32); - # else /* __KERNEL__ */ /* Here we must cater to libcs that poke about in kernel headers. */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d12811ce51d..47d0e21f2b9 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -15,28 +15,26 @@ # include <asm/io_apic.h> # endif #endif -#include <asm/pda.h> #include <asm/thread_info.h> - -extern cpumask_t cpu_callout_map; -extern cpumask_t cpu_initialized; -extern cpumask_t cpu_callin_map; - -extern void (*mtrr_hook)(void); -extern void zap_low_mappings(void); - -extern int __cpuinit get_local_pda(int cpu); +#include <asm/cpumask.h> extern int smp_num_siblings; extern unsigned int num_processors; -extern cpumask_t cpu_initialized; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -#ifdef CONFIG_X86_32 DECLARE_PER_CPU(int, cpu_number); -#endif + +static inline struct cpumask *cpu_sibling_mask(int cpu) +{ + return &per_cpu(cpu_sibling_map, cpu); +} + +static inline struct cpumask *cpu_core_mask(int cpu) +{ + return &per_cpu(cpu_core_map, cpu); +} DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); @@ -60,7 +58,7 @@ struct smp_ops { void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); - void (*send_call_func_ipi)(cpumask_t mask); + void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); }; @@ -125,7 +123,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) static inline void arch_send_call_function_ipi(cpumask_t mask) { - smp_ops.send_call_func_ipi(mask); + smp_ops.send_call_func_ipi(&mask); } void cpu_disable_common(void); @@ -138,22 +136,16 @@ void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); -void native_send_call_func_ipi(cpumask_t mask); +void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -extern void prefill_possible_map(void); - void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) /* We don't mark CPUs online until __cpu_up(), so we need another measure */ static inline int num_booting_cpus(void) { - return cpus_weight(cpu_callout_map); -} -#else -static inline void prefill_possible_map(void) -{ + return cpumask_weight(cpu_callout_mask); } #endif /* CONFIG_SMP */ @@ -165,11 +157,11 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +#define raw_smp_processor_id() (percpu_read(cpu_number)) extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -#define raw_smp_processor_id() read_pda(cpunumber) +#define raw_smp_processor_id() (percpu_read(cpu_number)) #define stack_smp_processor_id() \ ({ \ @@ -179,10 +171,6 @@ extern int safe_smp_processor_id(void); }) #define safe_smp_processor_id() smp_processor_id() -#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ -#define cpu_physical_id(cpu) boot_cpu_physical_apicid -#define safe_smp_processor_id() 0 -#define stack_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -194,28 +182,9 @@ static inline int logical_smp_processor_id(void) return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -#include <mach_apicdef.h> -static inline unsigned int read_apic_id(void) -{ - unsigned int reg; - - reg = *(u32 *)(APIC_BASE + APIC_ID); - - return GET_APIC_ID(reg); -} #endif - -# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); -# else -#include <mach_apicdef.h> -static inline int hard_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return read_apic_id(); -} -# endif /* APIC_DEFINITION */ #else /* CONFIG_X86_LOCAL_APIC */ @@ -225,11 +194,5 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ -#ifdef CONFIG_X86_HAS_BOOT_CPU_ID -extern unsigned char boot_cpu_id; -#else -#define boot_cpu_id 0 -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index dbab36d64d4..1def6011490 100644 --- a/arch/x86/include/asm/mach-default/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h @@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) CMOS_WRITE(0xa, 0xf); local_flush_tlb(); pr_debug("1.\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; + *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) = + start_eip >> 4; pr_debug("2.\n"); - *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; + *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_low)) = + start_eip & 0xf; pr_debug("3.\n"); } @@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void) */ CMOS_WRITE(0, 0xf); - *((volatile long *) phys_to_virt(0x467)) = 0; + *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0; } static inline void __init smpboot_setup_io_apic(void) diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index be44f7dab39..e3cc3c063ec 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,7 +27,7 @@ #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ # define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 44 +# define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */ #endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da..3a569665668 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -172,70 +172,8 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; } -#ifdef CONFIG_PARAVIRT -/* - * Define virtualization-friendly old-style lock byte lock, for use in - * pv_lock_ops if desired. - * - * This differs from the pre-2.6.24 spinlock by always using xchgb - * rather than decb to take the lock; this allows it to use a - * zero-initialized lock structure. It also maintains a 1-byte - * contention counter, so that we can implement - * __byte_spin_is_contended. - */ -struct __byte_spinlock { - s8 lock; - s8 spinners; -}; - -static inline int __byte_spin_is_locked(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - return bl->lock != 0; -} - -static inline int __byte_spin_is_contended(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - return bl->spinners != 0; -} - -static inline void __byte_spin_lock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - s8 val = 1; - - asm("1: xchgb %1, %0\n" - " test %1,%1\n" - " jz 3f\n" - " " LOCK_PREFIX "incb %2\n" - "2: rep;nop\n" - " cmpb $1, %0\n" - " je 2b\n" - " " LOCK_PREFIX "decb %2\n" - " jmp 1b\n" - "3:" - : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); -} - -static inline int __byte_spin_trylock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - u8 old = 1; - - asm("xchgb %1,%0" - : "+m" (bl->lock), "+q" (old) : : "memory"); +#ifndef CONFIG_PARAVIRT - return old == 0; -} - -static inline void __byte_spin_unlock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - smp_wmb(); - bl->lock = 0; -} -#else /* !CONFIG_PARAVIRT */ static inline int __raw_spin_is_locked(raw_spinlock_t *lock) { return __ticket_spin_is_locked(lock); @@ -245,6 +183,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock) { return __ticket_spin_is_contended(lock); } +#define __raw_spin_is_contended __raw_spin_is_contended static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) { @@ -267,7 +206,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, __raw_spin_lock(lock); } -#endif /* CONFIG_PARAVIRT */ +#endif static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { @@ -329,8 +268,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; - atomic_dec(count); - if (atomic_read(count) >= 0) + if (atomic_dec_return(count) >= 0) return 1; atomic_inc(count); return 0; diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h new file mode 100644 index 00000000000..c2d742c6e15 --- /dev/null +++ b/arch/x86/include/asm/stackprotector.h @@ -0,0 +1,124 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and unfortunately gcc requires it to be at a fixed offset from %gs. + * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64 + * and x86_32 use segment registers differently and thus handles this + * requirement differently. + * + * On x86_64, %gs is shared by percpu area and stack canary. All + * percpu symbols are zero based and %gs points to the base of percpu + * area. The first occupant of the percpu area is always + * irq_stack_union which contains stack_canary at offset 40. Userland + * %gs is always saved and restored on kernel entry and exit using + * swapgs, so stack protector doesn't add any complexity there. + * + * On x86_32, it's slightly more complicated. As in x86_64, %gs is + * used for userland TLS. Unfortunately, some processors are much + * slower at loading segment registers with different value when + * entering and leaving the kernel, so the kernel uses %fs for percpu + * area and manages %gs lazily so that %gs is switched only when + * necessary, usually during task switch. + * + * As gcc requires the stack canary at %gs:20, %gs can't be managed + * lazily if stack protector is enabled, so the kernel saves and + * restores userland %gs on kernel entry and exit. This behavior is + * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in + * system.h to hide the details. + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#ifdef CONFIG_CC_STACKPROTECTOR + +#include <asm/tsc.h> +#include <asm/processor.h> +#include <asm/percpu.h> +#include <asm/system.h> +#include <asm/desc.h> +#include <linux/random.h> + +/* + * 24 byte read-only segment initializer for stack canary. Linker + * can't handle the address bit shifting. Address will be set in + * head_32 for boot CPU and setup_per_cpu_areas() for others. + */ +#define GDT_STACK_CANARY_INIT \ + [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } }, + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + u64 canary; + u64 tsc; + +#ifdef CONFIG_X86_64 + BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); +#endif + /* + * We both use the random pool and the current TSC as a source + * of randomness. The TSC only matters for very early init, + * there it already has some randomness on most systems. Later + * on during the bootup the random pool has true entropy too. + */ + get_random_bytes(&canary, sizeof(canary)); + tsc = __native_read_tsc(); + canary += tsc + (tsc << 32UL); + + current->stack_canary = canary; +#ifdef CONFIG_X86_64 + percpu_write(irq_stack_union.stack_canary, canary); +#else + percpu_write(stack_canary, canary); +#endif +} + +static inline void setup_stack_canary_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; + struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); + struct desc_struct desc; + + desc = gdt_table[GDT_ENTRY_STACK_CANARY]; + desc.base0 = canary & 0xffff; + desc.base1 = (canary >> 16) & 0xff; + desc.base2 = (canary >> 24) & 0xff; + write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); +#endif +} + +static inline void load_stack_canary_segment(void) +{ +#ifdef CONFIG_X86_32 + asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory"); +#endif +} + +#else /* CC_STACKPROTECTOR */ + +#define GDT_STACK_CANARY_INIT + +/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */ + +static inline void setup_stack_canary_segment(int cpu) +{ } + +static inline void load_stack_canary_segment(void) +{ +#ifdef CONFIG_X86_32 + asm volatile ("mov %0, %%gs" : : "r" (0)); +#endif +} + +#endif /* CC_STACKPROTECTOR */ +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h deleted file mode 100644 index 9b3070f1c2a..00000000000 --- a/arch/x86/include/asm/summit/apic.h +++ /dev/null @@ -1,184 +0,0 @@ -#ifndef __ASM_SUMMIT_APIC_H -#define __ASM_SUMMIT_APIC_H - -#include <asm/smp.h> - -#define esr_disable (1) -#define NO_BALANCE_IRQ (0) - -/* In clustered mode, the high nibble of APIC ID is a cluster number. - * The low nibble is a 4-bit bitmap. */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline cpumask_t target_cpus(void) -{ - /* CPU_MASK_ALL (0xff) has undefined behaviour with - * dest_LowestPrio mode logical clustered apic interrupt routing - * Just start on cpu 0. IRQ balancing will spread load - */ - return cpumask_of_cpu(0); -} - -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} - -/* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long check_apicid_present(int bit) -{ - return 1; -} - -#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) - -extern u8 cpu_2_logical_apicid[]; - -static inline void init_apic_ldr(void) -{ - unsigned long val, id; - int count = 0; - u8 my_id = (u8)hard_smp_processor_id(); - u8 my_cluster = (u8)apicid_cluster(my_id); -#ifdef CONFIG_SMP - u8 lid; - int i; - - /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = NR_CPUS; --i >= 0; ) { - lid = cpu_2_logical_apicid[i]; - if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) - ++count; - } -#endif - /* We only have a 4 wide bitmap in cluster mode. If a deranged - * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ - BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); - id = my_cluster | (1UL << count); - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write(APIC_LDR, val); -} - -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - -static inline int apic_id_registered(void) -{ - return 1; -} - -static inline void setup_apic_routing(void) -{ - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); -} - -static inline int apicid_to_node(int logical_apicid) -{ -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif -} - -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < NR_CPUS) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); -} - -static inline physid_mask_t apicid_to_cpu_present(int apicid) -{ - return physid_mask_of_physid(0); -} - -static inline void setup_portio_remap(void) -{ -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -static inline void enable_apic_mode(void) -{ -} - -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid; - - num_bits_set = cpus_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) - return (int) 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -/* cpuid returns the value latched in the HW at reset, not the APIC ID - * register's value. For any box whose BIOS changes APIC IDs, like - * clustered APIC systems, we must use hard_smp_processor_id. - * - * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. - */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - -#endif /* __ASM_SUMMIT_APIC_H */ diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h deleted file mode 100644 index f3fbca1f61c..00000000000 --- a/arch/x86/include/asm/summit/apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_SUMMIT_APICDEF_H -#define __ASM_SUMMIT_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (x>>24)&0xFF; -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h deleted file mode 100644 index 53bd1e7bd7b..00000000000 --- a/arch/x86/include/asm/summit/ipi.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __ASM_SUMMIT_IPI_H -#define __ASM_SUMMIT_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h deleted file mode 100644 index 013ce6fab2d..00000000000 --- a/arch/x86/include/asm/summit/mpparse.h +++ /dev/null @@ -1,109 +0,0 @@ -#ifndef __ASM_SUMMIT_MPPARSE_H -#define __ASM_SUMMIT_MPPARSE_H - -#include <asm/tsc.h> - -extern int use_cyclone; - -#ifdef CONFIG_X86_SUMMIT_NUMA -extern void setup_summit(void); -#else -#define setup_summit() {} -#endif - -static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) - || !strncmp(productid, "EXA", 3) - || !strncmp(productid, "RUTHLESS SMP", 12))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -/* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && - (!strncmp(oem_table_id, "SERVIGIL", 8) - || !strncmp(oem_table_id, "EXA", 3))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -struct rio_table_hdr { - unsigned char version; /* Version number of this data structure */ - /* Version 3 adds chassis_num & WP_index */ - unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ - unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ -} __attribute__((packed)); - -struct scal_detail { - unsigned char node_id; /* Scalability Node ID */ - unsigned long CBAR; /* Address of 1MB register space */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF = None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port2node; /* Node ID port connected to: 0xFF = None */ - unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - unsigned char node_id; /* RIO Node ID */ - unsigned long BBAR; /* Address of 1MB register space */ - unsigned char type; /* Type of device */ - unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ - /* For CYC: Node ID of Twister that owns this CYC */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF=None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ - /* For CYC: 0 */ - unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie:*/ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - /* For CYC: Bits0:7 Reserved */ - unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - /* For CYC: No meaning */ - unsigned char chassis_num; /* 1 based Chassis number */ - /* For LookOut WPEGs this field indicates the */ - /* Expansion Chassis #, enumerated from Boot */ - /* Node WPEG external port, then Boot Node CYC */ - /* external port, then Next Vigil chassis WPEG */ - /* external port, etc. */ - /* Shared Lookouts have only 1 chassis number (the */ - /* first one assigned) */ -} __attribute__((packed)); - - -typedef enum { - CompatTwister = 0, /* Compatibility Twister */ - AltTwister = 1, /* Alternate Twister of internal 8-way */ - CompatCyclone = 2, /* Compatibility Cyclone */ - AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ - CompatWPEG = 4, /* Compatibility WPEG */ - AltWPEG = 5, /* Second Planar WPEG */ - LookOutAWPEG = 6, /* LookOut WPEG */ - LookOutBWPEG = 7, /* LookOut WPEG */ -} node_type; - -static inline int is_WPEG(struct rio_detail *rio){ - return (rio->type == CompatWPEG || rio->type == AltWPEG || - rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); -} - -#endif /* __ASM_SUMMIT_MPPARSE_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h new file mode 100644 index 00000000000..1b8afa78e86 --- /dev/null +++ b/arch/x86/include/asm/svm.h @@ -0,0 +1,328 @@ +#ifndef __SVM_H +#define __SVM_H + +enum { + INTERCEPT_INTR, + INTERCEPT_NMI, + INTERCEPT_SMI, + INTERCEPT_INIT, + INTERCEPT_VINTR, + INTERCEPT_SELECTIVE_CR0, + INTERCEPT_STORE_IDTR, + INTERCEPT_STORE_GDTR, + INTERCEPT_STORE_LDTR, + INTERCEPT_STORE_TR, + INTERCEPT_LOAD_IDTR, + INTERCEPT_LOAD_GDTR, + INTERCEPT_LOAD_LDTR, + INTERCEPT_LOAD_TR, + INTERCEPT_RDTSC, + INTERCEPT_RDPMC, + INTERCEPT_PUSHF, + INTERCEPT_POPF, + INTERCEPT_CPUID, + INTERCEPT_RSM, + INTERCEPT_IRET, + INTERCEPT_INTn, + INTERCEPT_INVD, + INTERCEPT_PAUSE, + INTERCEPT_HLT, + INTERCEPT_INVLPG, + INTERCEPT_INVLPGA, + INTERCEPT_IOIO_PROT, + INTERCEPT_MSR_PROT, + INTERCEPT_TASK_SWITCH, + INTERCEPT_FERR_FREEZE, + INTERCEPT_SHUTDOWN, + INTERCEPT_VMRUN, + INTERCEPT_VMMCALL, + INTERCEPT_VMLOAD, + INTERCEPT_VMSAVE, + INTERCEPT_STGI, + INTERCEPT_CLGI, + INTERCEPT_SKINIT, + INTERCEPT_RDTSCP, + INTERCEPT_ICEBP, + INTERCEPT_WBINVD, + INTERCEPT_MONITOR, + INTERCEPT_MWAIT, + INTERCEPT_MWAIT_COND, +}; + + +struct __attribute__ ((__packed__)) vmcb_control_area { + u16 intercept_cr_read; + u16 intercept_cr_write; + u16 intercept_dr_read; + u16 intercept_dr_write; + u32 intercept_exceptions; + u64 intercept; + u8 reserved_1[44]; + u64 iopm_base_pa; + u64 msrpm_base_pa; + u64 tsc_offset; + u32 asid; + u8 tlb_ctl; + u8 reserved_2[3]; + u32 int_ctl; + u32 int_vector; + u32 int_state; + u8 reserved_3[4]; + u32 exit_code; + u32 exit_code_hi; + u64 exit_info_1; + u64 exit_info_2; + u32 exit_int_info; + u32 exit_int_info_err; + u64 nested_ctl; + u8 reserved_4[16]; + u32 event_inj; + u32 event_inj_err; + u64 nested_cr3; + u64 lbr_ctl; + u8 reserved_5[832]; +}; + + +#define TLB_CONTROL_DO_NOTHING 0 +#define TLB_CONTROL_FLUSH_ALL_ASID 1 + +#define V_TPR_MASK 0x0f + +#define V_IRQ_SHIFT 8 +#define V_IRQ_MASK (1 << V_IRQ_SHIFT) + +#define V_INTR_PRIO_SHIFT 16 +#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) + +#define V_IGN_TPR_SHIFT 20 +#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) + +#define V_INTR_MASKING_SHIFT 24 +#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) + +#define SVM_INTERRUPT_SHADOW_MASK 1 + +#define SVM_IOIO_STR_SHIFT 2 +#define SVM_IOIO_REP_SHIFT 3 +#define SVM_IOIO_SIZE_SHIFT 4 +#define SVM_IOIO_ASIZE_SHIFT 7 + +#define SVM_IOIO_TYPE_MASK 1 +#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) +#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) +#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) +#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) + +struct __attribute__ ((__packed__)) vmcb_seg { + u16 selector; + u16 attrib; + u32 limit; + u64 base; +}; + +struct __attribute__ ((__packed__)) vmcb_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u8 reserved_1[43]; + u8 cpl; + u8 reserved_2[4]; + u64 efer; + u8 reserved_3[112]; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u8 reserved_4[88]; + u64 rsp; + u8 reserved_5[24]; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_6[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; +}; + +struct __attribute__ ((__packed__)) vmcb { + struct vmcb_control_area control; + struct vmcb_save_area save; +}; + +#define SVM_CPUID_FEATURE_SHIFT 2 +#define SVM_CPUID_FUNC 0x8000000a + +#define MSR_EFER_SVME_MASK (1ULL << 12) +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_HSAVE_PA 0xc0010117ULL + +#define SVM_VM_CR_SVM_DISABLE 4 + +#define SVM_SELECTOR_S_SHIFT 4 +#define SVM_SELECTOR_DPL_SHIFT 5 +#define SVM_SELECTOR_P_SHIFT 7 +#define SVM_SELECTOR_AVL_SHIFT 8 +#define SVM_SELECTOR_L_SHIFT 9 +#define SVM_SELECTOR_DB_SHIFT 10 +#define SVM_SELECTOR_G_SHIFT 11 + +#define SVM_SELECTOR_TYPE_MASK (0xf) +#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT) +#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT) +#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT) +#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT) +#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT) +#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT) +#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT) + +#define SVM_SELECTOR_WRITE_MASK (1 << 1) +#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK +#define SVM_SELECTOR_CODE_MASK (1 << 3) + +#define INTERCEPT_CR0_MASK 1 +#define INTERCEPT_CR3_MASK (1 << 3) +#define INTERCEPT_CR4_MASK (1 << 4) +#define INTERCEPT_CR8_MASK (1 << 8) + +#define INTERCEPT_DR0_MASK 1 +#define INTERCEPT_DR1_MASK (1 << 1) +#define INTERCEPT_DR2_MASK (1 << 2) +#define INTERCEPT_DR3_MASK (1 << 3) +#define INTERCEPT_DR4_MASK (1 << 4) +#define INTERCEPT_DR5_MASK (1 << 5) +#define INTERCEPT_DR6_MASK (1 << 6) +#define INTERCEPT_DR7_MASK (1 << 7) + +#define SVM_EVTINJ_VEC_MASK 0xff + +#define SVM_EVTINJ_TYPE_SHIFT 8 +#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_VALID (1 << 31) +#define SVM_EVTINJ_VALID_ERR (1 << 11) + +#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK + +#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR +#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI +#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT +#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT + +#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID +#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR + +#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 +#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 + +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ + +#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" +#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" +#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb" +#define SVM_CLGI ".byte 0x0f, 0x01, 0xdd" +#define SVM_STGI ".byte 0x0f, 0x01, 0xdc" +#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf" + +#endif + diff --git a/arch/x86/include/asm/swab.h b/arch/x86/include/asm/swab.h new file mode 100644 index 00000000000..557cd9f0066 --- /dev/null +++ b/arch/x86/include/asm/swab.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_SWAB_H +#define _ASM_X86_SWAB_H + +#include <linux/types.h> +#include <linux/compiler.h> + +static inline __attribute_const__ __u32 __arch_swab32(__u32 val) +{ +#ifdef __i386__ +# ifdef CONFIG_X86_BSWAP + asm("bswap %0" : "=r" (val) : "0" (val)); +# else + asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ + "rorl $16,%0\n\t" /* swap words */ + "xchgb %b0,%h0" /* swap higher bytes */ + : "=q" (val) + : "0" (val)); +# endif + +#else /* __i386__ */ + asm("bswapl %0" + : "=r" (val) + : "0" (val)); +#endif + return val; +} +#define __arch_swab32 __arch_swab32 + +static inline __attribute_const__ __u64 __arch_swab64(__u64 val) +{ +#ifdef __i386__ + union { + struct { + __u32 a; + __u32 b; + } s; + __u64 u; + } v; + v.u = val; +# ifdef CONFIG_X86_BSWAP + asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" + : "=r" (v.s.a), "=r" (v.s.b) + : "0" (v.s.a), "1" (v.s.b)); +# else + v.s.a = __arch_swab32(v.s.a); + v.s.b = __arch_swab32(v.s.b); + asm("xchgl %0,%1" + : "=r" (v.s.a), "=r" (v.s.b) + : "0" (v.s.a), "1" (v.s.b)); +# endif + return v.u; +#else /* __i386__ */ + asm("bswapq %0" + : "=r" (val) + : "0" (val)); + return val; +#endif +} +#define __arch_swab64 __arch_swab64 + +#endif /* _ASM_X86_SWAB_H */ diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 51fb2c76ad7..b9e4e20174f 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -1,46 +1,10 @@ #ifndef _ASM_X86_SWIOTLB_H #define _ASM_X86_SWIOTLB_H -#include <asm/dma-mapping.h> +#include <linux/swiotlb.h> /* SWIOTLB interface */ -extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, - size_t size, int dir); -extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags); -extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_for_cpu(struct device *hwdev, - dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_for_device(struct device *hwdev, - dma_addr_t dev_addr, - size_t size, int dir); -extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, - dma_addr_t dev_addr, - unsigned long offset, - size_t size, int dir); -extern void swiotlb_sync_single_range_for_device(struct device *hwdev, - dma_addr_t dev_addr, - unsigned long offset, - size_t size, int dir); -extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, - struct scatterlist *sg, int nelems, - int dir); -extern void swiotlb_sync_sg_for_device(struct device *hwdev, - struct scatterlist *sg, int nelems, - int dir); -extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); -extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); -extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); -extern void swiotlb_free_coherent(struct device *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); -extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); -extern void swiotlb_init(void); - extern int swiotlb_force; #ifdef CONFIG_SWIOTLB diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h new file mode 100644 index 00000000000..ffb08be2a53 --- /dev/null +++ b/arch/x86/include/asm/sys_ia32.h @@ -0,0 +1,101 @@ +/* + * sys_ia32.h - Linux ia32 syscall interfaces + * + * Copyright (c) 2008 Jaswinder Singh Rajput + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#ifndef _ASM_X86_SYS_IA32_H +#define _ASM_X86_SYS_IA32_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/signal.h> +#include <asm/compat.h> +#include <asm/ia32.h> + +/* ia32/sys_ia32.c */ +asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long); +asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); + +asmlinkage long sys32_stat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); +asmlinkage long sys32_fstatat(unsigned int, char __user *, + struct stat64 __user *, int); +struct mmap_arg_struct; +asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); +asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); + +asmlinkage long sys32_pipe(int __user *); +struct sigaction32; +struct old_sigaction32; +asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, + struct sigaction32 __user *, unsigned int); +asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, + struct old_sigaction32 __user *); +asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, + compat_sigset_t __user *, unsigned int); +asmlinkage long sys32_alarm(unsigned int); + +struct sel_arg_struct; +asmlinkage long sys32_old_select(struct sel_arg_struct __user *); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_sysfs(int, u32, u32); + +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, + struct compat_timespec __user *); +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); +asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); + +#ifdef CONFIG_SYSCTL_SYSCALL +struct sysctl_ia32; +asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); +#endif + +asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); +asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); + +asmlinkage long sys32_personality(unsigned long); +asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); + +asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); + +struct oldold_utsname; +struct old_utsname; +asmlinkage long sys32_olduname(struct oldold_utsname __user *); +long sys32_uname(struct old_utsname __user *); + +long sys32_ustat(unsigned, struct ustat32 __user *); + +asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, + compat_uptr_t __user *, struct pt_regs *); +asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); + +long sys32_lseek(unsigned int, int, unsigned int); +long sys32_kill(int, int); +long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); +long sys32_vm86_warning(void); +long sys32_lookup_dcookie(u32, u32, char __user *, size_t); + +asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); +asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, + unsigned, unsigned, int); +asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); +asmlinkage long sys32_fallocate(int, int, unsigned, + unsigned, unsigned, unsigned); + +/* ia32/ia32_signal.c */ +asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); +asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *, + stack_ia32_t __user *, struct pt_regs *); +asmlinkage long sys32_sigreturn(struct pt_regs *); +asmlinkage long sys32_rt_sigreturn(struct pt_regs *); + +/* ia32/ipc32.c */ +asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); +#endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 87803da4401..68b1be10cfa 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -19,27 +19,31 @@ /* kernel/ioport.c */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); +/* kernel/ldt.c */ +asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); + +/* kernel/tls.c */ +asmlinkage int sys_set_thread_area(struct user_desc __user *); +asmlinkage int sys_get_thread_area(struct user_desc __user *); + /* X86_32 only */ #ifdef CONFIG_X86_32 /* kernel/process_32.c */ -asmlinkage int sys_fork(struct pt_regs); -asmlinkage int sys_clone(struct pt_regs); -asmlinkage int sys_vfork(struct pt_regs); -asmlinkage int sys_execve(struct pt_regs); +int sys_fork(struct pt_regs *); +int sys_clone(struct pt_regs *); +int sys_vfork(struct pt_regs *); +int sys_execve(struct pt_regs *); /* kernel/signal_32.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); -asmlinkage int sys_sigaltstack(unsigned long); -asmlinkage unsigned long sys_sigreturn(unsigned long); -asmlinkage int sys_rt_sigreturn(unsigned long); +int sys_sigaltstack(struct pt_regs *); +unsigned long sys_sigreturn(struct pt_regs *); +long sys_rt_sigreturn(struct pt_regs *); /* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned long); - -/* kernel/ldt.c */ -asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); +long sys_iopl(struct pt_regs *); /* kernel/sys_i386_32.c */ asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, @@ -54,13 +58,9 @@ asmlinkage int sys_uname(struct old_utsname __user *); struct oldold_utsname; asmlinkage int sys_olduname(struct oldold_utsname __user *); -/* kernel/tls.c */ -asmlinkage int sys_set_thread_area(struct user_desc __user *); -asmlinkage int sys_get_thread_area(struct user_desc __user *); - /* kernel/vm86_32.c */ -asmlinkage int sys_vm86old(struct pt_regs); -asmlinkage int sys_vm86(struct pt_regs); +int sys_vm86old(struct pt_regs *); +int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2ed3f0f44ff..c00bfdbdd45 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -17,12 +17,26 @@ # define AT_VECTOR_SIZE_ARCH 1 #endif -#ifdef CONFIG_X86_32 - struct task_struct; /* one of the stranger aspects of C forward declarations */ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); +#ifdef CONFIG_X86_32 + +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movl %P[task_canary](%[next]), %%ebx\n\t" \ + "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" +#define __switch_canary_oparam \ + , [stack_canary] "=m" (per_cpu_var(stack_canary)) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -44,6 +58,7 @@ do { \ "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \ + __switch_canary \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ "popl %%ebp\n\t" /* restore EBP */ \ @@ -58,6 +73,8 @@ do { \ "=b" (ebx), "=c" (ecx), "=d" (edx), \ "=S" (esi), "=D" (edi) \ \ + __switch_canary_oparam \ + \ /* input parameters: */ \ : [next_sp] "m" (next->thread.sp), \ [next_ip] "m" (next->thread.ip), \ @@ -66,6 +83,8 @@ do { \ [prev] "a" (prev), \ [next] "d" (next) \ \ + __switch_canary_iparam \ + \ : /* reloaded segment registers */ \ "memory"); \ } while (0) @@ -86,27 +105,44 @@ do { \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ "r12", "r13", "r14", "r15" +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,"__percpu_arg([gs_canary])"\n\t" +#define __switch_canary_oparam \ + , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + /* Save restore flags to clear handle leaking NT */ #define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ + asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ + __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ - LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ - "jc ret_from_fork\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "jnz ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ + __switch_canary_oparam \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ - [tif_fork] "i" (TIF_FORK), \ + [_tif_fork] "i" (_TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [current_task] "m" (per_cpu_var(current_task)) \ + __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) #endif @@ -165,6 +201,25 @@ extern void native_load_gs_index(unsigned); #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") +/* + * x86_32 user gs accessors. + */ +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_32_LAZY_GS +#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) +#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) +#define task_user_gs(tsk) ((tsk)->thread.gs) +#define lazy_save_gs(v) savesegment(gs, (v)) +#define lazy_load_gs(v) loadsegment(gs, (v)) +#else /* X86_32_LAZY_GS */ +#define get_user_gs(regs) (u16)((regs)->gs) +#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) +#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) +#define lazy_save_gs(v) do { } while (0) +#define lazy_load_gs(v) do { } while (0) +#endif /* X86_32_LAZY_GS */ +#endif /* X86_32 */ + static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; @@ -314,6 +369,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); +void stop_this_cpu(void *dummy); + /* * Force strict CPU ordering. * And yes, this is required on UP too when we're talking diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad..df9d5f78385 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -20,11 +20,13 @@ struct task_struct; struct exec_domain; #include <asm/processor.h> +#include <asm/ftrace.h> +#include <asm/atomic.h> struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ - unsigned long flags; /* low level flags */ + __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, @@ -38,6 +40,7 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif + int uaccess_err; }; #define INIT_THREAD_INFO(tsk) \ @@ -91,7 +94,6 @@ struct thread_info { #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -113,7 +115,6 @@ struct thread_info { #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ @@ -139,8 +140,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ - _TIF_NOTSC) + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) @@ -195,25 +195,21 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include <asm/pda.h> +#include <asm/percpu.h> +#define KERNEL_STACK_OFFSET (5*8) /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} +DECLARE_PER_CPU(unsigned long, kernel_stack); -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) +static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + ti = (void *)(percpu_read(kernel_stack) + + KERNEL_STACK_OFFSET - THREAD_SIZE); return ti; } @@ -221,8 +217,8 @@ static inline struct thread_info *stack_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + movq PER_CPU_VAR(kernel_stack),reg ; \ + subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg #endif diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index 1287dc1347d..b5c9d45c981 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -1,18 +1,13 @@ -/* x86 architecture timex specifications */ #ifndef _ASM_X86_TIMEX_H #define _ASM_X86_TIMEX_H #include <asm/processor.h> #include <asm/tsc.h> -#ifdef CONFIG_X86_ELAN -# define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */ -#elif defined(CONFIG_X86_RDC321X) -# define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */ -#else -# define PIT_TICK_RATE 1193182 /* Underlying HZ */ -#endif -#define CLOCK_TICK_RATE PIT_TICK_RATE +/* The PIT ticks at this frequency (in HZ): */ +#define PIT_TICK_RATE 1193182 + +#define CLOCK_TICK_RATE PIT_TICK_RATE #define ARCH_HAS_READ_CURRENT_TIMER diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb54911..d3539f998f8 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -static inline void native_flush_tlb_others(const cpumask_t *cpumask, +static inline void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { @@ -142,31 +142,28 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, flush_tlb_mm(vma->vm_mm); } -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, - unsigned long va); +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -#ifdef CONFIG_X86_32 struct tlb_state { struct mm_struct *active_mm; int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -void reset_lazy_tlbstate(void); -#else static inline void reset_lazy_tlbstate(void) { + percpu_write(cpu_tlbstate.state, 0); + percpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif #endif /* SMP */ #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) +#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) #endif static inline void flush_tlb_kernel_range(unsigned long start, @@ -175,4 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } +extern void zap_low_mappings(void); + #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4850e4b02b6..77cfb2cfb38 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -61,13 +61,21 @@ static inline int cpu_to_node(int cpu) * * Side note: this function creates the returned cpumask on the stack * so with a high NR_CPUS count, excessive stack space is used. The - * node_to_cpumask_ptr function should be used whenever possible. + * cpumask_of_node function should be used whenever possible. */ static inline cpumask_t node_to_cpumask(int node) { return node_to_cpumask_map[node]; } +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline const struct cpumask *cpumask_of_node(int node) +{ + return &node_to_cpumask_map[node]; +} + +static inline void setup_node_to_cpumask_map(void) { } + #else /* CONFIG_X86_64 */ /* Mappings between node number and cpus on that node. */ @@ -77,12 +85,13 @@ extern cpumask_t *node_to_cpumask_map; DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); /* Returns the number of the current Node. */ -#define numa_node_id() read_pda(nodenumber) +DECLARE_PER_CPU(int, node_number); +#define numa_node_id() percpu_read(node_number) #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); extern int early_cpu_to_node(int cpu); -extern const cpumask_t *_node_to_cpumask_ptr(int node); +extern const cpumask_t *cpumask_of_node(int node); extern cpumask_t node_to_cpumask(int node); #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ @@ -96,14 +105,11 @@ static inline int cpu_to_node(int cpu) /* Same function but used if called before per_cpu areas are setup */ static inline int early_cpu_to_node(int cpu) { - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - return per_cpu(x86_cpu_to_node_map, cpu); + return early_per_cpu(x86_cpu_to_node_map, cpu); } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ -static inline const cpumask_t *_node_to_cpumask_ptr(int node) +static inline const cpumask_t *cpumask_of_node(int node) { return &node_to_cpumask_map[node]; } @@ -116,12 +122,17 @@ static inline cpumask_t node_to_cpumask(int node) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ -/* Replace default node_to_cpumask_ptr with optimized version */ +extern void setup_node_to_cpumask_map(void); + +/* + * Replace default node_to_cpumask_ptr with optimized version + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ - const cpumask_t *v = _node_to_cpumask_ptr(node) + const cpumask_t *v = cpumask_of_node(node) #define node_to_cpumask_ptr_next(v, node) \ - v = _node_to_cpumask_ptr(node) + v = cpumask_of_node(node) #endif /* CONFIG_X86_64 */ @@ -183,11 +194,22 @@ extern int __node_distance(int, int); #else /* !CONFIG_NUMA */ -#define numa_node_id() 0 -#define cpu_to_node(cpu) 0 -#define early_cpu_to_node(cpu) 0 +static inline int numa_node_id(void) +{ + return 0; +} -static inline const cpumask_t *_node_to_cpumask_ptr(int node) +static inline int cpu_to_node(int cpu) +{ + return 0; +} + +static inline int early_cpu_to_node(int cpu) +{ + return 0; +} + +static inline const cpumask_t *cpumask_of_node(int node) { return &cpu_online_map; } @@ -200,12 +222,17 @@ static inline int node_to_first_cpu(int node) return first_cpu(cpu_online_map); } -/* Replace default node_to_cpumask_ptr with optimized version */ +static inline void setup_node_to_cpumask_map(void) { } + +/* + * Replace default node_to_cpumask_ptr with optimized version + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ - const cpumask_t *v = _node_to_cpumask_ptr(node) + const cpumask_t *v = cpumask_of_node(node) #define node_to_cpumask_ptr_next(v, node) \ - v = _node_to_cpumask_ptr(node) + v = cpumask_of_node(node) #endif #include <asm-generic/topology.h> @@ -214,18 +241,20 @@ static inline int node_to_first_cpu(int node) /* Returns the number of the first CPU on Node 'node'. */ static inline int node_to_first_cpu(int node) { - node_to_cpumask_ptr(mask, node); - return first_cpu(*mask); + return cpumask_first(cpumask_of_node(node)); } #endif extern cpumask_t cpu_coregroup_map(int cpu); +extern const struct cpumask *cpu_coregroup_mask(int cpu); #ifdef ENABLE_TOPO_DEFINES #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) /* indicates that pointers to the topology cpumask_t maps are valid */ #define arch_provides_topology_pointers yes @@ -239,7 +268,7 @@ struct pci_bus; void set_pci_bus_resources_arch_default(struct pci_bus *b); #ifdef CONFIG_SMP -#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids) #define smt_capable() (smp_num_siblings > 1) #endif diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index fa0d79facdb..90f06c25221 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -3,6 +3,7 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_TRAMPOLINE /* * Trampoline 80x86 program as an array. */ @@ -12,9 +13,16 @@ extern unsigned char *trampoline_base; extern unsigned long init_rsp; extern unsigned long initial_code; +extern unsigned long initial_gs; +#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) #define TRAMPOLINE_BASE 0x6000 + extern unsigned long setup_trampoline(void); +extern void __init reserve_trampoline_memory(void); +#else +static inline void reserve_trampoline_memory(void) {}; +#endif /* CONFIG_X86_TRAMPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 45dee286e45..0d5342515b8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -46,6 +46,10 @@ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); dotraplinkage void do_invalid_TSS(struct pt_regs *, long); dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_stack_segment(struct pt_regs *, long); +#ifdef CONFIG_X86_64 +dotraplinkage void do_double_fault(struct pt_regs *, long); +asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); +#endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); @@ -72,10 +76,13 @@ static inline int get_si_code(unsigned long condition) extern int panic_on_unrecovered_nmi; extern int kstack_depth_to_print; -#ifdef CONFIG_X86_32 void math_error(void __user *); +void math_emulate(struct math_emu_info *); +#ifdef CONFIG_X86_32 unsigned long patch_espfix_desc(unsigned long, unsigned long); -asmlinkage void math_emulate(long); +#else +asmlinkage void smp_thermal_interrupt(void); +asmlinkage void mce_threshold_interrupt(void); #endif #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 9cd83a8e40d..38ae163cc91 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void) static __always_inline cycles_t vget_cycles(void) { - cycles_t cycles; - /* * We only do VDSOs on TSC capable CPUs, so this shouldnt * access boot_cpu_data (which is not VDSO-safe): @@ -44,11 +42,7 @@ static __always_inline cycles_t vget_cycles(void) if (!cpu_has_tsc) return 0; #endif - rdtsc_barrier(); - cycles = (cycles_t)__native_read_tsc(); - rdtsc_barrier(); - - return cycles; + return (cycles_t)__native_read_tsc(); } extern void tsc_init(void); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 35c54921b2e..b685ece89d5 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -121,7 +121,7 @@ extern int __get_user_bad(void); #define __get_user_x(size, ret, x, ptr) \ asm volatile("call __get_user_" #size \ - : "=a" (ret),"=d" (x) \ + : "=a" (ret), "=d" (x) \ : "0" (ptr)) \ /* Careful: we have to cast the result to the type of the pointer @@ -157,6 +157,7 @@ extern int __get_user_bad(void); int __ret_gu; \ unsigned long __val_gu; \ __chk_user_ptr(ptr); \ + might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_x(1, __ret_gu, __val_gu, ptr); \ @@ -180,12 +181,12 @@ extern int __get_user_bad(void); #define __put_user_x(size, x, ptr, __ret_pu) \ asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ - :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") + : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #ifdef CONFIG_X86_32 -#define __put_user_u64(x, addr, err) \ +#define __put_user_asm_u64(x, addr, err, errret) \ asm volatile("1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ "3:\n" \ @@ -196,14 +197,24 @@ extern int __get_user_bad(void); _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=r" (err) \ - : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) + : "A" (x), "r" (addr), "i" (errret), "0" (err)) + +#define __put_user_asm_ex_u64(x, addr) \ + asm volatile("1: movl %%eax,0(%1)\n" \ + "2: movl %%edx,4(%1)\n" \ + "3:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + _ASM_EXTABLE(2b, 3b - 2b) \ + : : "A" (x), "r" (addr)) #define __put_user_x8(x, ptr, __ret_pu) \ asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else -#define __put_user_u64(x, ptr, retval) \ - __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) +#define __put_user_asm_u64(x, ptr, retval, errret) \ + __put_user_asm(x, ptr, retval, "q", "", "Zr", errret) +#define __put_user_asm_ex_u64(x, addr) \ + __put_user_asm_ex(x, addr, "q", "", "Zr") #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) #endif @@ -241,6 +252,7 @@ extern void __put_user_8(void); int __ret_pu; \ __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ + might_fault(); \ __pu_val = x; \ switch (sizeof(*(ptr))) { \ case 1: \ @@ -274,10 +286,32 @@ do { \ __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ break; \ case 4: \ - __put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\ + __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ - __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ + __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ + errret); \ + break; \ + default: \ + __put_user_bad(); \ + } \ +} while (0) + +#define __put_user_size_ex(x, ptr, size) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ + break; \ + case 2: \ + __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ + break; \ + case 4: \ + __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ + break; \ + case 8: \ + __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ break; \ default: \ __put_user_bad(); \ @@ -309,9 +343,12 @@ do { \ #ifdef CONFIG_X86_32 #define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() +#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() #else #define __get_user_asm_u64(x, ptr, retval, errret) \ __get_user_asm(x, ptr, retval, "q", "", "=r", errret) +#define __get_user_asm_ex_u64(x, ptr) \ + __get_user_asm_ex(x, ptr, "q", "", "=r") #endif #define __get_user_size(x, ptr, size, retval, errret) \ @@ -348,16 +385,43 @@ do { \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) +#define __get_user_size_ex(x, ptr, size) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ + break; \ + case 2: \ + __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ + break; \ + case 4: \ + __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ + break; \ + case 8: \ + __get_user_asm_ex_u64(x, ptr); \ + break; \ + default: \ + (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ + asm volatile("1: mov"itype" %1,%"rtype"0\n" \ + "2:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + : ltype(x) : "m" (__m(addr))) + #define __put_user_nocheck(x, ptr, size) \ ({ \ - long __pu_err; \ + int __pu_err; \ __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ __pu_err; \ }) #define __get_user_nocheck(x, ptr, size) \ ({ \ - long __gu_err; \ + int __gu_err; \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -383,6 +447,26 @@ struct __large_struct { unsigned long buf[100]; }; _ASM_EXTABLE(1b, 3b) \ : "=r"(err) \ : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) + +#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ + asm volatile("1: mov"itype" %"rtype"0,%1\n" \ + "2:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + : : ltype(x), "m" (__m(addr))) + +/* + * uaccess_try and catch + */ +#define uaccess_try do { \ + int prev_err = current_thread_info()->uaccess_err; \ + current_thread_info()->uaccess_err = 0; \ + barrier(); + +#define uaccess_catch(err) \ + (err) |= current_thread_info()->uaccess_err; \ + current_thread_info()->uaccess_err = prev_err; \ +} while (0) + /** * __get_user: - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -406,6 +490,7 @@ struct __large_struct { unsigned long buf[100]; }; #define __get_user(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + /** * __put_user: - Write a simple value into user space, with less checking. * @x: Value to copy to user space. @@ -433,6 +518,45 @@ struct __large_struct { unsigned long buf[100]; }; #define __put_user_unaligned __put_user /* + * {get|put}_user_try and catch + * + * get_user_try { + * get_user_ex(...); + * } get_user_catch(err) + */ +#define get_user_try uaccess_try +#define get_user_catch(err) uaccess_catch(err) + +#define get_user_ex(x, ptr) do { \ + unsigned long __gue_val; \ + __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ + (x) = (__force __typeof__(*(ptr)))__gue_val; \ +} while (0) + +#ifdef CONFIG_X86_WP_WORKS_OK + +#define put_user_try uaccess_try +#define put_user_catch(err) uaccess_catch(err) + +#define put_user_ex(x, ptr) \ + __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + +#else /* !CONFIG_X86_WP_WORKS_OK */ + +#define put_user_try do { \ + int __uaccess_err = 0; + +#define put_user_catch(err) \ + (err) |= __uaccess_err; \ +} while (0) + +#define put_user_ex(x, ptr) do { \ + __uaccess_err |= __put_user(x, ptr); \ +} while (0) + +#endif /* CONFIG_X86_WP_WORKS_OK */ + +/* * movsl can be slow when source and dest are not both 8-byte aligned */ #ifdef CONFIG_X86_INTEL_USERCOPY diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index d095a3aeea1..5e06259e90e 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - might_sleep(); - return __copy_to_user_inatomic(to, from, n); + might_fault(); + return __copy_to_user_inatomic(to, from, n); } static __always_inline unsigned long @@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; @@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 664f15280f1..84210c479fc 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -29,6 +29,8 @@ static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -46,7 +48,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size) return ret; case 10: __get_user_asm(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 16); + ret, "q", "", "=r", 10); if (unlikely(ret)) return ret; __get_user_asm(*(u16 *)(8 + (char *)dst), @@ -71,6 +73,8 @@ static __always_inline __must_check int __copy_to_user(void __user *dst, const void *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -113,6 +117,8 @@ static __always_inline __must_check int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, (__force void *)src, size); diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 834b2c1d89f..d2e415e6666 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate) __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) #define __NR_timerfd_gettime 287 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) -#define __NR_paccept 288 -__SYSCALL(__NR_paccept, sys_paccept) +#define __NR_accept4 288 +__SYSCALL(__NR_accept4, sys_accept4) #define __NR_signalfd4 289 __SYSCALL(__NR_signalfd4, sys_signalfd4) #define __NR_eventfd2 290 diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h deleted file mode 100644 index 8b064bd9c55..00000000000 --- a/arch/x86/include/asm/unwind.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _ASM_X86_UNWIND_H -#define _ASM_X86_UNWIND_H - -#define UNW_PC(frame) ((void)(frame), 0UL) -#define UNW_SP(frame) ((void)(frame), 0UL) -#define UNW_FP(frame) ((void)(frame), 0UL) - -static inline int arch_unw_user_mode(const void *info) -{ - return 0; -} - -#endif /* _ASM_X86_UNWIND_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index d931d3b7e6f..7ed17ff502b 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -32,13 +32,18 @@ enum uv_bios_cmd { UV_BIOS_COMMON, UV_BIOS_GET_SN_INFO, - UV_BIOS_FREQ_BASE + UV_BIOS_FREQ_BASE, + UV_BIOS_WATCHLIST_ALLOC, + UV_BIOS_WATCHLIST_FREE, + UV_BIOS_MEMPROTECT, + UV_BIOS_GET_PARTITION_ADDR }; /* * Status values returned from a BIOS call. */ enum { + BIOS_STATUS_MORE_PASSES = 1, BIOS_STATUS_SUCCESS = 0, BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, BIOS_STATUS_EINVAL = -EINVAL, @@ -71,6 +76,21 @@ union partition_info_u { }; }; +union uv_watchlist_u { + u64 val; + struct { + u64 blade : 16, + size : 32, + filler : 16; + }; +}; + +enum uv_memprotect { + UV_MEMPROT_RESTRICT_ACCESS, + UV_MEMPROT_ALLOW_AMO, + UV_MEMPROT_ALLOW_RW +}; + /* * bios calls have 6 parameters */ @@ -80,14 +100,20 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); extern s64 uv_bios_freq_base(u64, u64 *); +extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int, + unsigned long *); +extern int uv_bios_mq_watchlist_free(int, int); +extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); +extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); extern void uv_bios_init(void); +extern unsigned long sn_rtc_cycles_per_second; extern int uv_type; extern long sn_partition_id; -extern long uv_coherency_id; -extern long uv_region_size; -#define partition_coherence_id() (uv_coherency_id) +extern long sn_coherency_id; +extern long sn_region_size; +#define partition_coherence_id() (sn_coherency_id) extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h new file mode 100644 index 00000000000..8242bf96581 --- /dev/null +++ b/arch/x86/include/asm/uv/uv.h @@ -0,0 +1,36 @@ +#ifndef _ASM_X86_UV_UV_H +#define _ASM_X86_UV_UV_H + +enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; + +struct cpumask; +struct mm_struct; + +#ifdef CONFIG_X86_UV + +extern enum uv_system_type get_uv_system_type(void); +extern int is_uv_system(void); +extern void uv_cpu_init(void); +extern void uv_system_init(void); +extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); +extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, + unsigned int cpu); + +#else /* X86_UV */ + +static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline int is_uv_system(void) { return 0; } +static inline void uv_cpu_init(void) { } +static inline void uv_system_init(void) { } +static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) +{ return 1; } +static inline const struct cpumask * +uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, + unsigned long va, unsigned int cpu) +{ return cpumask; } + +#endif /* X86_UV */ + +#endif /* _ASM_X86_UV_UV_H */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index e2363253bbb..9b0e61bf7a8 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -133,61 +133,61 @@ struct bau_msg_payload { * see table 4.2.3.0.1 in broacast_assist spec. */ struct bau_msg_header { - int dest_subnodeid:6; /* must be zero */ + unsigned int dest_subnodeid:6; /* must be zero */ /* bits 5:0 */ - int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ - /* bits 20:6 */ - int command:8; /* message type */ + unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ + /* bits 20:6 */ /* first bit in node_map */ + unsigned int command:8; /* message type */ /* bits 28:21 */ /* 0x38: SN3net EndPoint Message */ - int rsvd_1:3; /* must be zero */ + unsigned int rsvd_1:3; /* must be zero */ /* bits 31:29 */ /* int will align on 32 bits */ - int rsvd_2:9; /* must be zero */ + unsigned int rsvd_2:9; /* must be zero */ /* bits 40:32 */ /* Suppl_A is 56-41 */ - int payload_2a:8; /* becomes byte 16 of msg */ + unsigned int payload_2a:8;/* becomes byte 16 of msg */ /* bits 48:41 */ /* not currently using */ - int payload_2b:8; /* becomes byte 17 of msg */ + unsigned int payload_2b:8;/* becomes byte 17 of msg */ /* bits 56:49 */ /* not currently using */ /* Address field (96:57) is never used as an address (these are address bits 42:3) */ - int rsvd_3:1; /* must be zero */ + unsigned int rsvd_3:1; /* must be zero */ /* bit 57 */ /* address bits 27:4 are payload */ /* these 24 bits become bytes 12-14 of msg */ - int replied_to:1; /* sent as 0 by the source to byte 12 */ + unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ /* bit 58 */ - int payload_1a:5; /* not currently used */ + unsigned int payload_1a:5;/* not currently used */ /* bits 63:59 */ - int payload_1b:8; /* not currently used */ + unsigned int payload_1b:8;/* not currently used */ /* bits 71:64 */ - int payload_1c:8; /* not currently used */ + unsigned int payload_1c:8;/* not currently used */ /* bits 79:72 */ - int payload_1d:2; /* not currently used */ + unsigned int payload_1d:2;/* not currently used */ /* bits 81:80 */ - int rsvd_4:7; /* must be zero */ + unsigned int rsvd_4:7; /* must be zero */ /* bits 88:82 */ - int sw_ack_flag:1; /* software acknowledge flag */ + unsigned int sw_ack_flag:1;/* software acknowledge flag */ /* bit 89 */ /* INTD trasactions at destination are to wait for software acknowledge */ - int rsvd_5:6; /* must be zero */ + unsigned int rsvd_5:6; /* must be zero */ /* bits 95:90 */ - int rsvd_6:5; /* must be zero */ + unsigned int rsvd_6:5; /* must be zero */ /* bits 100:96 */ - int int_both:1; /* if 1, interrupt both sockets on the blade */ + unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ /* bit 101*/ - int fairness:3; /* usually zero */ + unsigned int fairness:3;/* usually zero */ /* bits 104:102 */ - int multilevel:1; /* multi-level multicast format */ + unsigned int multilevel:1; /* multi-level multicast format */ /* bit 105 */ /* 0 for TLB: endpoint multi-unicast messages */ - int chaining:1; /* next descriptor is part of this activation*/ + unsigned int chaining:1;/* next descriptor is part of this activation*/ /* bit 106 */ - int rsvd_7:21; /* must be zero */ + unsigned int rsvd_7:21; /* must be zero */ /* bits 127:107 */ }; @@ -325,7 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 7a5782610b2..777327ef05c 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -113,25 +113,37 @@ */ #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) +struct uv_scir_s { + struct timer_list timer; + unsigned long offset; + unsigned long last; + unsigned long idle_on; + unsigned long idle_off; + unsigned char state; + unsigned char enabled; +}; + /* * The following defines attributes of the HUB chip. These attributes are * frequently referenced and are kept in the per-cpu data areas of each cpu. * They are kept together in a struct to minimize cache misses. */ struct uv_hub_info_s { - unsigned long global_mmr_base; - unsigned long gpa_mask; - unsigned long gnode_upper; - unsigned long lowmem_remap_top; - unsigned long lowmem_remap_base; - unsigned short pnode; - unsigned short pnode_mask; - unsigned short coherency_domain_number; - unsigned short numa_blade_id; - unsigned char blade_processor_id; - unsigned char m_val; - unsigned char n_val; + unsigned long global_mmr_base; + unsigned long gpa_mask; + unsigned long gnode_upper; + unsigned long lowmem_remap_top; + unsigned long lowmem_remap_base; + unsigned short pnode; + unsigned short pnode_mask; + unsigned short coherency_domain_number; + unsigned short numa_blade_id; + unsigned char blade_processor_id; + unsigned char m_val; + unsigned char n_val; + struct uv_scir_s scir; }; + DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) @@ -163,6 +175,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define UV_APIC_PNODE_SHIFT 6 +/* Local Bus from cpu's perspective */ +#define LOCAL_BUS_BASE 0x1c00000 +#define LOCAL_BUS_SIZE (4 * 1024 * 1024) + +/* + * System Controller Interface Reg + * + * Note there are NO leds on a UV system. This register is only + * used by the system controller to monitor system-wide operation. + * There are 64 regs per node. With Nahelem cpus (2 cores per node, + * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on + * a node. + * + * The window is located at top of ACPI MMR space + */ +#define SCIR_WINDOW_COUNT 64 +#define SCIR_LOCAL_MMR_BASE (LOCAL_BUS_BASE + \ + LOCAL_BUS_SIZE - \ + SCIR_WINDOW_COUNT) + +#define SCIR_CPU_HEARTBEAT 0x01 /* timer interrupt */ +#define SCIR_CPU_ACTIVITY 0x02 /* not idle */ +#define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */ + /* * Macros for converting between kernel virtual addresses, socket local physical * addresses, and UV global physical addresses. @@ -174,7 +210,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) { if (paddr < uv_hub_info->lowmem_remap_top) - paddr += uv_hub_info->lowmem_remap_base; + paddr |= uv_hub_info->lowmem_remap_base; return paddr | uv_hub_info->gnode_upper; } @@ -182,19 +218,7 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) /* socket virtual --> UV global physical address */ static inline unsigned long uv_gpa(void *v) { - return __pa(v) | uv_hub_info->gnode_upper; -} - -/* socket virtual --> UV global physical address */ -static inline void *uv_vgpa(void *v) -{ - return (void *)uv_gpa(v); -} - -/* UV global physical address --> socket virtual */ -static inline void *uv_va(unsigned long gpa) -{ - return __va(gpa & uv_hub_info->gpa_mask); + return uv_soc_phys_ram_to_gpa(__pa(v)); } /* pnode, offset --> socket virtual */ @@ -277,6 +301,16 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) *uv_local_mmr_address(offset) = val; } +static inline unsigned char uv_read_local_mmr8(unsigned long offset) +{ + return *((unsigned char *)uv_local_mmr_address(offset)); +} + +static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val) +{ + *((unsigned char *)uv_local_mmr_address(offset)) = val; +} + /* * Structures and definitions for converting between cpu, node, pnode, and blade * numbers. @@ -351,5 +385,20 @@ static inline int uv_num_possible_blades(void) return uv_possible_blades; } -#endif /* _ASM_X86_UV_UV_HUB_H */ +/* Update SCIR state */ +static inline void uv_set_scir_bits(unsigned char value) +{ + if (uv_hub_info->scir.state != value) { + uv_hub_info->scir.state = value; + uv_write_local_mmr8(uv_hub_info->scir.offset, value); + } +} +static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) +{ + if (uv_cpu_hub_info(cpu)->scir.state != value) { + uv_cpu_hub_info(cpu)->scir.state = value; + uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); + } +} +#endif /* _ASM_X86_UV_UV_HUB_H */ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h new file mode 100644 index 00000000000..59363627523 --- /dev/null +++ b/arch/x86/include/asm/virtext.h @@ -0,0 +1,132 @@ +/* CPU virtualization extensions handling + * + * This should carry the code for handling CPU virtualization extensions + * that needs to live in the kernel core. + * + * Author: Eduardo Habkost <ehabkost@redhat.com> + * + * Copyright (C) 2008, Red Hat Inc. + * + * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#ifndef _ASM_X86_VIRTEX_H +#define _ASM_X86_VIRTEX_H + +#include <asm/processor.h> +#include <asm/system.h> + +#include <asm/vmx.h> +#include <asm/svm.h> + +/* + * VMX functions: + */ + +static inline int cpu_has_vmx(void) +{ + unsigned long ecx = cpuid_ecx(1); + return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ +} + + +/** Disable VMX on the current CPU + * + * vmxoff causes a undefined-opcode exception if vmxon was not run + * on the CPU previously. Only call this function if you know VMX + * is enabled. + */ +static inline void cpu_vmxoff(void) +{ + asm volatile (ASM_VMX_VMXOFF : : : "cc"); + write_cr4(read_cr4() & ~X86_CR4_VMXE); +} + +static inline int cpu_vmx_enabled(void) +{ + return read_cr4() & X86_CR4_VMXE; +} + +/** Disable VMX if it is enabled on the current CPU + * + * You shouldn't call this if cpu_has_vmx() returns 0. + */ +static inline void __cpu_emergency_vmxoff(void) +{ + if (cpu_vmx_enabled()) + cpu_vmxoff(); +} + +/** Disable VMX if it is supported and enabled on the current CPU + */ +static inline void cpu_emergency_vmxoff(void) +{ + if (cpu_has_vmx()) + __cpu_emergency_vmxoff(); +} + + + + +/* + * SVM functions: + */ + +/** Check if the CPU has SVM support + * + * You can use the 'msg' arg to get a message describing the problem, + * if the function returns zero. Simply pass NULL if you are not interested + * on the messages; gcc should take care of not generating code for + * the messages on this case. + */ +static inline int cpu_has_svm(const char **msg) +{ + uint32_t eax, ebx, ecx, edx; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + if (msg) + *msg = "not amd"; + return 0; + } + + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + if (eax < SVM_CPUID_FUNC) { + if (msg) + *msg = "can't execute cpuid_8000000a"; + return 0; + } + + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { + if (msg) + *msg = "svm not available"; + return 0; + } + return 1; +} + + +/** Disable SVM on the current CPU + * + * You should call this only if cpu_has_svm() returned true. + */ +static inline void cpu_svm_disable(void) +{ + uint64_t efer; + + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); +} + +/** Makes sure SVM is disabled, if it is supported on the CPU + */ +static inline void cpu_emergency_svm_disable(void) +{ + if (cpu_has_svm(NULL)) + cpu_svm_disable(); +} + +#endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h index b7c0dea119f..61e08c0a290 100644 --- a/arch/x86/include/asm/vmi.h +++ b/arch/x86/include/asm/vmi.h @@ -223,9 +223,15 @@ struct pci_header { } __attribute__((packed)); /* Function prototypes for bootstrapping */ +#ifdef CONFIG_VMI extern void vmi_init(void); +extern void vmi_activate(void); extern void vmi_bringup(void); -extern void vmi_apply_boot_page_allocations(void); +#else +static inline void vmi_init(void) {} +static inline void vmi_activate(void) {} +static inline void vmi_bringup(void) {} +#endif /* State needed to start an application processor in an SMP system. */ struct vmi_ap_state { diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h new file mode 100644 index 00000000000..c11b7e100d8 --- /dev/null +++ b/arch/x86/include/asm/vmware.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2008, VMware, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef ASM_X86__VMWARE_H +#define ASM_X86__VMWARE_H + +extern unsigned long vmware_get_tsc_khz(void); +extern int vmware_platform(void); +extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); + +#endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h new file mode 100644 index 00000000000..d0238e6151d --- /dev/null +++ b/arch/x86/include/asm/vmx.h @@ -0,0 +1,382 @@ +#ifndef VMX_H +#define VMX_H + +/* + * vmx.h: VMX Architecture related definitions + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * A few random additions are: + * Copyright (C) 2006 Qumranet + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * + */ + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 + + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 + +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 + +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 + +/* VMCS Encodings */ +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 + +/* + * Interruption-information format + */ +#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ +#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ +#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ +#define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */ +#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ +#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 + +#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK +#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK +#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK +#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK + +#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ +#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ +#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ + +/* GUEST_INTERRUPTIBILITY_INFO flags. */ +#define GUEST_INTR_STATE_STI 0x00000001 +#define GUEST_INTR_STATE_MOV_SS 0x00000002 +#define GUEST_INTR_STATE_SMI 0x00000004 +#define GUEST_INTR_STATE_NMI 0x00000008 + +/* + * Exit Qualifications for MOV for Control Register Access + */ +#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control reg.*/ +#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */ +#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose reg. */ +#define LMSW_SOURCE_DATA_SHIFT 16 +#define LMSW_SOURCE_DATA (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */ +#define REG_EAX (0 << 8) +#define REG_ECX (1 << 8) +#define REG_EDX (2 << 8) +#define REG_EBX (3 << 8) +#define REG_ESP (4 << 8) +#define REG_EBP (5 << 8) +#define REG_ESI (6 << 8) +#define REG_EDI (7 << 8) +#define REG_R8 (8 << 8) +#define REG_R9 (9 << 8) +#define REG_R10 (10 << 8) +#define REG_R11 (11 << 8) +#define REG_R12 (12 << 8) +#define REG_R13 (13 << 8) +#define REG_R14 (14 << 8) +#define REG_R15 (15 << 8) + +/* + * Exit Qualifications for MOV for Debug Register Access + */ +#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug reg. */ +#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ +#define TYPE_MOV_TO_DR (0 << 4) +#define TYPE_MOV_FROM_DR (1 << 4) +#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */ + + +/* segment AR */ +#define SEGMENT_AR_L_MASK (1 << 13) + +#define AR_TYPE_ACCESSES_MASK 1 +#define AR_TYPE_READABLE_MASK (1 << 1) +#define AR_TYPE_WRITEABLE_MASK (1 << 2) +#define AR_TYPE_CODE_MASK (1 << 3) +#define AR_TYPE_MASK 0x0f +#define AR_TYPE_BUSY_64_TSS 11 +#define AR_TYPE_BUSY_32_TSS 11 +#define AR_TYPE_BUSY_16_TSS 3 +#define AR_TYPE_LDT 2 + +#define AR_UNUSABLE_MASK (1 << 16) +#define AR_S_MASK (1 << 4) +#define AR_P_MASK (1 << 7) +#define AR_L_MASK (1 << 13) +#define AR_DB_MASK (1 << 14) +#define AR_G_MASK (1 << 15) +#define AR_DPL_SHIFT 5 +#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3) + +#define AR_RESERVD_MASK 0xfffe0f00 + +#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) + +#define VMX_NR_VPIDS (1 << 16) +#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 +#define VMX_VPID_EXTENT_ALL_CONTEXT 2 + +#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 +#define VMX_EPT_EXTENT_CONTEXT 1 +#define VMX_EPT_EXTENT_GLOBAL 2 +#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) +#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) +#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) +#define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_MAX_GAW 0x4 +#define VMX_EPT_MT_EPTE_SHIFT 3 +#define VMX_EPT_GAW_EPTP_SHIFT 3 +#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPT_READABLE_MASK 0x1ull +#define VMX_EPT_WRITABLE_MASK 0x2ull +#define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_IGMT_BIT (1ull << 6) + +#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + + +#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" +#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" +#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" +#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" +#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" +#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" +#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" +#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" + + + +#endif diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h index b3e64730762..c1635d43616 100644 --- a/arch/x86/include/asm/voyager.h +++ b/arch/x86/include/asm/voyager.h @@ -527,3 +527,45 @@ extern void voyager_smp_intr_init(void); #define VOYAGER_PSI_SUBREAD 2 #define VOYAGER_PSI_SUBWRITE 3 extern void voyager_cat_psi(__u8, __u16, __u8 *); + +/* These define the CPIs we use in linux */ +#define VIC_CPI_LEVEL0 0 +#define VIC_CPI_LEVEL1 1 +/* now the fake CPIs */ +#define VIC_TIMER_CPI 2 +#define VIC_INVALIDATE_CPI 3 +#define VIC_RESCHEDULE_CPI 4 +#define VIC_ENABLE_IRQ_CPI 5 +#define VIC_CALL_FUNCTION_CPI 6 +#define VIC_CALL_FUNCTION_SINGLE_CPI 7 + +/* Now the QIC CPIs: Since we don't need the two initial levels, + * these are 2 less than the VIC CPIs */ +#define QIC_CPI_OFFSET 1 +#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) +#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) +#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) +#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) + +#define VIC_START_FAKE_CPI VIC_TIMER_CPI +#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI + +/* this is the SYS_INT CPI. */ +#define VIC_SYS_INT 8 +#define VIC_CMN_INT 15 + +/* This is the boot CPI for alternate processors. It gets overwritten + * by the above once the system has activated all available processors */ +#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 +#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) + +extern asmlinkage void vic_cpi_interrupt(void); +extern asmlinkage void vic_sys_interrupt(void); +extern asmlinkage void vic_cmn_interrupt(void); +extern asmlinkage void qic_timer_interrupt(void); +extern asmlinkage void qic_invalidate_interrupt(void); +extern asmlinkage void qic_reschedule_interrupt(void); +extern asmlinkage void qic_enable_irq_interrupt(void); +extern asmlinkage void qic_call_function_interrupt(void); diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 19144184983..1df35417c41 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -15,10 +15,4 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->flags); } -static inline void xen_do_IRQ(int irq, struct pt_regs *regs) -{ - regs->orig_ax = ~irq; - do_IRQ(regs); -} - #endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 3f6000d95fe..5e79ca69432 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -33,8 +33,14 @@ #ifndef _ASM_X86_XEN_HYPERCALL_H #define _ASM_X86_XEN_HYPERCALL_H +#include <linux/kernel.h> +#include <linux/spinlock.h> #include <linux/errno.h> #include <linux/string.h> +#include <linux/types.h> + +#include <asm/page.h> +#include <asm/pgtable.h> #include <xen/interface/xen.h> #include <xen/interface/sched.h> diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index a38d25ac87d..81fbd735aec 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -33,39 +33,10 @@ #ifndef _ASM_X86_XEN_HYPERVISOR_H #define _ASM_X86_XEN_HYPERVISOR_H -#include <linux/types.h> -#include <linux/kernel.h> - -#include <xen/interface/xen.h> -#include <xen/interface/version.h> - -#include <asm/ptrace.h> -#include <asm/page.h> -#include <asm/desc.h> -#if defined(__i386__) -# ifdef CONFIG_X86_PAE -# include <asm-generic/pgtable-nopud.h> -# else -# include <asm-generic/pgtable-nopmd.h> -# endif -#endif -#include <asm/xen/hypercall.h> - /* arch/i386/kernel/setup.c */ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -/* arch/i386/mach-xen/evtchn.c */ -/* Force a proper event-channel callback from Xen. */ -extern void force_evtchn_callback(void); - -/* Turn jiffies into Xen system time. */ -u64 jiffies_to_st(unsigned long jiffies); - - -#define MULTI_UVMFLAGS_INDEX 3 -#define MULTI_UVMDOMID_INDEX 4 - enum xen_domain_type { XEN_NATIVE, XEN_PV_DOMAIN, @@ -74,9 +45,15 @@ enum xen_domain_type { extern enum xen_domain_type xen_domain_type; +#ifdef CONFIG_XEN #define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) +#else +#define xen_domain() (0) +#endif + +#define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN) +#define xen_hvm_domain() (xen_domain() && xen_domain_type == XEN_HVM_DOMAIN) + #define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) -#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index bc628998a1b..4bd990ee43d 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -1,11 +1,16 @@ #ifndef _ASM_X86_XEN_PAGE_H #define _ASM_X86_XEN_PAGE_H +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/spinlock.h> #include <linux/pfn.h> #include <asm/uaccess.h> +#include <asm/page.h> #include <asm/pgtable.h> +#include <xen/interface/xen.h> #include <xen/features.h> /* Xen machine address */ @@ -132,7 +137,7 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) pte_t pte; pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | - (pgprot_val(pgprot) & __supported_pte_mask); + massage_pgprot(pgprot); return pte; } |