From 444933c6c6e82362ba8e0da26f41a53c433d11ef Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:28 +0900 Subject: [IA64] pvops: preparation: remove extern in irq_ia64.c remove extern declaration of handle_IPI() in irq_ia64.c. Instead, declare it in asm-ia64/smp.h. Later handle_IPI() will be referenced from another file. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/irq_ia64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 5538471e8d6..c48171bc796 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -600,7 +600,6 @@ static irqreturn_t dummy_handler (int irq, void *dev_id) { BUG(); } -extern irqreturn_t handle_IPI (int irq, void *dev_id); static struct irqaction ipi_irqaction = { .handler = handle_IPI, -- cgit v1.2.3-70-g09d2 From 8311d21c35092aa4c4a12e0140e1ef3443489d77 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:29 +0900 Subject: [IA64] pvops: preparation: move the constants, LOAD_OFFSET, to a header file. Move the LOAD_OFFSET definition from vmlinux.lds.S into system.h. On paravirtualized environments, it is necessary to detect the execution environment. One of the solutions is the multi entry point. The multi entry point allows a boot loader to start the kernel execution from the entry point which is different from the ELF entry point. The non standard entry point will defined as the specialized elf note which contains the LMA of the entry point symbol. The constant, LOAD_OFFSET, is necessary to calculate the symbol's LMA. Move the definition into the public header file to make it available to the multi entry point support. Cc: "He, Qing" Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/vmlinux.lds.S | 1 - include/asm-ia64/system.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 5929ab10a28..5a77206c249 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -4,7 +4,6 @@ #include #include -#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) #include #define IVT_TEXT \ diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index 26e250bfb91..0db8c9812ce 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h @@ -26,6 +26,7 @@ */ #define KERNEL_START (GATE_ADDR+__IA64_UL_CONST(0x100000000)) #define PERCPU_ADDR (-PERCPU_PAGE_SIZE) +#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) #ifndef __ASSEMBLY__ -- cgit v1.2.3-70-g09d2 From 90aeb169c03a96e22674741f08054023c33d595b Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:32 +0900 Subject: [IA64] pvops: introduce pv_info which describes some random info. introduce pv_info which describes some randome info about underlying execution environment. Cc: Jes Sorensen Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 2 ++ arch/ia64/kernel/paravirt.c | 41 ++++++++++++++++++++++++++++++ include/asm-ia64/paravirt.h | 62 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 arch/ia64/kernel/paravirt.c create mode 100644 include/asm-ia64/paravirt.h (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 13fd10e8699..10a4ddb5b27 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -36,6 +36,8 @@ obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o +obj-$(CONFIG_PARAVIRT) += paravirt.o + obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c new file mode 100644 index 00000000000..d295ea5e59c --- /dev/null +++ b/arch/ia64/kernel/paravirt.c @@ -0,0 +1,41 @@ +/****************************************************************************** + * arch/ia64/kernel/paravirt.c + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * Yaozu (Eddie) Dong + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + +#include +#include +#include +#include + +#include +#include + +/*************************************************************************** + * general info + */ +struct pv_info pv_info = { + .kernel_rpl = 0, + .paravirt_enabled = 0, + .name = "bare hardware" +}; diff --git a/include/asm-ia64/paravirt.h b/include/asm-ia64/paravirt.h new file mode 100644 index 00000000000..26b43342308 --- /dev/null +++ b/include/asm-ia64/paravirt.h @@ -0,0 +1,62 @@ +/****************************************************************************** + * include/asm-ia64/paravirt.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + + +#ifndef __ASM_PARAVIRT_H +#define __ASM_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT_GUEST + +#ifndef __ASSEMBLY__ + +#include +#include + +/****************************************************************************** + * general info + */ +struct pv_info { + unsigned int kernel_rpl; + int paravirt_enabled; + const char *name; +}; + +extern struct pv_info pv_info; + +static inline int paravirt_enabled(void) +{ + return pv_info.paravirt_enabled; +} + +static inline unsigned int get_kernel_rpl(void) +{ + return pv_info.kernel_rpl; +} + +#endif /* !__ASSEMBLY__ */ + +#else +/* fallback for native case */ + +#endif /* CONFIG_PARAVIRT_GUEST */ + +#endif /* __ASM_PARAVIRT_H */ -- cgit v1.2.3-70-g09d2 From 3e0879deb700f322f6c81ab34f056fc72d15ec02 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:33 +0900 Subject: [IA64] pvops: add an early setup hook for pv_ops. This patch adds a setup hook in the very early boot sequence before start_kernel() to initialize paravirtualization stuff. The hook will be set by each pv loader code or by using multi entry point. Signed-off-by: Qing He Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/head.S | 41 +++++++++++++++++++++++++++++++++++++++++ include/asm-ia64/paravirt.h | 3 +++ 2 files changed, 44 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index ddeab4e36fd..db540e58c78 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -26,11 +26,14 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #ifdef CONFIG_HOTPLUG_CPU #define SAL_PSR_BITS_TO_SET \ @@ -367,6 +370,44 @@ start_ap: ;; (isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader +#ifdef CONFIG_PARAVIRT + + movl r14=hypervisor_setup_hooks + movl r15=hypervisor_type + mov r16=num_hypervisor_hooks + ;; + ld8 r2=[r15] + ;; + cmp.ltu p7,p0=r2,r16 // array size check + shladd r8=r2,3,r14 + ;; +(p7) ld8 r9=[r8] + ;; +(p7) mov b1=r9 +(p7) cmp.ne.unc p7,p0=r9,r0 // no actual branch to NULL + ;; +(p7) br.call.sptk.many rp=b1 + + __INITDATA + +default_setup_hook = 0 // Currently nothing needs to be done. + + .weak xen_setup_hook + + .global hypervisor_type +hypervisor_type: + data8 PARAVIRT_HYPERVISOR_TYPE_DEFAULT + + // must have the same order with PARAVIRT_HYPERVISOR_TYPE_xxx + +hypervisor_setup_hooks: + data8 default_setup_hook + data8 xen_setup_hook +num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8 + .previous + +#endif + #ifdef CONFIG_SMP (isAP) br.call.sptk.many rp=start_secondary .ret0: diff --git a/include/asm-ia64/paravirt.h b/include/asm-ia64/paravirt.h index 26b43342308..1032b216aea 100644 --- a/include/asm-ia64/paravirt.h +++ b/include/asm-ia64/paravirt.h @@ -26,6 +26,9 @@ #ifdef CONFIG_PARAVIRT_GUEST +#define PARAVIRT_HYPERVISOR_TYPE_DEFAULT 0 +#define PARAVIRT_HYPERVISOR_TYPE_XEN 1 + #ifndef __ASSEMBLY__ #include -- cgit v1.2.3-70-g09d2 From 1ff730b52f0c3e4e3846c3ff345c5526b2633ba9 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:34 +0900 Subject: [IA64] pvops: introduce pv_cpu_ops to paravirtualize privileged instructions. introduce pv_cpu_ops to paravirtualize privleged instructions which are defined by ia64 intrinsics. make them indirect C function calls by introducing function tables, pv_cpu_ops. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/paravirt.c | 247 +++++++++++++++++++++++++++++++++++++ include/asm-ia64/Kbuild | 2 +- include/asm-ia64/gcc_intrin.h | 24 ++-- include/asm-ia64/intel_intrin.h | 41 +++--- include/asm-ia64/intrinsics.h | 62 ++++++++-- include/asm-ia64/paravirt_privop.h | 91 ++++++++++++++ 6 files changed, 425 insertions(+), 42 deletions(-) create mode 100644 include/asm-ia64/paravirt_privop.h (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index d295ea5e59c..e5482bb6841 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -39,3 +40,249 @@ struct pv_info pv_info = { .paravirt_enabled = 0, .name = "bare hardware" }; + +/*************************************************************************** + * pv_cpu_ops + * intrinsics hooks. + */ + +/* ia64_native_xxx are macros so that we have to make them real functions */ + +#define DEFINE_VOID_FUNC1(name) \ + static void \ + ia64_native_ ## name ## _func(unsigned long arg) \ + { \ + ia64_native_ ## name(arg); \ + } \ + +#define DEFINE_VOID_FUNC2(name) \ + static void \ + ia64_native_ ## name ## _func(unsigned long arg0, \ + unsigned long arg1) \ + { \ + ia64_native_ ## name(arg0, arg1); \ + } \ + +#define DEFINE_FUNC0(name) \ + static unsigned long \ + ia64_native_ ## name ## _func(void) \ + { \ + return ia64_native_ ## name(); \ + } + +#define DEFINE_FUNC1(name, type) \ + static unsigned long \ + ia64_native_ ## name ## _func(type arg) \ + { \ + return ia64_native_ ## name(arg); \ + } \ + +DEFINE_VOID_FUNC1(fc); +DEFINE_VOID_FUNC1(intrin_local_irq_restore); + +DEFINE_VOID_FUNC2(ptcga); +DEFINE_VOID_FUNC2(set_rr); + +DEFINE_FUNC0(get_psr_i); + +DEFINE_FUNC1(thash, unsigned long); +DEFINE_FUNC1(get_cpuid, int); +DEFINE_FUNC1(get_pmd, int); +DEFINE_FUNC1(get_rr, unsigned long); + +static void +ia64_native_ssm_i_func(void) +{ + ia64_native_ssm(IA64_PSR_I); +} + +static void +ia64_native_rsm_i_func(void) +{ + ia64_native_rsm(IA64_PSR_I); +} + +static void +ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4) +{ + ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4); +} + +#define CASE_GET_REG(id) \ + case _IA64_REG_ ## id: \ + res = ia64_native_getreg(_IA64_REG_ ## id); \ + break; +#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id) +#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id) + +unsigned long +ia64_native_getreg_func(int regnum) +{ + unsigned long res = -1; + switch (regnum) { + CASE_GET_REG(GP); + CASE_GET_REG(IP); + CASE_GET_REG(PSR); + CASE_GET_REG(TP); + CASE_GET_REG(SP); + + CASE_GET_AR(KR0); + CASE_GET_AR(KR1); + CASE_GET_AR(KR2); + CASE_GET_AR(KR3); + CASE_GET_AR(KR4); + CASE_GET_AR(KR5); + CASE_GET_AR(KR6); + CASE_GET_AR(KR7); + CASE_GET_AR(RSC); + CASE_GET_AR(BSP); + CASE_GET_AR(BSPSTORE); + CASE_GET_AR(RNAT); + CASE_GET_AR(FCR); + CASE_GET_AR(EFLAG); + CASE_GET_AR(CSD); + CASE_GET_AR(SSD); + CASE_GET_AR(CFLAG); + CASE_GET_AR(FSR); + CASE_GET_AR(FIR); + CASE_GET_AR(FDR); + CASE_GET_AR(CCV); + CASE_GET_AR(UNAT); + CASE_GET_AR(FPSR); + CASE_GET_AR(ITC); + CASE_GET_AR(PFS); + CASE_GET_AR(LC); + CASE_GET_AR(EC); + + CASE_GET_CR(DCR); + CASE_GET_CR(ITM); + CASE_GET_CR(IVA); + CASE_GET_CR(PTA); + CASE_GET_CR(IPSR); + CASE_GET_CR(ISR); + CASE_GET_CR(IIP); + CASE_GET_CR(IFA); + CASE_GET_CR(ITIR); + CASE_GET_CR(IIPA); + CASE_GET_CR(IFS); + CASE_GET_CR(IIM); + CASE_GET_CR(IHA); + CASE_GET_CR(LID); + CASE_GET_CR(IVR); + CASE_GET_CR(TPR); + CASE_GET_CR(EOI); + CASE_GET_CR(IRR0); + CASE_GET_CR(IRR1); + CASE_GET_CR(IRR2); + CASE_GET_CR(IRR3); + CASE_GET_CR(ITV); + CASE_GET_CR(PMV); + CASE_GET_CR(CMCV); + CASE_GET_CR(LRR0); + CASE_GET_CR(LRR1); + + default: + printk(KERN_CRIT "wrong_getreg %d\n", regnum); + break; + } + return res; +} + +#define CASE_SET_REG(id) \ + case _IA64_REG_ ## id: \ + ia64_native_setreg(_IA64_REG_ ## id, val); \ + break; +#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id) +#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id) + +void +ia64_native_setreg_func(int regnum, unsigned long val) +{ + switch (regnum) { + case _IA64_REG_PSR_L: + ia64_native_setreg(_IA64_REG_PSR_L, val); + ia64_dv_serialize_data(); + break; + CASE_SET_REG(SP); + CASE_SET_REG(GP); + + CASE_SET_AR(KR0); + CASE_SET_AR(KR1); + CASE_SET_AR(KR2); + CASE_SET_AR(KR3); + CASE_SET_AR(KR4); + CASE_SET_AR(KR5); + CASE_SET_AR(KR6); + CASE_SET_AR(KR7); + CASE_SET_AR(RSC); + CASE_SET_AR(BSP); + CASE_SET_AR(BSPSTORE); + CASE_SET_AR(RNAT); + CASE_SET_AR(FCR); + CASE_SET_AR(EFLAG); + CASE_SET_AR(CSD); + CASE_SET_AR(SSD); + CASE_SET_AR(CFLAG); + CASE_SET_AR(FSR); + CASE_SET_AR(FIR); + CASE_SET_AR(FDR); + CASE_SET_AR(CCV); + CASE_SET_AR(UNAT); + CASE_SET_AR(FPSR); + CASE_SET_AR(ITC); + CASE_SET_AR(PFS); + CASE_SET_AR(LC); + CASE_SET_AR(EC); + + CASE_SET_CR(DCR); + CASE_SET_CR(ITM); + CASE_SET_CR(IVA); + CASE_SET_CR(PTA); + CASE_SET_CR(IPSR); + CASE_SET_CR(ISR); + CASE_SET_CR(IIP); + CASE_SET_CR(IFA); + CASE_SET_CR(ITIR); + CASE_SET_CR(IIPA); + CASE_SET_CR(IFS); + CASE_SET_CR(IIM); + CASE_SET_CR(IHA); + CASE_SET_CR(LID); + CASE_SET_CR(IVR); + CASE_SET_CR(TPR); + CASE_SET_CR(EOI); + CASE_SET_CR(IRR0); + CASE_SET_CR(IRR1); + CASE_SET_CR(IRR2); + CASE_SET_CR(IRR3); + CASE_SET_CR(ITV); + CASE_SET_CR(PMV); + CASE_SET_CR(CMCV); + CASE_SET_CR(LRR0); + CASE_SET_CR(LRR1); + default: + printk(KERN_CRIT "wrong setreg %d\n", regnum); + break; + } +} + +struct pv_cpu_ops pv_cpu_ops = { + .fc = ia64_native_fc_func, + .thash = ia64_native_thash_func, + .get_cpuid = ia64_native_get_cpuid_func, + .get_pmd = ia64_native_get_pmd_func, + .ptcga = ia64_native_ptcga_func, + .get_rr = ia64_native_get_rr_func, + .set_rr = ia64_native_set_rr_func, + .set_rr0_to_rr4 = ia64_native_set_rr0_to_rr4_func, + .ssm_i = ia64_native_ssm_i_func, + .getreg = ia64_native_getreg_func, + .setreg = ia64_native_setreg_func, + .rsm_i = ia64_native_rsm_i_func, + .get_psr_i = ia64_native_get_psr_i_func, + .intrin_local_irq_restore + = ia64_native_intrin_local_irq_restore_func, +}; +EXPORT_SYMBOL(pv_cpu_ops); diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild index eb24a3f47ca..ccbe8ae47a6 100644 --- a/include/asm-ia64/Kbuild +++ b/include/asm-ia64/Kbuild @@ -5,12 +5,12 @@ header-y += fpu.h header-y += fpswa.h header-y += ia64regs.h header-y += intel_intrin.h -header-y += intrinsics.h header-y += perfmon_default_smpl.h header-y += ptrace_offsets.h header-y += rse.h header-y += ucontext.h unifdef-y += gcc_intrin.h +unifdef-y += intrinsics.h unifdef-y += perfmon.h unifdef-y += ustack.h diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h index 2fe292c275f..0f5b5592175 100644 --- a/include/asm-ia64/gcc_intrin.h +++ b/include/asm-ia64/gcc_intrin.h @@ -32,7 +32,7 @@ extern void ia64_bad_param_for_getreg (void); register unsigned long ia64_r13 asm ("r13") __used; #endif -#define ia64_setreg(regnum, val) \ +#define ia64_native_setreg(regnum, val) \ ({ \ switch (regnum) { \ case _IA64_REG_PSR_L: \ @@ -61,7 +61,7 @@ register unsigned long ia64_r13 asm ("r13") __used; } \ }) -#define ia64_getreg(regnum) \ +#define ia64_native_getreg(regnum) \ ({ \ __u64 ia64_intri_res; \ \ @@ -385,7 +385,7 @@ register unsigned long ia64_r13 asm ("r13") __used; #define ia64_invala() asm volatile ("invala" ::: "memory") -#define ia64_thash(addr) \ +#define ia64_native_thash(addr) \ ({ \ __u64 ia64_intri_res; \ asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ @@ -438,10 +438,10 @@ register unsigned long ia64_r13 asm ("r13") __used; #define ia64_set_pmd(index, val) \ asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory") -#define ia64_set_rr(index, val) \ +#define ia64_native_set_rr(index, val) \ asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory"); -#define ia64_get_cpuid(index) \ +#define ia64_native_get_cpuid(index) \ ({ \ __u64 ia64_intri_res; \ asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index)); \ @@ -477,33 +477,33 @@ register unsigned long ia64_r13 asm ("r13") __used; }) -#define ia64_get_pmd(index) \ +#define ia64_native_get_pmd(index) \ ({ \ __u64 ia64_intri_res; \ asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ ia64_intri_res; \ }) -#define ia64_get_rr(index) \ +#define ia64_native_get_rr(index) \ ({ \ __u64 ia64_intri_res; \ asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index)); \ ia64_intri_res; \ }) -#define ia64_fc(addr) asm volatile ("fc %0" :: "r"(addr) : "memory") +#define ia64_native_fc(addr) asm volatile ("fc %0" :: "r"(addr) : "memory") #define ia64_sync_i() asm volatile (";; sync.i" ::: "memory") -#define ia64_ssm(mask) asm volatile ("ssm %0":: "i"((mask)) : "memory") -#define ia64_rsm(mask) asm volatile ("rsm %0":: "i"((mask)) : "memory") +#define ia64_native_ssm(mask) asm volatile ("ssm %0":: "i"((mask)) : "memory") +#define ia64_native_rsm(mask) asm volatile ("rsm %0":: "i"((mask)) : "memory") #define ia64_sum(mask) asm volatile ("sum %0":: "i"((mask)) : "memory") #define ia64_rum(mask) asm volatile ("rum %0":: "i"((mask)) : "memory") #define ia64_ptce(addr) asm volatile ("ptc.e %0" :: "r"(addr)) -#define ia64_ptcga(addr, size) \ +#define ia64_native_ptcga(addr, size) \ do { \ asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory"); \ ia64_dv_serialize_data(); \ @@ -608,7 +608,7 @@ do { \ } \ }) -#define ia64_intrin_local_irq_restore(x) \ +#define ia64_native_intrin_local_irq_restore(x) \ do { \ asm volatile (";; cmp.ne p6,p7=%0,r0;;" \ "(p6) ssm psr.i;" \ diff --git a/include/asm-ia64/intel_intrin.h b/include/asm-ia64/intel_intrin.h index a520d103d80..53cec577558 100644 --- a/include/asm-ia64/intel_intrin.h +++ b/include/asm-ia64/intel_intrin.h @@ -16,8 +16,8 @@ * intrinsic */ -#define ia64_getreg __getReg -#define ia64_setreg __setReg +#define ia64_native_getreg __getReg +#define ia64_native_setreg __setReg #define ia64_hint __hint #define ia64_hint_pause __hint_pause @@ -39,10 +39,10 @@ #define ia64_invala_fr __invala_fr #define ia64_nop __nop #define ia64_sum __sum -#define ia64_ssm __ssm +#define ia64_native_ssm __ssm #define ia64_rum __rum -#define ia64_rsm __rsm -#define ia64_fc __fc +#define ia64_native_rsm __rsm +#define ia64_native_fc __fc #define ia64_ldfs __ldfs #define ia64_ldfd __ldfd @@ -88,16 +88,17 @@ __setIndReg(_IA64_REG_INDR_PMC, index, val) #define ia64_set_pmd(index, val) \ __setIndReg(_IA64_REG_INDR_PMD, index, val) -#define ia64_set_rr(index, val) \ +#define ia64_native_set_rr(index, val) \ __setIndReg(_IA64_REG_INDR_RR, index, val) -#define ia64_get_cpuid(index) __getIndReg(_IA64_REG_INDR_CPUID, index) -#define __ia64_get_dbr(index) __getIndReg(_IA64_REG_INDR_DBR, index) -#define ia64_get_ibr(index) __getIndReg(_IA64_REG_INDR_IBR, index) -#define ia64_get_pkr(index) __getIndReg(_IA64_REG_INDR_PKR, index) -#define ia64_get_pmc(index) __getIndReg(_IA64_REG_INDR_PMC, index) -#define ia64_get_pmd(index) __getIndReg(_IA64_REG_INDR_PMD, index) -#define ia64_get_rr(index) __getIndReg(_IA64_REG_INDR_RR, index) +#define ia64_native_get_cpuid(index) \ + __getIndReg(_IA64_REG_INDR_CPUID, index) +#define __ia64_get_dbr(index) __getIndReg(_IA64_REG_INDR_DBR, index) +#define ia64_get_ibr(index) __getIndReg(_IA64_REG_INDR_IBR, index) +#define ia64_get_pkr(index) __getIndReg(_IA64_REG_INDR_PKR, index) +#define ia64_get_pmc(index) __getIndReg(_IA64_REG_INDR_PMC, index) +#define ia64_native_get_pmd(index) __getIndReg(_IA64_REG_INDR_PMD, index) +#define ia64_native_get_rr(index) __getIndReg(_IA64_REG_INDR_RR, index) #define ia64_srlz_d __dsrlz #define ia64_srlz_i __isrlz @@ -119,16 +120,16 @@ #define ia64_ld8_acq __ld8_acq #define ia64_sync_i __synci -#define ia64_thash __thash -#define ia64_ttag __ttag +#define ia64_native_thash __thash +#define ia64_native_ttag __ttag #define ia64_itcd __itcd #define ia64_itci __itci #define ia64_itrd __itrd #define ia64_itri __itri #define ia64_ptce __ptce #define ia64_ptcl __ptcl -#define ia64_ptcg __ptcg -#define ia64_ptcga __ptcga +#define ia64_native_ptcg __ptcg +#define ia64_native_ptcga __ptcga #define ia64_ptri __ptri #define ia64_ptrd __ptrd #define ia64_dep_mi _m64_dep_mi @@ -145,13 +146,13 @@ #define ia64_lfetch_fault __lfetch_fault #define ia64_lfetch_fault_excl __lfetch_fault_excl -#define ia64_intrin_local_irq_restore(x) \ +#define ia64_native_intrin_local_irq_restore(x) \ do { \ if ((x) != 0) { \ - ia64_ssm(IA64_PSR_I); \ + ia64_native_ssm(IA64_PSR_I); \ ia64_srlz_d(); \ } else { \ - ia64_rsm(IA64_PSR_I); \ + ia64_native_rsm(IA64_PSR_I); \ } \ } while (0) diff --git a/include/asm-ia64/intrinsics.h b/include/asm-ia64/intrinsics.h index a3b96892f83..47d686dba1e 100644 --- a/include/asm-ia64/intrinsics.h +++ b/include/asm-ia64/intrinsics.h @@ -18,15 +18,15 @@ # include #endif -#define ia64_get_psr_i() (ia64_getreg(_IA64_REG_PSR) & IA64_PSR_I) - -#define ia64_set_rr0_to_rr4(val0, val1, val2, val3, val4) \ -do { \ - ia64_set_rr(0x0000000000000000UL, (val0)); \ - ia64_set_rr(0x2000000000000000UL, (val1)); \ - ia64_set_rr(0x4000000000000000UL, (val2)); \ - ia64_set_rr(0x6000000000000000UL, (val3)); \ - ia64_set_rr(0x8000000000000000UL, (val4)); \ +#define ia64_native_get_psr_i() (ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I) + +#define ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4) \ +do { \ + ia64_native_set_rr(0x0000000000000000UL, (val0)); \ + ia64_native_set_rr(0x2000000000000000UL, (val1)); \ + ia64_native_set_rr(0x4000000000000000UL, (val2)); \ + ia64_native_set_rr(0x6000000000000000UL, (val3)); \ + ia64_native_set_rr(0x8000000000000000UL, (val4)); \ } while (0) /* @@ -194,4 +194,48 @@ extern long ia64_cmpxchg_called_with_bad_pointer (void); #endif /* !CONFIG_IA64_DEBUG_CMPXCHG */ #endif + +#ifdef __KERNEL__ +#include +#endif + +#ifndef __ASSEMBLY__ +#if defined(CONFIG_PARAVIRT) && defined(__KERNEL__) +#define IA64_INTRINSIC_API(name) pv_cpu_ops.name +#define IA64_INTRINSIC_MACRO(name) paravirt_ ## name +#else +#define IA64_INTRINSIC_API(name) ia64_native_ ## name +#define IA64_INTRINSIC_MACRO(name) ia64_native_ ## name +#endif + +/************************************************/ +/* Instructions paravirtualized for correctness */ +/************************************************/ +/* fc, thash, get_cpuid, get_pmd, get_eflags, set_eflags */ +/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag" + * is not currently used (though it may be in a long-format VHPT system!) + */ +#define ia64_fc IA64_INTRINSIC_API(fc) +#define ia64_thash IA64_INTRINSIC_API(thash) +#define ia64_get_cpuid IA64_INTRINSIC_API(get_cpuid) +#define ia64_get_pmd IA64_INTRINSIC_API(get_pmd) + + +/************************************************/ +/* Instructions paravirtualized for performance */ +/************************************************/ +#define ia64_ssm IA64_INTRINSIC_MACRO(ssm) +#define ia64_rsm IA64_INTRINSIC_MACRO(rsm) +#define ia64_getreg IA64_INTRINSIC_API(getreg) +#define ia64_setreg IA64_INTRINSIC_API(setreg) +#define ia64_set_rr IA64_INTRINSIC_API(set_rr) +#define ia64_get_rr IA64_INTRINSIC_API(get_rr) +#define ia64_ptcga IA64_INTRINSIC_API(ptcga) +#define ia64_get_psr_i IA64_INTRINSIC_API(get_psr_i) +#define ia64_intrin_local_irq_restore \ + IA64_INTRINSIC_API(intrin_local_irq_restore) +#define ia64_set_rr0_to_rr4 IA64_INTRINSIC_API(set_rr0_to_rr4) + +#endif /* !__ASSEMBLY__ */ + #endif /* _ASM_IA64_INTRINSICS_H */ diff --git a/include/asm-ia64/paravirt_privop.h b/include/asm-ia64/paravirt_privop.h new file mode 100644 index 00000000000..7b133ae86df --- /dev/null +++ b/include/asm-ia64/paravirt_privop.h @@ -0,0 +1,91 @@ +/****************************************************************************** + * include/asm-ia64/paravirt_privops.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _ASM_IA64_PARAVIRT_PRIVOP_H +#define _ASM_IA64_PARAVIRT_PRIVOP_H + +#ifdef CONFIG_PARAVIRT + +#ifndef __ASSEMBLY__ + +#include +#include /* for IA64_PSR_I */ + +/****************************************************************************** + * replacement of intrinsics operations. + */ + +struct pv_cpu_ops { + void (*fc)(unsigned long addr); + unsigned long (*thash)(unsigned long addr); + unsigned long (*get_cpuid)(int index); + unsigned long (*get_pmd)(int index); + unsigned long (*getreg)(int reg); + void (*setreg)(int reg, unsigned long val); + void (*ptcga)(unsigned long addr, unsigned long size); + unsigned long (*get_rr)(unsigned long index); + void (*set_rr)(unsigned long index, unsigned long val); + void (*set_rr0_to_rr4)(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); + void (*ssm_i)(void); + void (*rsm_i)(void); + unsigned long (*get_psr_i)(void); + void (*intrin_local_irq_restore)(unsigned long flags); +}; + +extern struct pv_cpu_ops pv_cpu_ops; + +extern void ia64_native_setreg_func(int regnum, unsigned long val); +extern unsigned long ia64_native_getreg_func(int regnum); + +/************************************************/ +/* Instructions paravirtualized for performance */ +/************************************************/ + +/* mask for ia64_native_ssm/rsm() must be constant.("i" constraing). + * static inline function doesn't satisfy it. */ +#define paravirt_ssm(mask) \ + do { \ + if ((mask) == IA64_PSR_I) \ + pv_cpu_ops.ssm_i(); \ + else \ + ia64_native_ssm(mask); \ + } while (0) + +#define paravirt_rsm(mask) \ + do { \ + if ((mask) == IA64_PSR_I) \ + pv_cpu_ops.rsm_i(); \ + else \ + ia64_native_rsm(mask); \ + } while (0) + +#endif /* __ASSEMBLY__ */ + +#else + +/* fallback for native case */ + +#endif /* CONFIG_PARAVIRT */ + +#endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */ -- cgit v1.2.3-70-g09d2 From 1e39d80a5957eab9dfdd7490d5c5cee272c34aa7 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:35 +0900 Subject: [IA64] pvops: preparation for paravirtulization of hand written assembly code. Preparation for paravirtualization of hand written assembly code. They are paravirtualized by single source code and compiled multi times. To tell those files for target (including native), add one defines. Cc: "Dong, Eddie" Cc: Keith Owens Cc: tgingold@free.fr Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 10a4ddb5b27..8b2524293eb 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -72,3 +72,12 @@ $(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE # We must build gate.so before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/gate-data.o: $(obj)/gate.so + +# +# native ivt.S and entry.S +# +ASM_PARAVIRT_OBJS = ivt.o entry.o +define paravirtualized_native +AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE +endef +$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj)))) -- cgit v1.2.3-70-g09d2 From 02e32e36f42f8ea7ee6060d02f2d69ad5bad6d50 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:37 +0900 Subject: [IA64] pvops: paravirtualize minstate.h. paravirtualize minstate.h which are hand written assembly code. They include sensitive or performance critical privileged instructions. So that they are appropriate for paravirtualization. Cc: Keith Owens Cc: Akio Takebe Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/minstate.h | 13 +++++++------ arch/ia64/kernel/paravirt_inst.h | 29 +++++++++++++++++++++++++++++ include/asm-ia64/native/inst.h | 2 ++ 3 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 arch/ia64/kernel/paravirt_inst.h (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index 74b6d670aae..292e214a3b8 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -2,6 +2,7 @@ #include #include "entry.h" +#include "paravirt_inst.h" #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* read ar.itc in advance, and use it before leaving bank 0 */ @@ -43,16 +44,16 @@ * Note that psr.ic is NOT turned on by this macro. This is so that * we can pass interruption state as arguments to a handler. */ -#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA,WORKAROUND) \ +#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \ mov r16=IA64_KR(CURRENT); /* M */ \ mov r27=ar.rsc; /* M */ \ mov r20=r1; /* A */ \ mov r25=ar.unat; /* M */ \ - mov r29=cr.ipsr; /* M */ \ + MOV_FROM_IPSR(p0,r29); /* M */ \ mov r26=ar.pfs; /* I */ \ - mov r28=cr.iip; /* M */ \ + MOV_FROM_IIP(r28); /* M */ \ mov r21=ar.fpsr; /* M */ \ - COVER; /* B;; (or nothing) */ \ + __COVER; /* B;; (or nothing) */ \ ;; \ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ ;; \ @@ -244,6 +245,6 @@ 1: \ .pred.rel "mutex", pKStk, pUStk -#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs, , RSE_WORKAROUND) -#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND) +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND) +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND) #define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , ) diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h new file mode 100644 index 00000000000..5cad6fb2ed1 --- /dev/null +++ b/arch/ia64/kernel/paravirt_inst.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_inst.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifdef __IA64_ASM_PARAVIRTUALIZED_XEN +#include +#include +#else +#include +#endif + diff --git a/include/asm-ia64/native/inst.h b/include/asm-ia64/native/inst.h index 2a50b70b969..f1072ace0cf 100644 --- a/include/asm-ia64/native/inst.h +++ b/include/asm-ia64/native/inst.h @@ -20,6 +20,8 @@ * */ +#define DO_SAVE_MIN IA64_NATIVE_DO_SAVE_MIN + #ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK # define PARAVIRT_POISON 0xdeadbeefbaadf00d # define CLOBBER(clob) \ -- cgit v1.2.3-70-g09d2 From 498c5170472ff0c03a29d22dbd33225a0be038f4 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:38 +0900 Subject: [IA64] pvops: paravirtualize ivt.S paravirtualize ivt.S which implements fault handler in hand written assembly code. They includes sensitive or performance critical privileged instructions. So they need paravirtualization. Cc: Keith Owens Cc: tgingold@free.fr Cc: Akio Takebe Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/ivt.S | 249 ++++++++++++++++++++++++------------------------- 1 file changed, 122 insertions(+), 127 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 80b44ea052d..23749ed3cf0 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -12,6 +12,14 @@ * * 00/08/23 Asit Mallick TLB handling for SMP * 00/12/20 David Mosberger-Tang DTLB/ITLB handler now uses virtual PT. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer + * Xen paravirtualization + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + * Yaozu (Eddie) Dong */ /* * This file defines the interruption vector table used by the CPU. @@ -102,13 +110,13 @@ ENTRY(vhpt_miss) * - the faulting virtual address uses unimplemented address bits * - the faulting virtual address has no valid page table mapping */ - mov r16=cr.ifa // get address that caused the TLB miss + MOV_FROM_IFA(r16) // get address that caused the TLB miss #ifdef CONFIG_HUGETLB_PAGE movl r18=PAGE_SHIFT - mov r25=cr.itir + MOV_FROM_ITIR(r25) #endif ;; - rsm psr.dt // use physical addressing for data + RSM_PSR_DT // use physical addressing for data mov r31=pr // save the predicate registers mov r19=IA64_KR(PT_BASE) // get page table base address shl r21=r16,3 // shift bit 60 into sign bit @@ -168,21 +176,21 @@ ENTRY(vhpt_miss) dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) ;; (p7) ld8 r18=[r21] // read *pte - mov r19=cr.isr // cr.isr bit 32 tells us if this is an insn miss + MOV_FROM_ISR(r19) // cr.isr bit 32 tells us if this is an insn miss ;; (p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? - mov r22=cr.iha // get the VHPT address that caused the TLB miss + MOV_FROM_IHA(r22) // get the VHPT address that caused the TLB miss ;; // avoid RAW on p7 (p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address ;; -(p10) itc.i r18 // insert the instruction TLB entry -(p11) itc.d r18 // insert the data TLB entry + ITC_I_AND_D(p10, p11, r18, r24) // insert the instruction TLB entry and + // insert the data TLB entry (p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) - mov cr.ifa=r22 + MOV_TO_IFA(r22, r24) #ifdef CONFIG_HUGETLB_PAGE -(p8) mov cr.itir=r25 // change to default page-size for VHPT + MOV_TO_ITIR(p8, r25, r24) // change to default page-size for VHPT #endif /* @@ -192,7 +200,7 @@ ENTRY(vhpt_miss) */ adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 ;; -(p7) itc.d r24 + ITC_D(p7, r24, r25) ;; #ifdef CONFIG_SMP /* @@ -234,7 +242,7 @@ ENTRY(vhpt_miss) #endif mov pr=r31,-1 // restore predicate registers - rfi + RFI END(vhpt_miss) .org ia64_ivt+0x400 @@ -248,11 +256,11 @@ ENTRY(itlb_miss) * mode, walk the page table, and then re-execute the PTE read and * go on normally after that. */ - mov r16=cr.ifa // get virtual address + MOV_FROM_IFA(r16) // get virtual address mov r29=b0 // save b0 mov r31=pr // save predicates .itlb_fault: - mov r17=cr.iha // get virtual address of PTE + MOV_FROM_IHA(r17) // get virtual address of PTE movl r30=1f // load nested fault continuation point ;; 1: ld8 r18=[r17] // read *pte @@ -261,7 +269,7 @@ ENTRY(itlb_miss) tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? (p6) br.cond.spnt page_fault ;; - itc.i r18 + ITC_I(p0, r18, r19) ;; #ifdef CONFIG_SMP /* @@ -278,7 +286,7 @@ ENTRY(itlb_miss) (p7) ptc.l r16,r20 #endif mov pr=r31,-1 - rfi + RFI END(itlb_miss) .org ia64_ivt+0x0800 @@ -292,11 +300,11 @@ ENTRY(dtlb_miss) * mode, walk the page table, and then re-execute the PTE read and * go on normally after that. */ - mov r16=cr.ifa // get virtual address + MOV_FROM_IFA(r16) // get virtual address mov r29=b0 // save b0 mov r31=pr // save predicates dtlb_fault: - mov r17=cr.iha // get virtual address of PTE + MOV_FROM_IHA(r17) // get virtual address of PTE movl r30=1f // load nested fault continuation point ;; 1: ld8 r18=[r17] // read *pte @@ -305,7 +313,7 @@ dtlb_fault: tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? (p6) br.cond.spnt page_fault ;; - itc.d r18 + ITC_D(p0, r18, r19) ;; #ifdef CONFIG_SMP /* @@ -322,7 +330,7 @@ dtlb_fault: (p7) ptc.l r16,r20 #endif mov pr=r31,-1 - rfi + RFI END(dtlb_miss) .org ia64_ivt+0x0c00 @@ -330,9 +338,9 @@ END(dtlb_miss) // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) ENTRY(alt_itlb_miss) DBG_FAULT(3) - mov r16=cr.ifa // get address that caused the TLB miss + MOV_FROM_IFA(r16) // get address that caused the TLB miss movl r17=PAGE_KERNEL - mov r21=cr.ipsr + MOV_FROM_IPSR(p0, r21) movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) mov r31=pr ;; @@ -341,9 +349,9 @@ ENTRY(alt_itlb_miss) ;; cmp.gt p8,p0=6,r22 // user mode ;; -(p8) thash r17=r16 + THASH(p8, r17, r16, r23) ;; -(p8) mov cr.iha=r17 + MOV_TO_IHA(p8, r17, r23) (p8) mov r29=b0 // save b0 (p8) br.cond.dptk .itlb_fault #endif @@ -358,9 +366,9 @@ ENTRY(alt_itlb_miss) or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 (p8) br.cond.spnt page_fault ;; - itc.i r19 // insert the TLB entry + ITC_I(p0, r19, r18) // insert the TLB entry mov pr=r31,-1 - rfi + RFI END(alt_itlb_miss) .org ia64_ivt+0x1000 @@ -368,11 +376,11 @@ END(alt_itlb_miss) // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) ENTRY(alt_dtlb_miss) DBG_FAULT(4) - mov r16=cr.ifa // get address that caused the TLB miss + MOV_FROM_IFA(r16) // get address that caused the TLB miss movl r17=PAGE_KERNEL - mov r20=cr.isr + MOV_FROM_ISR(r20) movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r21=cr.ipsr + MOV_FROM_IPSR(p0, r21) mov r31=pr mov r24=PERCPU_ADDR ;; @@ -381,9 +389,9 @@ ENTRY(alt_dtlb_miss) ;; cmp.gt p8,p0=6,r22 // access to region 0-5 ;; -(p8) thash r17=r16 + THASH(p8, r17, r16, r25) ;; -(p8) mov cr.iha=r17 + MOV_TO_IHA(p8, r17, r25) (p8) mov r29=b0 // save b0 (p8) br.cond.dptk dtlb_fault #endif @@ -402,7 +410,7 @@ ENTRY(alt_dtlb_miss) tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? ;; (p10) sub r19=r19,r26 -(p10) mov cr.itir=r25 + MOV_TO_ITIR(p10, r25, r24) cmp.ne p8,p0=r0,r23 (p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field (p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr @@ -411,11 +419,11 @@ ENTRY(alt_dtlb_miss) dep r21=-1,r21,IA64_PSR_ED_BIT,1 ;; or r19=r19,r17 // insert PTE control bits into r19 -(p6) mov cr.ipsr=r21 + MOV_TO_IPSR(p6, r21, r24) ;; -(p7) itc.d r19 // insert the TLB entry + ITC_D(p7, r19, r18) // insert the TLB entry mov pr=r31,-1 - rfi + RFI END(alt_dtlb_miss) .org ia64_ivt+0x1400 @@ -444,10 +452,10 @@ ENTRY(nested_dtlb_miss) * * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) */ - rsm psr.dt // switch to using physical data addressing + RSM_PSR_DT // switch to using physical data addressing mov r19=IA64_KR(PT_BASE) // get the page table base address shl r21=r16,3 // shift bit 60 into sign bit - mov r18=cr.itir + MOV_FROM_ITIR(r18) ;; shr.u r17=r16,61 // get the region number into r17 extr.u r18=r18,2,6 // get the faulting page size @@ -510,21 +518,15 @@ END(ikey_miss) //----------------------------------------------------------------------------------- // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) ENTRY(page_fault) - ssm psr.dt - ;; - srlz.i + SSM_PSR_DT_AND_SRLZ_I ;; SAVE_MIN_WITH_COVER alloc r15=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=cr.isr + MOV_FROM_IFA(out0) + MOV_FROM_ISR(out1) + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collectin is on - ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_I(p15, p15, r14) // restore psr.i movl r14=ia64_leave_kernel ;; SAVE_REST @@ -556,10 +558,10 @@ ENTRY(dirty_bit) * page table TLB entry isn't present, we take a nested TLB miss hit where we look * up the physical address of the L3 PTE and then continue at label 1 below. */ - mov r16=cr.ifa // get the address that caused the fault + MOV_FROM_IFA(r16) // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault ;; - thash r17=r16 // compute virtual address of L3 PTE + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE mov r29=b0 // save b0 in case of nested fault mov r31=pr // save pr #ifdef CONFIG_SMP @@ -576,7 +578,7 @@ ENTRY(dirty_bit) ;; (p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present ;; -(p6) itc.d r25 // install updated PTE + ITC_D(p6, r25, r18) // install updated PTE ;; /* * Tell the assemblers dependency-violation checker that the above "itc" instructions @@ -602,7 +604,7 @@ ENTRY(dirty_bit) itc.d r18 // install updated PTE #endif mov pr=r31,-1 // restore pr - rfi + RFI END(dirty_bit) .org ia64_ivt+0x2400 @@ -611,22 +613,22 @@ END(dirty_bit) ENTRY(iaccess_bit) DBG_FAULT(9) // Like Entry 8, except for instruction access - mov r16=cr.ifa // get the address that caused the fault + MOV_FROM_IFA(r16) // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault mov r31=pr // save predicates #ifdef CONFIG_ITANIUM /* * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. */ - mov r17=cr.ipsr + MOV_FROM_IPSR(p0, r17) ;; - mov r18=cr.iip + MOV_FROM_IIP(r18) tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? ;; (p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa #endif /* CONFIG_ITANIUM */ ;; - thash r17=r16 // compute virtual address of L3 PTE + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE mov r29=b0 // save b0 in case of nested fault) #ifdef CONFIG_SMP mov r28=ar.ccv // save ar.ccv @@ -642,7 +644,7 @@ ENTRY(iaccess_bit) ;; (p6) cmp.eq p6,p7=r26,r18 // Only if page present ;; -(p6) itc.i r25 // install updated PTE + ITC_I(p6, r25, r26) // install updated PTE ;; /* * Tell the assemblers dependency-violation checker that the above "itc" instructions @@ -668,7 +670,7 @@ ENTRY(iaccess_bit) itc.i r18 // install updated PTE #endif /* !CONFIG_SMP */ mov pr=r31,-1 - rfi + RFI END(iaccess_bit) .org ia64_ivt+0x2800 @@ -677,10 +679,10 @@ END(iaccess_bit) ENTRY(daccess_bit) DBG_FAULT(10) // Like Entry 8, except for data access - mov r16=cr.ifa // get the address that caused the fault + MOV_FROM_IFA(r16) // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault ;; - thash r17=r16 // compute virtual address of L3 PTE + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE mov r31=pr mov r29=b0 // save b0 in case of nested fault) #ifdef CONFIG_SMP @@ -697,7 +699,7 @@ ENTRY(daccess_bit) ;; (p6) cmp.eq p6,p7=r26,r18 // Only if page is present ;; -(p6) itc.d r25 // install updated PTE + ITC_D(p6, r25, r26) // install updated PTE /* * Tell the assemblers dependency-violation checker that the above "itc" instructions * cannot possibly affect the following loads: @@ -721,7 +723,7 @@ ENTRY(daccess_bit) #endif mov b0=r29 // restore b0 mov pr=r31,-1 - rfi + RFI END(daccess_bit) .org ia64_ivt+0x2c00 @@ -745,10 +747,10 @@ ENTRY(break_fault) */ DBG_FAULT(11) mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) - mov r29=cr.ipsr // M2 (12 cyc) + MOV_FROM_IPSR(p0, r29) // M2 (12 cyc) mov r31=pr // I0 (2 cyc) - mov r17=cr.iim // M2 (2 cyc) + MOV_FROM_IIM(r17) // M2 (2 cyc) mov.m r27=ar.rsc // M2 (12 cyc) mov r18=__IA64_BREAK_SYSCALL // A @@ -767,7 +769,7 @@ ENTRY(break_fault) nop.m 0 movl r30=sys_call_table // X - mov r28=cr.iip // M2 (2 cyc) + MOV_FROM_IIP(r28) // M2 (2 cyc) cmp.eq p0,p7=r18,r17 // I0 is this a system call? (p7) br.cond.spnt non_syscall // B no -> // @@ -864,18 +866,17 @@ ENTRY(break_fault) #endif mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 nop 0 - bsw.1 // B (6 cyc) regs are saved, switch to bank 1 + BSW_1(r2, r14) // B (6 cyc) regs are saved, switch to bank 1 ;; - ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16) // M2 now it's safe to re-enable intr.-collection + // M0 ensure interruption collection is on movl r3=ia64_ret_from_syscall // X ;; - - srlz.i // M0 ensure interruption collection is on mov rp=r3 // I0 set the real return addr (p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT -(p15) ssm psr.i // M2 restore psr.i + SSM_PSR_I(p15, p15, r16) // M2 restore psr.i (p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic // NOT REACHED @@ -899,16 +900,15 @@ ENTRY(interrupt) mov r31=pr // prepare to save predicates ;; SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - ssm psr.ic | PSR_DEFAULT_BITS - ;; + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) + // ensure everybody knows psr.ic is back on adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on ;; SAVE_REST ;; MCA_RECOVER_RANGE(interrupt) alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group - mov out0=cr.ivr // pass cr.ivr as first arg + MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg add out1=16,sp // pass pointer to pt_regs as second arg ;; srlz.d // make sure we see the effect of cr.ivr @@ -978,6 +978,7 @@ END(interrupt) * - ar.fpsr: set to kernel settings * - b6: preserved (same as on entry) */ +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE GLOBAL_ENTRY(ia64_syscall_setup) #if PT(B6) != 0 # error This code assumes that b6 is the first field in pt_regs. @@ -1069,6 +1070,7 @@ GLOBAL_ENTRY(ia64_syscall_setup) (p10) mov r8=-EINVAL br.ret.sptk.many b7 END(ia64_syscall_setup) +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ .org ia64_ivt+0x3c00 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1082,7 +1084,7 @@ END(ia64_syscall_setup) DBG_FAULT(16) FAULT(16) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) /* * There is no particular reason for this code to be here, other than * that there happens to be space here that would go unused otherwise. @@ -1092,7 +1094,7 @@ END(ia64_syscall_setup) * account_sys_enter is called from SAVE_MIN* macros if accounting is * enabled and if the macro is entered from user mode. */ -ENTRY(account_sys_enter) +GLOBAL_ENTRY(account_sys_enter) // mov.m r20=ar.itc is called in advance, and r13 is current add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 @@ -1134,15 +1136,13 @@ ENTRY(non_syscall) // suitable spot... alloc r14=ar.pfs,0,0,2,0 - mov out0=cr.iim + MOV_FROM_IIM(out0) add out1=16,sp adds r3=8,r2 // set up second base pointer for SAVE_REST - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r15) // restore psr.i movl r15=ia64_leave_kernel ;; SAVE_REST @@ -1168,14 +1168,12 @@ ENTRY(dispatch_unaligned_handler) SAVE_MIN_WITH_COVER ;; alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa + MOV_FROM_IFA(out0) adds out1=16,sp - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r3) // restore psr.i adds r3=8,r2 // set up second base pointer ;; SAVE_REST @@ -1207,17 +1205,16 @@ ENTRY(dispatch_to_fault_handler) */ SAVE_MIN_WITH_COVER_R19 alloc r14=ar.pfs,0,0,5,0 - mov out0=r15 - mov out1=cr.isr - mov out2=cr.ifa - mov out3=cr.iim - mov out4=cr.itir + MOV_FROM_ISR(out1) + MOV_FROM_IFA(out2) + MOV_FROM_IIM(out3) + MOV_FROM_ITIR(out4) ;; - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) + // guarantee that interruption collection is on + mov out0=r15 ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_I(p15, p15, r3) // restore psr.i adds r3=8,r2 // set up second base pointer for SAVE_REST ;; SAVE_REST @@ -1236,8 +1233,8 @@ END(dispatch_to_fault_handler) // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) ENTRY(page_not_present) DBG_FAULT(20) - mov r16=cr.ifa - rsm psr.dt + MOV_FROM_IFA(r16) + RSM_PSR_DT /* * The Linux page fault handler doesn't expect non-present pages to be in * the TLB. Flush the existing entry now, so we meet that expectation. @@ -1256,8 +1253,8 @@ END(page_not_present) // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) ENTRY(key_permission) DBG_FAULT(21) - mov r16=cr.ifa - rsm psr.dt + MOV_FROM_IFA(r16) + RSM_PSR_DT mov r31=pr ;; srlz.d @@ -1269,8 +1266,8 @@ END(key_permission) // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) ENTRY(iaccess_rights) DBG_FAULT(22) - mov r16=cr.ifa - rsm psr.dt + MOV_FROM_IFA(r16) + RSM_PSR_DT mov r31=pr ;; srlz.d @@ -1282,8 +1279,8 @@ END(iaccess_rights) // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) ENTRY(daccess_rights) DBG_FAULT(23) - mov r16=cr.ifa - rsm psr.dt + MOV_FROM_IFA(r16) + RSM_PSR_DT mov r31=pr ;; srlz.d @@ -1295,7 +1292,7 @@ END(daccess_rights) // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) ENTRY(general_exception) DBG_FAULT(24) - mov r16=cr.isr + MOV_FROM_ISR(r16) mov r31=pr ;; cmp4.eq p6,p0=0,r16 @@ -1324,8 +1321,8 @@ END(disabled_fp_reg) ENTRY(nat_consumption) DBG_FAULT(26) - mov r16=cr.ipsr - mov r17=cr.isr + MOV_FROM_IPSR(p0, r16) + MOV_FROM_ISR(r17) mov r31=pr // save PR ;; and r18=0xf,r17 // r18 = cr.ipsr.code{3:0} @@ -1335,10 +1332,10 @@ ENTRY(nat_consumption) dep r16=-1,r16,IA64_PSR_ED_BIT,1 (p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH) ;; - mov cr.ipsr=r16 // set cr.ipsr.na + MOV_TO_IPSR(p0, r16, r18) mov pr=r31,-1 ;; - rfi + RFI 1: mov pr=r31,-1 ;; @@ -1360,26 +1357,26 @@ ENTRY(speculation_vector) * * cr.imm contains zero_ext(imm21) */ - mov r18=cr.iim + MOV_FROM_IIM(r18) ;; - mov r17=cr.iip + MOV_FROM_IIP(r17) shl r18=r18,43 // put sign bit in position (43=64-21) ;; - mov r16=cr.ipsr + MOV_FROM_IPSR(p0, r16) shr r18=r18,39 // sign extend (39=43-4) ;; add r17=r17,r18 // now add the offset ;; - mov cr.iip=r17 + MOV_FROM_IIP(r17) dep r16=0,r16,41,2 // clear EI ;; - mov cr.ipsr=r16 + MOV_FROM_IPSR(p0, r16) ;; - rfi // and go back + RFI END(speculation_vector) .org ia64_ivt+0x5800 @@ -1517,11 +1514,11 @@ ENTRY(ia32_intercept) DBG_FAULT(46) #ifdef CONFIG_IA32_SUPPORT mov r31=pr - mov r16=cr.isr + MOV_FROM_ISR(r16) ;; extr.u r17=r16,16,8 // get ISR.code mov r18=ar.eflag - mov r19=cr.iim // old eflag value + MOV_FROM_IIM(r19) // old eflag value ;; cmp.ne p6,p0=2,r17 (p6) br.cond.spnt 1f // not a system flag fault @@ -1533,7 +1530,7 @@ ENTRY(ia32_intercept) (p6) br.cond.spnt 1f // eflags.ac bit didn't change ;; mov pr=r31,-1 // restore predicate registers - rfi + RFI 1: #endif // CONFIG_IA32_SUPPORT @@ -1686,11 +1683,10 @@ ENTRY(dispatch_illegal_op_fault) .prologue .body SAVE_MIN_WITH_COVER - ssm psr.ic | PSR_DEFAULT_BITS + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_I(p15, p15, r3) // restore psr.i adds r3=8,r2 // set up second base pointer for SAVE_REST ;; alloc r14=ar.pfs,0,0,1,0 // must be first in insn group @@ -1729,12 +1725,11 @@ END(dispatch_illegal_op_fault) ENTRY(dispatch_to_ia32_handler) SAVE_MIN ;; - mov r14=cr.isr - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on + MOV_FROM_ISR(r14) + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on ;; -(p15) ssm psr.i + SSM_PSR_I(p15, p15, r3) adds r3=8,r2 // Base pointer for SAVE_REST ;; SAVE_REST -- cgit v1.2.3-70-g09d2 From 4df8d22bbbb16ccfa4e10cc068135183c9e5e006 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 27 May 2008 15:08:01 -0700 Subject: [IA64] pvops: paravirtualize entry.S paravirtualize ia64_swtich_to, ia64_leave_syscall and ia64_leave_kernel. They include sensitive or performance critical privileged instructions so that they need paravirtualization. To paravirtualize them by single source and multi compile they are converted into indirect jump. And define each pv instances. Cc: Keith Owens Cc: "Dong, Eddie" Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 2 +- arch/ia64/kernel/entry.S | 115 +++++++++++++++++++++++-------------- arch/ia64/kernel/paravirt.c | 19 ++++++ arch/ia64/kernel/paravirtentry.S | 60 +++++++++++++++++++ include/asm-ia64/native/inst.h | 8 +++ include/asm-ia64/paravirt_privop.h | 23 ++++++++ 6 files changed, 183 insertions(+), 44 deletions(-) create mode 100644 arch/ia64/kernel/paravirtentry.S (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 8b2524293eb..cea91f17d44 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o -obj-$(CONFIG_PARAVIRT) += paravirt.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ca2bb95726d..56ab156c48a 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -22,6 +22,11 @@ * Patrick O'Rourke * 11/07/2000 */ +/* + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + */ /* * Global (preserved) predicate usage on syscall entry/exit path: * @@ -45,6 +50,7 @@ #include "minstate.h" +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE /* * execve() is special because in case of success, we need to * setup a null register window frame. @@ -173,6 +179,7 @@ GLOBAL_ENTRY(sys_clone) mov rp=loc0 br.ret.sptk.many rp END(sys_clone) +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ /* * prev_task <- ia64_switch_to(struct task_struct *next) @@ -180,7 +187,7 @@ END(sys_clone) * called. The code starting at .map relies on this. The rest of the code * doesn't care about the interrupt masking status. */ -GLOBAL_ENTRY(ia64_switch_to) +GLOBAL_ENTRY(__paravirt_switch_to) .prologue alloc r16=ar.pfs,1,0,0,0 DO_SAVE_SWITCH_STACK @@ -204,7 +211,7 @@ GLOBAL_ENTRY(ia64_switch_to) ;; .done: ld8 sp=[r21] // load kernel stack pointer of new task - mov IA64_KR(CURRENT)=in0 // update "current" application register + MOV_TO_KR(CURRENT, in0, r8, r9) // update "current" application register mov r8=r13 // return pointer to previously running task mov r13=in0 // set "current" pointer ;; @@ -216,26 +223,25 @@ GLOBAL_ENTRY(ia64_switch_to) br.ret.sptk.many rp // boogie on out in new context .map: - rsm psr.ic // interrupts (psr.i) are already disabled here + RSM_PSR_IC(r25) // interrupts (psr.i) are already disabled here movl r25=PAGE_KERNEL ;; srlz.d or r23=r25,r20 // construct PA | page properties mov r25=IA64_GRANULE_SHIFT<<2 ;; - mov cr.itir=r25 - mov cr.ifa=in0 // VA of next task... + MOV_TO_ITIR(p0, r25, r8) + MOV_TO_IFA(in0, r8) // VA of next task... ;; mov r25=IA64_TR_CURRENT_STACK - mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped... + MOV_TO_KR(CURRENT_STACK, r26, r8, r9) // remember last page we mapped... ;; itr.d dtr[r25]=r23 // wire in new mapping... - ssm psr.ic // reenable the psr.ic bit - ;; - srlz.d + SSM_PSR_IC_AND_SRLZ_D(r8, r9) // reenable the psr.ic bit br.cond.sptk .done -END(ia64_switch_to) +END(__paravirt_switch_to) +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE /* * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This * means that we may get an interrupt with "sp" pointing to the new kernel stack while @@ -375,7 +381,7 @@ END(save_switch_stack) * - b7 holds address to return to * - must not touch r8-r11 */ -ENTRY(load_switch_stack) +GLOBAL_ENTRY(load_switch_stack) .prologue .altrp b7 @@ -571,7 +577,7 @@ GLOBAL_ENTRY(ia64_trace_syscall) .ret3: (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk (pUStk) rsm psr.i // disable interrupts - br.cond.sptk .work_pending_syscall_end + br.cond.sptk ia64_work_pending_syscall_end strace_error: ld8 r3=[r2] // load pt_regs.r8 @@ -636,8 +642,17 @@ GLOBAL_ENTRY(ia64_ret_from_syscall) adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 mov r10=r0 // clear error indication in r10 (p7) br.cond.spnt handle_syscall_error // handle potential syscall failure +#ifdef CONFIG_PARAVIRT + ;; + br.cond.sptk.few ia64_leave_syscall + ;; +#endif /* CONFIG_PARAVIRT */ END(ia64_ret_from_syscall) +#ifndef CONFIG_PARAVIRT // fall through +#endif +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ + /* * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't * need to switch to bank 0 and doesn't restore the scratch registers. @@ -682,7 +697,7 @@ END(ia64_ret_from_syscall) * ar.csd: cleared * ar.ssd: cleared */ -ENTRY(ia64_leave_syscall) +GLOBAL_ENTRY(__paravirt_leave_syscall) PT_REGS_UNWIND_INFO(0) /* * work.need_resched etc. mustn't get changed by this CPU before it returns to @@ -692,11 +707,11 @@ ENTRY(ia64_leave_syscall) * extra work. We always check for extra work when returning to user-level. * With CONFIG_PREEMPT, we also check for extra work when the preempt_count * is 0. After extra work processing has been completed, execution - * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check + * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check * needs to be redone. */ #ifdef CONFIG_PREEMPT - rsm psr.i // disable interrupts + RSM_PSR_I(p0, r2, r18) // disable interrupts cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 ;; @@ -706,11 +721,12 @@ ENTRY(ia64_leave_syscall) ;; cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) #else /* !CONFIG_PREEMPT */ -(pUStk) rsm psr.i + RSM_PSR_I(pUStk, r2, r18) cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk #endif -.work_processed_syscall: +.global __paravirt_work_processed_syscall; +__paravirt_work_processed_syscall: #ifdef CONFIG_VIRT_CPU_ACCOUNTING adds r2=PT(LOADRS)+16,r12 (pUStk) mov.m r22=ar.itc // fetch time at leave @@ -744,7 +760,7 @@ ENTRY(ia64_leave_syscall) (pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! ;; invala // M0|1 invalidate ALAT - rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection + RSM_PSR_I_IC(r28, r29, r30) // M2 turn off interrupts and interruption collection cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs ld8 r29=[r2],16 // M0|1 load cr.ipsr @@ -765,7 +781,7 @@ ENTRY(ia64_leave_syscall) ;; #endif ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + MOV_FROM_PSR(pKStk, r22, r21) // M2 read PSR now that interrupts are disabled nop 0 ;; ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 @@ -798,7 +814,7 @@ ENTRY(ia64_leave_syscall) srlz.d // M0 ensure interruption collection is off (for cover) shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition - cover // B add current frame into dirty partition & set cr.ifs + COVER // B add current frame into dirty partition & set cr.ifs ;; #ifdef CONFIG_VIRT_CPU_ACCOUNTING mov r19=ar.bsp // M2 get new backing store pointer @@ -823,8 +839,9 @@ ENTRY(ia64_leave_syscall) mov.m ar.ssd=r0 // M2 clear ar.ssd mov f11=f0 // F clear f11 br.cond.sptk.many rbs_switch // B -END(ia64_leave_syscall) +END(__paravirt_leave_syscall) +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE #ifdef CONFIG_IA32_SUPPORT GLOBAL_ENTRY(ia64_ret_from_ia32_execve) PT_REGS_UNWIND_INFO(0) @@ -835,10 +852,20 @@ GLOBAL_ENTRY(ia64_ret_from_ia32_execve) st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit .mem.offset 8,0 st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit +#ifdef CONFIG_PARAVIRT + ;; + // don't fall through, ia64_leave_kernel may be #define'd + br.cond.sptk.few ia64_leave_kernel + ;; +#endif /* CONFIG_PARAVIRT */ END(ia64_ret_from_ia32_execve) +#ifndef CONFIG_PARAVIRT // fall through +#endif #endif /* CONFIG_IA32_SUPPORT */ -GLOBAL_ENTRY(ia64_leave_kernel) +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ + +GLOBAL_ENTRY(__paravirt_leave_kernel) PT_REGS_UNWIND_INFO(0) /* * work.need_resched etc. mustn't get changed by this CPU before it returns to @@ -852,7 +879,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) * needs to be redone. */ #ifdef CONFIG_PREEMPT - rsm psr.i // disable interrupts + RSM_PSR_I(p0, r17, r31) // disable interrupts cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 ;; @@ -862,7 +889,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) ;; cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) #else -(pUStk) rsm psr.i + RSM_PSR_I(pUStk, r17, r31) cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk #endif @@ -910,7 +937,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) mov ar.csd=r30 mov ar.ssd=r31 ;; - rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection + RSM_PSR_I_IC(r23, r22, r25) // initiate turning off of interrupt and interruption collection invala // invalidate ALAT ;; ld8.fill r22=[r2],24 @@ -942,7 +969,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) mov ar.ccv=r15 ;; ldf.fill f11=[r2] - bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + BSW_0(r2, r3, r15) // switch back to bank 0 (no stop bit required beforehand...) ;; (pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) adds r16=PT(CR_IPSR)+16,r12 @@ -950,12 +977,12 @@ GLOBAL_ENTRY(ia64_leave_kernel) #ifdef CONFIG_VIRT_CPU_ACCOUNTING .pred.rel.mutex pUStk,pKStk -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled (pUStk) mov.m r22=ar.itc // M fetch time at leave nop.i 0 ;; #else -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled nop.i 0 nop.i 0 ;; @@ -1027,7 +1054,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) * NOTE: alloc, loadrs, and cover can't be predicated. */ (pNonSys) br.cond.dpnt dont_preserve_current_frame - cover // add current frame into dirty partition and set cr.ifs + COVER // add current frame into dirty partition and set cr.ifs ;; mov r19=ar.bsp // get new backing store pointer rbs_switch: @@ -1130,16 +1157,16 @@ skip_rbs_switch: (pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp (pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise ;; - mov cr.ipsr=r29 // M2 + MOV_TO_IPSR(p0, r29, r25) // M2 mov ar.pfs=r26 // I0 (pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise -(p9) mov cr.ifs=r30 // M2 + MOV_TO_IFS(p9, r30, r25)// M2 mov b0=r21 // I0 (pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise mov ar.fpsr=r20 // M2 - mov cr.iip=r28 // M2 + MOV_TO_IIP(r28, r25) // M2 nop 0 ;; (pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode @@ -1148,7 +1175,7 @@ skip_rbs_switch: mov ar.rsc=r27 // M2 mov pr=r31,-1 // I0 - rfi // B + RFI // B /* * On entry: @@ -1174,35 +1201,36 @@ skip_rbs_switch: ;; (pKStk) st4 [r20]=r21 #endif - ssm psr.i // enable interrupts + SSM_PSR_I(p0, p6, r2) // enable interrupts br.call.spnt.many rp=schedule .ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check) - rsm psr.i // disable interrupts + RSM_PSR_I(p0, r2, r20) // disable interrupts ;; #ifdef CONFIG_PREEMPT (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 ;; (pKStk) st4 [r20]=r0 // preempt_count() <- 0 #endif -(pLvSys)br.cond.sptk.few .work_pending_syscall_end +(pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end br.cond.sptk.many .work_processed_kernel .notify: (pUStk) br.call.spnt.many rp=notify_resume_user .ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check) -(pLvSys)br.cond.sptk.few .work_pending_syscall_end +(pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end br.cond.sptk.many .work_processed_kernel -.work_pending_syscall_end: +.global __paravirt_pending_syscall_end; +__paravirt_pending_syscall_end: adds r2=PT(R8)+16,r12 adds r3=PT(R10)+16,r12 ;; ld8 r8=[r2] ld8 r10=[r3] - br.cond.sptk.many .work_processed_syscall - -END(ia64_leave_kernel) + br.cond.sptk.many __paravirt_work_processed_syscall_target +END(__paravirt_leave_kernel) +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE ENTRY(handle_syscall_error) /* * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could @@ -1244,7 +1272,7 @@ END(ia64_invoke_schedule_tail) * We declare 8 input registers so the system call args get preserved, * in case we need to restart a system call. */ -ENTRY(notify_resume_user) +GLOBAL_ENTRY(notify_resume_user) .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! mov r9=ar.unat @@ -1306,7 +1334,7 @@ ENTRY(sys_rt_sigreturn) adds sp=16,sp ;; ld8 r9=[sp] // load new ar.unat - mov.sptk b7=r8,ia64_leave_kernel + mov.sptk b7=r8,ia64_native_leave_kernel ;; mov ar.unat=r9 br.many b7 @@ -1665,3 +1693,4 @@ sys_call_table: data8 sys_timerfd_gettime .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index e5482bb6841..7126ea8f7ec 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -286,3 +286,22 @@ struct pv_cpu_ops pv_cpu_ops = { = ia64_native_intrin_local_irq_restore_func, }; EXPORT_SYMBOL(pv_cpu_ops); + +/****************************************************************************** + * replacement of hand written assembly codes. + */ + +void +paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch) +{ + extern unsigned long paravirt_switch_to_targ; + extern unsigned long paravirt_leave_syscall_targ; + extern unsigned long paravirt_work_processed_syscall_targ; + extern unsigned long paravirt_leave_kernel_targ; + + paravirt_switch_to_targ = cpu_asm_switch->switch_to; + paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall; + paravirt_work_processed_syscall_targ = + cpu_asm_switch->work_processed_syscall; + paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel; +} diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S new file mode 100644 index 00000000000..2f42fcb9776 --- /dev/null +++ b/arch/ia64/kernel/paravirtentry.S @@ -0,0 +1,60 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirtentry.S + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include "entry.h" + +#define DATA8(sym, init_value) \ + .pushsection .data.read_mostly ; \ + .align 8 ; \ + .global sym ; \ + sym: ; \ + data8 init_value ; \ + .popsection + +#define BRANCH(targ, reg, breg) \ + movl reg=targ ; \ + ;; \ + ld8 reg=[reg] ; \ + ;; \ + mov breg=reg ; \ + br.cond.sptk.many breg + +#define BRANCH_PROC(sym, reg, breg) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ + END(paravirt_ ## sym) + +#define BRANCH_PROC_UNWINFO(sym, reg, breg) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + PT_REGS_UNWIND_INFO(0) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ + END(paravirt_ ## sym) + + +BRANCH_PROC(switch_to, r22, b7) +BRANCH_PROC_UNWINFO(leave_syscall, r22, b7) +BRANCH_PROC(work_processed_syscall, r2, b7) +BRANCH_PROC_UNWINFO(leave_kernel, r22, b7) diff --git a/include/asm-ia64/native/inst.h b/include/asm-ia64/native/inst.h index f1072ace0cf..c953a2ca4fc 100644 --- a/include/asm-ia64/native/inst.h +++ b/include/asm-ia64/native/inst.h @@ -22,6 +22,14 @@ #define DO_SAVE_MIN IA64_NATIVE_DO_SAVE_MIN +#define __paravirt_switch_to ia64_native_switch_to +#define __paravirt_leave_syscall ia64_native_leave_syscall +#define __paravirt_work_processed_syscall ia64_native_work_processed_syscall +#define __paravirt_leave_kernel ia64_native_leave_kernel +#define __paravirt_pending_syscall_end ia64_work_pending_syscall_end +#define __paravirt_work_processed_syscall_target \ + ia64_work_processed_syscall + #ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK # define PARAVIRT_POISON 0xdeadbeefbaadf00d # define CLOBBER(clob) \ diff --git a/include/asm-ia64/paravirt_privop.h b/include/asm-ia64/paravirt_privop.h index 7b133ae86df..52482e6940a 100644 --- a/include/asm-ia64/paravirt_privop.h +++ b/include/asm-ia64/paravirt_privop.h @@ -80,12 +80,35 @@ extern unsigned long ia64_native_getreg_func(int regnum); ia64_native_rsm(mask); \ } while (0) +/****************************************************************************** + * replacement of hand written assembly codes. + */ +struct pv_cpu_asm_switch { + unsigned long switch_to; + unsigned long leave_syscall; + unsigned long work_processed_syscall; + unsigned long leave_kernel; +}; +void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch); + #endif /* __ASSEMBLY__ */ +#define IA64_PARAVIRT_ASM_FUNC(name) paravirt_ ## name + #else /* fallback for native case */ +#define IA64_PARAVIRT_ASM_FUNC(name) ia64_native_ ## name #endif /* CONFIG_PARAVIRT */ +/* these routines utilize privilege-sensitive or performance-sensitive + * privileged instructions so the code must be replaced with + * paravirtualized versions */ +#define ia64_switch_to IA64_PARAVIRT_ASM_FUNC(switch_to) +#define ia64_leave_syscall IA64_PARAVIRT_ASM_FUNC(leave_syscall) +#define ia64_work_processed_syscall \ + IA64_PARAVIRT_ASM_FUNC(work_processed_syscall) +#define ia64_leave_kernel IA64_PARAVIRT_ASM_FUNC(leave_kernel) + #endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */ -- cgit v1.2.3-70-g09d2 From 213060a4d6991a95d0b9344406d195be3464accf Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:40 +0900 Subject: [IA64] pvops: paravirtualize NR_IRQS Make NR_IRQ overridable by each pv instances. Pv instance may need each own number of irqs so that NR_IRQS should be the maximum number of nr_irqs each pv instances need. Cc: Jes Sorensen Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/Makefile | 6 ++++++ arch/ia64/kernel/Makefile | 33 +++++++++++++++++++++++++++++++++ arch/ia64/kernel/nr-irqs.c | 24 ++++++++++++++++++++++++ include/asm-ia64/irq.h | 9 +-------- include/asm-ia64/native/irq.h | 35 +++++++++++++++++++++++++++++++++++ 5 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 arch/ia64/kernel/nr-irqs.c create mode 100644 include/asm-ia64/native/irq.h (limited to 'arch/ia64') diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 88f1a55c6c9..3b9c8cadfd3 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -99,3 +99,9 @@ define archhelp echo ' boot - Build vmlinux and bootloader for Ski simulator' echo '* unwcheck - Check vmlinux for invalid unwind info' endef + +archprepare: make_nr_irqs_h FORCE +PHONY += make_nr_irqs_h FORCE + +make_nr_irqs_h: FORCE + $(Q)$(MAKE) $(build)=arch/ia64/kernel include/asm-ia64/nr-irqs.h diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index cea91f17d44..87fea11aecb 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -73,6 +73,39 @@ $(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/gate-data.o: $(obj)/gate.so +# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config +define sed-y + "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" +endef +quiet_cmd_nr_irqs = GEN $@ +define cmd_nr_irqs + (set -e; \ + echo "#ifndef __ASM_NR_IRQS_H__"; \ + echo "#define __ASM_NR_IRQS_H__"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Kbuild"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-y) $<; \ + echo ""; \ + echo "#endif" ) > $@ +endef + +# We use internal kbuild rules to avoid the "is up to date" message from make +arch/$(SRCARCH)/kernel/nr-irqs.s: $(srctree)/arch/$(SRCARCH)/kernel/nr-irqs.c \ + $(wildcard $(srctree)/include/asm-ia64/*/irq.h) + $(Q)mkdir -p $(dir $@) + $(call if_changed_dep,cc_s_c) + +include/asm-ia64/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s + $(Q)mkdir -p $(dir $@) + $(call cmd,nr_irqs) + +clean-files += $(objtree)/include/asm-ia64/nr-irqs.h + # # native ivt.S and entry.S # diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c new file mode 100644 index 00000000000..1ae049181e8 --- /dev/null +++ b/arch/ia64/kernel/nr-irqs.c @@ -0,0 +1,24 @@ +/* + * calculate + * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...) + * depending on config. + * This must be calculated before processing asm-offset.c. + */ + +#define ASM_OFFSETS_C 1 + +#include +#include +#include + +void foo(void) +{ + union paravirt_nr_irqs_max { + char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS]; +#ifdef CONFIG_XEN + char xen_nr_irqs[XEN_NR_IRQS]; +#endif + }; + + DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max)); +} diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h index a66d26827cb..3627116fb0e 100644 --- a/include/asm-ia64/irq.h +++ b/include/asm-ia64/irq.h @@ -13,14 +13,7 @@ #include #include - -#define NR_VECTORS 256 - -#if (NR_VECTORS + 32 * NR_CPUS) < 1024 -#define NR_IRQS (NR_VECTORS + 32 * NR_CPUS) -#else -#define NR_IRQS 1024 -#endif +#include static __inline__ int irq_canonicalize (int irq) diff --git a/include/asm-ia64/native/irq.h b/include/asm-ia64/native/irq.h new file mode 100644 index 00000000000..efe9ff74a3c --- /dev/null +++ b/include/asm-ia64/native/irq.h @@ -0,0 +1,35 @@ +/****************************************************************************** + * include/asm-ia64/native/irq.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * moved from linux/include/asm-ia64/irq.h. + */ + +#ifndef _ASM_IA64_NATIVE_IRQ_H +#define _ASM_IA64_NATIVE_IRQ_H + +#define NR_VECTORS 256 + +#if (NR_VECTORS + 32 * NR_CPUS) < 1024 +#define IA64_NATIVE_NR_IRQS (NR_VECTORS + 32 * NR_CPUS) +#else +#define IA64_NATIVE_NR_IRQS 1024 +#endif + +#endif /* _ASM_IA64_NATIVE_IRQ_H */ -- cgit v1.2.3-70-g09d2 From e51835d58a5abdf82211f36f500f666ca7ef9aee Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:41 +0900 Subject: [IA64] pvops: define initialization hooks, pv_init_ops, for paravirtualized environment. define pv_init_ops hooks which represents various initialization hooks for paravirtualized environment. and add hooks. Signed-off-by: Alex Williamson Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/paravirt.c | 7 +++++ arch/ia64/kernel/setup.c | 10 +++++++ arch/ia64/kernel/smpboot.c | 2 ++ include/asm-ia64/paravirt.h | 70 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 7126ea8f7ec..5daf659ff29 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -41,6 +41,13 @@ struct pv_info pv_info = { .name = "bare hardware" }; +/*************************************************************************** + * pv_init_ops + * initialization hooks. + */ + +struct pv_init_ops pv_init_ops; + /*************************************************************************** * pv_cpu_ops * intrinsics hooks. diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index f48a809c686..750749551e8 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -341,6 +342,8 @@ reserve_memory (void) rsvd_region[n].end = (unsigned long) ia64_imva(_end); n++; + n += paravirt_reserve_memory(&rsvd_region[n]); + #ifdef CONFIG_BLK_DEV_INITRD if (ia64_boot_param->initrd_start) { rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); @@ -519,6 +522,8 @@ setup_arch (char **cmdline_p) { unw_init(); + paravirt_arch_setup_early(); + ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); *cmdline_p = __va(ia64_boot_param->command_line); @@ -584,6 +589,9 @@ setup_arch (char **cmdline_p) acpi_boot_init(); #endif + paravirt_banner(); + paravirt_arch_setup_console(cmdline_p); + #ifdef CONFIG_VT if (!conswitchp) { # if defined(CONFIG_DUMMY_CONSOLE) @@ -603,6 +611,8 @@ setup_arch (char **cmdline_p) #endif /* enable IA-64 Machine Check Abort Handling unless disabled */ + if (paravirt_arch_setup_nomca()) + nomca = 1; if (!nomca) ia64_mca_init(); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index d7ad42b77d4..933f3881152 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -642,6 +643,7 @@ void __devinit smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callin_map); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + paravirt_post_smp_prepare_boot_cpu(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/include/asm-ia64/paravirt.h b/include/asm-ia64/paravirt.h index 1032b216aea..84d74c32eb9 100644 --- a/include/asm-ia64/paravirt.h +++ b/include/asm-ia64/paravirt.h @@ -55,11 +55,81 @@ static inline unsigned int get_kernel_rpl(void) return pv_info.kernel_rpl; } +/****************************************************************************** + * initialization hooks. + */ +struct rsvd_region; + +struct pv_init_ops { + void (*banner)(void); + + int (*reserve_memory)(struct rsvd_region *region); + + void (*arch_setup_early)(void); + void (*arch_setup_console)(char **cmdline_p); + int (*arch_setup_nomca)(void); + + void (*post_smp_prepare_boot_cpu)(void); +}; + +extern struct pv_init_ops pv_init_ops; + +static inline void paravirt_banner(void) +{ + if (pv_init_ops.banner) + pv_init_ops.banner(); +} + +static inline int paravirt_reserve_memory(struct rsvd_region *region) +{ + if (pv_init_ops.reserve_memory) + return pv_init_ops.reserve_memory(region); + return 0; +} + +static inline void paravirt_arch_setup_early(void) +{ + if (pv_init_ops.arch_setup_early) + pv_init_ops.arch_setup_early(); +} + +static inline void paravirt_arch_setup_console(char **cmdline_p) +{ + if (pv_init_ops.arch_setup_console) + pv_init_ops.arch_setup_console(cmdline_p); +} + +static inline int paravirt_arch_setup_nomca(void) +{ + if (pv_init_ops.arch_setup_nomca) + return pv_init_ops.arch_setup_nomca(); + return 0; +} + +static inline void paravirt_post_smp_prepare_boot_cpu(void) +{ + if (pv_init_ops.post_smp_prepare_boot_cpu) + pv_init_ops.post_smp_prepare_boot_cpu(); +} + #endif /* !__ASSEMBLY__ */ #else /* fallback for native case */ +#ifndef __ASSEMBLY__ + +#define paravirt_banner() do { } while (0) +#define paravirt_reserve_memory(region) 0 + +#define paravirt_arch_setup_early() do { } while (0) +#define paravirt_arch_setup_console(cmdline_p) do { } while (0) +#define paravirt_arch_setup_nomca() 0 +#define paravirt_post_smp_prepare_boot_cpu() do { } while (0) + +#endif /* __ASSEMBLY__ */ + + #endif /* CONFIG_PARAVIRT_GUEST */ #endif /* __ASM_PARAVIRT_H */ -- cgit v1.2.3-70-g09d2 From 33b39e84209b0308b572dce017df7ee9b63f086c Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:42 +0900 Subject: [IA64] pvops: add hooks, pv_iosapic_ops, to paravirtualize iosapic. add hooks to paravirtualize iosapic which is a real hardware resource. On virtualized environment it may be replaced something virtualized friendly. Define pv_iosapic_ops and add the hooks. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 45 +++++++++++++++++++++++++++++---------------- arch/ia64/kernel/paravirt.c | 25 +++++++++++++++++++++++++ include/asm-ia64/iosapic.h | 18 ++++++++++++++++-- include/asm-ia64/paravirt.h | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 18 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 082c31dcfd9..587196dd84f 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -587,6 +587,15 @@ static inline int irq_is_shared (int irq) return (iosapic_intr_info[irq].count > 1); } +struct irq_chip* +ia64_native_iosapic_get_irq_chip(unsigned long trigger) +{ + if (trigger == IOSAPIC_EDGE) + return &irq_type_iosapic_edge; + else + return &irq_type_iosapic_level; +} + static int register_intr (unsigned int gsi, int irq, unsigned char delivery, unsigned long polarity, unsigned long trigger) @@ -637,13 +646,10 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery, iosapic_intr_info[irq].dmode = delivery; iosapic_intr_info[irq].trigger = trigger; - if (trigger == IOSAPIC_EDGE) - irq_type = &irq_type_iosapic_edge; - else - irq_type = &irq_type_iosapic_level; + irq_type = iosapic_get_irq_chip(trigger); idesc = irq_desc + irq; - if (idesc->chip != irq_type) { + if (irq_type != NULL && idesc->chip != irq_type) { if (idesc->chip != &no_irq_type) printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", @@ -975,6 +981,22 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, set_rte(gsi, irq, dest, 1); } +void __init +ia64_native_iosapic_pcat_compat_init(void) +{ + if (pcat_compat) { + /* + * Disable the compatibility mode interrupts (8259 style), + * needs IN/OUT support enabled. + */ + printk(KERN_INFO + "%s: Disabling PC-AT compatible 8259 interrupts\n", + __func__); + outb(0xff, 0xA1); + outb(0xff, 0x21); + } +} + void __init iosapic_system_init (int system_pcat_compat) { @@ -989,17 +1011,8 @@ iosapic_system_init (int system_pcat_compat) } pcat_compat = system_pcat_compat; - if (pcat_compat) { - /* - * Disable the compatibility mode interrupts (8259 style), - * needs IN/OUT support enabled. - */ - printk(KERN_INFO - "%s: Disabling PC-AT compatible 8259 interrupts\n", - __func__); - outb(0xff, 0xA1); - outb(0xff, 0x21); - } + if (pcat_compat) + iosapic_pcat_compat_init(); } static inline int diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 5daf659ff29..65c211b2f98 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -312,3 +312,28 @@ paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch) cpu_asm_switch->work_processed_syscall; paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel; } + +/*************************************************************************** + * pv_iosapic_ops + * iosapic read/write hooks. + */ + +static unsigned int +ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + return __ia64_native_iosapic_read(iosapic, reg); +} + +static void +ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + __ia64_native_iosapic_write(iosapic, reg, val); +} + +struct pv_iosapic_ops pv_iosapic_ops = { + .pcat_compat_init = ia64_native_iosapic_pcat_compat_init, + .get_irq_chip = ia64_native_iosapic_get_irq_chip, + + .__read = ia64_native_iosapic_read, + .__write = ia64_native_iosapic_write, +}; diff --git a/include/asm-ia64/iosapic.h b/include/asm-ia64/iosapic.h index a3a4288daae..b9c102e15f2 100644 --- a/include/asm-ia64/iosapic.h +++ b/include/asm-ia64/iosapic.h @@ -55,13 +55,27 @@ #define NR_IOSAPICS 256 -static inline unsigned int __iosapic_read(char __iomem *iosapic, unsigned int reg) +#ifdef CONFIG_PARAVIRT_GUEST +#include +#else +#define iosapic_pcat_compat_init ia64_native_iosapic_pcat_compat_init +#define __iosapic_read __ia64_native_iosapic_read +#define __iosapic_write __ia64_native_iosapic_write +#define iosapic_get_irq_chip ia64_native_iosapic_get_irq_chip +#endif + +extern void __init ia64_native_iosapic_pcat_compat_init(void); +extern struct irq_chip *ia64_native_iosapic_get_irq_chip(unsigned long trigger); + +static inline unsigned int +__ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg) { writel(reg, iosapic + IOSAPIC_REG_SELECT); return readl(iosapic + IOSAPIC_WINDOW); } -static inline void __iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +static inline void +__ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) { writel(reg, iosapic + IOSAPIC_REG_SELECT); writel(val, iosapic + IOSAPIC_WINDOW); diff --git a/include/asm-ia64/paravirt.h b/include/asm-ia64/paravirt.h index 84d74c32eb9..3a40f624e86 100644 --- a/include/asm-ia64/paravirt.h +++ b/include/asm-ia64/paravirt.h @@ -112,6 +112,46 @@ static inline void paravirt_post_smp_prepare_boot_cpu(void) pv_init_ops.post_smp_prepare_boot_cpu(); } +/****************************************************************************** + * replacement of iosapic operations. + */ + +struct pv_iosapic_ops { + void (*pcat_compat_init)(void); + + struct irq_chip *(*get_irq_chip)(unsigned long trigger); + + unsigned int (*__read)(char __iomem *iosapic, unsigned int reg); + void (*__write)(char __iomem *iosapic, unsigned int reg, u32 val); +}; + +extern struct pv_iosapic_ops pv_iosapic_ops; + +static inline void +iosapic_pcat_compat_init(void) +{ + if (pv_iosapic_ops.pcat_compat_init) + pv_iosapic_ops.pcat_compat_init(); +} + +static inline struct irq_chip* +iosapic_get_irq_chip(unsigned long trigger) +{ + return pv_iosapic_ops.get_irq_chip(trigger); +} + +static inline unsigned int +__iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + return pv_iosapic_ops.__read(iosapic, reg); +} + +static inline void +__iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + return pv_iosapic_ops.__write(iosapic, reg, val); +} + #endif /* !__ASSEMBLY__ */ #else -- cgit v1.2.3-70-g09d2 From 85cbc503787d577c215f9540c57294e1ec799144 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:43 +0900 Subject: [IA64] pvops: add hooks, pv_irq_ops, to paravirtualized irq related operations. introduce pv_irq_ops which adds hooks to paravirtualize irq related operations. On virtualized environment, interruption may be replaced by something virtualization friendly. So the irq related operation also may need paravirtualization. This patch adds necessary hooks to paravirtualize irq related operations. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/irq_ia64.c | 18 ++++++++++++----- arch/ia64/kernel/paravirt.c | 15 ++++++++++++++ include/asm-ia64/hw_irq.h | 23 ++++++++++++++++++---- include/asm-ia64/paravirt.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 9 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index c48171bc796..28d3d483db9 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -196,7 +196,7 @@ static void clear_irq_vector(int irq) } int -assign_irq_vector (int irq) +ia64_native_assign_irq_vector (int irq) { unsigned long flags; int vector, cpu; @@ -222,7 +222,7 @@ assign_irq_vector (int irq) } void -free_irq_vector (int vector) +ia64_native_free_irq_vector (int vector) { if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) @@ -622,7 +622,7 @@ static struct irqaction tlb_irqaction = { #endif void -register_percpu_irq (ia64_vector vec, struct irqaction *action) +ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action) { irq_desc_t *desc; unsigned int irq; @@ -637,13 +637,21 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action) } void __init -init_IRQ (void) +ia64_native_register_ipi(void) { - register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); #ifdef CONFIG_SMP register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); +#endif +} + +void __init +init_IRQ (void) +{ + ia64_register_ipi(); + register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); +#ifdef CONFIG_SMP #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG) if (vector_domain_type != VECTOR_DOMAIN_NONE) { BUG_ON(IA64_FIRST_DEVICE_VECTOR != IA64_IRQ_MOVE_VECTOR); diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 65c211b2f98..ba5383be03c 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -337,3 +337,18 @@ struct pv_iosapic_ops pv_iosapic_ops = { .__read = ia64_native_iosapic_read, .__write = ia64_native_iosapic_write, }; + +/*************************************************************************** + * pv_irq_ops + * irq operations + */ + +struct pv_irq_ops pv_irq_ops = { + .register_ipi = ia64_native_register_ipi, + + .assign_irq_vector = ia64_native_assign_irq_vector, + .free_irq_vector = ia64_native_free_irq_vector, + .register_percpu_irq = ia64_native_register_percpu_irq, + + .resend_irq = ia64_native_resend_irq, +}; diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index 76366dc9c1a..5c99cbcb8a0 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -15,7 +15,11 @@ #include #include +#ifndef CONFIG_PARAVIRT typedef u8 ia64_vector; +#else +typedef u16 ia64_vector; +#endif /* * 0 special @@ -104,13 +108,24 @@ DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq); extern struct hw_interrupt_type irq_type_ia64_lsapic; /* CPU-internal interrupt controller */ +#ifdef CONFIG_PARAVIRT_GUEST +#include +#else +#define ia64_register_ipi ia64_native_register_ipi +#define assign_irq_vector ia64_native_assign_irq_vector +#define free_irq_vector ia64_native_free_irq_vector +#define register_percpu_irq ia64_native_register_percpu_irq +#define ia64_resend_irq ia64_native_resend_irq +#endif + +extern void ia64_native_register_ipi(void); extern int bind_irq_vector(int irq, int vector, cpumask_t domain); -extern int assign_irq_vector (int irq); /* allocate a free vector */ -extern void free_irq_vector (int vector); +extern int ia64_native_assign_irq_vector (int irq); /* allocate a free vector */ +extern void ia64_native_free_irq_vector (int vector); extern int reserve_irq_vector (int vector); extern void __setup_vector_irq(int cpu); extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); -extern void register_percpu_irq (ia64_vector vec, struct irqaction *action); +extern void ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action); extern int check_irq_used (int irq); extern void destroy_and_reserve_irq (unsigned int irq); @@ -122,7 +137,7 @@ static inline int irq_prepare_move(int irq, int cpu) { return 0; } static inline void irq_complete_move(unsigned int irq) {} #endif -static inline void ia64_resend_irq(unsigned int vector) +static inline void ia64_native_resend_irq(unsigned int vector) { platform_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0); } diff --git a/include/asm-ia64/paravirt.h b/include/asm-ia64/paravirt.h index 3a40f624e86..ee15646b6d6 100644 --- a/include/asm-ia64/paravirt.h +++ b/include/asm-ia64/paravirt.h @@ -152,6 +152,54 @@ __iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) return pv_iosapic_ops.__write(iosapic, reg, val); } +/****************************************************************************** + * replacement of irq operations. + */ + +struct pv_irq_ops { + void (*register_ipi)(void); + + int (*assign_irq_vector)(int irq); + void (*free_irq_vector)(int vector); + + void (*register_percpu_irq)(ia64_vector vec, + struct irqaction *action); + + void (*resend_irq)(unsigned int vector); +}; + +extern struct pv_irq_ops pv_irq_ops; + +static inline void +ia64_register_ipi(void) +{ + pv_irq_ops.register_ipi(); +} + +static inline int +assign_irq_vector(int irq) +{ + return pv_irq_ops.assign_irq_vector(irq); +} + +static inline void +free_irq_vector(int vector) +{ + return pv_irq_ops.free_irq_vector(vector); +} + +static inline void +register_percpu_irq(ia64_vector vec, struct irqaction *action) +{ + pv_irq_ops.register_percpu_irq(vec, action); +} + +static inline void +ia64_resend_irq(unsigned int vector) +{ + pv_irq_ops.resend_irq(vector); +} + #endif /* !__ASSEMBLY__ */ #else -- cgit v1.2.3-70-g09d2 From 00d21d82b8a9e290286e09d8eedc20bfc33b0eee Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:44 +0900 Subject: [IA64] pvops: add to hooks, pv_time_ops, for steal time accounting. Introduce pv_time_ops which adds hook to steal time accounting. On virtualized environment, cpus are shared by many guests and steal time is the time which is used for other guests. On virtualized environtment, streal time should be accounted. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/paravirt.c | 15 +++++++++++++++ arch/ia64/kernel/time.c | 23 +++++++++++++++++++++++ include/asm-ia64/paravirt.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index ba5383be03c..afaf5b9a2cf 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -352,3 +352,18 @@ struct pv_irq_ops pv_irq_ops = { .resend_irq = ia64_native_resend_irq, }; + +/*************************************************************************** + * pv_time_ops + * time operations + */ + +static int +ia64_native_do_steal_accounting(unsigned long *new_itm) +{ + return 0; +} + +struct pv_time_ops pv_time_ops = { + .do_steal_accounting = ia64_native_do_steal_accounting, +}; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 8c73643f2d6..046ca89efc0 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,15 @@ EXPORT_SYMBOL(last_cli_ip); #endif +#ifdef CONFIG_PARAVIRT +static void +paravirt_clocksource_resume(void) +{ + if (pv_time_ops.clocksource_resume) + pv_time_ops.clocksource_resume(); +} +#endif + static struct clocksource clocksource_itc = { .name = "itc", .rating = 350, @@ -56,6 +66,9 @@ static struct clocksource clocksource_itc = { .mult = 0, /*to be calculated*/ .shift = 16, .flags = CLOCK_SOURCE_IS_CONTINUOUS, +#ifdef CONFIG_PARAVIRT + .resume = paravirt_clocksource_resume, +#endif }; static struct clocksource *itc_clocksource; @@ -156,6 +169,9 @@ timer_interrupt (int irq, void *dev_id) profile_tick(CPU_PROFILING); + if (paravirt_do_steal_accounting(&new_itm)) + goto skip_process_time_accounting; + while (1) { update_process_times(user_mode(get_irq_regs())); @@ -185,6 +201,8 @@ timer_interrupt (int irq, void *dev_id) local_irq_disable(); } +skip_process_time_accounting: + do { /* * If we're too close to the next clock tick for @@ -334,6 +352,11 @@ ia64_init_itm (void) */ clocksource_itc.rating = 50; + paravirt_init_missing_ticks_accounting(smp_processor_id()); + + /* avoid softlock up message when cpu is unplug and plugged again. */ + touch_softlockup_watchdog(); + /* Setup the CPU local timer tick */ ia64_cpu_local_tick(); diff --git a/include/asm-ia64/paravirt.h b/include/asm-ia64/paravirt.h index ee15646b6d6..1b4df129f57 100644 --- a/include/asm-ia64/paravirt.h +++ b/include/asm-ia64/paravirt.h @@ -200,6 +200,35 @@ ia64_resend_irq(unsigned int vector) pv_irq_ops.resend_irq(vector); } +/****************************************************************************** + * replacement of time operations. + */ + +extern struct itc_jitter_data_t itc_jitter_data; +extern volatile int time_keeper_id; + +struct pv_time_ops { + void (*init_missing_ticks_accounting)(int cpu); + int (*do_steal_accounting)(unsigned long *new_itm); + + void (*clocksource_resume)(void); +}; + +extern struct pv_time_ops pv_time_ops; + +static inline void +paravirt_init_missing_ticks_accounting(int cpu) +{ + if (pv_time_ops.init_missing_ticks_accounting) + pv_time_ops.init_missing_ticks_accounting(cpu); +} + +static inline int +paravirt_do_steal_accounting(unsigned long *new_itm) +{ + return pv_time_ops.do_steal_accounting(new_itm); +} + #endif /* !__ASSEMBLY__ */ #else @@ -215,6 +244,9 @@ ia64_resend_irq(unsigned int vector) #define paravirt_arch_setup_nomca() 0 #define paravirt_post_smp_prepare_boot_cpu() do { } while (0) +#define paravirt_init_missing_ticks_accounting(cpu) do { } while (0) +#define paravirt_do_steal_accounting(new_itm) 0 + #endif /* __ASSEMBLY__ */ -- cgit v1.2.3-70-g09d2 From 4d58bbcc89e267d52b4df572acbf209a60a8a497 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 28 May 2008 09:41:58 -0700 Subject: [IA64] pv_ops: move some functions in ivt.S to avoid lack of space. move interrupt, page_fault, non_syscall, dispatch_unaligned_handler and dispatch_to_fault_handler to avoid lack of instructin space. The change set 4dcc29e1574d88f4465ba865ed82800032f76418 bloated SAVE_MIN_WITH_COVER, SAVE_MIN_WITH_COVER_R19 so that it bloated the functions which uses those macros. In the native case, only dispatch_illegal_op_fault had to be moved. When paravirtualized case the all functions which use the macros need to be moved to avoid the lack of space. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/ivt.S | 261 +++++++++++++++++++++++++------------------------ 1 file changed, 133 insertions(+), 128 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 23749ed3cf0..c39627df3cd 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -515,27 +515,6 @@ ENTRY(ikey_miss) FAULT(6) END(ikey_miss) - //----------------------------------------------------------------------------------- - // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) -ENTRY(page_fault) - SSM_PSR_DT_AND_SRLZ_I - ;; - SAVE_MIN_WITH_COVER - alloc r15=ar.pfs,0,0,3,0 - MOV_FROM_IFA(out0) - MOV_FROM_ISR(out1) - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) - adds r3=8,r2 // set up second base pointer - SSM_PSR_I(p15, p15, r14) // restore psr.i - movl r14=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 // out2 = pointer to pt_regs - br.call.sptk.many b6=ia64_do_page_fault // ignore return address -END(page_fault) - .org ia64_ivt+0x1c00 ///////////////////////////////////////////////////////////////////////////////////////// // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) @@ -896,26 +875,8 @@ END(break_fault) ///////////////////////////////////////////////////////////////////////////////////////// // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) ENTRY(interrupt) - DBG_FAULT(12) - mov r31=pr // prepare to save predicates - ;; - SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) - // ensure everybody knows psr.ic is back on - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - ;; - MCA_RECOVER_RANGE(interrupt) - alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group - MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg - add out1=16,sp // pass pointer to pt_regs as second arg - ;; - srlz.d // make sure we see the effect of cr.ivr - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_handle_irq + /* interrupt handler has become too big to fit this area. */ + br.sptk.many __interrupt END(interrupt) .org ia64_ivt+0x3400 @@ -1125,105 +1086,18 @@ END(account_sys_enter) DBG_FAULT(17) FAULT(17) -ENTRY(non_syscall) - mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER - ;; - SAVE_MIN_WITH_COVER - - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - - alloc r14=ar.pfs,0,0,2,0 - MOV_FROM_IIM(out0) - add out1=16,sp - adds r3=8,r2 // set up second base pointer for SAVE_REST - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) - // guarantee that interruption collection is on - SSM_PSR_I(p15, p15, r15) // restore psr.i - movl r15=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r15 - ;; - br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr -END(non_syscall) - .org ia64_ivt+0x4800 ///////////////////////////////////////////////////////////////////////////////////////// // 0x4800 Entry 18 (size 64 bundles) Reserved DBG_FAULT(18) FAULT(18) - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_unaligned_handler) - SAVE_MIN_WITH_COVER - ;; - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - MOV_FROM_IFA(out0) - adds out1=16,sp - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) - // guarantee that interruption collection is on - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_unaligned -END(dispatch_unaligned_handler) - .org ia64_ivt+0x4c00 ///////////////////////////////////////////////////////////////////////////////////////// // 0x4c00 Entry 19 (size 64 bundles) Reserved DBG_FAULT(19) FAULT(19) - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_to_fault_handler) - /* - * Input: - * psr.ic: off - * r19: fault vector number (e.g., 24 for General Exception) - * r31: contains saved predicates (pr) - */ - SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - MOV_FROM_ISR(out1) - MOV_FROM_IFA(out2) - MOV_FROM_IIM(out3) - MOV_FROM_ITIR(out4) - ;; - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) - // guarantee that interruption collection is on - mov out0=r15 - ;; - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_fault -END(dispatch_to_fault_handler) - // // --- End of long entries, Beginning of short entries // @@ -1670,6 +1544,137 @@ END(ia32_interrupt) DBG_FAULT(67) FAULT(67) + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) +ENTRY(page_fault) + SSM_PSR_DT_AND_SRLZ_I + ;; + SAVE_MIN_WITH_COVER + alloc r15=ar.pfs,0,0,3,0 + MOV_FROM_IFA(out0) + MOV_FROM_ISR(out1) + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) + adds r3=8,r2 // set up second base pointer + SSM_PSR_I(p15, p15, r14) // restore psr.i + movl r14=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.many b6=ia64_do_page_fault // ignore return address +END(page_fault) + +ENTRY(non_syscall) + mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER + ;; + SAVE_MIN_WITH_COVER + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + alloc r14=ar.pfs,0,0,2,0 + MOV_FROM_IIM(out0) + add out1=16,sp + adds r3=8,r2 // set up second base pointer for SAVE_REST + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r15) // restore psr.i + movl r15=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r15 + ;; + br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr +END(non_syscall) + +ENTRY(__interrupt) + DBG_FAULT(12) + mov r31=pr // prepare to save predicates + ;; + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) + // ensure everybody knows psr.ic is back on + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + ;; + MCA_RECOVER_RANGE(interrupt) + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_handle_irq +END(__interrupt) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_unaligned_handler) + SAVE_MIN_WITH_COVER + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + MOV_FROM_IFA(out0) + adds out1=16,sp + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_unaligned +END(dispatch_unaligned_handler) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_to_fault_handler) + /* + * Input: + * psr.ic: off + * r19: fault vector number (e.g., 24 for General Exception) + * r31: contains saved predicates (pr) + */ + SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + MOV_FROM_ISR(out1) + MOV_FROM_IFA(out2) + MOV_FROM_IIM(out3) + MOV_FROM_ITIR(out4) + ;; + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) + // guarantee that interruption collection is on + mov out0=r15 + ;; + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_fault +END(dispatch_to_fault_handler) + /* * Squatting in this space ... * -- cgit v1.2.3-70-g09d2 From 60192db82952ad56ef7bbc4a318e2041ca65ba7d Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 17 Jul 2008 11:11:17 -0700 Subject: [IA64] improper printk format in acpi-cpufreq When dprintk is enabled the following warnings are generated: arch/ia64/kernel/cpufreq/acpi-cpufreq.c: In function 'processor_set_pstate': arch/ia64/kernel/cpufreq/acpi-cpufreq.c:54: warning: format '%x' expects type 'unsigned int', but argumen t 3 has type 's64' arch/ia64/kernel/cpufreq/acpi-cpufreq.c: In function 'processor_get_pstate': arch/ia64/kernel/cpufreq/acpi-cpufreq.c:76: warning: format '%x' expects type 'unsigned int', but argumen t 2 has type 's64' Signed-off-by: Denis V. Lunev Signed-off-by: Tony Luck --- arch/ia64/kernel/cpufreq/acpi-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c index b8498ea6206..7b435451b3d 100644 --- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -51,7 +51,7 @@ processor_set_pstate ( retval = ia64_pal_set_pstate((u64)value); if (retval) { - dprintk("Failed to set freq to 0x%x, with error 0x%x\n", + dprintk("Failed to set freq to 0x%x, with error 0x%lx\n", value, retval); return -ENODEV; } @@ -74,7 +74,7 @@ processor_get_pstate ( if (retval) dprintk("Failed to get current freq with " - "error 0x%x, idx 0x%x\n", retval, *value); + "error 0x%lx, idx 0x%x\n", retval, *value); return (int)retval; } -- cgit v1.2.3-70-g09d2 From 740a8de0796dd12890b3c8ddcfabfcb528b78d40 Mon Sep 17 00:00:00 2001 From: "Akiyama, Nobuyuki" Date: Thu, 17 Jul 2008 11:22:01 -0700 Subject: [IA64] adding parameter check to module_free() module_free() refers the first parameter before checking. But it is called like below(in kernel/kprobes). The first parameter is always NULL. This happens when many probe points(>1024) are set by kprobes. I encountered this with using SystemTap. It can set many probes easily. static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) { ... if (kip->nused == 0) { hlist_del(&kip->hlist); if (hlist_empty(&kprobe_insn_pages)) { ... } else { module_free(NULL, kip->insns); //<<< 1st param always NULL kfree(kip); } return 1; } return 0; } Signed-off-by: Akiyama, Nobuyuki Signed-off-by: Tony Luck --- arch/ia64/kernel/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index e83e2ea3b3e..29aad349e0c 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -321,7 +321,8 @@ module_alloc (unsigned long size) void module_free (struct module *mod, void *module_region) { - if (mod->arch.init_unw_table && module_region == mod->module_init) { + if (mod && mod->arch.init_unw_table && + module_region == mod->module_init) { unw_remove_unwind_table(mod->arch.init_unw_table); mod->arch.init_unw_table = NULL; } -- cgit v1.2.3-70-g09d2 From efc7508c9e29944fb3d9edf166d3d584557c33d1 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 16 Jul 2008 12:47:08 -0600 Subject: [IA64] Avoid overflowing ia64_cpu_to_sapicid in acpi_map_lsapic() acpi_map_lsapic tries to stuff a long into ia64_cpu_to_sapicid[], which can only hold ints, so let's fix that. We need to update the signature of acpi_map_cpu2node() too. Signed-off-by: Alex Chiang Signed-off-by: Tony Luck --- arch/ia64/kernel/acpi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 43687cc60df..5d1eb7ee2bf 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -774,7 +774,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) */ #ifdef CONFIG_ACPI_HOTPLUG_CPU static -int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) +int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int pxm_id; @@ -854,8 +854,7 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu) union acpi_object *obj; struct acpi_madt_local_sapic *lsapic; cpumask_t tmp_map; - long physid; - int cpu; + int cpu, physid; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return -EINVAL; -- cgit v1.2.3-70-g09d2 From fb86611f8f3251865784d5938a485a0238ec1427 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Thu, 26 Jun 2008 14:53:11 +0200 Subject: [IA64] Remove experimental status of kdump This patch removes the experimental status of kdump on IA64. kdump is on IA64 now since more than one year and it has proven to be stable. Signed-off-by: Bernhard Walle Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 18bcc10903b..451f2ffb137 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -540,8 +540,8 @@ config KEXEC strongly in flux, so no good recommendation can be made. config CRASH_DUMP - bool "kernel crash dumps (EXPERIMENTAL)" - depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + bool "kernel crash dumps" + depends on IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) help Generate crash dump after being started by kexec. -- cgit v1.2.3-70-g09d2 From 7cc8883074b040aa8c1ebd3a17463b0ea3a9ef16 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 13 May 2008 16:29:20 +0300 Subject: KVM: Remove decache_vcpus_on_cpu() and related callbacks Obsoleted by the vmx-specific per-cpu list. Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 8 -------- arch/powerpc/kvm/powerpc.c | 4 ---- arch/s390/kvm/kvm-s390.c | 4 ---- arch/x86/kvm/svm.c | 5 ----- arch/x86/kvm/vmx.c | 6 ------ arch/x86/kvm/x86.c | 8 -------- include/asm-x86/kvm_host.h | 1 - include/linux/kvm_host.h | 3 --- virt/kvm/kvm_main.c | 1 - 9 files changed, 40 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 68c978be9a5..7c504be5797 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1035,14 +1035,6 @@ static void kvm_free_vmm_area(void) } } -/* - * Make sure that a cpu that is being hot-unplugged does not have any vcpus - * cached on it. Leave it as blank for IA64. - */ -void decache_vcpus_on_cpu(int cpu) -{ -} - static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 777e0f34e0e..0513b359851 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -240,10 +240,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { } -void decache_vcpus_on_cpu(int cpu) -{ -} - int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6558b09ff57..4585c8ac2b0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -79,10 +79,6 @@ void kvm_arch_hardware_disable(void *garbage) { } -void decache_vcpus_on_cpu(int cpu) -{ -} - int kvm_arch_hardware_setup(void) { return 0; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9390a31c06f..238e8f3afaf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -709,10 +709,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) rdtscll(vcpu->arch.host_tsc); } -static void svm_vcpu_decache(struct kvm_vcpu *vcpu) -{ -} - static void svm_cache_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1933,7 +1929,6 @@ static struct kvm_x86_ops svm_x86_ops = { .prepare_guest_switch = svm_prepare_guest_switch, .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .vcpu_decache = svm_vcpu_decache, .set_guest_debug = svm_guest_debug, .get_msr = svm_get_msr, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4d179d10637..b99bb37e5de 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -692,11 +692,6 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) update_exception_bitmap(vcpu); } -static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) -{ - vcpu_clear(to_vmx(vcpu)); -} - static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { return vmcs_readl(GUEST_RFLAGS); @@ -3114,7 +3109,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .prepare_guest_switch = vmx_save_host_state, .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, - .vcpu_decache = vmx_vcpu_decache, .set_guest_debug = set_guest_debug, .guest_debug_pre = kvm_guest_debug_pre, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c14ddcaba7..fd03b4465bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -817,14 +817,6 @@ out: return r; } -/* - * Make sure that a cpu that is being hot-unplugged does not have any vcpus - * cached on it. - */ -void decache_vcpus_on_cpu(int cpu) -{ -} - int kvm_dev_ioctl_check_extension(long ext) { int r; diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 0df9d5fa281..4bcdc7de07b 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -380,7 +380,6 @@ struct kvm_x86_ops { void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - void (*vcpu_decache)(struct kvm_vcpu *vcpu); int (*set_guest_debug)(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index de9d1df4bba..865dcbcb891 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -135,9 +135,6 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -void decache_vcpus_on_cpu(int cpu); - - int kvm_init(void *opaque, unsigned int vcpu_size, struct module *module); void kvm_exit(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e4bf88a9ee4..83a0e5ce603 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1273,7 +1273,6 @@ static void hardware_disable(void *junk) if (!cpu_isset(cpu, cpus_hardware_enabled)) return; cpu_clear(cpu, cpus_hardware_enabled); - decache_vcpus_on_cpu(cpu); kvm_arch_hardware_disable(NULL); } -- cgit v1.2.3-70-g09d2 From 92760499d01ef91518119908eb9b8798b6c9bd3f Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 30 May 2008 16:05:53 +0200 Subject: KVM: kvm_io_device: extend in_range() to manage len and write attribute Modify member in_range() of structure kvm_io_device to pass length and the type of the I/O (write or read). This modification allows to use kvm_io_device with coalesced MMIO. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 6 +++--- arch/x86/kvm/i8254.c | 6 ++++-- arch/x86/kvm/i8259.c | 3 ++- arch/x86/kvm/lapic.c | 3 ++- arch/x86/kvm/x86.c | 28 +++++++++++++++++----------- include/linux/kvm_host.h | 3 ++- virt/kvm/ioapic.c | 3 ++- virt/kvm/iodev.h | 8 +++++--- virt/kvm/kvm_main.c | 5 +++-- 9 files changed, 40 insertions(+), 25 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 7c504be5797..bb58df7cc41 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -195,11 +195,11 @@ int kvm_dev_ioctl_check_extension(long ext) } static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, int is_write) { struct kvm_io_device *dev; - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write); return dev; } @@ -231,7 +231,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; mmio: - mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); + mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir); if (mmio_dev) { if (!p->dir) kvm_iodevice_write(mmio_dev, p->addr, p->size, diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 60074dc66bd..9e3391e9a1b 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -460,7 +460,8 @@ static void pit_ioport_read(struct kvm_io_device *this, mutex_unlock(&pit_state->lock); } -static int pit_in_range(struct kvm_io_device *this, gpa_t addr) +static int pit_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { return ((addr >= KVM_PIT_BASE_ADDRESS) && (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); @@ -501,7 +502,8 @@ static void speaker_ioport_read(struct kvm_io_device *this, mutex_unlock(&pit_state->lock); } -static int speaker_in_range(struct kvm_io_device *this, gpa_t addr) +static int speaker_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { return (addr == KVM_SPEAKER_BASE_ADDRESS); } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index ab29cf2def4..5857f59ad4a 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -346,7 +346,8 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1) return s->elcr; } -static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) +static int picdev_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { switch (addr) { case 0x20: diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e48d1939403..180ba7316da 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -785,7 +785,8 @@ static void apic_mmio_write(struct kvm_io_device *this, } -static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr) +static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr, + int len, int size) { struct kvm_lapic *apic = (struct kvm_lapic *)this->private; int ret = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4c94fad7f01..ab3f5552d69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1797,13 +1797,14 @@ static void kvm_init_msr_list(void) * Only apic need an MMIO device hook, so shortcut now.. */ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { struct kvm_io_device *dev; if (vcpu->arch.apic) { dev = &vcpu->arch.apic->dev; - if (dev->in_range(dev, addr)) + if (dev->in_range(dev, addr, len, is_write)) return dev; } return NULL; @@ -1811,13 +1812,15 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { struct kvm_io_device *dev; - dev = vcpu_find_pervcpu_dev(vcpu, addr); + dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write); if (dev == NULL) - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, + is_write); return dev; } @@ -1885,7 +1888,7 @@ mmio: * Is this MMIO handled locally? */ mutex_lock(&vcpu->kvm->lock); - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); + mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); if (mmio_dev) { kvm_iodevice_read(mmio_dev, gpa, bytes, val); mutex_unlock(&vcpu->kvm->lock); @@ -1940,7 +1943,7 @@ mmio: * Is this MMIO handled locally? */ mutex_lock(&vcpu->kvm->lock); - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); + mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1); if (mmio_dev) { kvm_iodevice_write(mmio_dev, gpa, bytes, val); mutex_unlock(&vcpu->kvm->lock); @@ -2317,9 +2320,10 @@ static void pio_string_write(struct kvm_io_device *pio_dev, } static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { - return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); + return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write); } int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, @@ -2351,7 +2355,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_x86_ops->skip_emulated_instruction(vcpu); - pio_dev = vcpu_find_pio_dev(vcpu, port); + pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); complete_pio(vcpu); @@ -2433,7 +2437,9 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } } - pio_dev = vcpu_find_pio_dev(vcpu, port); + pio_dev = vcpu_find_pio_dev(vcpu, port, + vcpu->arch.pio.cur_count, + !vcpu->arch.pio.in); if (!vcpu->arch.pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 865dcbcb891..499ff060423 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -52,7 +52,8 @@ struct kvm_io_bus { void kvm_io_bus_init(struct kvm_io_bus *bus); void kvm_io_bus_destroy(struct kvm_io_bus *bus); -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr); +struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, + gpa_t addr, int len, int is_write); void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index d0c668c6959..c0d22870ee9 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -307,7 +307,8 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) __kvm_ioapic_update_eoi(ioapic, i); } -static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h index c14e642027b..55e8846ac3a 100644 --- a/virt/kvm/iodev.h +++ b/virt/kvm/iodev.h @@ -27,7 +27,8 @@ struct kvm_io_device { gpa_t addr, int len, const void *val); - int (*in_range)(struct kvm_io_device *this, gpa_t addr); + int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len, + int is_write); void (*destructor)(struct kvm_io_device *this); void *private; @@ -49,9 +50,10 @@ static inline void kvm_iodevice_write(struct kvm_io_device *dev, dev->write(dev, addr, len, val); } -static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr) +static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, + gpa_t addr, int len, int is_write) { - return dev->in_range(dev, addr); + return dev->in_range(dev, addr, len, is_write); } static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 83a0e5ce603..9330fad2b91 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1350,14 +1350,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) } } -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr) +struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, + gpa_t addr, int len, int is_write) { int i; for (i = 0; i < bus->dev_count; i++) { struct kvm_io_device *pos = bus->devs[i]; - if (pos->in_range(pos, addr)) + if (pos->in_range(pos, addr, len, is_write)) return pos; } -- cgit v1.2.3-70-g09d2 From 7f39f8ac177db258200053074aa7a3d98656b1cf Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 30 May 2008 16:05:57 +0200 Subject: KVM: Add coalesced MMIO support (ia64 part) This patch enables coalesced MMIO for ia64 architecture. It defines KVM_MMIO_PAGE_OFFSET and KVM_CAP_COALESCED_MMIO. It enables the compilation of coalesced_mmio.c. [akpm: fix compile error on ia64] Signed-off-by: Laurent Vivier Signed-off-by: Andrew Morton Signed-off-by: Avi Kivity --- arch/ia64/kvm/Makefile | 3 ++- arch/ia64/kvm/kvm-ia64.c | 3 +++ include/asm-ia64/kvm_host.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 112791dd254..bf22fb9e6dc 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -43,7 +43,8 @@ $(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ + coalesced_mmio.o) kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bb58df7cc41..9408b30576d 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -187,6 +187,9 @@ int kvm_dev_ioctl_check_extension(long ext) r = 1; break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; default: r = 0; } diff --git a/include/asm-ia64/kvm_host.h b/include/asm-ia64/kvm_host.h index 5c958b0c46b..1efe513a994 100644 --- a/include/asm-ia64/kvm_host.h +++ b/include/asm-ia64/kvm_host.h @@ -38,6 +38,7 @@ /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* define exit reasons from vmm to kvm*/ #define EXIT_REASON_VM_PANIC 0 -- cgit v1.2.3-70-g09d2 From 34d4cb8fca1f2a31be152b74797e6cd160ec9de6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 10 Jul 2008 20:49:31 -0300 Subject: KVM: MMU: nuke shadowed pgtable pages and ptes on memslot destruction Flush the shadow mmu before removing regions to avoid stale entries. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 3 +++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 5 +++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 3 +++ 6 files changed, 20 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 9408b30576d..2672f4d278a 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1455,6 +1455,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b850d249702..53826a5f6c0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -170,6 +170,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} + struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 399acf3f64d..1782cbcd282 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -675,6 +675,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b131f3c0cf6..9f1cdb011cf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4032,6 +4032,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ + kvm_mmu_zap_all(kvm); +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d220b4926c4..07d68a8ae8e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -168,6 +168,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +void kvm_arch_flush_shadow(struct kvm *kvm); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9ccaf8f5402..30b36368fcd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -405,6 +405,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; + if (!npages) + kvm_arch_flush_shadow(kvm); + *memslot = new; r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); -- cgit v1.2.3-70-g09d2 From 15648f154a8faea97cbe931e189cf0a57fd066f4 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 16 Jul 2008 21:52:25 +0100 Subject: simserial: Fix up for ldisc changes Noted by Tony Luck although I've done the patches differently and also removed some other bogus oddments. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- arch/ia64/hp/sim/simserial.c | 46 +++----------------------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 23cafc80d2a..24b1ad5334c 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -193,18 +193,6 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id) * ------------------------------------------------------------------- */ -#if 0 -/* - * not really used in our situation so keep them commented out for now - */ -static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */ -static void do_serial_bh(void) -{ - run_task_queue(&tq_serial); - printk(KERN_ERR "do_serial_bh: called\n"); -} -#endif - static void do_softint(struct work_struct *private_) { printk(KERN_ERR "simserial: do_softint called\n"); @@ -351,11 +339,7 @@ static void rs_flush_buffer(struct tty_struct *tty) info->xmit.head = info->xmit.tail = 0; local_irq_restore(flags); - wake_up_interruptible(&tty->write_wait); - - if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - tty->ldisc.write_wakeup) - (tty->ldisc.write_wakeup)(tty); + tty_wakeup(tty); } /* @@ -404,12 +388,6 @@ static void rs_unthrottle(struct tty_struct * tty) printk(KERN_INFO "simrs_unthrottle called\n"); } -/* - * rs_break() --- routine which turns the break handling on or off - */ -static void rs_break(struct tty_struct *tty, int break_state) -{ -} static int rs_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg) @@ -422,14 +400,6 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file, } switch (cmd) { - case TIOCMGET: - printk(KERN_INFO "rs_ioctl: TIOCMGET called\n"); - return -EINVAL; - case TIOCMBIS: - case TIOCMBIC: - case TIOCMSET: - printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n"); - return -EINVAL; case TIOCGSERIAL: printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n"); return 0; @@ -488,14 +458,6 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file, static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - unsigned int cflag = tty->termios->c_cflag; - - if ( (cflag == old_termios->c_cflag) - && ( RELEVANT_IFLAG(tty->termios->c_iflag) - == RELEVANT_IFLAG(old_termios->c_iflag))) - return; - - /* Handle turning off CRTSCTS */ if ((old_termios->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) { @@ -623,9 +585,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp) * the line discipline to only process XON/XOFF characters. */ shutdown(info); - if (tty->ops->flush_buffer) - tty->ops->flush_buffer(tty); - if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty); + rs_flush_buffer(tty); + tty_ldisc_flush(tty); info->event = 0; info->tty = NULL; if (info->blocked_open) { @@ -955,7 +916,6 @@ static const struct tty_operations hp_ops = { .stop = rs_stop, .start = rs_start, .hangup = rs_hangup, - .break_ctl = rs_break, .wait_until_sent = rs_wait_until_sent, .read_proc = rs_read_proc, }; -- cgit v1.2.3-70-g09d2 From 4a0b2b4dbe1335b8b9886ba3dc85a145d5d938ed Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 1 Jul 2008 18:48:41 +0200 Subject: sysdev: Pass the attribute to the low level sysdev show/store function This allow to dynamically generate attributes and share show/store functions between attributes. Right now most attributes are generated by special macros and lots of duplicated code. With the attribute passed it's instead possible to attach some data to the attribute and then use that in shared low level functions to do different things. I need this for the dynamically generated bank attributes in the x86 machine check code, but it'll allow some further cleanups. I converted all users in tree to the new show/store prototype. It's a single huge patch to avoid unbisectable sections. Runtime tested: x86-32, x86-64 Compiled only: ia64, powerpc Not compile tested/only grep converted: sh, arm, avr32 Signed-off-by: Andi Kleen Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/time.c | 4 ++- arch/avr32/kernel/cpu.c | 38 +++++++++++++++++--------- arch/ia64/kernel/err_inject.c | 22 ++++++++++----- arch/powerpc/kernel/sysfs.c | 15 ++++++++--- arch/powerpc/platforms/cell/cbe_thermal.c | 45 ++++++++++++++++++++----------- arch/powerpc/platforms/cell/spu_base.c | 3 ++- arch/s390/kernel/smp.c | 36 ++++++++++++++++--------- arch/s390/kernel/time.c | 35 ++++++++++++++++-------- arch/sh/drivers/dma/dma-sysfs.c | 15 ++++++++--- arch/sparc64/kernel/sysfs.c | 16 +++++++---- arch/x86/kernel/cpu/mcheck/mce_64.c | 14 +++++++--- arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 + arch/x86/kernel/microcode.c | 10 ++++--- drivers/base/cpu.c | 10 ++++--- drivers/base/memory.c | 12 ++++++--- drivers/base/node.c | 15 +++++++---- drivers/base/sys.c | 4 +-- drivers/base/topology.c | 17 ++++++++---- drivers/cpuidle/sysfs.c | 10 ++++--- drivers/xen/balloon.c | 1 + include/linux/sysdev.h | 5 ++-- kernel/rtmutex-tester.c | 7 ++--- kernel/sched.c | 8 ++++-- kernel/time/clocksource.c | 8 ++++-- 24 files changed, 239 insertions(+), 112 deletions(-) (limited to 'arch/ia64') diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index cc5145b28e7..368d171754c 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -130,7 +130,9 @@ static const struct leds_evt_name evt_names[] = { { "red", led_red_on, led_red_off }, }; -static ssize_t leds_store(struct sys_device *dev, const char *buf, size_t size) +static ssize_t leds_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t size) { int ret = -EINVAL, len = strcspn(buf, " "); diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c index b8409caeb23..e84faffbbec 100644 --- a/arch/avr32/kernel/cpu.c +++ b/arch/avr32/kernel/cpu.c @@ -26,14 +26,16 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); * XXX: If/when a SMP-capable implementation of AVR32 will ever be * made, we must make sure that the code executes on the correct CPU. */ -static ssize_t show_pc0event(struct sys_device *dev, char *buf) +static ssize_t show_pc0event(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { unsigned long pccr; pccr = sysreg_read(PCCR); return sprintf(buf, "0x%lx\n", (pccr >> 12) & 0x3f); } -static ssize_t store_pc0event(struct sys_device *dev, const char *buf, +static ssize_t store_pc0event(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -46,15 +48,17 @@ static ssize_t store_pc0event(struct sys_device *dev, const char *buf, sysreg_write(PCCR, val); return count; } -static ssize_t show_pc0count(struct sys_device *dev, char *buf) +static ssize_t show_pc0count(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { unsigned long pcnt0; pcnt0 = sysreg_read(PCNT0); return sprintf(buf, "%lu\n", pcnt0); } -static ssize_t store_pc0count(struct sys_device *dev, const char *buf, - size_t count) +static ssize_t store_pc0count(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) { unsigned long val; char *endp; @@ -67,14 +71,16 @@ static ssize_t store_pc0count(struct sys_device *dev, const char *buf, return count; } -static ssize_t show_pc1event(struct sys_device *dev, char *buf) +static ssize_t show_pc1event(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { unsigned long pccr; pccr = sysreg_read(PCCR); return sprintf(buf, "0x%lx\n", (pccr >> 18) & 0x3f); } -static ssize_t store_pc1event(struct sys_device *dev, const char *buf, +static ssize_t store_pc1event(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -87,14 +93,16 @@ static ssize_t store_pc1event(struct sys_device *dev, const char *buf, sysreg_write(PCCR, val); return count; } -static ssize_t show_pc1count(struct sys_device *dev, char *buf) +static ssize_t show_pc1count(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { unsigned long pcnt1; pcnt1 = sysreg_read(PCNT1); return sprintf(buf, "%lu\n", pcnt1); } -static ssize_t store_pc1count(struct sys_device *dev, const char *buf, +static ssize_t store_pc1count(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -108,14 +116,16 @@ static ssize_t store_pc1count(struct sys_device *dev, const char *buf, return count; } -static ssize_t show_pccycles(struct sys_device *dev, char *buf) +static ssize_t show_pccycles(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { unsigned long pccnt; pccnt = sysreg_read(PCCNT); return sprintf(buf, "%lu\n", pccnt); } -static ssize_t store_pccycles(struct sys_device *dev, const char *buf, +static ssize_t store_pccycles(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { unsigned long val; @@ -129,14 +139,16 @@ static ssize_t store_pccycles(struct sys_device *dev, const char *buf, return count; } -static ssize_t show_pcenable(struct sys_device *dev, char *buf) +static ssize_t show_pcenable(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { unsigned long pccr; pccr = sysreg_read(PCCR); return sprintf(buf, "%c\n", (pccr & 1)?'1':'0'); } -static ssize_t store_pcenable(struct sys_device *dev, const char *buf, +static ssize_t store_pcenable(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { unsigned long pccr, val; diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index b642648cc2a..c539c689493 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -55,7 +55,8 @@ static u64 resources[NR_CPUS]; #define show(name) \ static ssize_t \ -show_##name(struct sys_device *dev, char *buf) \ +show_##name(struct sys_device *dev, struct sysdev_attribute *attr, \ + char *buf) \ { \ u32 cpu=dev->id; \ return sprintf(buf, "%lx\n", name[cpu]); \ @@ -63,7 +64,8 @@ show_##name(struct sys_device *dev, char *buf) \ #define store(name) \ static ssize_t \ -store_##name(struct sys_device *dev, const char *buf, size_t size) \ +store_##name(struct sys_device *dev, struct sysdev_attribute *attr, \ + const char *buf, size_t size) \ { \ unsigned int cpu=dev->id; \ name[cpu] = simple_strtoull(buf, NULL, 16); \ @@ -76,7 +78,8 @@ show(call_start) * processor. The cpu number in driver is only used for storing data. */ static ssize_t -store_call_start(struct sys_device *dev, const char *buf, size_t size) +store_call_start(struct sys_device *dev, struct sysdev_attribute *attr, + const char *buf, size_t size) { unsigned int cpu=dev->id; unsigned long call_start = simple_strtoull(buf, NULL, 16); @@ -124,14 +127,16 @@ show(err_type_info) store(err_type_info) static ssize_t -show_virtual_to_phys(struct sys_device *dev, char *buf) +show_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr, + char *buf) { unsigned int cpu=dev->id; return sprintf(buf, "%lx\n", phys_addr[cpu]); } static ssize_t -store_virtual_to_phys(struct sys_device *dev, const char *buf, size_t size) +store_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr, + const char *buf, size_t size) { unsigned int cpu=dev->id; u64 virt_addr=simple_strtoull(buf, NULL, 16); @@ -154,7 +159,8 @@ show(err_struct_info) store(err_struct_info) static ssize_t -show_err_data_buffer(struct sys_device *dev, char *buf) +show_err_data_buffer(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { unsigned int cpu=dev->id; @@ -165,7 +171,9 @@ show_err_data_buffer(struct sys_device *dev, char *buf) } static ssize_t -store_err_data_buffer(struct sys_device *dev, const char *buf, size_t size) +store_err_data_buffer(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t size) { unsigned int cpu=dev->id; int ret; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c8127f832df..aba0ba95f06 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -28,7 +28,9 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); /* Time in microseconds we delay before sleeping in the idle loop */ DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 }; -static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, +static ssize_t store_smt_snooze_delay(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); @@ -44,7 +46,9 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, return count; } -static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf) +static ssize_t show_smt_snooze_delay(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); @@ -152,14 +156,17 @@ static unsigned long write_##NAME(unsigned long val) \ mtspr(ADDRESS, val); \ return 0; \ } \ -static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ return sprintf(buf, "%lx\n", val); \ } \ static ssize_t __used \ - store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ + store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \ + const char *buf, size_t count) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ unsigned long val; \ diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 4852bf312d8..4d4c8c16912 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -97,7 +97,8 @@ static u8 spu_read_register_value(struct sys_device *sysdev, union spe_reg __iom return value.spe[spu->spe_id]; } -static ssize_t spu_show_temp(struct sys_device *sysdev, char *buf) +static ssize_t spu_show_temp(struct sys_device *sysdev, struct sysdev_attribute *attr, + char *buf) { u8 value; struct cbe_pmd_regs __iomem *pmd_regs; @@ -146,32 +147,38 @@ static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char return size; } -static ssize_t spu_show_throttle_end(struct sys_device *sysdev, char *buf) +static ssize_t spu_show_throttle_end(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(get_pmd_regs(sysdev), buf, 0); } -static ssize_t spu_show_throttle_begin(struct sys_device *sysdev, char *buf) +static ssize_t spu_show_throttle_begin(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(get_pmd_regs(sysdev), buf, 8); } -static ssize_t spu_show_throttle_full_stop(struct sys_device *sysdev, char *buf) +static ssize_t spu_show_throttle_full_stop(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(get_pmd_regs(sysdev), buf, 16); } -static ssize_t spu_store_throttle_end(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t spu_store_throttle_end(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(get_pmd_regs(sysdev), buf, size, 0); } -static ssize_t spu_store_throttle_begin(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t spu_store_throttle_begin(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(get_pmd_regs(sysdev), buf, size, 8); } -static ssize_t spu_store_throttle_full_stop(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t spu_store_throttle_full_stop(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(get_pmd_regs(sysdev), buf, size, 16); } @@ -192,43 +199,51 @@ static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos) /* shows the temperature of the DTS on the PPE, * located near the linear thermal sensor */ -static ssize_t ppe_show_temp0(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_temp0(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return ppe_show_temp(sysdev, buf, 32); } /* shows the temperature of the second DTS on the PPE */ -static ssize_t ppe_show_temp1(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_temp1(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return ppe_show_temp(sysdev, buf, 0); } -static ssize_t ppe_show_throttle_end(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_throttle_end(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 32); } -static ssize_t ppe_show_throttle_begin(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_throttle_begin(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 40); } -static ssize_t ppe_show_throttle_full_stop(struct sys_device *sysdev, char *buf) +static ssize_t ppe_show_throttle_full_stop(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 48); } -static ssize_t ppe_store_throttle_end(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t ppe_store_throttle_end(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 32); } -static ssize_t ppe_store_throttle_begin(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t ppe_store_throttle_begin(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 40); } -static ssize_t ppe_store_throttle_full_stop(struct sys_device *sysdev, const char *buf, size_t size) +static ssize_t ppe_store_throttle_full_stop(struct sys_device *sysdev, + struct sysdev_attribute *attr, const char *buf, size_t size) { return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 48); } diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 78f905bc6a4..a5bdb89a17c 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -703,7 +703,8 @@ static unsigned long long spu_acct_time(struct spu *spu, } -static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) +static ssize_t spu_stat_show(struct sys_device *sysdev, + struct sysdev_attribute *attr, char *buf) { struct spu *spu = container_of(sysdev, struct spu, sysdev); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b6781030cfb..b795b3e24af 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -864,7 +864,8 @@ int setup_profiling_timer(unsigned int multiplier) } #ifdef CONFIG_HOTPLUG_CPU -static ssize_t cpu_configure_show(struct sys_device *dev, char *buf) +static ssize_t cpu_configure_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { ssize_t count; @@ -874,8 +875,9 @@ static ssize_t cpu_configure_show(struct sys_device *dev, char *buf) return count; } -static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf, - size_t count) +static ssize_t cpu_configure_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) { int cpu = dev->id; int val, rc; @@ -922,7 +924,8 @@ out: static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t cpu_polarization_show(struct sys_device *dev, char *buf) +static ssize_t cpu_polarization_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { int cpu = dev->id; ssize_t count; @@ -950,7 +953,8 @@ static ssize_t cpu_polarization_show(struct sys_device *dev, char *buf) } static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL); -static ssize_t show_cpu_address(struct sys_device *dev, char *buf) +static ssize_t show_cpu_address(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); } @@ -970,7 +974,8 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_capability(struct sys_device *dev, char *buf) +static ssize_t show_capability(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { unsigned int capability; int rc; @@ -982,7 +987,8 @@ static ssize_t show_capability(struct sys_device *dev, char *buf) } static SYSDEV_ATTR(capability, 0444, show_capability, NULL); -static ssize_t show_idle_count(struct sys_device *dev, char *buf) +static ssize_t show_idle_count(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct s390_idle_data *idle; unsigned long long idle_count; @@ -995,7 +1001,8 @@ static ssize_t show_idle_count(struct sys_device *dev, char *buf) } static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); -static ssize_t show_idle_time(struct sys_device *dev, char *buf) +static ssize_t show_idle_time(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct s390_idle_data *idle; unsigned long long new_time; @@ -1112,7 +1119,9 @@ out: return rc; } -static ssize_t __ref rescan_store(struct sys_device *dev, const char *buf, +static ssize_t __ref rescan_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) { int rc; @@ -1123,7 +1132,9 @@ static ssize_t __ref rescan_store(struct sys_device *dev, const char *buf, static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t dispatching_show(struct sys_device *dev, char *buf) +static ssize_t dispatching_show(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) { ssize_t count; @@ -1133,8 +1144,9 @@ static ssize_t dispatching_show(struct sys_device *dev, char *buf) return count; } -static ssize_t dispatching_store(struct sys_device *dev, const char *buf, - size_t count) +static ssize_t dispatching_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) { int val, rc; char delim; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index f2cede3947b..ab70d9bd926 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1100,7 +1100,9 @@ static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) return etr_port1_online ? &etr_port1 : NULL; } -static ssize_t etr_online_show(struct sys_device *dev, char *buf) +static ssize_t etr_online_show(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) { unsigned int online; @@ -1109,7 +1111,8 @@ static ssize_t etr_online_show(struct sys_device *dev, char *buf) } static ssize_t etr_online_store(struct sys_device *dev, - const char *buf, size_t count) + struct sysdev_attribute *attr, + const char *buf, size_t count) { unsigned int value; @@ -1136,7 +1139,9 @@ static ssize_t etr_online_store(struct sys_device *dev, static SYSDEV_ATTR(online, 0600, etr_online_show, etr_online_store); -static ssize_t etr_stepping_control_show(struct sys_device *dev, char *buf) +static ssize_t etr_stepping_control_show(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) { return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? etr_eacr.e0 : etr_eacr.e1); @@ -1144,7 +1149,8 @@ static ssize_t etr_stepping_control_show(struct sys_device *dev, char *buf) static SYSDEV_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); -static ssize_t etr_mode_code_show(struct sys_device *dev, char *buf) +static ssize_t etr_mode_code_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { if (!etr_port0_online && !etr_port1_online) /* Status word is not uptodate if both ports are offline. */ @@ -1155,7 +1161,8 @@ static ssize_t etr_mode_code_show(struct sys_device *dev, char *buf) static SYSDEV_ATTR(state_code, 0400, etr_mode_code_show, NULL); -static ssize_t etr_untuned_show(struct sys_device *dev, char *buf) +static ssize_t etr_untuned_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1166,7 +1173,8 @@ static ssize_t etr_untuned_show(struct sys_device *dev, char *buf) static SYSDEV_ATTR(untuned, 0400, etr_untuned_show, NULL); -static ssize_t etr_network_id_show(struct sys_device *dev, char *buf) +static ssize_t etr_network_id_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1177,7 +1185,8 @@ static ssize_t etr_network_id_show(struct sys_device *dev, char *buf) static SYSDEV_ATTR(network, 0400, etr_network_id_show, NULL); -static ssize_t etr_id_show(struct sys_device *dev, char *buf) +static ssize_t etr_id_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1188,7 +1197,8 @@ static ssize_t etr_id_show(struct sys_device *dev, char *buf) static SYSDEV_ATTR(id, 0400, etr_id_show, NULL); -static ssize_t etr_port_number_show(struct sys_device *dev, char *buf) +static ssize_t etr_port_number_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1199,7 +1209,8 @@ static ssize_t etr_port_number_show(struct sys_device *dev, char *buf) static SYSDEV_ATTR(port, 0400, etr_port_number_show, NULL); -static ssize_t etr_coupled_show(struct sys_device *dev, char *buf) +static ssize_t etr_coupled_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1210,7 +1221,8 @@ static ssize_t etr_coupled_show(struct sys_device *dev, char *buf) static SYSDEV_ATTR(coupled, 0400, etr_coupled_show, NULL); -static ssize_t etr_local_time_show(struct sys_device *dev, char *buf) +static ssize_t etr_local_time_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1221,7 +1233,8 @@ static ssize_t etr_local_time_show(struct sys_device *dev, char *buf) static SYSDEV_ATTR(local_time, 0400, etr_local_time_show, NULL); -static ssize_t etr_utc_offset_show(struct sys_device *dev, char *buf) +static ssize_t etr_utc_offset_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); diff --git a/arch/sh/drivers/dma/dma-sysfs.c b/arch/sh/drivers/dma/dma-sysfs.c index 51b57c0d1a3..347ee11351e 100644 --- a/arch/sh/drivers/dma/dma-sysfs.c +++ b/arch/sh/drivers/dma/dma-sysfs.c @@ -23,7 +23,8 @@ static struct sysdev_class dma_sysclass = { }; EXPORT_SYMBOL(dma_sysclass); -static ssize_t dma_show_devices(struct sys_device *dev, char *buf) +static ssize_t dma_show_devices(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { ssize_t len = 0; int i; @@ -57,13 +58,15 @@ static int __init dma_sysclass_init(void) } postcore_initcall(dma_sysclass_init); -static ssize_t dma_show_dev_id(struct sys_device *dev, char *buf) +static ssize_t dma_show_dev_id(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct dma_channel *channel = to_dma_channel(dev); return sprintf(buf, "%s\n", channel->dev_id); } static ssize_t dma_store_dev_id(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { struct dma_channel *channel = to_dma_channel(dev); @@ -74,6 +77,7 @@ static ssize_t dma_store_dev_id(struct sys_device *dev, static SYSDEV_ATTR(dev_id, S_IRUGO | S_IWUSR, dma_show_dev_id, dma_store_dev_id); static ssize_t dma_store_config(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { struct dma_channel *channel = to_dma_channel(dev); @@ -87,13 +91,15 @@ static ssize_t dma_store_config(struct sys_device *dev, static SYSDEV_ATTR(config, S_IWUSR, NULL, dma_store_config); -static ssize_t dma_show_mode(struct sys_device *dev, char *buf) +static ssize_t dma_show_mode(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct dma_channel *channel = to_dma_channel(dev); return sprintf(buf, "0x%08x\n", channel->mode); } static ssize_t dma_store_mode(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { struct dma_channel *channel = to_dma_channel(dev); @@ -104,7 +110,8 @@ static ssize_t dma_store_mode(struct sys_device *dev, static SYSDEV_ATTR(mode, S_IRUGO | S_IWUSR, dma_show_mode, dma_store_mode); #define dma_ro_attr(field, fmt) \ -static ssize_t dma_show_##field(struct sys_device *dev, char *buf) \ +static ssize_t dma_show_##field(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf)\ { \ struct dma_channel *channel = to_dma_channel(dev); \ return sprintf(buf, fmt, channel->field); \ diff --git a/arch/sparc64/kernel/sysfs.c b/arch/sparc64/kernel/sysfs.c index e885034a6b7..84e5ce14671 100644 --- a/arch/sparc64/kernel/sysfs.c +++ b/arch/sparc64/kernel/sysfs.c @@ -14,7 +14,8 @@ static DEFINE_PER_CPU(struct hv_mmu_statistics, mmu_stats) __attribute__((aligned(64))); #define SHOW_MMUSTAT_ULONG(NAME) \ -static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ { \ struct hv_mmu_statistics *p = &per_cpu(mmu_stats, dev->id); \ return sprintf(buf, "%lu\n", p->NAME); \ @@ -135,13 +136,16 @@ static unsigned long write_mmustat_enable(unsigned long val) return sun4v_mmustat_conf(ra, &orig_ra); } -static ssize_t show_mmustat_enable(struct sys_device *s, char *buf) +static ssize_t show_mmustat_enable(struct sys_device *s, + struct sysdev_attribute *attr, char *buf) { unsigned long val = run_on_cpu(s->id, read_mmustat_enable, 0); return sprintf(buf, "%lx\n", val); } -static ssize_t store_mmustat_enable(struct sys_device *s, const char *buf, size_t count) +static ssize_t store_mmustat_enable(struct sys_device *s, + struct sysdev_attribute *attr, const char *buf, + size_t count) { unsigned long val, err; int ret = sscanf(buf, "%ld", &val); @@ -179,14 +183,16 @@ static void unregister_mmu_stats(struct sys_device *s) #endif #define SHOW_CPUDATA_ULONG_NAME(NAME, MEMBER) \ -static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ { \ cpuinfo_sparc *c = &cpu_data(dev->id); \ return sprintf(buf, "%lu\n", c->MEMBER); \ } #define SHOW_CPUDATA_UINT_NAME(NAME, MEMBER) \ -static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ { \ cpuinfo_sparc *c = &cpu_data(dev->id); \ return sprintf(buf, "%u\n", c->MEMBER); \ diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index c4a7ec31394..e6a4d5f6764 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -762,10 +762,14 @@ DEFINE_PER_CPU(struct sys_device, device_mce); /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ - static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ + static ssize_t show_ ## name(struct sys_device *s, \ + struct sysdev_attribute *attr, \ + char *buf) { \ return sprintf(buf, "%lx\n", (unsigned long)var); \ } \ - static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ + static ssize_t set_ ## name(struct sys_device *s, \ + struct sysdev_attribute *attr, \ + const char *buf, size_t siz) { \ char *end; \ unsigned long new = simple_strtoul(buf, &end, 0); \ if (end == buf) return -EINVAL; \ @@ -786,14 +790,16 @@ ACCESSOR(bank3ctl,bank[3],mce_restart()) ACCESSOR(bank4ctl,bank[4],mce_restart()) ACCESSOR(bank5ctl,bank[5],mce_restart()) -static ssize_t show_trigger(struct sys_device *s, char *buf) +static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, + char *buf) { strcpy(buf, trigger); strcat(buf, "\n"); return strlen(trigger) + 1; } -static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) +static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, + const char *buf,size_t siz) { char *p; int len; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1f4cc48c14c..d5ae2243f0b 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -35,6 +35,7 @@ atomic_t therm_throt_en = ATOMIC_INIT(0); #define define_therm_throt_sysdev_show_func(name) \ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ char *buf) \ { \ unsigned int cpu = dev->id; \ diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 56b933119a0..fc4790638b6 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -644,7 +644,9 @@ static void microcode_fini_cpu(int cpu) mutex_unlock(µcode_mutex); } -static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; char *end; @@ -674,14 +676,16 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) return sz; } -static ssize_t version_show(struct sys_device *dev, char *buf) +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; return sprintf(buf, "0x%x\n", uci->rev); } -static ssize_t pf_show(struct sys_device *dev, char *buf) +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index e38dfed41d8..20537d50790 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -21,15 +21,16 @@ EXPORT_SYMBOL(cpu_sysdev_class); static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices); #ifdef CONFIG_HOTPLUG_CPU -static ssize_t show_online(struct sys_device *dev, char *buf) +static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr, + char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); return sprintf(buf, "%u\n", !!cpu_online(cpu->sysdev.id)); } -static ssize_t __ref store_online(struct sys_device *dev, const char *buf, - size_t count) +static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribute *attr, + const char *buf, size_t count) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); ssize_t ret; @@ -80,7 +81,8 @@ static inline void register_cpu_control(struct cpu *cpu) #ifdef CONFIG_KEXEC #include -static ssize_t show_crash_notes(struct sys_device *dev, char *buf) +static ssize_t show_crash_notes(struct sys_device *dev, struct sysdev_attribute *attr, + char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); ssize_t rc; diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 937e8258981..4d4e0e7b6e9 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -92,7 +92,8 @@ unregister_memory(struct memory_block *memory, struct mem_section *section) * uses. */ -static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf) +static ssize_t show_mem_phys_index(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct memory_block *mem = container_of(dev, struct memory_block, sysdev); @@ -102,7 +103,8 @@ static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf) /* * online, offline, going offline, etc. */ -static ssize_t show_mem_state(struct sys_device *dev, char *buf) +static ssize_t show_mem_state(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct memory_block *mem = container_of(dev, struct memory_block, sysdev); @@ -217,7 +219,8 @@ out: } static ssize_t -store_mem_state(struct sys_device *dev, const char *buf, size_t count) +store_mem_state(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { struct memory_block *mem; unsigned int phys_section_nr; @@ -248,7 +251,8 @@ out: * s.t. if I offline all of these sections I can then * remove the physical device? */ -static ssize_t show_phys_device(struct sys_device *dev, char *buf) +static ssize_t show_phys_device(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct memory_block *mem = container_of(dev, struct memory_block, sysdev); diff --git a/drivers/base/node.c b/drivers/base/node.c index 0f867a08333..5116b78c632 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -36,11 +36,13 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf) return len; } -static inline ssize_t node_read_cpumask(struct sys_device *dev, char *buf) +static inline ssize_t node_read_cpumask(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { return node_read_cpumap(dev, 0, buf); } -static inline ssize_t node_read_cpulist(struct sys_device *dev, char *buf) +static inline ssize_t node_read_cpulist(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { return node_read_cpumap(dev, 1, buf); } @@ -49,7 +51,8 @@ static SYSDEV_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); #define K(x) ((x) << (PAGE_SHIFT - 10)) -static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) +static ssize_t node_read_meminfo(struct sys_device * dev, + struct sysdev_attribute *attr, char * buf) { int n; int nid = dev->id; @@ -112,7 +115,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) #undef K static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); -static ssize_t node_read_numastat(struct sys_device * dev, char * buf) +static ssize_t node_read_numastat(struct sys_device * dev, + struct sysdev_attribute *attr, char * buf) { return sprintf(buf, "numa_hit %lu\n" @@ -130,7 +134,8 @@ static ssize_t node_read_numastat(struct sys_device * dev, char * buf) } static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); -static ssize_t node_read_distance(struct sys_device * dev, char * buf) +static ssize_t node_read_distance(struct sys_device * dev, + struct sysdev_attribute *attr, char * buf) { int nid = dev->id; int len = 0; diff --git a/drivers/base/sys.c b/drivers/base/sys.c index 50690d9df24..dc7dace14e1 100644 --- a/drivers/base/sys.c +++ b/drivers/base/sys.c @@ -36,7 +36,7 @@ sysdev_show(struct kobject * kobj, struct attribute * attr, char * buffer) struct sysdev_attribute * sysdev_attr = to_sysdev_attr(attr); if (sysdev_attr->show) - return sysdev_attr->show(sysdev, buffer); + return sysdev_attr->show(sysdev, sysdev_attr, buffer); return -EIO; } @@ -49,7 +49,7 @@ sysdev_store(struct kobject * kobj, struct attribute * attr, struct sysdev_attribute * sysdev_attr = to_sysdev_attr(attr); if (sysdev_attr->store) - return sysdev_attr->store(sysdev, buffer, count); + return sysdev_attr->store(sysdev, sysdev_attr, buffer, count); return -EIO; } diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 3f6d9b0a6ab..199cd97e32e 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -34,7 +34,8 @@ static SYSDEV_ATTR(_name, 0444, show_##_name, NULL) #define define_id_show_func(name) \ -static ssize_t show_##name(struct sys_device *dev, char *buf) \ +static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ { \ unsigned int cpu = dev->id; \ return sprintf(buf, "%d\n", topology_##name(cpu)); \ @@ -59,14 +60,17 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) #ifdef arch_provides_topology_pointers #define define_siblings_show_map(name) \ -static ssize_t show_##name(struct sys_device *dev, char *buf) \ +static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ { \ unsigned int cpu = dev->id; \ return show_cpumap(0, &(topology_##name(cpu)), buf); \ } #define define_siblings_show_list(name) \ -static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ +static ssize_t show_##name##_list(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ { \ unsigned int cpu = dev->id; \ return show_cpumap(1, &(topology_##name(cpu)), buf); \ @@ -74,7 +78,8 @@ static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ #else #define define_siblings_show_map(name) \ -static ssize_t show_##name(struct sys_device *dev, char *buf) \ +static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, char *buf) \ { \ unsigned int cpu = dev->id; \ cpumask_t mask = topology_##name(cpu); \ @@ -82,7 +87,9 @@ static ssize_t show_##name(struct sys_device *dev, char *buf) \ } #define define_siblings_show_list(name) \ -static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ +static ssize_t show_##name##_list(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ { \ unsigned int cpu = dev->id; \ cpumask_t mask = topology_##name(cpu); \ diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index e949618b9be..31a0e0b455b 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -21,7 +21,8 @@ static int __init cpuidle_sysfs_setup(char *unused) } __setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup); -static ssize_t show_available_governors(struct sys_device *dev, char *buf) +static ssize_t show_available_governors(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { ssize_t i = 0; struct cpuidle_governor *tmp; @@ -39,7 +40,8 @@ out: return i; } -static ssize_t show_current_driver(struct sys_device *dev, char *buf) +static ssize_t show_current_driver(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { ssize_t ret; @@ -53,7 +55,8 @@ static ssize_t show_current_driver(struct sys_device *dev, char *buf) return ret; } -static ssize_t show_current_governor(struct sys_device *dev, char *buf) +static ssize_t show_current_governor(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { ssize_t ret; @@ -68,6 +71,7 @@ static ssize_t show_current_governor(struct sys_device *dev, char *buf) } static ssize_t store_current_governor(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { char gov_name[CPUIDLE_NAME_LEN]; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 591bc29b55f..d4427cb8697 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -610,6 +610,7 @@ static ssize_t show_target_kb(struct sys_device *dev, char *buf) } static ssize_t store_target_kb(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h index f2767bc6b73..8dcf3162b21 100644 --- a/include/linux/sysdev.h +++ b/include/linux/sysdev.h @@ -99,8 +99,9 @@ extern void sysdev_unregister(struct sys_device *); struct sysdev_attribute { struct attribute attr; - ssize_t (*show)(struct sys_device *, char *); - ssize_t (*store)(struct sys_device *, const char *, size_t); + ssize_t (*show)(struct sys_device *, struct sysdev_attribute *, char *); + ssize_t (*store)(struct sys_device *, struct sysdev_attribute *, + const char *, size_t); }; diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 092e4c620af..a56f629b057 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -297,8 +297,8 @@ static int test_func(void *data) * * opcode:data */ -static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf, - size_t count) +static ssize_t sysfs_test_command(struct sys_device *dev, struct sysdev_attribute *attr, + const char *buf, size_t count) { struct sched_param schedpar; struct test_thread_data *td; @@ -360,7 +360,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf, * @dev: thread to query * @buf: char buffer to be filled with thread status info */ -static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) +static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute *attr, + char *buf) { struct test_thread_data *td; struct task_struct *tsk; diff --git a/kernel/sched.c b/kernel/sched.c index 99e6d850eca..b1104ea5d25 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7737,11 +7737,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) } #ifdef CONFIG_SCHED_MC -static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) +static ssize_t sched_mc_power_savings_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *page) { return sprintf(page, "%u\n", sched_mc_power_savings); } static ssize_t sched_mc_power_savings_store(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 0); @@ -7751,11 +7753,13 @@ static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, #endif #ifdef CONFIG_SCHED_SMT -static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page) +static ssize_t sched_smt_power_savings_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *page) { return sprintf(page, "%u\n", sched_smt_power_savings); } static ssize_t sched_smt_power_savings_store(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { return sched_power_savings_store(buf, count, 1); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index dadde5361f3..b1c2da81b05 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -376,7 +376,8 @@ void clocksource_unregister(struct clocksource *cs) * Provides sysfs interface for listing current clocksource. */ static ssize_t -sysfs_show_current_clocksources(struct sys_device *dev, char *buf) +sysfs_show_current_clocksources(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { ssize_t count = 0; @@ -397,6 +398,7 @@ sysfs_show_current_clocksources(struct sys_device *dev, char *buf) * clocksource selction. */ static ssize_t sysfs_override_clocksource(struct sys_device *dev, + struct sysdev_attribute *attr, const char *buf, size_t count) { struct clocksource *ovr = NULL; @@ -449,7 +451,9 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, * Provides sysfs interface for listing registered clocksources */ static ssize_t -sysfs_show_available_clocksources(struct sys_device *dev, char *buf) +sysfs_show_available_clocksources(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) { struct clocksource *src; ssize_t count = 0; -- cgit v1.2.3-70-g09d2 From b61bfa3c462671c48a51fb5c31af337c5a996a04 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:26:55 -0700 Subject: mm: move bootmem descriptors definition to a single place There are a lot of places that define either a single bootmem descriptor or an array of them. Use only one central array with MAX_NUMNODES items instead. Signed-off-by: Johannes Weiner Acked-by: Ralf Baechle Cc: Ingo Molnar Cc: Richard Henderson Cc: Russell King Cc: Tony Luck Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Kyle McMartin Cc: Paul Mackerras Cc: Paul Mundt Cc: David S. Miller Cc: Yinghai Lu Cc: Christoph Lameter Cc: Mel Gorman Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/mm/numa.c | 8 ++++---- arch/arm/mm/discontig.c | 34 ++++++++++++++++------------------ arch/ia64/mm/discontig.c | 11 +++++------ arch/m32r/mm/discontig.c | 4 +--- arch/m68k/mm/init.c | 4 +--- arch/mips/sgi-ip27/ip27-memory.c | 4 +--- arch/parisc/mm/init.c | 3 +-- arch/powerpc/mm/numa.c | 3 +-- arch/sh/mm/numa.c | 5 ++--- arch/sparc64/mm/init.c | 3 +-- arch/x86/mm/discontig_32.c | 3 +-- arch/x86/mm/numa_64.c | 4 +--- include/linux/bootmem.h | 2 ++ mm/bootmem.c | 2 ++ mm/page_alloc.c | 4 +--- 15 files changed, 40 insertions(+), 54 deletions(-) (limited to 'arch/ia64') diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 10ab7833e83..a53fda0481c 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -19,7 +19,6 @@ #include pg_data_t node_data[MAX_NUMNODES]; -bootmem_data_t node_bdata[MAX_NUMNODES]; EXPORT_SYMBOL(node_data); #undef DEBUG_DISCONTIG @@ -141,7 +140,7 @@ setup_memory_node(int nid, void *kernel_end) printk(" not enough mem to reserve NODE_DATA"); return; } - NODE_DATA(nid)->bdata = &node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; printk(" Detected node memory: start %8lu, end %8lu\n", node_min_pfn, node_max_pfn); @@ -304,8 +303,9 @@ void __init paging_init(void) dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; for_each_online_node(nid) { - unsigned long start_pfn = node_bdata[nid].node_boot_start >> PAGE_SHIFT; - unsigned long end_pfn = node_bdata[nid].node_low_pfn; + bootmem_data_t *bdata = &bootmem_node_data[nid]; + unsigned long start_pfn = bdata->node_boot_start >> PAGE_SHIFT; + unsigned long end_pfn = bdata->node_low_pfn; if (dma_local_pfn >= end_pfn - start_pfn) zones_size[ZONE_DMA] = end_pfn - start_pfn; diff --git a/arch/arm/mm/discontig.c b/arch/arm/mm/discontig.c index 1e560218950..c8c0c4b0f0a 100644 --- a/arch/arm/mm/discontig.c +++ b/arch/arm/mm/discontig.c @@ -21,26 +21,24 @@ * Our node_data structure for discontiguous memory. */ -static bootmem_data_t node_bootmem_data[MAX_NUMNODES]; - pg_data_t discontig_node_data[MAX_NUMNODES] = { - { .bdata = &node_bootmem_data[0] }, - { .bdata = &node_bootmem_data[1] }, - { .bdata = &node_bootmem_data[2] }, - { .bdata = &node_bootmem_data[3] }, + { .bdata = &bootmem_node_data[0] }, + { .bdata = &bootmem_node_data[1] }, + { .bdata = &bootmem_node_data[2] }, + { .bdata = &bootmem_node_data[3] }, #if MAX_NUMNODES == 16 - { .bdata = &node_bootmem_data[4] }, - { .bdata = &node_bootmem_data[5] }, - { .bdata = &node_bootmem_data[6] }, - { .bdata = &node_bootmem_data[7] }, - { .bdata = &node_bootmem_data[8] }, - { .bdata = &node_bootmem_data[9] }, - { .bdata = &node_bootmem_data[10] }, - { .bdata = &node_bootmem_data[11] }, - { .bdata = &node_bootmem_data[12] }, - { .bdata = &node_bootmem_data[13] }, - { .bdata = &node_bootmem_data[14] }, - { .bdata = &node_bootmem_data[15] }, + { .bdata = &bootmem_node_data[4] }, + { .bdata = &bootmem_node_data[5] }, + { .bdata = &bootmem_node_data[6] }, + { .bdata = &bootmem_node_data[7] }, + { .bdata = &bootmem_node_data[8] }, + { .bdata = &bootmem_node_data[9] }, + { .bdata = &bootmem_node_data[10] }, + { .bdata = &bootmem_node_data[11] }, + { .bdata = &bootmem_node_data[12] }, + { .bdata = &bootmem_node_data[13] }, + { .bdata = &bootmem_node_data[14] }, + { .bdata = &bootmem_node_data[15] }, #endif }; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 544dc420c65..2fcf8464331 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -36,7 +36,6 @@ struct early_node_data { struct ia64_node_data *node_data; unsigned long pernode_addr; unsigned long pernode_size; - struct bootmem_data bootmem_data; unsigned long num_physpages; #ifdef CONFIG_ZONE_DMA unsigned long num_dma_physpages; @@ -76,7 +75,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len, int node) { unsigned long cstart, epfn, end = start + len; - struct bootmem_data *bdp = &mem_data[node].bootmem_data; + struct bootmem_data *bdp = &bootmem_node_data[node]; epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; cstart = GRANULEROUNDDOWN(start); @@ -167,7 +166,7 @@ static void __init fill_pernode(int node, unsigned long pernode, { void *cpu_data; int cpus = early_nr_cpus_node(node); - struct bootmem_data *bdp = &mem_data[node].bootmem_data; + struct bootmem_data *bdp = &bootmem_node_data[node]; mem_data[node].pernode_addr = pernode; mem_data[node].pernode_size = pernodesize; @@ -224,7 +223,7 @@ static int __init find_pernode_space(unsigned long start, unsigned long len, { unsigned long epfn; unsigned long pernodesize = 0, pernode, pages, mapsize; - struct bootmem_data *bdp = &mem_data[node].bootmem_data; + struct bootmem_data *bdp = &bootmem_node_data[node]; epfn = (start + len) >> PAGE_SHIFT; @@ -440,7 +439,7 @@ void __init find_memory(void) efi_memmap_walk(find_max_min_low_pfn, NULL); for_each_online_node(node) - if (mem_data[node].bootmem_data.node_low_pfn) { + if (bootmem_node_data[node].node_low_pfn) { node_clear(node, memory_less_mask); mem_data[node].min_pfn = ~0UL; } @@ -460,7 +459,7 @@ void __init find_memory(void) else if (node_isset(node, memory_less_mask)) continue; - bdp = &mem_data[node].bootmem_data; + bdp = &bootmem_node_data[node]; pernode = mem_data[node].pernode_addr; pernodesize = mem_data[node].pernode_size; map = pernode + pernodesize; diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index 07c1af7dc0e..aa9145ef6cc 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -20,7 +20,6 @@ extern char _end[]; struct pglist_data *node_data[MAX_NUMNODES]; EXPORT_SYMBOL(node_data); -static bootmem_data_t node_bdata[MAX_NUMNODES] __initdata; pg_data_t m32r_node_data[MAX_NUMNODES]; @@ -81,7 +80,7 @@ unsigned long __init setup_memory(void) for_each_online_node(nid) { mp = &mem_prof[nid]; NODE_DATA(nid)=(pg_data_t *)&m32r_node_data[nid]; - NODE_DATA(nid)->bdata = &node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; min_pfn = mp->start_pfn; max_pfn = mp->start_pfn + mp->pages; bootmap_size = init_bootmem_node(NODE_DATA(nid), mp->free_pfn, @@ -163,4 +162,3 @@ unsigned long __init zone_sizes_init(void) return holes; } - diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index d8fb9c5303c..79f5f94d480 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -32,8 +32,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -static bootmem_data_t __initdata bootmem_data[MAX_NUMNODES]; - pg_data_t pg_data_map[MAX_NUMNODES]; EXPORT_SYMBOL(pg_data_map); @@ -58,7 +56,7 @@ void __init m68k_setup_node(int node) pg_data_table[i] = pg_data_map + node; } #endif - pg_data_map[node].bdata = bootmem_data + node; + pg_data_map[node].bdata = bootmem_node_data + node; node_set_online(node); } diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 42cd1095630..060d853d7b3 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -33,8 +33,6 @@ #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) -static struct bootmem_data __initdata plat_node_bdata[MAX_COMPACT_NODES]; - struct node_data *__node_data[MAX_COMPACT_NODES]; EXPORT_SYMBOL(__node_data); @@ -403,7 +401,7 @@ static void __init node_mem_init(cnodeid_t node) */ __node_data[node] = __va(slot_freepfn << PAGE_SHIFT); - NODE_DATA(node)->bdata = &plat_node_bdata[node]; + NODE_DATA(node)->bdata = &bootmem_node_data[node]; NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index b4d6c8777ed..0ddf4904640 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -36,7 +36,6 @@ extern int data_start; #ifdef CONFIG_DISCONTIGMEM struct node_map_data node_data[MAX_NUMNODES] __read_mostly; -bootmem_data_t bmem_data[MAX_NUMNODES] __read_mostly; unsigned char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; #endif @@ -262,7 +261,7 @@ static void __init setup_bootmem(void) #ifdef CONFIG_DISCONTIGMEM for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { memset(NODE_DATA(i), 0, sizeof(pg_data_t)); - NODE_DATA(i)->bdata = &bmem_data[i]; + NODE_DATA(i)->bdata = &bootmem_node_data[i]; } memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index cf4bffba6f7..d9a18135133 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -39,7 +39,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table); EXPORT_SYMBOL(numa_cpumask_lookup_table); EXPORT_SYMBOL(node_data); -static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; static int min_common_depth; static int n_mem_addr_cells, n_mem_size_cells; @@ -816,7 +815,7 @@ void __init do_init_bootmem(void) dbg("node %d\n", nid); dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c index 1663199ce88..095d93bec7c 100644 --- a/arch/sh/mm/numa.c +++ b/arch/sh/mm/numa.c @@ -14,7 +14,6 @@ #include #include -static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL_GPL(node_data); @@ -35,7 +34,7 @@ void __init setup_memory(void) NODE_DATA(0) = pfn_to_kaddr(free_pfn); memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); free_pfn += PFN_UP(sizeof(struct pglist_data)); - NODE_DATA(0)->bdata = &plat_node_bdata[0]; + NODE_DATA(0)->bdata = &bootmem_node_data[0]; /* Set up node 0 */ setup_bootmem_allocator(free_pfn); @@ -66,7 +65,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) free_pfn += PFN_UP(sizeof(struct pglist_data)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 84898c44dd4..71329747395 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -788,7 +788,6 @@ int numa_cpu_lookup_table[NR_CPUS]; cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; #ifdef CONFIG_NEED_MULTIPLE_NODES -static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct mdesc_mblock { u64 base; @@ -871,7 +870,7 @@ static void __init allocate_node_data(int nid) NODE_DATA(nid) = __va(paddr); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; #endif p = NODE_DATA(nid); diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 5dfef9fa061..62fa440678d 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -42,7 +42,6 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -static bootmem_data_t node0_bdata; /* * numa interface - we expect the numa architecture specific code to have @@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn, for_each_online_node(nid) memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(0)->bdata = &node0_bdata; + NODE_DATA(0)->bdata = &bootmem_node_data[0]; setup_bootmem_allocator(); } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9782f42dd31..a4dd793d600 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -23,8 +23,6 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; - struct memnode memnode; s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { @@ -198,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, nodedata_phys + pgdat_size - 1); memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); - NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; + NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index a1d9b79078e..2599c741405 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -38,6 +38,8 @@ typedef struct bootmem_data { struct list_head list; } bootmem_data_t; +extern bootmem_data_t bootmem_node_data[]; + extern unsigned long bootmem_bootmap_pages(unsigned long); extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); extern void free_bootmem(unsigned long addr, unsigned long size); diff --git a/mm/bootmem.c b/mm/bootmem.c index 9f4bbc5da73..35b3cb66703 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -36,6 +36,8 @@ static LIST_HEAD(bdata_list); unsigned long saved_max_pfn; #endif +bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; + /* return the number of _pages_ that will be allocated for the boot bitmap */ unsigned long __init bootmem_bootmap_pages(unsigned long pages) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9ece07ce65b..e089b92cdff 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4040,9 +4040,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) } #ifndef CONFIG_NEED_MULTIPLE_NODES -static bootmem_data_t contig_bootmem_data; -struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; - +struct pglist_data contig_page_data = { .bdata = &bootmem_node_data[0] }; EXPORT_SYMBOL(contig_page_data); #endif -- cgit v1.2.3-70-g09d2 From 42b7772812d15b86543a23b82bd6070eef9a08b1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 23 Jul 2008 21:27:10 -0700 Subject: mm: remove double indirection on tlb parameter to free_pgd_range() & Co The double indirection here is not needed anywhere and hence (at least) confusing. Signed-off-by: Jan Beulich Cc: Hugh Dickins Cc: Nick Piggin Cc: Christoph Lameter Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Luck, Tony" Cc: Paul Mundt Cc: "David S. Miller" Acked-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 8 ++++---- fs/exec.c | 4 ++-- include/asm-ia64/hugetlb.h | 2 +- include/asm-powerpc/hugetlb.h | 2 +- include/asm-sh/hugetlb.h | 2 +- include/asm-sparc/hugetlb.h | 2 +- include/asm-x86/hugetlb.h | 2 +- include/linux/mm.h | 4 +--- mm/internal.h | 3 +++ mm/memory.c | 10 ++++++---- mm/mmap.c | 6 ++++-- 12 files changed, 26 insertions(+), 21 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index d3ce8f3bcaa..cd49e2860ee 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -112,7 +112,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int wri return NULL; } -void hugetlb_free_pgd_range(struct mmu_gather **tlb, +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0d12fba31bc..1a96cc891cf 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -255,7 +255,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, * * Must be called with pagetable lock held. */ -void hugetlb_free_pgd_range(struct mmu_gather **tlb, +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -315,13 +315,13 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, return; start = addr; - pgd = pgd_offset((*tlb)->mm, addr); + pgd = pgd_offset(tlb->mm, addr); do { - BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); + BUG_ON(get_slice_psize(tlb->mm, addr) != mmu_huge_psize); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); + hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } diff --git a/fs/exec.c b/fs/exec.c index fd9234379e8..190ed1f9277 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -541,7 +541,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) /* * when the old and new regions overlap clear from new_end. */ - free_pgd_range(&tlb, new_end, old_end, new_end, + free_pgd_range(tlb, new_end, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } else { /* @@ -550,7 +550,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * have constraints on va-space that make this illegal (IA64) - * for the others its just a little faster. */ - free_pgd_range(&tlb, old_start, old_end, new_end, + free_pgd_range(tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } tlb_finish_mmu(tlb, new_end, old_end); diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h index f28a9701f1c..e9d1e5e2382 100644 --- a/include/asm-ia64/hugetlb.h +++ b/include/asm-ia64/hugetlb.h @@ -4,7 +4,7 @@ #include -void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h index be32ff02f4a..0a37aa5ecaa 100644 --- a/include/asm-powerpc/hugetlb.h +++ b/include/asm-powerpc/hugetlb.h @@ -7,7 +7,7 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); -void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h index 02402303d89..fb30018938c 100644 --- a/include/asm-sh/hugetlb.h +++ b/include/asm-sh/hugetlb.h @@ -26,7 +26,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { } -static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb, +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) diff --git a/include/asm-sparc/hugetlb.h b/include/asm-sparc/hugetlb.h index 412af58926a..aeb92374ca3 100644 --- a/include/asm-sparc/hugetlb.h +++ b/include/asm-sparc/hugetlb.h @@ -31,7 +31,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) return 0; } -static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb, +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h index 14171a4924f..7eed6e0883b 100644 --- a/include/asm-x86/hugetlb.h +++ b/include/asm-x86/hugetlb.h @@ -26,7 +26,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { } -static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb, +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5c7f8f64f70..f8071097302 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -769,10 +769,8 @@ struct mm_walk { int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk); -void free_pgd_range(struct mmu_gather **tlb, unsigned long addr, +void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); -void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma, - unsigned long floor, unsigned long ceiling); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); void unmap_mapping_range(struct address_space *mapping, diff --git a/mm/internal.h b/mm/internal.h index 50807e12490..858ad01864d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -13,6 +13,9 @@ #include +void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, + unsigned long floor, unsigned long ceiling); + static inline void set_page_count(struct page *page, int v) { atomic_set(&page->_count, v); diff --git a/mm/memory.c b/mm/memory.c index 87350321e66..82f3f1c5cf1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -61,6 +61,8 @@ #include #include +#include "internal.h" + #ifndef CONFIG_NEED_MULTIPLE_NODES /* use the per-pgdat data instead for discontigmem - mbligh */ unsigned long max_mapnr; @@ -211,7 +213,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, * * Must be called with pagetable lock held. */ -void free_pgd_range(struct mmu_gather **tlb, +void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -262,16 +264,16 @@ void free_pgd_range(struct mmu_gather **tlb, return; start = addr; - pgd = pgd_offset((*tlb)->mm, addr); + pgd = pgd_offset(tlb->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - free_pud_range(*tlb, pgd, addr, next, floor, ceiling); + free_pud_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } -void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, +void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling) { while (vma) { diff --git a/mm/mmap.c b/mm/mmap.c index 1d102b956fd..75e0d0673d7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -32,6 +32,8 @@ #include #include +#include "internal.h" + #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) #endif @@ -1763,7 +1765,7 @@ static void unmap_region(struct mm_struct *mm, update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); - free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, + free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, next? next->vm_start: 0); tlb_finish_mmu(tlb, start, end); } @@ -2063,7 +2065,7 @@ void exit_mmap(struct mm_struct *mm) /* Use -1 here to ensure all VMAs in the mm are unmapped */ end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); - free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); + free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); /* -- cgit v1.2.3-70-g09d2 From a5516438959d90b071ff0a484ce4f3f523dc3152 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 23 Jul 2008 21:27:41 -0700 Subject: hugetlb: modular state for hugetlb page size The goal of this patchset is to support multiple hugetlb page sizes. This is achieved by introducing a new struct hstate structure, which encapsulates the important hugetlb state and constants (eg. huge page size, number of huge pages currently allocated, etc). The hstate structure is then passed around the code which requires these fields, they will do the right thing regardless of the exact hstate they are operating on. This patch adds the hstate structure, with a single global instance of it (default_hstate), and does the basic work of converting hugetlb to use the hstate. Future patches will add more hstate structures to allow for different hugetlbfs mounts to have different page sizes. [akpm@linux-foundation.org: coding-style fixes] Acked-by: Adam Litke Acked-by: Nishanth Aravamudan Signed-off-by: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/hugetlbpage.c | 7 +- arch/powerpc/mm/hugetlbpage.c | 3 +- arch/s390/mm/hugetlbpage.c | 3 +- arch/sh/mm/hugetlbpage.c | 3 +- arch/sparc64/mm/hugetlbpage.c | 5 +- arch/x86/mm/hugetlbpage.c | 5 +- fs/hugetlbfs/inode.c | 52 +++--- include/asm-ia64/hugetlb.h | 3 +- include/asm-powerpc/hugetlb.h | 3 +- include/asm-s390/hugetlb.h | 3 +- include/asm-sh/hugetlb.h | 3 +- include/asm-sparc/hugetlb.h | 3 +- include/asm-x86/hugetlb.h | 8 +- include/linux/hugetlb.h | 88 +++++++++- ipc/shm.c | 3 +- mm/hugetlb.c | 368 +++++++++++++++++++++++------------------- mm/memory.c | 2 +- mm/mempolicy.c | 9 +- mm/mmap.c | 3 +- 19 files changed, 356 insertions(+), 218 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index cd49e2860ee..6170f097d25 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -24,7 +24,7 @@ unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT; pte_t * -huge_pte_alloc (struct mm_struct *mm, unsigned long addr) +huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { unsigned long taddr = htlbpage_to_page(addr); pgd_t *pgd; @@ -75,7 +75,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) * Don't actually need to do any preparation, but need to make sure * the address is in the right region. */ -int prepare_hugepage_range(unsigned long addr, unsigned long len) +int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) { if (len & ~HPAGE_MASK) return -EINVAL; @@ -149,7 +150,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u /* Handle MAP_FIXED */ if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len)) + if (prepare_hugepage_range(file, addr, len)) return -EINVAL; return addr; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1a96cc891cf..c94dc71af98 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -128,7 +128,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return NULL; } -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pg; pud_t *pu; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f4b6124fdb7..9162dc84f77 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -72,7 +72,8 @@ void arch_release_hugepage(struct page *page) page[1].index = 0; } -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pgdp; pud_t *pudp; diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index ae8c321d6e2..2f9dbe0ef4a 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -22,7 +22,8 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pgd; pud_t *pud; diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index ebefd2a1437..1307b23f6a7 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -175,7 +175,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len)) + if (prepare_hugepage_range(file, addr, len)) return -EINVAL; return addr; } @@ -195,7 +195,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, pgoff, flags); } -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pgd; pud_t *pud; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 0b3d567e686..52476fde899 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 1; } -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pgd; pud_t *pud; @@ -368,7 +369,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len)) + if (prepare_hugepage_range(file, addr, len)) return -EINVAL; return addr; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 428eff5b73f..516c581b537 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -80,6 +80,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file->f_path.dentry->d_inode; loff_t len, vma_len; int ret; + struct hstate *h = hstate_file(file); /* * vma address alignment (but not the pgoff alignment) has @@ -92,7 +93,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; - if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) + if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); @@ -104,8 +105,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); if (hugetlb_reserve_pages(inode, - vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), - len >> HPAGE_SHIFT, vma)) + vma->vm_pgoff >> huge_page_order(h), + len >> huge_page_shift(h), vma)) goto out; ret = 0; @@ -130,20 +131,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long start_addr; + struct hstate *h = hstate_file(file); - if (len & ~HPAGE_MASK) + if (len & ~huge_page_mask(h)) return -EINVAL; if (len > TASK_SIZE) return -ENOMEM; if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len)) + if (prepare_hugepage_range(file, addr, len)) return -EINVAL; return addr; } if (addr) { - addr = ALIGN(addr, HPAGE_SIZE); + addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -156,7 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, start_addr = TASK_UNMAPPED_BASE; full_search: - addr = ALIGN(start_addr, HPAGE_SIZE); + addr = ALIGN(start_addr, huge_page_size(h)); for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ @@ -174,7 +176,7 @@ full_search: if (!vma || addr + len <= vma->vm_start) return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); + addr = ALIGN(vma->vm_end, huge_page_size(h)); } } #endif @@ -225,10 +227,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset, static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { + struct hstate *h = hstate_file(filp); struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; - unsigned long index = *ppos >> HPAGE_SHIFT; - unsigned long offset = *ppos & ~HPAGE_MASK; + unsigned long index = *ppos >> huge_page_shift(h); + unsigned long offset = *ppos & ~huge_page_mask(h); unsigned long end_index; loff_t isize; ssize_t retval = 0; @@ -243,17 +246,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, if (!isize) goto out; - end_index = (isize - 1) >> HPAGE_SHIFT; + end_index = (isize - 1) >> huge_page_shift(h); for (;;) { struct page *page; - int nr, ret; + unsigned long nr, ret; /* nr is the maximum number of bytes to copy from this page */ - nr = HPAGE_SIZE; + nr = huge_page_size(h); if (index >= end_index) { if (index > end_index) goto out; - nr = ((isize - 1) & ~HPAGE_MASK) + 1; + nr = ((isize - 1) & ~huge_page_mask(h)) + 1; if (nr <= offset) { goto out; } @@ -287,8 +290,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, offset += ret; retval += ret; len -= ret; - index += offset >> HPAGE_SHIFT; - offset &= ~HPAGE_MASK; + index += offset >> huge_page_shift(h); + offset &= ~huge_page_mask(h); if (page) page_cache_release(page); @@ -298,7 +301,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, break; } out: - *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; + *ppos = ((loff_t)index << huge_page_shift(h)) + offset; mutex_unlock(&inode->i_mutex); return retval; } @@ -339,8 +342,9 @@ static void truncate_huge_page(struct page *page) static void truncate_hugepages(struct inode *inode, loff_t lstart) { + struct hstate *h = hstate_inode(inode); struct address_space *mapping = &inode->i_data; - const pgoff_t start = lstart >> HPAGE_SHIFT; + const pgoff_t start = lstart >> huge_page_shift(h); struct pagevec pvec; pgoff_t next; int i, freed = 0; @@ -449,8 +453,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) { pgoff_t pgoff; struct address_space *mapping = inode->i_mapping; + struct hstate *h = hstate_inode(inode); - BUG_ON(offset & ~HPAGE_MASK); + BUG_ON(offset & ~huge_page_mask(h)); pgoff = offset >> PAGE_SHIFT; i_size_write(inode, offset); @@ -465,6 +470,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; + struct hstate *h = hstate_inode(inode); int error; unsigned int ia_valid = attr->ia_valid; @@ -476,7 +482,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) if (ia_valid & ATTR_SIZE) { error = -EINVAL; - if (!(attr->ia_size & ~HPAGE_MASK)) + if (!(attr->ia_size & ~huge_page_mask(h))) error = hugetlb_vmtruncate(inode, attr->ia_size); if (error) goto out; @@ -610,9 +616,10 @@ static int hugetlbfs_set_page_dirty(struct page *page) static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); + struct hstate *h = hstate_inode(dentry->d_inode); buf->f_type = HUGETLBFS_MAGIC; - buf->f_bsize = HPAGE_SIZE; + buf->f_bsize = huge_page_size(h); if (sbinfo) { spin_lock(&sbinfo->stat_lock); /* If no limits set, just report 0 for max/free/used @@ -942,7 +949,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size) goto out_dentry; error = -ENOMEM; - if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT, NULL)) + if (hugetlb_reserve_pages(inode, 0, + size >> huge_page_shift(hstate_inode(inode)), NULL)) goto out_inode; d_instantiate(dentry, inode); diff --git a/include/asm-ia64/hugetlb.h b/include/asm-ia64/hugetlb.h index e9d1e5e2382..da55c63728e 100644 --- a/include/asm-ia64/hugetlb.h +++ b/include/asm-ia64/hugetlb.h @@ -8,7 +8,8 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); -int prepare_hugepage_range(unsigned long addr, unsigned long len); +int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len); static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h index 0a37aa5ecaa..ca37c4af27b 100644 --- a/include/asm-powerpc/hugetlb.h +++ b/include/asm-powerpc/hugetlb.h @@ -21,7 +21,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) { if (len & ~HPAGE_MASK) return -EINVAL; diff --git a/include/asm-s390/hugetlb.h b/include/asm-s390/hugetlb.h index 600a776f8f7..670a1d1745d 100644 --- a/include/asm-s390/hugetlb.h +++ b/include/asm-s390/hugetlb.h @@ -22,7 +22,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) { if (len & ~HPAGE_MASK) return -EINVAL; diff --git a/include/asm-sh/hugetlb.h b/include/asm-sh/hugetlb.h index fb30018938c..967068fb79a 100644 --- a/include/asm-sh/hugetlb.h +++ b/include/asm-sh/hugetlb.h @@ -14,7 +14,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) { if (len & ~HPAGE_MASK) return -EINVAL; diff --git a/include/asm-sparc/hugetlb.h b/include/asm-sparc/hugetlb.h index aeb92374ca3..177061064ee 100644 --- a/include/asm-sparc/hugetlb.h +++ b/include/asm-sparc/hugetlb.h @@ -22,7 +22,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) { if (len & ~HPAGE_MASK) return -EINVAL; diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h index 7eed6e0883b..439a9acc132 100644 --- a/include/asm-x86/hugetlb.h +++ b/include/asm-x86/hugetlb.h @@ -14,11 +14,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index abbc187193a..ad2271e11f9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -8,7 +8,6 @@ #include #include #include -#include struct ctl_table; @@ -45,7 +44,8 @@ extern int sysctl_hugetlb_shm_group; /* arch callbacks */ -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, @@ -80,7 +80,7 @@ static inline unsigned long hugetlb_total_pages(void) #define hugetlb_report_meminfo(buf) 0 #define hugetlb_report_node_meminfo(n, buf) 0 #define follow_huge_pmd(mm, addr, pmd, write) NULL -#define prepare_hugepage_range(addr,len) (-EINVAL) +#define prepare_hugepage_range(file, addr, len) (-EINVAL) #define pmd_huge(x) 0 #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) @@ -134,8 +134,6 @@ struct file *hugetlb_file_setup(const char *name, size_t); int hugetlb_get_quota(struct address_space *mapping, long delta); void hugetlb_put_quota(struct address_space *mapping, long delta); -#define BLOCKS_PER_HUGEPAGE (HPAGE_SIZE / 512) - static inline int is_file_hugepages(struct file *file) { if (file->f_op == &hugetlbfs_file_operations) @@ -164,4 +162,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long flags); #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ +#ifdef CONFIG_HUGETLB_PAGE + +/* Defines one hugetlb page size */ +struct hstate { + int hugetlb_next_nid; + unsigned int order; + unsigned long mask; + unsigned long max_huge_pages; + unsigned long nr_huge_pages; + unsigned long free_huge_pages; + unsigned long resv_huge_pages; + unsigned long surplus_huge_pages; + unsigned long nr_overcommit_huge_pages; + struct list_head hugepage_freelists[MAX_NUMNODES]; + unsigned int nr_huge_pages_node[MAX_NUMNODES]; + unsigned int free_huge_pages_node[MAX_NUMNODES]; + unsigned int surplus_huge_pages_node[MAX_NUMNODES]; +}; + +extern struct hstate default_hstate; + +static inline struct hstate *hstate_vma(struct vm_area_struct *vma) +{ + return &default_hstate; +} + +static inline struct hstate *hstate_file(struct file *f) +{ + return &default_hstate; +} + +static inline struct hstate *hstate_inode(struct inode *i) +{ + return &default_hstate; +} + +static inline unsigned long huge_page_size(struct hstate *h) +{ + return (unsigned long)PAGE_SIZE << h->order; +} + +static inline unsigned long huge_page_mask(struct hstate *h) +{ + return h->mask; +} + +static inline unsigned int huge_page_order(struct hstate *h) +{ + return h->order; +} + +static inline unsigned huge_page_shift(struct hstate *h) +{ + return h->order + PAGE_SHIFT; +} + +static inline unsigned int pages_per_huge_page(struct hstate *h) +{ + return 1 << h->order; +} + +static inline unsigned int blocks_per_huge_page(struct hstate *h) +{ + return huge_page_size(h) / 512; +} + +#include + +#else +struct hstate {}; +#define hstate_file(f) NULL +#define hstate_vma(v) NULL +#define hstate_inode(i) NULL +#define huge_page_size(h) PAGE_SIZE +#define huge_page_mask(h) PAGE_MASK +#define huge_page_order(h) 0 +#define huge_page_shift(h) PAGE_SHIFT +#define pages_per_huge_page(h) 1 +#endif + #endif /* _LINUX_HUGETLB_H */ diff --git a/ipc/shm.c b/ipc/shm.c index 790240cd067..a726aebce7d 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -577,7 +577,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, if (is_file_hugepages(shp->shm_file)) { struct address_space *mapping = inode->i_mapping; - *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; + struct hstate *h = hstate_file(shp->shm_file); + *rss += pages_per_huge_page(h) * mapping->nrpages; } else { struct shmem_inode_info *info = SHMEM_I(inode); spin_lock(&info->lock); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 32dff4290c6..0d8153e25f0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -22,18 +22,12 @@ #include "internal.h" const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; -static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; -static unsigned long surplus_huge_pages; -static unsigned long nr_overcommit_huge_pages; unsigned long max_huge_pages; unsigned long sysctl_overcommit_huge_pages; -static struct list_head hugepage_freelists[MAX_NUMNODES]; -static unsigned int nr_huge_pages_node[MAX_NUMNODES]; -static unsigned int free_huge_pages_node[MAX_NUMNODES]; -static unsigned int surplus_huge_pages_node[MAX_NUMNODES]; static gfp_t htlb_alloc_mask = GFP_HIGHUSER; unsigned long hugepages_treat_as_movable; -static int hugetlb_next_nid; + +struct hstate default_hstate; /* * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages @@ -203,11 +197,11 @@ static long region_count(struct list_head *head, long f, long t) * Convert the address within this vma to the page offset within * the mapping, in pagecache page units; huge pages here. */ -static pgoff_t vma_hugecache_offset(struct vm_area_struct *vma, - unsigned long address) +static pgoff_t vma_hugecache_offset(struct hstate *h, + struct vm_area_struct *vma, unsigned long address) { - return ((address - vma->vm_start) >> HPAGE_SHIFT) + - (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); + return ((address - vma->vm_start) >> huge_page_shift(h)) + + (vma->vm_pgoff >> huge_page_order(h)); } /* @@ -309,20 +303,21 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) } /* Decrement the reserved pages in the hugepage pool by one */ -static void decrement_hugepage_resv_vma(struct vm_area_struct *vma) +static void decrement_hugepage_resv_vma(struct hstate *h, + struct vm_area_struct *vma) { if (vma->vm_flags & VM_NORESERVE) return; if (vma->vm_flags & VM_SHARED) { /* Shared mappings always use reserves */ - resv_huge_pages--; + h->resv_huge_pages--; } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { /* * Only the process that called mmap() has reserves for * private mappings. */ - resv_huge_pages--; + h->resv_huge_pages--; } } @@ -344,12 +339,13 @@ static int vma_has_private_reserves(struct vm_area_struct *vma) return 1; } -static void clear_huge_page(struct page *page, unsigned long addr) +static void clear_huge_page(struct page *page, + unsigned long addr, unsigned long sz) { int i; might_sleep(); - for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { + for (i = 0; i < sz/PAGE_SIZE; i++) { cond_resched(); clear_user_highpage(page + i, addr + i * PAGE_SIZE); } @@ -359,41 +355,43 @@ static void copy_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma) { int i; + struct hstate *h = hstate_vma(vma); might_sleep(); - for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { + for (i = 0; i < pages_per_huge_page(h); i++) { cond_resched(); copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); } } -static void enqueue_huge_page(struct page *page) +static void enqueue_huge_page(struct hstate *h, struct page *page) { int nid = page_to_nid(page); - list_add(&page->lru, &hugepage_freelists[nid]); - free_huge_pages++; - free_huge_pages_node[nid]++; + list_add(&page->lru, &h->hugepage_freelists[nid]); + h->free_huge_pages++; + h->free_huge_pages_node[nid]++; } -static struct page *dequeue_huge_page(void) +static struct page *dequeue_huge_page(struct hstate *h) { int nid; struct page *page = NULL; for (nid = 0; nid < MAX_NUMNODES; ++nid) { - if (!list_empty(&hugepage_freelists[nid])) { - page = list_entry(hugepage_freelists[nid].next, + if (!list_empty(&h->hugepage_freelists[nid])) { + page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); list_del(&page->lru); - free_huge_pages--; - free_huge_pages_node[nid]--; + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; break; } } return page; } -static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, +static struct page *dequeue_huge_page_vma(struct hstate *h, + struct vm_area_struct *vma, unsigned long address, int avoid_reserve) { int nid; @@ -411,26 +409,26 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, * not "stolen". The child may still get SIGKILLed */ if (!vma_has_private_reserves(vma) && - free_huge_pages - resv_huge_pages == 0) + h->free_huge_pages - h->resv_huge_pages == 0) return NULL; /* If reserves cannot be used, ensure enough pages are in the pool */ - if (avoid_reserve && free_huge_pages - resv_huge_pages == 0) + if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0) return NULL; for_each_zone_zonelist_nodemask(zone, z, zonelist, MAX_NR_ZONES - 1, nodemask) { nid = zone_to_nid(zone); if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && - !list_empty(&hugepage_freelists[nid])) { - page = list_entry(hugepage_freelists[nid].next, + !list_empty(&h->hugepage_freelists[nid])) { + page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); list_del(&page->lru); - free_huge_pages--; - free_huge_pages_node[nid]--; + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; if (!avoid_reserve) - decrement_hugepage_resv_vma(vma); + decrement_hugepage_resv_vma(h, vma); break; } @@ -439,12 +437,13 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, return page; } -static void update_and_free_page(struct page *page) +static void update_and_free_page(struct hstate *h, struct page *page) { int i; - nr_huge_pages--; - nr_huge_pages_node[page_to_nid(page)]--; - for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { + + h->nr_huge_pages--; + h->nr_huge_pages_node[page_to_nid(page)]--; + for (i = 0; i < pages_per_huge_page(h); i++) { page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 1 << PG_private | 1<< PG_writeback); @@ -452,11 +451,16 @@ static void update_and_free_page(struct page *page) set_compound_page_dtor(page, NULL); set_page_refcounted(page); arch_release_hugepage(page); - __free_pages(page, HUGETLB_PAGE_ORDER); + __free_pages(page, huge_page_order(h)); } static void free_huge_page(struct page *page) { + /* + * Can't pass hstate in here because it is called from the + * compound page destructor. + */ + struct hstate *h = &default_hstate; int nid = page_to_nid(page); struct address_space *mapping; @@ -466,12 +470,12 @@ static void free_huge_page(struct page *page) INIT_LIST_HEAD(&page->lru); spin_lock(&hugetlb_lock); - if (surplus_huge_pages_node[nid]) { - update_and_free_page(page); - surplus_huge_pages--; - surplus_huge_pages_node[nid]--; + if (h->surplus_huge_pages_node[nid]) { + update_and_free_page(h, page); + h->surplus_huge_pages--; + h->surplus_huge_pages_node[nid]--; } else { - enqueue_huge_page(page); + enqueue_huge_page(h, page); } spin_unlock(&hugetlb_lock); if (mapping) @@ -483,7 +487,7 @@ static void free_huge_page(struct page *page) * balanced by operating on them in a round-robin fashion. * Returns 1 if an adjustment was made. */ -static int adjust_pool_surplus(int delta) +static int adjust_pool_surplus(struct hstate *h, int delta) { static int prev_nid; int nid = prev_nid; @@ -496,15 +500,15 @@ static int adjust_pool_surplus(int delta) nid = first_node(node_online_map); /* To shrink on this node, there must be a surplus page */ - if (delta < 0 && !surplus_huge_pages_node[nid]) + if (delta < 0 && !h->surplus_huge_pages_node[nid]) continue; /* Surplus cannot exceed the total number of pages */ - if (delta > 0 && surplus_huge_pages_node[nid] >= - nr_huge_pages_node[nid]) + if (delta > 0 && h->surplus_huge_pages_node[nid] >= + h->nr_huge_pages_node[nid]) continue; - surplus_huge_pages += delta; - surplus_huge_pages_node[nid] += delta; + h->surplus_huge_pages += delta; + h->surplus_huge_pages_node[nid] += delta; ret = 1; break; } while (nid != prev_nid); @@ -513,46 +517,46 @@ static int adjust_pool_surplus(int delta) return ret; } -static void prep_new_huge_page(struct page *page, int nid) +static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) { set_compound_page_dtor(page, free_huge_page); spin_lock(&hugetlb_lock); - nr_huge_pages++; - nr_huge_pages_node[nid]++; + h->nr_huge_pages++; + h->nr_huge_pages_node[nid]++; spin_unlock(&hugetlb_lock); put_page(page); /* free it into the hugepage allocator */ } -static struct page *alloc_fresh_huge_page_node(int nid) +static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) { struct page *page; page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| __GFP_REPEAT|__GFP_NOWARN, - HUGETLB_PAGE_ORDER); + huge_page_order(h)); if (page) { if (arch_prepare_hugepage(page)) { __free_pages(page, HUGETLB_PAGE_ORDER); return NULL; } - prep_new_huge_page(page, nid); + prep_new_huge_page(h, page, nid); } return page; } -static int alloc_fresh_huge_page(void) +static int alloc_fresh_huge_page(struct hstate *h) { struct page *page; int start_nid; int next_nid; int ret = 0; - start_nid = hugetlb_next_nid; + start_nid = h->hugetlb_next_nid; do { - page = alloc_fresh_huge_page_node(hugetlb_next_nid); + page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid); if (page) ret = 1; /* @@ -566,11 +570,11 @@ static int alloc_fresh_huge_page(void) * if we just successfully allocated a hugepage so that * the next caller gets hugepages on the next node. */ - next_nid = next_node(hugetlb_next_nid, node_online_map); + next_nid = next_node(h->hugetlb_next_nid, node_online_map); if (next_nid == MAX_NUMNODES) next_nid = first_node(node_online_map); - hugetlb_next_nid = next_nid; - } while (!page && hugetlb_next_nid != start_nid); + h->hugetlb_next_nid = next_nid; + } while (!page && h->hugetlb_next_nid != start_nid); if (ret) count_vm_event(HTLB_BUDDY_PGALLOC); @@ -580,8 +584,8 @@ static int alloc_fresh_huge_page(void) return ret; } -static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, - unsigned long address) +static struct page *alloc_buddy_huge_page(struct hstate *h, + struct vm_area_struct *vma, unsigned long address) { struct page *page; unsigned int nid; @@ -610,18 +614,18 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, * per-node value is checked there. */ spin_lock(&hugetlb_lock); - if (surplus_huge_pages >= nr_overcommit_huge_pages) { + if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) { spin_unlock(&hugetlb_lock); return NULL; } else { - nr_huge_pages++; - surplus_huge_pages++; + h->nr_huge_pages++; + h->surplus_huge_pages++; } spin_unlock(&hugetlb_lock); page = alloc_pages(htlb_alloc_mask|__GFP_COMP| __GFP_REPEAT|__GFP_NOWARN, - HUGETLB_PAGE_ORDER); + huge_page_order(h)); spin_lock(&hugetlb_lock); if (page) { @@ -636,12 +640,12 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, /* * We incremented the global counters already */ - nr_huge_pages_node[nid]++; - surplus_huge_pages_node[nid]++; + h->nr_huge_pages_node[nid]++; + h->surplus_huge_pages_node[nid]++; __count_vm_event(HTLB_BUDDY_PGALLOC); } else { - nr_huge_pages--; - surplus_huge_pages--; + h->nr_huge_pages--; + h->surplus_huge_pages--; __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); } spin_unlock(&hugetlb_lock); @@ -653,16 +657,16 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, * Increase the hugetlb pool such that it can accomodate a reservation * of size 'delta'. */ -static int gather_surplus_pages(int delta) +static int gather_surplus_pages(struct hstate *h, int delta) { struct list_head surplus_list; struct page *page, *tmp; int ret, i; int needed, allocated; - needed = (resv_huge_pages + delta) - free_huge_pages; + needed = (h->resv_huge_pages + delta) - h->free_huge_pages; if (needed <= 0) { - resv_huge_pages += delta; + h->resv_huge_pages += delta; return 0; } @@ -673,7 +677,7 @@ static int gather_surplus_pages(int delta) retry: spin_unlock(&hugetlb_lock); for (i = 0; i < needed; i++) { - page = alloc_buddy_huge_page(NULL, 0); + page = alloc_buddy_huge_page(h, NULL, 0); if (!page) { /* * We were not able to allocate enough pages to @@ -694,7 +698,8 @@ retry: * because either resv_huge_pages or free_huge_pages may have changed. */ spin_lock(&hugetlb_lock); - needed = (resv_huge_pages + delta) - (free_huge_pages + allocated); + needed = (h->resv_huge_pages + delta) - + (h->free_huge_pages + allocated); if (needed > 0) goto retry; @@ -707,7 +712,7 @@ retry: * before they are reserved. */ needed += allocated; - resv_huge_pages += delta; + h->resv_huge_pages += delta; ret = 0; free: /* Free the needed pages to the hugetlb pool */ @@ -715,7 +720,7 @@ free: if ((--needed) < 0) break; list_del(&page->lru); - enqueue_huge_page(page); + enqueue_huge_page(h, page); } /* Free unnecessary surplus pages to the buddy allocator */ @@ -743,7 +748,8 @@ free: * allocated to satisfy the reservation must be explicitly freed if they were * never used. */ -static void return_unused_surplus_pages(unsigned long unused_resv_pages) +static void return_unused_surplus_pages(struct hstate *h, + unsigned long unused_resv_pages) { static int nid = -1; struct page *page; @@ -758,27 +764,27 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) unsigned long remaining_iterations = num_online_nodes(); /* Uncommit the reservation */ - resv_huge_pages -= unused_resv_pages; + h->resv_huge_pages -= unused_resv_pages; - nr_pages = min(unused_resv_pages, surplus_huge_pages); + nr_pages = min(unused_resv_pages, h->surplus_huge_pages); while (remaining_iterations-- && nr_pages) { nid = next_node(nid, node_online_map); if (nid == MAX_NUMNODES) nid = first_node(node_online_map); - if (!surplus_huge_pages_node[nid]) + if (!h->surplus_huge_pages_node[nid]) continue; - if (!list_empty(&hugepage_freelists[nid])) { - page = list_entry(hugepage_freelists[nid].next, + if (!list_empty(&h->hugepage_freelists[nid])) { + page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); list_del(&page->lru); - update_and_free_page(page); - free_huge_pages--; - free_huge_pages_node[nid]--; - surplus_huge_pages--; - surplus_huge_pages_node[nid]--; + update_and_free_page(h, page); + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; + h->surplus_huge_pages--; + h->surplus_huge_pages_node[nid]--; nr_pages--; remaining_iterations = num_online_nodes(); } @@ -794,13 +800,14 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) * an instantiated the change should be committed via vma_commit_reservation. * No action is required on failure. */ -static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr) +static int vma_needs_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) { struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; if (vma->vm_flags & VM_SHARED) { - pgoff_t idx = vma_hugecache_offset(vma, addr); + pgoff_t idx = vma_hugecache_offset(h, vma, addr); return region_chg(&inode->i_mapping->private_list, idx, idx + 1); @@ -809,7 +816,7 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr) } else { int err; - pgoff_t idx = vma_hugecache_offset(vma, addr); + pgoff_t idx = vma_hugecache_offset(h, vma, addr); struct resv_map *reservations = vma_resv_map(vma); err = region_chg(&reservations->regions, idx, idx + 1); @@ -818,18 +825,18 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr) return 0; } } -static void vma_commit_reservation(struct vm_area_struct *vma, - unsigned long addr) +static void vma_commit_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) { struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; if (vma->vm_flags & VM_SHARED) { - pgoff_t idx = vma_hugecache_offset(vma, addr); + pgoff_t idx = vma_hugecache_offset(h, vma, addr); region_add(&inode->i_mapping->private_list, idx, idx + 1); } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { - pgoff_t idx = vma_hugecache_offset(vma, addr); + pgoff_t idx = vma_hugecache_offset(h, vma, addr); struct resv_map *reservations = vma_resv_map(vma); /* Mark this page used in the map. */ @@ -840,6 +847,7 @@ static void vma_commit_reservation(struct vm_area_struct *vma, static struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { + struct hstate *h = hstate_vma(vma); struct page *page; struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; @@ -852,7 +860,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, * MAP_NORESERVE mappings may also need pages and quota allocated * if no reserve mapping overlaps. */ - chg = vma_needs_reservation(vma, addr); + chg = vma_needs_reservation(h, vma, addr); if (chg < 0) return ERR_PTR(chg); if (chg) @@ -860,11 +868,11 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); spin_lock(&hugetlb_lock); - page = dequeue_huge_page_vma(vma, addr, avoid_reserve); + page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve); spin_unlock(&hugetlb_lock); if (!page) { - page = alloc_buddy_huge_page(vma, addr); + page = alloc_buddy_huge_page(h, vma, addr); if (!page) { hugetlb_put_quota(inode->i_mapping, chg); return ERR_PTR(-VM_FAULT_OOM); @@ -874,7 +882,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, set_page_refcounted(page); set_page_private(page, (unsigned long) mapping); - vma_commit_reservation(vma, addr); + vma_commit_reservation(h, vma, addr); return page; } @@ -882,21 +890,28 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, static int __init hugetlb_init(void) { unsigned long i; + struct hstate *h = &default_hstate; if (HPAGE_SHIFT == 0) return 0; + if (!h->order) { + h->order = HPAGE_SHIFT - PAGE_SHIFT; + h->mask = HPAGE_MASK; + } + for (i = 0; i < MAX_NUMNODES; ++i) - INIT_LIST_HEAD(&hugepage_freelists[i]); + INIT_LIST_HEAD(&h->hugepage_freelists[i]); - hugetlb_next_nid = first_node(node_online_map); + h->hugetlb_next_nid = first_node(node_online_map); for (i = 0; i < max_huge_pages; ++i) { - if (!alloc_fresh_huge_page()) + if (!alloc_fresh_huge_page(h)) break; } - max_huge_pages = free_huge_pages = nr_huge_pages = i; - printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); + max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; + printk(KERN_INFO "Total HugeTLB memory allocated, %ld\n", + h->free_huge_pages); return 0; } module_init(hugetlb_init); @@ -922,34 +937,36 @@ static unsigned int cpuset_mems_nr(unsigned int *array) #ifdef CONFIG_SYSCTL #ifdef CONFIG_HIGHMEM -static void try_to_free_low(unsigned long count) +static void try_to_free_low(struct hstate *h, unsigned long count) { int i; for (i = 0; i < MAX_NUMNODES; ++i) { struct page *page, *next; - list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { - if (count >= nr_huge_pages) + struct list_head *freel = &h->hugepage_freelists[i]; + list_for_each_entry_safe(page, next, freel, lru) { + if (count >= h->nr_huge_pages) return; if (PageHighMem(page)) continue; list_del(&page->lru); update_and_free_page(page); - free_huge_pages--; - free_huge_pages_node[page_to_nid(page)]--; + h->free_huge_pages--; + h->free_huge_pages_node[page_to_nid(page)]--; } } } #else -static inline void try_to_free_low(unsigned long count) +static inline void try_to_free_low(struct hstate *h, unsigned long count) { } #endif -#define persistent_huge_pages (nr_huge_pages - surplus_huge_pages) +#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) static unsigned long set_max_huge_pages(unsigned long count) { unsigned long min_count, ret; + struct hstate *h = &default_hstate; /* * Increase the pool size @@ -963,19 +980,19 @@ static unsigned long set_max_huge_pages(unsigned long count) * within all the constraints specified by the sysctls. */ spin_lock(&hugetlb_lock); - while (surplus_huge_pages && count > persistent_huge_pages) { - if (!adjust_pool_surplus(-1)) + while (h->surplus_huge_pages && count > persistent_huge_pages(h)) { + if (!adjust_pool_surplus(h, -1)) break; } - while (count > persistent_huge_pages) { + while (count > persistent_huge_pages(h)) { /* * If this allocation races such that we no longer need the * page, free_huge_page will handle it by freeing the page * and reducing the surplus. */ spin_unlock(&hugetlb_lock); - ret = alloc_fresh_huge_page(); + ret = alloc_fresh_huge_page(h); spin_lock(&hugetlb_lock); if (!ret) goto out; @@ -997,21 +1014,21 @@ static unsigned long set_max_huge_pages(unsigned long count) * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use. */ - min_count = resv_huge_pages + nr_huge_pages - free_huge_pages; + min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; min_count = max(count, min_count); - try_to_free_low(min_count); - while (min_count < persistent_huge_pages) { - struct page *page = dequeue_huge_page(); + try_to_free_low(h, min_count); + while (min_count < persistent_huge_pages(h)) { + struct page *page = dequeue_huge_page(h); if (!page) break; - update_and_free_page(page); + update_and_free_page(h, page); } - while (count < persistent_huge_pages) { - if (!adjust_pool_surplus(1)) + while (count < persistent_huge_pages(h)) { + if (!adjust_pool_surplus(h, 1)) break; } out: - ret = persistent_huge_pages; + ret = persistent_huge_pages(h); spin_unlock(&hugetlb_lock); return ret; } @@ -1041,9 +1058,10 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { + struct hstate *h = &default_hstate; proc_doulongvec_minmax(table, write, file, buffer, length, ppos); spin_lock(&hugetlb_lock); - nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; + h->nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; spin_unlock(&hugetlb_lock); return 0; } @@ -1052,37 +1070,40 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, int hugetlb_report_meminfo(char *buf) { + struct hstate *h = &default_hstate; return sprintf(buf, "HugePages_Total: %5lu\n" "HugePages_Free: %5lu\n" "HugePages_Rsvd: %5lu\n" "HugePages_Surp: %5lu\n" "Hugepagesize: %5lu kB\n", - nr_huge_pages, - free_huge_pages, - resv_huge_pages, - surplus_huge_pages, - HPAGE_SIZE/1024); + h->nr_huge_pages, + h->free_huge_pages, + h->resv_huge_pages, + h->surplus_huge_pages, + 1UL << (huge_page_order(h) + PAGE_SHIFT - 10)); } int hugetlb_report_node_meminfo(int nid, char *buf) { + struct hstate *h = &default_hstate; return sprintf(buf, "Node %d HugePages_Total: %5u\n" "Node %d HugePages_Free: %5u\n" "Node %d HugePages_Surp: %5u\n", - nid, nr_huge_pages_node[nid], - nid, free_huge_pages_node[nid], - nid, surplus_huge_pages_node[nid]); + nid, h->nr_huge_pages_node[nid], + nid, h->free_huge_pages_node[nid], + nid, h->surplus_huge_pages_node[nid]); } /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ unsigned long hugetlb_total_pages(void) { - return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); + struct hstate *h = &default_hstate; + return h->nr_huge_pages * pages_per_huge_page(h); } -static int hugetlb_acct_memory(long delta) +static int hugetlb_acct_memory(struct hstate *h, long delta) { int ret = -ENOMEM; @@ -1105,18 +1126,18 @@ static int hugetlb_acct_memory(long delta) * semantics that cpuset has. */ if (delta > 0) { - if (gather_surplus_pages(delta) < 0) + if (gather_surplus_pages(h, delta) < 0) goto out; - if (delta > cpuset_mems_nr(free_huge_pages_node)) { - return_unused_surplus_pages(delta); + if (delta > cpuset_mems_nr(h->free_huge_pages_node)) { + return_unused_surplus_pages(h, delta); goto out; } } ret = 0; if (delta < 0) - return_unused_surplus_pages((unsigned long) -delta); + return_unused_surplus_pages(h, (unsigned long) -delta); out: spin_unlock(&hugetlb_lock); @@ -1141,14 +1162,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) static void hugetlb_vm_op_close(struct vm_area_struct *vma) { + struct hstate *h = hstate_vma(vma); struct resv_map *reservations = vma_resv_map(vma); unsigned long reserve; unsigned long start; unsigned long end; if (reservations) { - start = vma_hugecache_offset(vma, vma->vm_start); - end = vma_hugecache_offset(vma, vma->vm_end); + start = vma_hugecache_offset(h, vma, vma->vm_start); + end = vma_hugecache_offset(h, vma, vma->vm_end); reserve = (end - start) - region_count(&reservations->regions, start, end); @@ -1156,7 +1178,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) kref_put(&reservations->refs, resv_map_release); if (reserve) - hugetlb_acct_memory(-reserve); + hugetlb_acct_memory(h, -reserve); } } @@ -1214,14 +1236,16 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct page *ptepage; unsigned long addr; int cow; + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; - for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { + for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { src_pte = huge_pte_offset(src, addr); if (!src_pte) continue; - dst_pte = huge_pte_alloc(dst, addr); + dst_pte = huge_pte_alloc(dst, addr, sz); if (!dst_pte) goto nomem; @@ -1257,6 +1281,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, pte_t pte; struct page *page; struct page *tmp; + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); + /* * A page gathering list, protected by per file i_mmap_lock. The * lock is used to avoid list corruption from multiple unmapping @@ -1265,11 +1292,11 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, LIST_HEAD(page_list); WARN_ON(!is_vm_hugetlb_page(vma)); - BUG_ON(start & ~HPAGE_MASK); - BUG_ON(end & ~HPAGE_MASK); + BUG_ON(start & ~huge_page_mask(h)); + BUG_ON(end & ~huge_page_mask(h)); spin_lock(&mm->page_table_lock); - for (address = start; address < end; address += HPAGE_SIZE) { + for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; @@ -1383,6 +1410,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte, struct page *pagecache_page) { + struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; int avoidcopy; int outside_reserve = 0; @@ -1443,7 +1471,7 @@ retry_avoidcopy: __SetPageUptodate(new_page); spin_lock(&mm->page_table_lock); - ptep = huge_pte_offset(mm, address & HPAGE_MASK); + ptep = huge_pte_offset(mm, address & huge_page_mask(h)); if (likely(pte_same(huge_ptep_get(ptep), pte))) { /* Break COW */ huge_ptep_clear_flush(vma, address, ptep); @@ -1458,14 +1486,14 @@ retry_avoidcopy: } /* Return the pagecache page at a given address within a VMA */ -static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma, - unsigned long address) +static struct page *hugetlbfs_pagecache_page(struct hstate *h, + struct vm_area_struct *vma, unsigned long address) { struct address_space *mapping; pgoff_t idx; mapping = vma->vm_file->f_mapping; - idx = vma_hugecache_offset(vma, address); + idx = vma_hugecache_offset(h, vma, address); return find_lock_page(mapping, idx); } @@ -1473,6 +1501,7 @@ static struct page *hugetlbfs_pagecache_page(struct vm_area_struct *vma, static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, int write_access) { + struct hstate *h = hstate_vma(vma); int ret = VM_FAULT_SIGBUS; pgoff_t idx; unsigned long size; @@ -1493,7 +1522,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, } mapping = vma->vm_file->f_mapping; - idx = vma_hugecache_offset(vma, address); + idx = vma_hugecache_offset(h, vma, address); /* * Use page lock to guard against racing truncation @@ -1502,7 +1531,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, retry: page = find_lock_page(mapping, idx); if (!page) { - size = i_size_read(mapping->host) >> HPAGE_SHIFT; + size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) goto out; page = alloc_huge_page(vma, address, 0); @@ -1510,7 +1539,7 @@ retry: ret = -PTR_ERR(page); goto out; } - clear_huge_page(page, address); + clear_huge_page(page, address, huge_page_size(h)); __SetPageUptodate(page); if (vma->vm_flags & VM_SHARED) { @@ -1526,14 +1555,14 @@ retry: } spin_lock(&inode->i_lock); - inode->i_blocks += BLOCKS_PER_HUGEPAGE; + inode->i_blocks += blocks_per_huge_page(h); spin_unlock(&inode->i_lock); } else lock_page(page); } spin_lock(&mm->page_table_lock); - size = i_size_read(mapping->host) >> HPAGE_SHIFT; + size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) goto backout; @@ -1569,8 +1598,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_t entry; int ret; static DEFINE_MUTEX(hugetlb_instantiation_mutex); + struct hstate *h = hstate_vma(vma); - ptep = huge_pte_alloc(mm, address); + ptep = huge_pte_alloc(mm, address, huge_page_size(h)); if (!ptep) return VM_FAULT_OOM; @@ -1594,7 +1624,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (likely(pte_same(entry, huge_ptep_get(ptep)))) if (write_access && !pte_write(entry)) { struct page *page; - page = hugetlbfs_pagecache_page(vma, address); + page = hugetlbfs_pagecache_page(h, vma, address); ret = hugetlb_cow(mm, vma, address, ptep, entry, page); if (page) { unlock_page(page); @@ -1615,6 +1645,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long pfn_offset; unsigned long vaddr = *position; int remainder = *length; + struct hstate *h = hstate_vma(vma); spin_lock(&mm->page_table_lock); while (vaddr < vma->vm_end && remainder) { @@ -1626,7 +1657,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * each hugepage. We have to make * sure we get the * first, for the page indexing below to work. */ - pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); + pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); if (!pte || huge_pte_none(huge_ptep_get(pte)) || (write && !pte_write(huge_ptep_get(pte)))) { @@ -1644,7 +1675,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, break; } - pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; + pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT; page = pte_page(huge_ptep_get(pte)); same_page: if (pages) { @@ -1660,7 +1691,7 @@ same_page: --remainder; ++i; if (vaddr < vma->vm_end && remainder && - pfn_offset < HPAGE_SIZE/PAGE_SIZE) { + pfn_offset < pages_per_huge_page(h)) { /* * We use pfn_offset to avoid touching the pageframes * of this compound page. @@ -1682,13 +1713,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma, unsigned long start = address; pte_t *ptep; pte_t pte; + struct hstate *h = hstate_vma(vma); BUG_ON(address >= end); flush_cache_range(vma, address, end); spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); spin_lock(&mm->page_table_lock); - for (; address < end; address += HPAGE_SIZE) { + for (; address < end; address += huge_page_size(h)) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; @@ -1711,6 +1743,7 @@ int hugetlb_reserve_pages(struct inode *inode, struct vm_area_struct *vma) { long ret, chg; + struct hstate *h = hstate_inode(inode); if (vma && vma->vm_flags & VM_NORESERVE) return 0; @@ -1739,7 +1772,7 @@ int hugetlb_reserve_pages(struct inode *inode, if (hugetlb_get_quota(inode->i_mapping, chg)) return -ENOSPC; - ret = hugetlb_acct_memory(chg); + ret = hugetlb_acct_memory(h, chg); if (ret < 0) { hugetlb_put_quota(inode->i_mapping, chg); return ret; @@ -1751,12 +1784,13 @@ int hugetlb_reserve_pages(struct inode *inode, void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) { + struct hstate *h = hstate_inode(inode); long chg = region_truncate(&inode->i_mapping->private_list, offset); spin_lock(&inode->i_lock); - inode->i_blocks -= BLOCKS_PER_HUGEPAGE * freed; + inode->i_blocks -= blocks_per_huge_page(h); spin_unlock(&inode->i_lock); hugetlb_put_quota(inode->i_mapping, (chg - freed)); - hugetlb_acct_memory(-(chg - freed)); + hugetlb_acct_memory(h, -(chg - freed)); } diff --git a/mm/memory.c b/mm/memory.c index 72932489a08..c1c1d6d8c22 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -903,7 +903,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, if (unlikely(is_vm_hugetlb_page(vma))) { unmap_hugepage_range(vma, start, end, NULL); zap_work -= (end - start) / - (HPAGE_SIZE / PAGE_SIZE); + pages_per_huge_page(hstate_vma(vma)); start = end; } else start = unmap_page_range(*tlbp, vma, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c94e58b192c..e550bec2058 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1481,7 +1481,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) { zl = node_zonelist(interleave_nid(*mpol, vma, addr, - HPAGE_SHIFT), gfp_flags); + huge_page_shift(hstate_vma(vma))), gfp_flags); } else { zl = policy_zonelist(gfp_flags, *mpol); if ((*mpol)->mode == MPOL_BIND) @@ -2220,9 +2220,12 @@ static void check_huge_range(struct vm_area_struct *vma, { unsigned long addr; struct page *page; + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); - for (addr = start; addr < end; addr += HPAGE_SIZE) { - pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK); + for (addr = start; addr < end; addr += sz) { + pte_t *ptep = huge_pte_offset(vma->vm_mm, + addr & huge_page_mask(h)); pte_t pte; if (!ptep) diff --git a/mm/mmap.c b/mm/mmap.c index 57d3b6097de..5e0cc99e9cd 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1812,7 +1812,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, struct mempolicy *pol; struct vm_area_struct *new; - if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) + if (is_vm_hugetlb_page(vma) && (addr & + ~(huge_page_mask(hstate_vma(vma))))) return -EINVAL; if (mm->map_count >= sysctl_max_map_count) -- cgit v1.2.3-70-g09d2 From ceb868796181dc95ea01a110e123afd391639873 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 23 Jul 2008 21:27:50 -0700 Subject: hugetlb: introduce pud_huge Straight forward extensions for huge pages located in the PUD instead of PMDs. Signed-off-by: Andi Kleen Signed-off-by: Nick Piggin Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/hugetlbpage.c | 6 ++++++ arch/powerpc/mm/hugetlbpage.c | 5 +++++ arch/s390/mm/hugetlbpage.c | 5 +++++ arch/sh/mm/hugetlbpage.c | 5 +++++ arch/sparc64/mm/hugetlbpage.c | 5 +++++ arch/x86/mm/hugetlbpage.c | 25 ++++++++++++++++++++++++- include/linux/hugetlb.h | 5 +++++ mm/hugetlb.c | 9 +++++++++ mm/memory.c | 15 +++++++++++---- 9 files changed, 75 insertions(+), 5 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 6170f097d25..c45fc7f5a97 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -107,6 +107,12 @@ int pmd_huge(pmd_t pmd) { return 0; } + +int pud_huge(pud_t pud) +{ + return 0; +} + struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index c94dc71af98..63db7adce71 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -369,6 +369,11 @@ int pmd_huge(pmd_t pmd) return 0; } +int pud_huge(pud_t pud) +{ + return 0; +} + struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 9162dc84f77..f28c43d2f61 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -120,6 +120,11 @@ int pmd_huge(pmd_t pmd) return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); } +int pud_huge(pud_t pud) +{ + return 0; +} + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmdp, int write) { diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 2f9dbe0ef4a..9304117039c 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -79,6 +79,11 @@ int pmd_huge(pmd_t pmd) return 0; } +int pud_huge(pud_t pud) +{ + return 0; +} + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 1307b23f6a7..f27d10369e0 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -295,6 +295,11 @@ int pmd_huge(pmd_t pmd) return 0; } +int pud_huge(pud_t pud) +{ + return 0; +} + struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 52476fde899..a4789e87a31 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -189,6 +189,11 @@ int pmd_huge(pmd_t pmd) return 0; } +int pud_huge(pud_t pud) +{ + return 0; +} + struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) @@ -209,6 +214,11 @@ int pmd_huge(pmd_t pmd) return !!(pmd_val(pmd) & _PAGE_PSE); } +int pud_huge(pud_t pud) +{ + return 0; +} + struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) @@ -217,9 +227,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, page = pte_page(*(pte_t *)pmd); if (page) - page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); return page; } + +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pud); + if (page) + page += ((address & ~PUD_MASK) >> PAGE_SHIFT); + return page; +} + #endif /* x86_64 also uses this file */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 58c0de32e7f..b2c17f62cac 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -50,7 +50,10 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write); +struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write); int pmd_huge(pmd_t pmd); +int pud_huge(pud_t pmd); void hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); @@ -78,8 +81,10 @@ static inline unsigned long hugetlb_total_pages(void) #define hugetlb_report_meminfo(buf) 0 #define hugetlb_report_node_meminfo(n, buf) 0 #define follow_huge_pmd(mm, addr, pmd, write) NULL +#define follow_huge_pud(mm, addr, pud, write) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) #define pmd_huge(x) 0 +#define pud_huge(x) 0 #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0c74c14dd2f..107c1ce223c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1996,6 +1996,15 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } +/* Can be overriden by architectures */ +__attribute__((weak)) struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + BUG(); + return NULL; +} + int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, diff --git a/mm/memory.c b/mm/memory.c index 02fc6b1047b..262e3eb6601 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -998,19 +998,24 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, goto no_page_table; pud = pud_offset(pgd, address); - if (pud_none(*pud) || unlikely(pud_bad(*pud))) + if (pud_none(*pud)) + goto no_page_table; + if (pud_huge(*pud)) { + BUG_ON(flags & FOLL_GET); + page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); + goto out; + } + if (unlikely(pud_bad(*pud))) goto no_page_table; - + pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) goto no_page_table; - if (pmd_huge(*pmd)) { BUG_ON(flags & FOLL_GET); page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); goto out; } - if (unlikely(pmd_bad(*pmd))) goto no_page_table; @@ -1567,6 +1572,8 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long next; int err; + BUG_ON(pud_huge(*pud)); + pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 3560e249abda6bee41a07a7bf0383a6e193e2839 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:28:09 -0700 Subject: bootmem: replace node_boot_start in struct bootmem_data Almost all users of this field need a PFN instead of a physical address, so replace node_boot_start with node_min_pfn. [Lee.Schermerhorn@hp.com: fix spurious BUG_ON() in mark_bootmem()] Signed-off-by: Johannes Weiner Cc: Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/mm/numa.c | 2 +- arch/arm/plat-omap/fb.c | 4 +--- arch/avr32/mm/init.c | 3 +-- arch/ia64/mm/discontig.c | 19 ++++++++++--------- arch/m32r/mm/discontig.c | 3 +-- arch/m32r/mm/init.c | 4 +--- arch/mn10300/mm/init.c | 6 +++--- arch/sh/mm/init.c | 2 +- include/linux/bootmem.h | 2 +- mm/bootmem.c | 40 +++++++++++++++++++++------------------- 10 files changed, 41 insertions(+), 44 deletions(-) (limited to 'arch/ia64') diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index def0c74a78a..d8c4ceaf00b 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -304,7 +304,7 @@ void __init paging_init(void) for_each_online_node(nid) { bootmem_data_t *bdata = &bootmem_node_data[nid]; - unsigned long start_pfn = bdata->node_boot_start >> PAGE_SHIFT; + unsigned long start_pfn = bdata->node_min_pfn; unsigned long end_pfn = bdata->node_low_pfn; if (dma_local_pfn >= end_pfn - start_pfn) diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c index 7854f19b77c..96d6f061973 100644 --- a/arch/arm/plat-omap/fb.c +++ b/arch/arm/plat-omap/fb.c @@ -182,7 +182,7 @@ void __init omapfb_reserve_sdram(void) return; bdata = NODE_DATA(0)->bdata; - sdram_start = bdata->node_boot_start; + sdram_start = bdata->node_min_pfn << PAGE_SHIFT; sdram_size = (bdata->node_low_pfn << PAGE_SHIFT) - sdram_start; reserved = 0; for (i = 0; ; i++) { @@ -340,5 +340,3 @@ unsigned long omapfb_reserve_sram(unsigned long sram_pstart, #endif - - diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index 786de88a82a..3c85fdaa948 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -119,8 +119,7 @@ void __init paging_init(void) unsigned long zones_size[MAX_NR_ZONES]; unsigned long low, start_pfn; - start_pfn = pgdat->bdata->node_boot_start; - start_pfn >>= PAGE_SHIFT; + start_pfn = pgdat->bdata->node_min_pfn; low = pgdat->bdata->node_low_pfn; memset(zones_size, 0, sizeof(zones_size)); diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 2fcf8464331..d83125e1ed2 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -74,17 +74,17 @@ pg_data_t *pgdat_list[MAX_NUMNODES]; static int __init build_node_maps(unsigned long start, unsigned long len, int node) { - unsigned long cstart, epfn, end = start + len; + unsigned long spfn, epfn, end = start + len; struct bootmem_data *bdp = &bootmem_node_data[node]; epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; - cstart = GRANULEROUNDDOWN(start); + spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; if (!bdp->node_low_pfn) { - bdp->node_boot_start = cstart; + bdp->node_min_pfn = spfn; bdp->node_low_pfn = epfn; } else { - bdp->node_boot_start = min(cstart, bdp->node_boot_start); + bdp->node_min_pfn = min(spfn, bdp->node_min_pfn); bdp->node_low_pfn = max(epfn, bdp->node_low_pfn); } @@ -221,20 +221,21 @@ static void __init fill_pernode(int node, unsigned long pernode, static int __init find_pernode_space(unsigned long start, unsigned long len, int node) { - unsigned long epfn; + unsigned long spfn, epfn; unsigned long pernodesize = 0, pernode, pages, mapsize; struct bootmem_data *bdp = &bootmem_node_data[node]; + spfn = start >> PAGE_SHIFT; epfn = (start + len) >> PAGE_SHIFT; - pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT); + pages = bdp->node_low_pfn - bdp->node_min_pfn; mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT; /* * Make sure this memory falls within this node's usable memory * since we may have thrown some away in build_maps(). */ - if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn) + if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn) return 0; /* Don't setup this node's local space twice... */ @@ -296,7 +297,7 @@ static void __init reserve_pernode_space(void) bdp = pdp->bdata; /* First the bootmem_map itself */ - pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT); + pages = bdp->node_low_pfn - bdp->node_min_pfn; size = bootmem_bootmap_pages(pages) << PAGE_SHIFT; base = __pa(bdp->node_bootmem_map); reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); @@ -466,7 +467,7 @@ void __init find_memory(void) init_bootmem_node(pgdat_list[node], map>>PAGE_SHIFT, - bdp->node_boot_start>>PAGE_SHIFT, + bdp->node_min_pfn, bdp->node_low_pfn); } diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index cc23934bc41..cbc3c4c5456 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -123,8 +123,7 @@ unsigned long __init setup_memory(void) return max_low_pfn; } -#define START_PFN(nid) \ - (NODE_DATA(nid)->bdata->node_boot_start >> PAGE_SHIFT) +#define START_PFN(nid) (NODE_DATA(nid)->bdata->node_min_pfn) #define MAX_LOW_PFN(nid) (NODE_DATA(nid)->bdata->node_low_pfn) unsigned long __init zone_sizes_init(void) diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index 28799af15e9..2554eb59cfe 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -93,8 +93,7 @@ void free_initrd_mem(unsigned long, unsigned long); #endif /* It'd be good if these lines were in the standard header file. */ -#define START_PFN(nid) \ - (NODE_DATA(nid)->bdata->node_boot_start >> PAGE_SHIFT) +#define START_PFN(nid) (NODE_DATA(nid)->bdata->node_min_pfn) #define MAX_LOW_PFN(nid) (NODE_DATA(nid)->bdata->node_low_pfn) #ifndef CONFIG_DISCONTIGMEM @@ -252,4 +251,3 @@ void free_initrd_mem(unsigned long start, unsigned long end) printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); } #endif - diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index 8c5d88c7b90..8cee387a24f 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -67,8 +67,8 @@ void __init paging_init(void) /* declare the sizes of the RAM zones (only use the normal zone) */ zones_size[ZONE_NORMAL] = - (contig_page_data.bdata->node_low_pfn) - - (contig_page_data.bdata->node_boot_start >> PAGE_SHIFT); + contig_page_data.bdata->node_low_pfn - + contig_page_data.bdata->node_min_pfn; /* pass the memory from the bootmem allocator to the main allocator */ free_area_init(zones_size); @@ -87,7 +87,7 @@ void __init mem_init(void) if (!mem_map) BUG(); -#define START_PFN (contig_page_data.bdata->node_boot_start >> PAGE_SHIFT) +#define START_PFN (contig_page_data.bdata->node_min_pfn) #define MAX_LOW_PFN (contig_page_data.bdata->node_low_pfn) max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index d7df26bd1e5..d652d375eb1 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -191,7 +191,7 @@ void __init paging_init(void) pg_data_t *pgdat = NODE_DATA(nid); unsigned long low, start_pfn; - start_pfn = pgdat->bdata->node_boot_start >> PAGE_SHIFT; + start_pfn = pgdat->bdata->node_min_pfn; low = pgdat->bdata->node_low_pfn; if (max_zone_pfns[ZONE_NORMAL] < low) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 90921d10ffa..4ddf2922fc8 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -28,7 +28,7 @@ extern unsigned long saved_max_pfn; * memory pages (including holes) on the node. */ typedef struct bootmem_data { - unsigned long node_boot_start; + unsigned long node_min_pfn; unsigned long node_low_pfn; void *node_bootmem_map; unsigned long last_end_off; diff --git a/mm/bootmem.c b/mm/bootmem.c index 282b786c2b1..4af15d0340a 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -80,7 +80,7 @@ static void __init link_bootmem(bootmem_data_t *bdata) bootmem_data_t *ent; ent = list_entry(iter, bootmem_data_t, list); - if (bdata->node_boot_start < ent->node_boot_start) + if (bdata->node_min_pfn < ent->node_min_pfn) break; } list_add_tail(&bdata->list, iter); @@ -96,7 +96,7 @@ static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, mminit_validate_memmodel_limits(&start, &end); bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); - bdata->node_boot_start = PFN_PHYS(start); + bdata->node_min_pfn = start; bdata->node_low_pfn = end; link_bootmem(bdata); @@ -151,7 +151,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) if (!bdata->node_bootmem_map) return 0; - start = PFN_DOWN(bdata->node_boot_start); + start = bdata->node_min_pfn; end = bdata->node_low_pfn; /* @@ -167,7 +167,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) unsigned long *map, idx, vec; map = bdata->node_bootmem_map; - idx = start - PFN_DOWN(bdata->node_boot_start); + idx = start - bdata->node_min_pfn; vec = ~map[idx / BITS_PER_LONG]; if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) { @@ -192,7 +192,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) } page = virt_to_page(bdata->node_bootmem_map); - pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); + pages = bdata->node_low_pfn - bdata->node_min_pfn; pages = bootmem_bootmap_pages(pages); count += pages; while (pages--) @@ -231,8 +231,8 @@ static void __init __free(bootmem_data_t *bdata, unsigned long idx; bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, - sidx + PFN_DOWN(bdata->node_boot_start), - eidx + PFN_DOWN(bdata->node_boot_start)); + sidx + bdata->node_min_pfn, + eidx + bdata->node_min_pfn); if (bdata->hint_idx > sidx) bdata->hint_idx = sidx; @@ -250,8 +250,8 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, bdebug("nid=%td start=%lx end=%lx flags=%x\n", bdata - bootmem_node_data, - sidx + PFN_DOWN(bdata->node_boot_start), - eidx + PFN_DOWN(bdata->node_boot_start), + sidx + bdata->node_min_pfn, + eidx + bdata->node_min_pfn, flags); for (idx = sidx; idx < eidx; idx++) @@ -261,7 +261,7 @@ static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, return -EBUSY; } bdebug("silent double reserve of PFN %lx\n", - idx + PFN_DOWN(bdata->node_boot_start)); + idx + bdata->node_min_pfn); } return 0; } @@ -275,11 +275,11 @@ static int __init mark_bootmem_node(bootmem_data_t *bdata, bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n", bdata - bootmem_node_data, start, end, reserve, flags); - BUG_ON(start < PFN_DOWN(bdata->node_boot_start)); + BUG_ON(start < bdata->node_min_pfn); BUG_ON(end > bdata->node_low_pfn); - sidx = start - PFN_DOWN(bdata->node_boot_start); - eidx = end - PFN_DOWN(bdata->node_boot_start); + sidx = start - bdata->node_min_pfn; + eidx = end - bdata->node_min_pfn; if (reserve) return __reserve(bdata, sidx, eidx, flags); @@ -299,7 +299,8 @@ static int __init mark_bootmem(unsigned long start, unsigned long end, int err; unsigned long max; - if (pos < PFN_DOWN(bdata->node_boot_start)) { + if (pos < bdata->node_min_pfn || + pos >= bdata->node_low_pfn) { BUG_ON(pos != start); continue; } @@ -422,7 +423,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, align, goal, limit); - min = PFN_DOWN(bdata->node_boot_start); + min = bdata->node_min_pfn; max = bdata->node_low_pfn; goal >>= PAGE_SHIFT; @@ -440,8 +441,8 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, else start = ALIGN(min, step); - sidx = start - PFN_DOWN(bdata->node_boot_start); - midx = max - PFN_DOWN(bdata->node_boot_start); + sidx = start - bdata->node_min_pfn;; + midx = max - bdata->node_min_pfn; if (bdata->hint_idx > sidx) { /* @@ -491,7 +492,8 @@ find_block: PFN_UP(end_off), BOOTMEM_EXCLUSIVE)) BUG(); - region = phys_to_virt(bdata->node_boot_start + start_off); + region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + + start_off); memset(region, 0, size); return region; } @@ -518,7 +520,7 @@ restart: if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) continue; - if (limit && bdata->node_boot_start >= limit) + if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) break; region = alloc_bootmem_core(bdata, size, align, goal, limit); -- cgit v1.2.3-70-g09d2 From ed8cae8ba01348bfd83333f4648dd807b04d7f08 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 23 Jul 2008 21:29:30 -0700 Subject: flag parameters: pipe This patch introduces the new syscall pipe2 which is like pipe but it also takes an additional parameter which takes a flag value. This patch implements the handling of O_CLOEXEC for the flag. I did not add support for the new syscall for the architectures which have a special sys_pipe implementation. I think the maintainers of those archs have the chance to go with the unified implementation but that's up to them. The implementation introduces do_pipe_flags. I did that instead of changing all callers of do_pipe because some of the callers are written in assembler. I would probably screw up changing the assembly code. To avoid breaking code do_pipe is now a small wrapper around do_pipe_flags. Once all callers are changed over to do_pipe_flags the old do_pipe function can be removed. The following test must be adjusted for architectures other than x86 and x86-64 and in case the syscall numbers changed. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #include #include #include #include #ifndef __NR_pipe2 # ifdef __x86_64__ # define __NR_pipe2 293 # elif defined __i386__ # define __NR_pipe2 331 # else # error "need __NR_pipe2" # endif #endif int main (void) { int fd[2]; if (syscall (__NR_pipe2, fd, 0) != 0) { puts ("pipe2(0) failed"); return 1; } for (int i = 0; i < 2; ++i) { int coe = fcntl (fd[i], F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if (coe & FD_CLOEXEC) { printf ("pipe2(0) set close-on-exit for fd[%d]\n", i); return 1; } } close (fd[0]); close (fd[1]); if (syscall (__NR_pipe2, fd, O_CLOEXEC) != 0) { puts ("pipe2(O_CLOEXEC) failed"); return 1; } for (int i = 0; i < 2; ++i) { int coe = fcntl (fd[i], F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if ((coe & FD_CLOEXEC) == 0) { printf ("pipe2(O_CLOEXEC) does not set close-on-exit for fd[%d]\n", i); return 1; } } close (fd[0]); close (fd[1]); puts ("OK"); return 0; } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Ulrich Drepper Acked-by: Davide Libenzi Cc: Michael Kerrisk Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/sys_ia32.c | 2 +- arch/ia64/kernel/sys_ia64.c | 2 +- arch/mips/kernel/syscall.c | 2 +- arch/parisc/hpux/sys_hpux.c | 2 +- arch/sh/kernel/sys_sh32.c | 2 +- arch/sparc/kernel/sys_sparc.c | 2 +- arch/sparc64/kernel/sys_sparc.c | 2 +- arch/x86/ia32/ia32entry.S | 1 + arch/x86/ia32/sys_ia32.c | 2 +- arch/x86/kernel/syscall_table_32.S | 1 + arch/xtensa/kernel/syscall.c | 2 +- fs/pipe.c | 23 ++++++++++++++++++----- include/asm-x86/unistd_32.h | 1 + include/asm-x86/unistd_64.h | 2 ++ include/linux/fs.h | 1 + 15 files changed, 33 insertions(+), 14 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 7e028ceb93b..465116aecb8 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1139,7 +1139,7 @@ sys32_pipe (int __user *fd) int retval; int fds[2]; - retval = do_pipe(fds); + retval = do_pipe_flags(fds, 0); if (retval) goto out; if (copy_to_user(fd, fds, sizeof(fds))) diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 1eda194b955..bcbb6d8792d 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -160,7 +160,7 @@ sys_pipe (void) int fd[2]; int retval; - retval = do_pipe(fd); + retval = do_pipe_flags(fd, 0); if (retval) goto out; retval = fd[0]; diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 3523c8d12ed..343015a2f41 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -52,7 +52,7 @@ asmlinkage int sysm_pipe(nabi_no_regargs volatile struct pt_regs regs) int fd[2]; int error, res; - error = do_pipe(fd); + error = do_pipe_flags(fd, 0); if (error) { res = error; goto out; diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c index 0c5b9dabb47..be255ebb609 100644 --- a/arch/parisc/hpux/sys_hpux.c +++ b/arch/parisc/hpux/sys_hpux.c @@ -448,7 +448,7 @@ int hpux_pipe(int *kstack_fildes) int error; lock_kernel(); - error = do_pipe(kstack_fildes); + error = do_pipe_flags(kstack_fildes, 0); unlock_kernel(); return error; } diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index 125e493ead8..f0aa5c39865 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -29,7 +29,7 @@ asmlinkage int sys_pipe(unsigned long r4, unsigned long r5, int fd[2]; int error; - error = do_pipe(fd); + error = do_pipe_flags(fd, 0); if (!error) { regs->regs[1] = fd[1]; return fd[0]; diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c index 3c6b49a53ae..4d73421559c 100644 --- a/arch/sparc/kernel/sys_sparc.c +++ b/arch/sparc/kernel/sys_sparc.c @@ -97,7 +97,7 @@ asmlinkage int sparc_pipe(struct pt_regs *regs) int fd[2]; int error; - error = do_pipe(fd); + error = do_pipe_flags(fd, 0); if (error) goto out; regs->u_regs[UREG_I1] = fd[1]; diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index e1f4eba2e57..39749e32dc7 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -418,7 +418,7 @@ asmlinkage long sparc_pipe(struct pt_regs *regs) int fd[2]; int error; - error = do_pipe(fd); + error = do_pipe_flags(fd, 0); if (error) goto out; regs->u_regs[UREG_I1] = fd[1]; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 5614a8f7bed..18808b16457 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -830,4 +830,5 @@ ia32_sys_call_table: .quad sys_eventfd2 .quad sys_epoll_create2 .quad sys_dup3 /* 330 */ + .quad sys_pipe2 ia32_syscall_end: diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index f00afdf61e6..d3c64088b98 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -238,7 +238,7 @@ asmlinkage long sys32_pipe(int __user *fd) int retval; int fds[2]; - retval = do_pipe(fds); + retval = do_pipe_flags(fds, 0); if (retval) goto out; if (copy_to_user(fd, fds, sizeof(fds))) diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 24a3f1ea6a0..66154769d52 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -330,3 +330,4 @@ ENTRY(sys_call_table) .long sys_eventfd2 .long sys_epoll_create2 .long sys_dup3 /* 330 */ + .long sys_pipe2 diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index f3e16efcd47..ac15ecbdf91 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -49,7 +49,7 @@ asmlinkage long xtensa_pipe(int __user *userfds) int fd[2]; int error; - error = do_pipe(fd); + error = do_pipe_flags(fd, 0); if (!error) { if (copy_to_user(userfds, fd, 2 * sizeof(int))) error = -EFAULT; diff --git a/fs/pipe.c b/fs/pipe.c index 700f4e0d957..68e82061070 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1027,12 +1027,15 @@ struct file *create_read_pipe(struct file *wrf) return f; } -int do_pipe(int *fd) +int do_pipe_flags(int *fd, int flags) { struct file *fw, *fr; int error; int fdw, fdr; + if (flags & ~O_CLOEXEC) + return -EINVAL; + fw = create_write_pipe(); if (IS_ERR(fw)) return PTR_ERR(fw); @@ -1041,12 +1044,12 @@ int do_pipe(int *fd) if (IS_ERR(fr)) goto err_write_pipe; - error = get_unused_fd(); + error = get_unused_fd_flags(flags); if (error < 0) goto err_read_pipe; fdr = error; - error = get_unused_fd(); + error = get_unused_fd_flags(flags); if (error < 0) goto err_fdr; fdw = error; @@ -1074,16 +1077,21 @@ int do_pipe(int *fd) return error; } +int do_pipe(int *fd) +{ + return do_pipe_flags(fd, 0); +} + /* * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ -asmlinkage long __weak sys_pipe(int __user *fildes) +asmlinkage long __weak sys_pipe2(int __user *fildes, int flags) { int fd[2]; int error; - error = do_pipe(fd); + error = do_pipe_flags(fd, flags); if (!error) { if (copy_to_user(fildes, fd, sizeof(fd))) { sys_close(fd[0]); @@ -1094,6 +1102,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes) return error; } +asmlinkage long __weak sys_pipe(int __user *fildes) +{ + return sys_pipe2(fildes, 0); +} + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h index a1f6383bf69..748a05c77da 100644 --- a/include/asm-x86/unistd_32.h +++ b/include/asm-x86/unistd_32.h @@ -336,6 +336,7 @@ #define __NR_eventfd2 328 #define __NR_epoll_create2 329 #define __NR_dup3 330 +#define __NR_pipe2 331 #ifdef __KERNEL__ diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h index f0fb2bd40cd..d2284b43ad5 100644 --- a/include/asm-x86/unistd_64.h +++ b/include/asm-x86/unistd_64.h @@ -649,6 +649,8 @@ __SYSCALL(__NR_eventfd2, sys_eventfd2) __SYSCALL(__NR_epoll_create2, sys_epoll_create2) #define __NR_dup3 292 __SYSCALL(__NR_dup3, sys_dup3) +#define __NR_pipe2 293 +__SYSCALL(__NR_pipe2, sys_pipe2) #ifndef __NO_STUBS diff --git a/include/linux/fs.h b/include/linux/fs.h index e5e6a244096..0e80cd717d3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1777,6 +1777,7 @@ static inline void allow_write_access(struct file *file) atomic_inc(&file->f_path.dentry->d_inode->i_writecount); } extern int do_pipe(int *); +extern int do_pipe_flags(int *, int); extern struct file *create_read_pipe(struct file *f); extern struct file *create_write_pipe(void); extern void free_write_pipe(struct file *); -- cgit v1.2.3-70-g09d2 From 3e4d0cab61c88a9ae3e61151a857960397e26403 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 25 Jul 2008 10:10:28 -0700 Subject: [IA64] Wire up new system calls Six new system calls: signalfd4, eventfd2, epoll_create1, dup3, pipe2 and inotify_init1. Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 6 ++++++ include/asm-ia64/unistd.h | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 56ab156c48a..0dd6c1419d8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1691,6 +1691,12 @@ sys_call_table: data8 sys_timerfd_create // 1310 data8 sys_timerfd_settime data8 sys_timerfd_gettime + data8 sys_signalfd4 + data8 sys_eventfd2 + data8 sys_epoll_create1 // 1315 + data8 sys_dup3 + data8 sys_pipe2 + data8 sys_inotify_init1 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index e6031471612..d535833aab5 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -302,11 +302,17 @@ #define __NR_timerfd_create 1310 #define __NR_timerfd_settime 1311 #define __NR_timerfd_gettime 1312 +#define __NR_signalfd4 1313 +#define __NR_eventfd2 1314 +#define __NR_epoll_create1 1315 +#define __NR_dup3 1316 +#define __NR_pipe2 1317 +#define __NR_inotify_init1 1318 #ifdef __KERNEL__ -#define NR_syscalls 289 /* length of syscall table */ +#define NR_syscalls 295 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about -- cgit v1.2.3-70-g09d2 From ef53d9c5e4da147ecaa43c44c5e5945eb83970a2 Mon Sep 17 00:00:00 2001 From: Srinivasa D S Date: Fri, 25 Jul 2008 01:46:04 -0700 Subject: kprobes: improve kretprobe scalability with hashed locking Currently list of kretprobe instances are stored in kretprobe object (as used_instances,free_instances) and in kretprobe hash table. We have one global kretprobe lock to serialise the access to these lists. This causes only one kretprobe handler to execute at a time. Hence affects system performance, particularly on SMP systems and when return probe is set on lot of functions (like on all systemcalls). Solution proposed here gives fine-grain locks that performs better on SMP system compared to present kretprobe implementation. Solution: 1) Instead of having one global lock to protect kretprobe instances present in kretprobe object and kretprobe hash table. We will have two locks, one lock for protecting kretprobe hash table and another lock for kretporbe object. 2) We hold lock present in kretprobe object while we modify kretprobe instance in kretprobe object and we hold per-hash-list lock while modifying kretprobe instances present in that hash list. To prevent deadlock, we never grab a per-hash-list lock while holding a kretprobe lock. 3) We can remove used_instances from struct kretprobe, as we can track used instances of kretprobe instances using kretprobe hash table. Time duration for kernel compilation ("make -j 8") on a 8-way ppc64 system with return probes set on all systemcalls looks like this. cacheline non-cacheline Un-patched kernel aligned patch aligned patch =============================================================================== real 9m46.784s 9m54.412s 10m2.450s user 40m5.715s 40m7.142s 40m4.273s sys 2m57.754s 2m58.583s 3m17.430s =========================================================== Time duration for kernel compilation ("make -j 8) on the same system, when kernel is not probed. ========================= real 9m26.389s user 40m8.775s sys 2m7.283s ========================= Signed-off-by: Srinivasa DS Signed-off-by: Jim Keniston Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Cc: Masami Hiramatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/kprobes.c | 6 +- arch/ia64/kernel/kprobes.c | 6 +- arch/powerpc/kernel/kprobes.c | 6 +- arch/s390/kernel/kprobes.c | 6 +- arch/sparc64/kernel/kprobes.c | 11 ++-- arch/x86/kernel/kprobes.c | 6 +- include/linux/kprobes.h | 7 ++- kernel/kprobes.c | 127 +++++++++++++++++++++++++++++------------- 8 files changed, 108 insertions(+), 67 deletions(-) (limited to 'arch/ia64') diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 5ee39e10c8d..d28513f14d0 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) } kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); @@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) return (void *)orig_ret_address; } -/* Called with kretprobe_lock held. */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 233434f4f88..f07688da947 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) ((struct fnptr *)kretprobe_trampoline)->ip; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { @@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 4ba2af12545..de79915452c 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_msr = regs->msr; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, regs->nip = orig_ret_address; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 288ad490a6d..4f82e5b5f87 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, __ctl_store(kcb->kprobe_saved_ctl, 9, 11); } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index f43b5d75535..201a6e547e4 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -/* Called with kretprobe_lock held. The value stored in the return - * address register is actually 2 instructions before where the - * callee will return to. Sequences usually look something like this +/* The value stored in the return address register is actually 2 + * instructions before where the callee will return to. + * Sequences usually look something like this * * call some_function <--- return register points here * nop <--- call delay slot @@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) regs->tnpc = orig_ret_address + 4; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 43c019f85f0..6c27679ec6a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->ip = (unsigned long)p->ainsn.insn; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; @@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 04a3556bdea..0be7795655f 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -157,11 +157,10 @@ struct kretprobe { int nmissed; size_t data_size; struct hlist_head free_instances; - struct hlist_head used_instances; + spinlock_t lock; }; struct kretprobe_instance { - struct hlist_node uflist; /* either on free list or used list */ struct hlist_node hlist; struct kretprobe *rp; kprobe_opcode_t *ret_addr; @@ -201,7 +200,6 @@ static inline int init_test_probes(void) } #endif /* CONFIG_KPROBES_SANITY_TEST */ -extern spinlock_t kretprobe_lock; extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); @@ -214,6 +212,9 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p); /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); +void kretprobe_hash_lock(struct task_struct *tsk, + struct hlist_head **head, unsigned long *flags); +void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags); struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); /* kprobe_running() will just return the current_kprobe on this CPU */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1485ca8d0e0..cb0b3bde361 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -62,6 +62,7 @@ addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) #endif +static int kprobes_initialized; static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; @@ -69,8 +70,15 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; static bool kprobe_enabled; DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ -DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; +static struct { + spinlock_t lock ____cacheline_aligned; +} kretprobe_table_locks[KPROBE_TABLE_SIZE]; + +static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) +{ + return &(kretprobe_table_locks[hash].lock); +} /* * Normally, functions that we'd want to prohibit kprobes in, are marked @@ -368,26 +376,53 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) return; } -/* Called with kretprobe_lock held */ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head) { + struct kretprobe *rp = ri->rp; + /* remove rp inst off the rprobe_inst_table */ hlist_del(&ri->hlist); - if (ri->rp) { - /* remove rp inst off the used list */ - hlist_del(&ri->uflist); - /* put rp inst back onto the free list */ - INIT_HLIST_NODE(&ri->uflist); - hlist_add_head(&ri->uflist, &ri->rp->free_instances); + INIT_HLIST_NODE(&ri->hlist); + if (likely(rp)) { + spin_lock(&rp->lock); + hlist_add_head(&ri->hlist, &rp->free_instances); + spin_unlock(&rp->lock); } else /* Unregistering */ hlist_add_head(&ri->hlist, head); } -struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) +void kretprobe_hash_lock(struct task_struct *tsk, + struct hlist_head **head, unsigned long *flags) +{ + unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); + spinlock_t *hlist_lock; + + *head = &kretprobe_inst_table[hash]; + hlist_lock = kretprobe_table_lock_ptr(hash); + spin_lock_irqsave(hlist_lock, *flags); +} + +void kretprobe_table_lock(unsigned long hash, unsigned long *flags) { - return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; + spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); + spin_lock_irqsave(hlist_lock, *flags); +} + +void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) +{ + unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); + spinlock_t *hlist_lock; + + hlist_lock = kretprobe_table_lock_ptr(hash); + spin_unlock_irqrestore(hlist_lock, *flags); +} + +void kretprobe_table_unlock(unsigned long hash, unsigned long *flags) +{ + spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); + spin_unlock_irqrestore(hlist_lock, *flags); } /* @@ -401,17 +436,21 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; struct hlist_node *node, *tmp; - unsigned long flags = 0; + unsigned long hash, flags = 0; - INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(tk); + if (unlikely(!kprobes_initialized)) + /* Early boot. kretprobe_table_locks not yet initialized. */ + return; + + hash = hash_ptr(tk, KPROBE_HASH_BITS); + head = &kretprobe_inst_table[hash]; + kretprobe_table_lock(hash, &flags); hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { if (ri->task == tk) recycle_rp_inst(ri, &empty_rp); } - spin_unlock_irqrestore(&kretprobe_lock, flags); - + kretprobe_table_unlock(hash, &flags); + INIT_HLIST_HEAD(&empty_rp); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); @@ -423,24 +462,29 @@ static inline void free_rp_inst(struct kretprobe *rp) struct kretprobe_instance *ri; struct hlist_node *pos, *next; - hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) { - hlist_del(&ri->uflist); + hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { + hlist_del(&ri->hlist); kfree(ri); } } static void __kprobes cleanup_rp_inst(struct kretprobe *rp) { - unsigned long flags; + unsigned long flags, hash; struct kretprobe_instance *ri; struct hlist_node *pos, *next; + struct hlist_head *head; + /* No race here */ - spin_lock_irqsave(&kretprobe_lock, flags); - hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) { - ri->rp = NULL; - hlist_del(&ri->uflist); + for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { + kretprobe_table_lock(hash, &flags); + head = &kretprobe_inst_table[hash]; + hlist_for_each_entry_safe(ri, pos, next, head, hlist) { + if (ri->rp == rp) + ri->rp = NULL; + } + kretprobe_table_unlock(hash, &flags); } - spin_unlock_irqrestore(&kretprobe_lock, flags); free_rp_inst(rp); } @@ -831,32 +875,37 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) { struct kretprobe *rp = container_of(p, struct kretprobe, kp); - unsigned long flags = 0; + unsigned long hash, flags = 0; + struct kretprobe_instance *ri; /*TODO: consider to only swap the RA after the last pre_handler fired */ - spin_lock_irqsave(&kretprobe_lock, flags); + hash = hash_ptr(current, KPROBE_HASH_BITS); + spin_lock_irqsave(&rp->lock, flags); if (!hlist_empty(&rp->free_instances)) { - struct kretprobe_instance *ri; - ri = hlist_entry(rp->free_instances.first, - struct kretprobe_instance, uflist); + struct kretprobe_instance, hlist); + hlist_del(&ri->hlist); + spin_unlock_irqrestore(&rp->lock, flags); + ri->rp = rp; ri->task = current; if (rp->entry_handler && rp->entry_handler(ri, regs)) { - spin_unlock_irqrestore(&kretprobe_lock, flags); + spin_unlock_irqrestore(&rp->lock, flags); return 0; } arch_prepare_kretprobe(ri, regs); /* XXX(hch): why is there no hlist_move_head? */ - hlist_del(&ri->uflist); - hlist_add_head(&ri->uflist, &ri->rp->used_instances); - hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task)); - } else + INIT_HLIST_NODE(&ri->hlist); + kretprobe_table_lock(hash, &flags); + hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]); + kretprobe_table_unlock(hash, &flags); + } else { rp->nmissed++; - spin_unlock_irqrestore(&kretprobe_lock, flags); + spin_unlock_irqrestore(&rp->lock, flags); + } return 0; } @@ -892,7 +941,7 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp, rp->maxactive = NR_CPUS; #endif } - INIT_HLIST_HEAD(&rp->used_instances); + spin_lock_init(&rp->lock); INIT_HLIST_HEAD(&rp->free_instances); for (i = 0; i < rp->maxactive; i++) { inst = kmalloc(sizeof(struct kretprobe_instance) + @@ -901,8 +950,8 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp, free_rp_inst(rp); return -ENOMEM; } - INIT_HLIST_NODE(&inst->uflist); - hlist_add_head(&inst->uflist, &rp->free_instances); + INIT_HLIST_NODE(&inst->hlist); + hlist_add_head(&inst->hlist, &rp->free_instances); } rp->nmissed = 0; @@ -1009,6 +1058,7 @@ static int __init init_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { INIT_HLIST_HEAD(&kprobe_table[i]); INIT_HLIST_HEAD(&kretprobe_inst_table[i]); + spin_lock_init(&(kretprobe_table_locks[i].lock)); } /* @@ -1050,6 +1100,7 @@ static int __init init_kprobes(void) err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); + kprobes_initialized = (err == 0); if (!err) init_test_probes(); -- cgit v1.2.3-70-g09d2 From 8d8bb39b9eba32dd70e87fd5ad5c5dd4ba118e06 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 25 Jul 2008 19:44:49 -0700 Subject: dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Andi Kleen Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DMA-API.txt | 4 +- arch/arm/common/dmabounce.c | 2 +- arch/ia64/hp/common/hwsw_iommu.c | 5 +- arch/ia64/hp/common/sba_iommu.c | 2 +- arch/ia64/sn/pci/pci_dma.c | 2 +- arch/mips/mm/dma-default.c | 2 +- arch/powerpc/platforms/cell/celleb_scc_pciex.c | 2 +- arch/powerpc/platforms/cell/spider-pci.c | 2 +- arch/powerpc/platforms/iseries/mf.c | 2 +- arch/x86/kernel/pci-calgary_64.c | 2 +- arch/x86/kernel/pci-dma.c | 27 ++++--- arch/x86/kernel/pci-gart_64.c | 3 +- arch/x86/kernel/pci-nommu.c | 14 +--- arch/x86/kernel/pci-swiotlb_64.c | 2 +- drivers/firewire/fw-iso.c | 2 +- drivers/firewire/fw-ohci.c | 2 +- drivers/firewire/fw-sbp2.c | 8 +-- drivers/infiniband/hw/ipath/ipath_sdma.c | 2 +- drivers/infiniband/hw/ipath/ipath_user_sdma.c | 6 +- drivers/infiniband/hw/mthca/mthca_eq.c | 2 +- drivers/media/dvb/pluto2/pluto2.c | 2 +- drivers/mmc/host/sdhci.c | 4 +- drivers/net/arm/ep93xx_eth.c | 4 +- drivers/net/bnx2x_main.c | 4 +- drivers/net/cxgb3/sge.c | 2 +- drivers/net/e100.c | 2 +- drivers/net/e1000e/ethtool.c | 4 +- drivers/net/e1000e/netdev.c | 11 +-- drivers/net/ibmveth.c | 38 +++++----- drivers/net/iseries_veth.c | 4 +- drivers/net/mlx4/eq.c | 2 +- drivers/net/pasemi_mac.c | 6 +- drivers/net/qla3xxx.c | 12 ++-- drivers/net/s2io.c | 48 +++++++------ drivers/net/sfc/rx.c | 4 +- drivers/net/sfc/tx.c | 7 +- drivers/net/spider_net.c | 4 +- drivers/net/tc35815.c | 4 +- drivers/net/wireless/ath5k/base.c | 4 +- drivers/scsi/ibmvscsi/ibmvfc.c | 4 +- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 +- drivers/scsi/ibmvscsi/ibmvstgt.c | 2 +- drivers/scsi/ibmvscsi/rpa_vscsi.c | 2 +- drivers/spi/atmel_spi.c | 4 +- drivers/spi/au1550_spi.c | 6 +- drivers/spi/omap2_mcspi.c | 4 +- drivers/spi/pxa2xx_spi.c | 4 +- drivers/spi/spi_imx.c | 6 +- include/asm-alpha/dma-mapping.h | 6 +- include/asm-alpha/pci.h | 2 +- include/asm-arm/dma-mapping.h | 2 +- include/asm-avr32/dma-mapping.h | 2 +- include/asm-cris/dma-mapping.h | 2 +- include/asm-frv/dma-mapping.h | 2 +- include/asm-generic/dma-mapping-broken.h | 2 +- include/asm-generic/dma-mapping.h | 4 +- include/asm-generic/pci-dma-compat.h | 4 +- include/asm-ia64/machvec.h | 2 +- include/asm-m68k/dma-mapping.h | 2 +- include/asm-mips/dma-mapping.h | 2 +- include/asm-mn10300/dma-mapping.h | 2 +- include/asm-parisc/dma-mapping.h | 2 +- include/asm-powerpc/dma-mapping.h | 2 +- include/asm-sh/dma-mapping.h | 2 +- include/asm-sparc/dma-mapping_64.h | 2 +- include/asm-sparc/pci_32.h | 3 +- include/asm-sparc/pci_64.h | 5 +- include/asm-x86/device.h | 3 + include/asm-x86/dma-mapping.h | 99 ++++++++++++++++++-------- include/asm-x86/swiotlb.h | 2 +- include/asm-xtensa/dma-mapping.h | 2 +- include/linux/i2o.h | 2 +- include/linux/ssb/ssb.h | 4 +- include/rdma/ib_verbs.h | 2 +- lib/swiotlb.c | 4 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 3 +- 76 files changed, 256 insertions(+), 210 deletions(-) (limited to 'arch/ia64') diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 80d150458c8..d8b63d164e4 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -298,10 +298,10 @@ recommended that you never use these unless you really know what the cache width is. int -dma_mapping_error(dma_addr_t dma_addr) +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) int -pci_dma_mapping_error(dma_addr_t dma_addr) +pci_dma_mapping_error(struct pci_dev *hwdev, dma_addr_t dma_addr) In some circumstances dma_map_single and dma_map_page will fail to create a mapping. A driver can check for these errors by testing the returned diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index dd294734260..69130f36590 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -280,7 +280,7 @@ unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, /* * Trying to unmap an invalid mapping */ - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(dev, dma_addr)) { dev_err(dev, "Trying to unmap invalid mapping\n"); return; } diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index 1c44ec2a1d5..88b6e6f3fd8 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -186,9 +186,10 @@ hwsw_dma_supported (struct device *dev, u64 mask) } int -hwsw_dma_mapping_error (dma_addr_t dma_addr) +hwsw_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr); + return hwiommu_dma_mapping_error(dev, dma_addr) || + swiotlb_dma_mapping_error(dev, dma_addr); } EXPORT_SYMBOL(hwsw_dma_mapping_error); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 34421aed1e2..4956be40d7b 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2147,7 +2147,7 @@ sba_dma_supported (struct device *dev, u64 mask) } int -sba_dma_mapping_error (dma_addr_t dma_addr) +sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 52175af299a..53ebb648449 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -350,7 +350,7 @@ void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } EXPORT_SYMBOL(sn_dma_sync_sg_for_device); -int sn_dma_mapping_error(dma_addr_t dma_addr) +int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index ae39dd88b9a..891312f8e5a 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -348,7 +348,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele EXPORT_SYMBOL(dma_sync_sg_for_device); -int dma_mapping_error(dma_addr_t dma_addr) +int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c index 0e04f8fb152..3e7e0f1568e 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -281,7 +281,7 @@ static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data) dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dummy_page_da)) { + if (dma_mapping_error(bus->phb->parent, dummy_page_da)) { pr_err("PCIEX:Map dummy page failed.\n"); kfree(dummy_page_va); return -1; diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index 418b605ac35..5122ec14527 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -111,7 +111,7 @@ static int __init spiderpci_pci_setup_chip(struct pci_controller *phb, dummy_page_da = dma_map_single(phb->parent, dummy_page_va, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dummy_page_da)) { + if (dma_mapping_error(phb->parent, dummy_page_da)) { pr_err("SPIDER-IOWA:Map dummy page filed.\n"); kfree(dummy_page_va); return -1; diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c index 1dc7295746d..731d7b15774 100644 --- a/arch/powerpc/platforms/iseries/mf.c +++ b/arch/powerpc/platforms/iseries/mf.c @@ -871,7 +871,7 @@ static int proc_mf_dump_cmdline(char *page, char **start, off_t off, count = 256 - off; dma_addr = iseries_hv_map(page, off + count, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_addr)) + if (dma_mapping_error(NULL, dma_addr)) return -ENOMEM; memset(page, 0, off + count); memset(&vsp_cmd, 0, sizeof(vsp_cmd)); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 19e7fc7c2c4..1eb86be93d7 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -544,7 +544,7 @@ error: return ret; } -static const struct dma_mapping_ops calgary_dma_ops = { +static struct dma_mapping_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, .map_single = calgary_map_single, .unmap_single = calgary_unmap_single, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05551b..37544123896 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,7 +11,7 @@ static int forbid_dac __read_mostly; -const struct dma_mapping_ops *dma_ops; +struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -312,6 +312,8 @@ static int dma_release_coherent(struct device *dev, int order, void *vaddr) int dma_supported(struct device *dev, u64 mask) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { dev_info(dev, "PCI: Disallowing DAC for device\n"); @@ -319,8 +321,8 @@ int dma_supported(struct device *dev, u64 mask) } #endif - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); + if (ops->dma_supported) + return ops->dma_supported(dev, mask); /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. @@ -367,6 +369,7 @@ void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { + struct dma_mapping_ops *ops = get_dma_ops(dev); void *memory = NULL; struct page *page; unsigned long dma_mask = 0; @@ -435,8 +438,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* Let low level make its own zone decisions */ gfp &= ~(GFP_DMA32|GFP_DMA); - if (dma_ops->alloc_coherent) - return dma_ops->alloc_coherent(dev, size, + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, dma_handle, gfp); return NULL; } @@ -448,14 +451,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } } - if (dma_ops->alloc_coherent) { + if (ops->alloc_coherent) { free_pages((unsigned long)memory, get_order(size)); gfp &= ~(GFP_DMA|GFP_DMA32); - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); + return ops->alloc_coherent(dev, size, dma_handle, gfp); } - if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), + if (ops->map_simple) { + *dma_handle = ops->map_simple(dev, virt_to_phys(memory), size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle != bad_dma_address) @@ -477,12 +480,14 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ if (dma_release_coherent(dev, order, vaddr)) return; - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, bus, size, 0); + if (ops->unmap_single) + ops->unmap_single(dev, bus, size, 0); free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d..744126e6495 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -692,8 +692,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) extern int agp_amd64_init(void); -static const struct dma_mapping_ops gart_dma_ops = { - .mapping_error = NULL, +static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .map_simple = gart_map_simple, .unmap_single = gart_unmap_single, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 792b9179eff..3f91f71cdc3 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -/* Make sure we keep the same behaviour */ -static int nommu_mapping_error(dma_addr_t dma_addr) -{ -#ifdef CONFIG_X86_32 - return 0; -#else - return (dma_addr == bad_dma_address); -#endif -} - - -const struct dma_mapping_ops nommu_dma_ops = { +struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, .map_sg = nommu_map_sg, - .mapping_error = nommu_mapping_error, .is_phys = 1, }; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 20df839b9c2..c4ce0332759 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); } -const struct dma_mapping_ops swiotlb_dma_ops = { +struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, diff --git a/drivers/firewire/fw-iso.c b/drivers/firewire/fw-iso.c index bcbe794a3ea..e14c03dc006 100644 --- a/drivers/firewire/fw-iso.c +++ b/drivers/firewire/fw-iso.c @@ -50,7 +50,7 @@ fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card, address = dma_map_page(card->device, buffer->pages[i], 0, PAGE_SIZE, direction); - if (dma_mapping_error(address)) { + if (dma_mapping_error(card->device, address)) { __free_page(buffer->pages[i]); goto out_pages; } diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 333b12544dd..566672e0bcf 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -953,7 +953,7 @@ at_context_queue_packet(struct context *ctx, struct fw_packet *packet) payload_bus = dma_map_single(ohci->card.device, packet->payload, packet->payload_length, DMA_TO_DEVICE); - if (dma_mapping_error(payload_bus)) { + if (dma_mapping_error(ohci->card.device, payload_bus)) { packet->ack = RCODE_SEND_ERROR; return -1; } diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 53fc5a641e6..aaff50ebba1 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -543,7 +543,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, orb->response_bus = dma_map_single(device->card->device, &orb->response, sizeof(orb->response), DMA_FROM_DEVICE); - if (dma_mapping_error(orb->response_bus)) + if (dma_mapping_error(device->card->device, orb->response_bus)) goto fail_mapping_response; orb->request.response.high = 0; @@ -577,7 +577,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, orb->base.request_bus = dma_map_single(device->card->device, &orb->request, sizeof(orb->request), DMA_TO_DEVICE); - if (dma_mapping_error(orb->base.request_bus)) + if (dma_mapping_error(device->card->device, orb->base.request_bus)) goto fail_mapping_request; sbp2_send_orb(&orb->base, lu, node_id, generation, @@ -1424,7 +1424,7 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device, orb->page_table_bus = dma_map_single(device->card->device, orb->page_table, sizeof(orb->page_table), DMA_TO_DEVICE); - if (dma_mapping_error(orb->page_table_bus)) + if (dma_mapping_error(device->card->device, orb->page_table_bus)) goto fail_page_table; /* @@ -1509,7 +1509,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done) orb->base.request_bus = dma_map_single(device->card->device, &orb->request, sizeof(orb->request), DMA_TO_DEVICE); - if (dma_mapping_error(orb->base.request_bus)) + if (dma_mapping_error(device->card->device, orb->base.request_bus)) goto out; sbp2_send_orb(&orb->base, lu, lu->tgt->node_id, lu->generation, diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index eaba03273e4..284c9bca517 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -698,7 +698,7 @@ retry: addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(addr)) { + if (dma_mapping_error(&dd->pcidev->dev, addr)) { ret = -EIO; goto unlock; } diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 86e016916cd..82d9a0b5ca2 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -206,7 +206,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_unmap; } @@ -301,7 +301,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, pages[j], 0, flen, DMA_TO_DEVICE); unsigned long fofs = addr & ~PAGE_MASK; - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto done; } @@ -508,7 +508,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, if (page) { dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_pbc; } diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 4e36aa7cb3d..cc6858f0b65 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -780,7 +780,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) return -ENOMEM; dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(dev->eq_table.icm_dma)) { + if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) { __free_page(dev->eq_table.icm_page); return -ENOMEM; } diff --git a/drivers/media/dvb/pluto2/pluto2.c b/drivers/media/dvb/pluto2/pluto2.c index 1360403b88b..a9653c63f4d 100644 --- a/drivers/media/dvb/pluto2/pluto2.c +++ b/drivers/media/dvb/pluto2/pluto2.c @@ -242,7 +242,7 @@ static int __devinit pluto_dma_map(struct pluto *pluto) pluto->dma_addr = pci_map_single(pluto->pdev, pluto->dma_buf, TS_DMA_BYTES, PCI_DMA_FROMDEVICE); - return pci_dma_mapping_error(pluto->dma_addr); + return pci_dma_mapping_error(pluto->pdev, pluto->dma_addr); } static void pluto_dma_unmap(struct pluto *pluto) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index c3a5db72ddd..5f95e10229b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -337,7 +337,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, host->align_addr = dma_map_single(mmc_dev(host->mmc), host->align_buffer, 128 * 4, direction); - if (dma_mapping_error(host->align_addr)) + if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr)) goto fail; BUG_ON(host->align_addr & 0x3); @@ -439,7 +439,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, host->adma_addr = dma_map_single(mmc_dev(host->mmc), host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE); - if (dma_mapping_error(host->align_addr)) + if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr)) goto unmap_entries; BUG_ON(host->adma_addr & 0x3); diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c index 7a14980f347..18d3eeb7eab 100644 --- a/drivers/net/arm/ep93xx_eth.c +++ b/drivers/net/arm/ep93xx_eth.c @@ -482,7 +482,7 @@ static int ep93xx_alloc_buffers(struct ep93xx_priv *ep) goto err; d = dma_map_single(NULL, page, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(d)) { + if (dma_mapping_error(NULL, d)) { free_page((unsigned long)page); goto err; } @@ -505,7 +505,7 @@ static int ep93xx_alloc_buffers(struct ep93xx_priv *ep) goto err; d = dma_map_single(NULL, page, PAGE_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(d)) { + if (dma_mapping_error(NULL, d)) { free_page((unsigned long)page); goto err; } diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 0263bef9cc6..c7cc760a177 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -1020,7 +1020,7 @@ static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp, mapping = pci_map_page(bp->pdev, page, 0, BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(mapping))) { + if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { __free_pages(page, PAGES_PER_SGE_SHIFT); return -ENOMEM; } @@ -1048,7 +1048,7 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp, mapping = pci_map_single(bp->pdev, skb->data, bp->rx_buf_use_size, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(mapping))) { + if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { dev_kfree_skb(skb); return -ENOMEM; } diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index a96331c875e..1b0861d73ab 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -386,7 +386,7 @@ static inline int add_one_rx_buf(void *va, unsigned int len, dma_addr_t mapping; mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE); - if (unlikely(pci_dma_mapping_error(mapping))) + if (unlikely(pci_dma_mapping_error(pdev, mapping))) return -ENOMEM; pci_unmap_addr_set(sd, dma_addr, mapping); diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 1037b133231..19d32a227be 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1790,7 +1790,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx) rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data, RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(rx->dma_addr)) { + if (pci_dma_mapping_error(nic->pdev, rx->dma_addr)) { dev_kfree_skb_any(rx->skb); rx->skb = NULL; rx->dma_addr = 0; diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c index a14561f40db..9350564065e 100644 --- a/drivers/net/e1000e/ethtool.c +++ b/drivers/net/e1000e/ethtool.c @@ -1090,7 +1090,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) tx_ring->buffer_info[i].dma = pci_map_single(pdev, skb->data, skb->len, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(tx_ring->buffer_info[i].dma)) { + if (pci_dma_mapping_error(pdev, tx_ring->buffer_info[i].dma)) { ret_val = 4; goto err_nomem; } @@ -1153,7 +1153,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter) rx_ring->buffer_info[i].dma = pci_map_single(pdev, skb->data, 2048, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rx_ring->buffer_info[i].dma)) { + if (pci_dma_mapping_error(pdev, rx_ring->buffer_info[i].dma)) { ret_val = 8; goto err_nomem; } diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 9c0f56b3c51..d1367789976 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -195,7 +195,7 @@ map_skb: buffer_info->dma = pci_map_single(pdev, skb->data, adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(buffer_info->dma)) { + if (pci_dma_mapping_error(pdev, buffer_info->dma)) { dev_err(&pdev->dev, "RX DMA map failed\n"); adapter->rx_dma_failed++; break; @@ -265,7 +265,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, ps_page->page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(ps_page->dma)) { + if (pci_dma_mapping_error(pdev, ps_page->dma)) { dev_err(&adapter->pdev->dev, "RX DMA page map failed\n"); adapter->rx_dma_failed++; @@ -300,7 +300,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, buffer_info->dma = pci_map_single(pdev, skb->data, adapter->rx_ps_bsize0, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(buffer_info->dma)) { + if (pci_dma_mapping_error(pdev, buffer_info->dma)) { dev_err(&pdev->dev, "RX DMA map failed\n"); adapter->rx_dma_failed++; /* cleanup skb */ @@ -3344,7 +3344,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter, skb->data + offset, size, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(buffer_info->dma)) { + if (pci_dma_mapping_error(adapter->pdev, buffer_info->dma)) { dev_err(&adapter->pdev->dev, "TX DMA map failed\n"); adapter->tx_dma_failed++; return -1; @@ -3382,7 +3382,8 @@ static int e1000_tx_map(struct e1000_adapter *adapter, offset, size, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(buffer_info->dma)) { + if (pci_dma_mapping_error(adapter->pdev, + buffer_info->dma)) { dev_err(&adapter->pdev->dev, "TX DMA page map failed\n"); adapter->tx_dma_failed++; diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index e5a6e2e8454..91ec9fdc718 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -260,7 +260,7 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, pool->buff_size, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_addr)) + if (dma_mapping_error((&adapter->vdev->dev, dma_addr)) goto failure; pool->free_map[free_index] = IBM_VETH_INVALID_MAP; @@ -294,7 +294,7 @@ failure: pool->consumer_index = pool->size - 1; else pool->consumer_index--; - if (!dma_mapping_error(dma_addr)) + if (!dma_mapping_error((&adapter->vdev->dev, dma_addr)) dma_unmap_single(&adapter->vdev->dev, pool->dma_addr[index], pool->buff_size, DMA_FROM_DEVICE); @@ -448,11 +448,11 @@ static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) static void ibmveth_cleanup(struct ibmveth_adapter *adapter) { int i; + struct device *dev = &adapter->vdev->dev; if(adapter->buffer_list_addr != NULL) { - if(!dma_mapping_error(adapter->buffer_list_dma)) { - dma_unmap_single(&adapter->vdev->dev, - adapter->buffer_list_dma, 4096, + if (!dma_mapping_error(dev, adapter->buffer_list_dma)) { + dma_unmap_single(dev, adapter->buffer_list_dma, 4096, DMA_BIDIRECTIONAL); adapter->buffer_list_dma = DMA_ERROR_CODE; } @@ -461,9 +461,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) } if(adapter->filter_list_addr != NULL) { - if(!dma_mapping_error(adapter->filter_list_dma)) { - dma_unmap_single(&adapter->vdev->dev, - adapter->filter_list_dma, 4096, + if (!dma_mapping_error(dev, adapter->filter_list_dma)) { + dma_unmap_single(dev, adapter->filter_list_dma, 4096, DMA_BIDIRECTIONAL); adapter->filter_list_dma = DMA_ERROR_CODE; } @@ -472,8 +471,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) } if(adapter->rx_queue.queue_addr != NULL) { - if(!dma_mapping_error(adapter->rx_queue.queue_dma)) { - dma_unmap_single(&adapter->vdev->dev, + if (!dma_mapping_error(dev, adapter->rx_queue.queue_dma)) { + dma_unmap_single(dev, adapter->rx_queue.queue_dma, adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL); @@ -535,6 +534,7 @@ static int ibmveth_open(struct net_device *netdev) int rc; union ibmveth_buf_desc rxq_desc; int i; + struct device *dev; ibmveth_debug_printk("open starting\n"); @@ -563,17 +563,19 @@ static int ibmveth_open(struct net_device *netdev) return -ENOMEM; } - adapter->buffer_list_dma = dma_map_single(&adapter->vdev->dev, + dev = &adapter->vdev->dev; + + adapter->buffer_list_dma = dma_map_single(dev, adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL); - adapter->filter_list_dma = dma_map_single(&adapter->vdev->dev, + adapter->filter_list_dma = dma_map_single(dev, adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL); - adapter->rx_queue.queue_dma = dma_map_single(&adapter->vdev->dev, + adapter->rx_queue.queue_dma = dma_map_single(dev, adapter->rx_queue.queue_addr, adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL); - if((dma_mapping_error(adapter->buffer_list_dma) ) || - (dma_mapping_error(adapter->filter_list_dma)) || - (dma_mapping_error(adapter->rx_queue.queue_dma))) { + if ((dma_mapping_error(dev, adapter->buffer_list_dma)) || + (dma_mapping_error(dev, adapter->filter_list_dma)) || + (dma_mapping_error(dev, adapter->rx_queue.queue_dma))) { ibmveth_error_printk("unable to map filter or buffer list pages\n"); ibmveth_cleanup(adapter); napi_disable(&adapter->napi); @@ -645,7 +647,7 @@ static int ibmveth_open(struct net_device *netdev) adapter->bounce_buffer_dma = dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); - if (dma_mapping_error(adapter->bounce_buffer_dma)) { + if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) { ibmveth_error_printk("unable to map bounce buffer\n"); ibmveth_cleanup(adapter); napi_disable(&adapter->napi); @@ -922,7 +924,7 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) buf[1] = 0; } - if (dma_mapping_error(data_dma_addr)) { + if (dma_mapping_error((&adapter->vdev->dev, data_dma_addr)) { if (!firmware_has_feature(FW_FEATURE_CMO)) ibmveth_error_printk("tx: unable to map xmit buffer\n"); skb_copy_from_linear_data(skb, adapter->bounce_buffer, diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index b8d0639c1cd..c46864d626b 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1128,7 +1128,7 @@ static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp, msg->data.addr[0] = dma_map_single(port->dev, skb->data, skb->len, DMA_TO_DEVICE); - if (dma_mapping_error(msg->data.addr[0])) + if (dma_mapping_error(port->dev, msg->data.addr[0])) goto recycle_and_drop; msg->dev = port->dev; @@ -1226,7 +1226,7 @@ static void veth_recycle_msg(struct veth_lpar_connection *cnx, dma_address = msg->data.addr[0]; dma_length = msg->data.len[0]; - if (!dma_mapping_error(dma_address)) + if (!dma_mapping_error(msg->dev, dma_address)) dma_unmap_single(msg->dev, dma_address, dma_length, DMA_TO_DEVICE); diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index ea3a09aaa84..7df928d3a3d 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -526,7 +526,7 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt) return -ENOMEM; priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(priv->eq_table.icm_dma)) { + if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) { __free_page(priv->eq_table.icm_page); return -ENOMEM; } diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index 993d87c9296..edc0fd58898 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -650,7 +650,7 @@ static void pasemi_mac_replenish_rx_ring(const struct net_device *dev, mac->bufsz - LOCAL_SKB_ALIGN, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(dma))) { + if (unlikely(pci_dma_mapping_error(mac->dma_pdev, dma))) { dev_kfree_skb_irq(info->skb); break; } @@ -1519,7 +1519,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev) map[0] = pci_map_single(mac->dma_pdev, skb->data, skb_headlen(skb), PCI_DMA_TODEVICE); map_size[0] = skb_headlen(skb); - if (dma_mapping_error(map[0])) + if (pci_dma_mapping_error(mac->dma_pdev, map[0])) goto out_err_nolock; for (i = 0; i < nfrags; i++) { @@ -1529,7 +1529,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev) frag->page_offset, frag->size, PCI_DMA_TODEVICE); map_size[i+1] = frag->size; - if (dma_mapping_error(map[i+1])) { + if (pci_dma_mapping_error(mac->dma_pdev, map[i+1])) { nfrags = i; goto out_err_nolock; } diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index e7d48a352be..e82b37bbd6c 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -328,7 +328,7 @@ static void ql_release_to_lrg_buf_free_list(struct ql3_adapter *qdev, qdev->lrg_buffer_len - QL_HEADER_SPACE, PCI_DMA_FROMDEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping failed with error: %d\n", qdev->ndev->name, err); @@ -1919,7 +1919,7 @@ static int ql_populate_free_queue(struct ql3_adapter *qdev) QL_HEADER_SPACE, PCI_DMA_FROMDEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping failed with error: %d\n", qdev->ndev->name, err); @@ -2454,7 +2454,7 @@ static int ql_send_map(struct ql3_adapter *qdev, */ map = pci_map_single(qdev->pdev, skb->data, len, PCI_DMA_TODEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping failed with error: %d\n", qdev->ndev->name, err); @@ -2487,7 +2487,7 @@ static int ql_send_map(struct ql3_adapter *qdev, sizeof(struct oal), PCI_DMA_TODEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping outbound address list with error: %d\n", @@ -2514,7 +2514,7 @@ static int ql_send_map(struct ql3_adapter *qdev, frag->page_offset, frag->size, PCI_DMA_TODEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping frags failed with error: %d\n", qdev->ndev->name, err); @@ -2916,7 +2916,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) QL_HEADER_SPACE, PCI_DMA_FROMDEVICE); - err = pci_dma_mapping_error(map); + err = pci_dma_mapping_error(qdev->pdev, map); if(err) { printk(KERN_ERR "%s: PCI mapping failed with error: %d\n", qdev->ndev->name, err); diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 9dae40ccf04..86d77d05190 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -2512,8 +2512,8 @@ static void stop_nic(struct s2io_nic *nic) * Return Value: * SUCCESS on success or an appropriate -ve value on failure. */ - -static int fill_rx_buffers(struct ring_info *ring, int from_card_up) +static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring, + int from_card_up) { struct sk_buff *skb; struct RxD_t *rxdp; @@ -2602,7 +2602,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up) rxdp1->Buffer0_ptr = pci_map_single (ring->pdev, skb->data, size - NET_IP_ALIGN, PCI_DMA_FROMDEVICE); - if(pci_dma_mapping_error(rxdp1->Buffer0_ptr)) + if (pci_dma_mapping_error(nic->pdev, + rxdp1->Buffer0_ptr)) goto pci_map_failed; rxdp->Control_2 = @@ -2636,7 +2637,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up) rxdp3->Buffer0_ptr = pci_map_single(ring->pdev, ba->ba_0, BUF0_LEN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer0_ptr)) + if (pci_dma_mapping_error(nic->pdev, + rxdp3->Buffer0_ptr)) goto pci_map_failed; } else pci_dma_sync_single_for_device(ring->pdev, @@ -2655,7 +2657,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up) (ring->pdev, skb->data, ring->mtu + 4, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer2_ptr)) + if (pci_dma_mapping_error(nic->pdev, + rxdp3->Buffer2_ptr)) goto pci_map_failed; if (from_card_up) { @@ -2664,8 +2667,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up) ba->ba_1, BUF1_LEN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error - (rxdp3->Buffer1_ptr)) { + if (pci_dma_mapping_error(nic->pdev, + rxdp3->Buffer1_ptr)) { pci_unmap_single (ring->pdev, (dma_addr_t)(unsigned long) @@ -2806,9 +2809,9 @@ static void free_rx_buffers(struct s2io_nic *sp) } } -static int s2io_chk_rx_buffers(struct ring_info *ring) +static int s2io_chk_rx_buffers(struct s2io_nic *nic, struct ring_info *ring) { - if (fill_rx_buffers(ring, 0) == -ENOMEM) { + if (fill_rx_buffers(nic, ring, 0) == -ENOMEM) { DBG_PRINT(INFO_DBG, "%s:Out of memory", ring->dev->name); DBG_PRINT(INFO_DBG, " in Rx Intr!!\n"); } @@ -2848,7 +2851,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget) return 0; pkts_processed = rx_intr_handler(ring, budget); - s2io_chk_rx_buffers(ring); + s2io_chk_rx_buffers(nic, ring); if (pkts_processed < budget_org) { netif_rx_complete(dev, napi); @@ -2882,7 +2885,7 @@ static int s2io_poll_inta(struct napi_struct *napi, int budget) for (i = 0; i < config->rx_ring_num; i++) { ring = &mac_control->rings[i]; ring_pkts_processed = rx_intr_handler(ring, budget); - s2io_chk_rx_buffers(ring); + s2io_chk_rx_buffers(nic, ring); pkts_processed += ring_pkts_processed; budget -= ring_pkts_processed; if (budget <= 0) @@ -2939,7 +2942,8 @@ static void s2io_netpoll(struct net_device *dev) rx_intr_handler(&mac_control->rings[i], 0); for (i = 0; i < config->rx_ring_num; i++) { - if (fill_rx_buffers(&mac_control->rings[i], 0) == -ENOMEM) { + if (fill_rx_buffers(nic, &mac_control->rings[i], 0) == + -ENOMEM) { DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name); DBG_PRINT(INFO_DBG, " in Rx Netpoll!!\n"); break; @@ -4235,14 +4239,14 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Buffer_Pointer = pci_map_single(sp->pdev, fifo->ufo_in_band_v, sizeof(u64), PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(txdp->Buffer_Pointer)) + if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer)) goto pci_map_failed; txdp++; } txdp->Buffer_Pointer = pci_map_single (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(txdp->Buffer_Pointer)) + if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer)) goto pci_map_failed; txdp->Host_Control = (unsigned long) skb; @@ -4345,7 +4349,7 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id) netif_rx_schedule(dev, &ring->napi); } else { rx_intr_handler(ring, 0); - s2io_chk_rx_buffers(ring); + s2io_chk_rx_buffers(sp, ring); } return IRQ_HANDLED; @@ -4826,7 +4830,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id) */ if (!config->napi) { for (i = 0; i < config->rx_ring_num; i++) - s2io_chk_rx_buffers(&mac_control->rings[i]); + s2io_chk_rx_buffers(sp, &mac_control->rings[i]); } writeq(sp->general_int_mask, &bar0->general_int_mask); readl(&bar0->general_int_status); @@ -6859,7 +6863,7 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, pci_map_single( sp->pdev, (*skb)->data, size - NET_IP_ALIGN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp1->Buffer0_ptr)) + if (pci_dma_mapping_error(sp->pdev, rxdp1->Buffer0_ptr)) goto memalloc_failed; rxdp->Host_Control = (unsigned long) (*skb); } @@ -6886,12 +6890,13 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, pci_map_single(sp->pdev, (*skb)->data, dev->mtu + 4, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer2_ptr)) + if (pci_dma_mapping_error(sp->pdev, rxdp3->Buffer2_ptr)) goto memalloc_failed; rxdp3->Buffer0_ptr = *temp0 = pci_map_single( sp->pdev, ba->ba_0, BUF0_LEN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer0_ptr)) { + if (pci_dma_mapping_error(sp->pdev, + rxdp3->Buffer0_ptr)) { pci_unmap_single (sp->pdev, (dma_addr_t)rxdp3->Buffer2_ptr, dev->mtu + 4, PCI_DMA_FROMDEVICE); @@ -6903,7 +6908,8 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp, rxdp3->Buffer1_ptr = *temp1 = pci_map_single(sp->pdev, ba->ba_1, BUF1_LEN, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(rxdp3->Buffer1_ptr)) { + if (pci_dma_mapping_error(sp->pdev, + rxdp3->Buffer1_ptr)) { pci_unmap_single (sp->pdev, (dma_addr_t)rxdp3->Buffer0_ptr, BUF0_LEN, PCI_DMA_FROMDEVICE); @@ -7187,7 +7193,7 @@ static int s2io_card_up(struct s2io_nic * sp) for (i = 0; i < config->rx_ring_num; i++) { mac_control->rings[i].mtu = dev->mtu; - ret = fill_rx_buffers(&mac_control->rings[i], 1); + ret = fill_rx_buffers(sp, &mac_control->rings[i], 1); if (ret) { DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n", dev->name); diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c index 601b001437c..0d27dd39bc0 100644 --- a/drivers/net/sfc/rx.c +++ b/drivers/net/sfc/rx.c @@ -233,7 +233,7 @@ static inline int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue, rx_buf->data, rx_buf->len, PCI_DMA_FROMDEVICE); - if (unlikely(pci_dma_mapping_error(rx_buf->dma_addr))) { + if (unlikely(pci_dma_mapping_error(efx->pci_dev, rx_buf->dma_addr))) { dev_kfree_skb_any(rx_buf->skb); rx_buf->skb = NULL; return -EIO; @@ -275,7 +275,7 @@ static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue, 0, efx_rx_buf_size(efx), PCI_DMA_FROMDEVICE); - if (unlikely(pci_dma_mapping_error(dma_addr))) { + if (unlikely(pci_dma_mapping_error(efx->pci_dev, dma_addr))) { __free_pages(rx_buf->page, efx->rx_buffer_order); rx_buf->page = NULL; return -EIO; diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index 5cdd082ab8f..5e8374ab28e 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c @@ -172,7 +172,7 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue, /* Process all fragments */ while (1) { - if (unlikely(pci_dma_mapping_error(dma_addr))) + if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr))) goto pci_err; /* Store fields for marking in the per-fragment final @@ -661,7 +661,8 @@ efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev, TSOH_BUFFER(tsoh), header_len, PCI_DMA_TODEVICE); - if (unlikely(pci_dma_mapping_error(tsoh->dma_addr))) { + if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev, + tsoh->dma_addr))) { kfree(tsoh); return NULL; } @@ -863,7 +864,7 @@ static inline int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, st->ifc.unmap_addr = pci_map_page(efx->pci_dev, page, page_off, len, PCI_DMA_TODEVICE); - if (likely(!pci_dma_mapping_error(st->ifc.unmap_addr))) { + if (likely(!pci_dma_mapping_error(efx->pci_dev, st->ifc.unmap_addr))) { st->ifc.unmap_len = len; st->ifc.len = len; st->ifc.dma_addr = st->ifc.unmap_addr; diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c index 00aa0b108cb..b6435d0d71f 100644 --- a/drivers/net/spider_net.c +++ b/drivers/net/spider_net.c @@ -452,7 +452,7 @@ spider_net_prepare_rx_descr(struct spider_net_card *card, /* iommu-map the skb */ buf = pci_map_single(card->pdev, descr->skb->data, SPIDER_NET_MAX_FRAME, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(buf)) { + if (pci_dma_mapping_error(card->pdev, buf)) { dev_kfree_skb_any(descr->skb); descr->skb = NULL; if (netif_msg_rx_err(card) && net_ratelimit()) @@ -691,7 +691,7 @@ spider_net_prepare_tx_descr(struct spider_net_card *card, unsigned long flags; buf = pci_map_single(card->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(buf)) { + if (pci_dma_mapping_error(card->pdev, buf)) { if (netif_msg_tx_err(card) && net_ratelimit()) dev_err(&card->netdev->dev, "could not iommu-map packet (%p, %i). " "Dropping packet\n", skb->data, skb->len); diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c index a645e5028c1..8487ace9d2e 100644 --- a/drivers/net/tc35815.c +++ b/drivers/net/tc35815.c @@ -506,7 +506,7 @@ static void *alloc_rxbuf_page(struct pci_dev *hwdev, dma_addr_t *dma_handle) return NULL; *dma_handle = pci_map_single(hwdev, buf, PAGE_SIZE, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(*dma_handle)) { + if (pci_dma_mapping_error(hwdev, *dma_handle)) { free_page((unsigned long)buf); return NULL; } @@ -536,7 +536,7 @@ static struct sk_buff *alloc_rxbuf_skb(struct net_device *dev, return NULL; *dma_handle = pci_map_single(hwdev, skb->data, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(*dma_handle)) { + if (pci_dma_mapping_error(hwdev, *dma_handle)) { dev_kfree_skb_any(skb); return NULL; } diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 217d506527a..d9769c52734 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -1166,7 +1166,7 @@ ath5k_rxbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) bf->skb = skb; bf->skbaddr = pci_map_single(sc->pdev, skb->data, sc->rxbufsize, PCI_DMA_FROMDEVICE); - if (unlikely(pci_dma_mapping_error(bf->skbaddr))) { + if (unlikely(pci_dma_mapping_error(sc->pdev, bf->skbaddr))) { ATH5K_ERR(sc, "%s: DMA mapping failed\n", __func__); dev_kfree_skb(skb); bf->skb = NULL; @@ -1918,7 +1918,7 @@ ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) ATH5K_DBG(sc, ATH5K_DEBUG_BEACON, "skb %p [data %p len %u] " "skbaddr %llx\n", skb, skb->data, skb->len, (unsigned long long)bf->skbaddr); - if (pci_dma_mapping_error(bf->skbaddr)) { + if (pci_dma_mapping_error(sc->pdev, bf->skbaddr)) { ATH5K_ERR(sc, "beacon DMA mapping failed\n"); return -EIO; } diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index c4a7c06793c..61f8fdea2d9 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3525,7 +3525,7 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost) crq->msg_token = dma_map_single(dev, crq->msgs, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma_mapping_error(crq->msg_token)) + if (dma_mapping_error(dev, crq->msg_token)) goto map_failed; retrc = rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, @@ -3618,7 +3618,7 @@ static int ibmvfc_alloc_mem(struct ibmvfc_host *vhost) async_q->size * sizeof(*async_q->msgs), DMA_BIDIRECTIONAL); - if (dma_mapping_error(async_q->msg_token)) { + if (dma_mapping_error(dev, async_q->msg_token)) { dev_err(dev, "Failed to map async queue\n"); goto free_async_crq; } diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 20000ec79b0..6b24b9cdb04 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -859,7 +859,7 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata) sizeof(hostdata->madapter_info), DMA_BIDIRECTIONAL); - if (dma_mapping_error(req->buffer)) { + if (dma_mapping_error(hostdata->dev, req->buffer)) { if (!firmware_has_feature(FW_FEATURE_CMO)) dev_err(hostdata->dev, "Unable to map request_buffer for " @@ -1407,7 +1407,7 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata, length, DMA_BIDIRECTIONAL); - if (dma_mapping_error(host_config->buffer)) { + if (dma_mapping_error(hostdata->dev, host_config->buffer)) { if (!firmware_has_feature(FW_FEATURE_CMO)) dev_err(hostdata->dev, "dma_mapping error getting host config\n"); diff --git a/drivers/scsi/ibmvscsi/ibmvstgt.c b/drivers/scsi/ibmvscsi/ibmvstgt.c index 3b9514c8f1f..2e13ec00172 100644 --- a/drivers/scsi/ibmvscsi/ibmvstgt.c +++ b/drivers/scsi/ibmvscsi/ibmvstgt.c @@ -564,7 +564,7 @@ static int crq_queue_create(struct crq_queue *queue, struct srp_target *target) queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL); - if (dma_mapping_error(queue->msg_token)) + if (dma_mapping_error(target->dev, queue->msg_token)) goto map_failed; err = h_reg_crq(vport->dma_dev->unit_address, queue->msg_token, diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c index 182146100dc..462a8574dad 100644 --- a/drivers/scsi/ibmvscsi/rpa_vscsi.c +++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c @@ -253,7 +253,7 @@ static int rpavscsi_init_crq_queue(struct crq_queue *queue, queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL); - if (dma_mapping_error(queue->msg_token)) + if (dma_mapping_error(hostdata->dev, queue->msg_token)) goto map_failed; gather_partition_info(); diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c index e81d59d7891..0c716566085 100644 --- a/drivers/spi/atmel_spi.c +++ b/drivers/spi/atmel_spi.c @@ -313,14 +313,14 @@ atmel_spi_dma_map_xfer(struct atmel_spi *as, struct spi_transfer *xfer) xfer->tx_dma = dma_map_single(dev, (void *) xfer->tx_buf, xfer->len, DMA_TO_DEVICE); - if (dma_mapping_error(xfer->tx_dma)) + if (dma_mapping_error(dev, xfer->tx_dma)) return -ENOMEM; } if (xfer->rx_buf) { xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len, DMA_FROM_DEVICE); - if (dma_mapping_error(xfer->rx_dma)) { + if (dma_mapping_error(dev, xfer->rx_dma)) { if (xfer->tx_buf) dma_unmap_single(dev, xfer->tx_dma, xfer->len, diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c index 9149689c79d..87b73e0169c 100644 --- a/drivers/spi/au1550_spi.c +++ b/drivers/spi/au1550_spi.c @@ -334,7 +334,7 @@ static int au1550_spi_dma_rxtmp_alloc(struct au1550_spi *hw, unsigned size) hw->dma_rx_tmpbuf_size = size; hw->dma_rx_tmpbuf_addr = dma_map_single(hw->dev, hw->dma_rx_tmpbuf, size, DMA_FROM_DEVICE); - if (dma_mapping_error(hw->dma_rx_tmpbuf_addr)) { + if (dma_mapping_error(hw->dev, hw->dma_rx_tmpbuf_addr)) { kfree(hw->dma_rx_tmpbuf); hw->dma_rx_tmpbuf = 0; hw->dma_rx_tmpbuf_size = 0; @@ -378,7 +378,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t) dma_rx_addr = dma_map_single(hw->dev, (void *)t->rx_buf, t->len, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_rx_addr)) + if (dma_mapping_error(hw->dev, dma_rx_addr)) dev_err(hw->dev, "rx dma map error\n"); } } else { @@ -401,7 +401,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t) dma_tx_addr = dma_map_single(hw->dev, (void *)t->tx_buf, t->len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_tx_addr)) + if (dma_mapping_error(hw->dev, dma_tx_addr)) dev_err(hw->dev, "tx dma map error\n"); } } else { diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c index b1cc148036c..f6f987bb71c 100644 --- a/drivers/spi/omap2_mcspi.c +++ b/drivers/spi/omap2_mcspi.c @@ -836,7 +836,7 @@ static int omap2_mcspi_transfer(struct spi_device *spi, struct spi_message *m) if (tx_buf != NULL) { t->tx_dma = dma_map_single(&spi->dev, (void *) tx_buf, len, DMA_TO_DEVICE); - if (dma_mapping_error(t->tx_dma)) { + if (dma_mapping_error(&spi->dev, t->tx_dma)) { dev_dbg(&spi->dev, "dma %cX %d bytes error\n", 'T', len); return -EINVAL; @@ -845,7 +845,7 @@ static int omap2_mcspi_transfer(struct spi_device *spi, struct spi_message *m) if (rx_buf != NULL) { t->rx_dma = dma_map_single(&spi->dev, rx_buf, t->len, DMA_FROM_DEVICE); - if (dma_mapping_error(t->rx_dma)) { + if (dma_mapping_error(&spi->dev, t->rx_dma)) { dev_dbg(&spi->dev, "dma %cX %d bytes error\n", 'R', len); if (tx_buf != NULL) diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c index 0c452c46ab0..067299d6d19 100644 --- a/drivers/spi/pxa2xx_spi.c +++ b/drivers/spi/pxa2xx_spi.c @@ -353,7 +353,7 @@ static int map_dma_buffers(struct driver_data *drv_data) drv_data->rx_dma = dma_map_single(dev, drv_data->rx, drv_data->rx_map_len, DMA_FROM_DEVICE); - if (dma_mapping_error(drv_data->rx_dma)) + if (dma_mapping_error(dev, drv_data->rx_dma)) return 0; /* Stream map the tx buffer */ @@ -361,7 +361,7 @@ static int map_dma_buffers(struct driver_data *drv_data) drv_data->tx_map_len, DMA_TO_DEVICE); - if (dma_mapping_error(drv_data->tx_dma)) { + if (dma_mapping_error(dev, drv_data->tx_dma)) { dma_unmap_single(dev, drv_data->rx_dma, drv_data->rx_map_len, DMA_FROM_DEVICE); return 0; diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c index 54ac7bea5f8..6fb77fcc497 100644 --- a/drivers/spi/spi_imx.c +++ b/drivers/spi/spi_imx.c @@ -491,7 +491,7 @@ static int map_dma_buffers(struct driver_data *drv_data) buf, drv_data->tx_map_len, DMA_TO_DEVICE); - if (dma_mapping_error(drv_data->tx_dma)) + if (dma_mapping_error(dev, drv_data->tx_dma)) return -1; drv_data->tx_dma_needs_unmap = 1; @@ -516,7 +516,7 @@ static int map_dma_buffers(struct driver_data *drv_data) buf, drv_data->len, DMA_FROM_DEVICE); - if (dma_mapping_error(drv_data->rx_dma)) + if (dma_mapping_error(dev, drv_data->rx_dma)) return -1; drv_data->rx_dma_needs_unmap = 1; } @@ -534,7 +534,7 @@ static int map_dma_buffers(struct driver_data *drv_data) buf, drv_data->tx_map_len, DMA_TO_DEVICE); - if (dma_mapping_error(drv_data->tx_dma)) { + if (dma_mapping_error(dev, drv_data->tx_dma)) { if (drv_data->rx_dma) { dma_unmap_single(dev, drv_data->rx_dma, diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h index db351d1296f..a5801ae02e4 100644 --- a/include/asm-alpha/dma-mapping.h +++ b/include/asm-alpha/dma-mapping.h @@ -24,8 +24,8 @@ pci_unmap_sg(alpha_gendev_to_pci(dev), sg, nents, dir) #define dma_supported(dev, mask) \ pci_dma_supported(alpha_gendev_to_pci(dev), mask) -#define dma_mapping_error(addr) \ - pci_dma_mapping_error(addr) +#define dma_mapping_error(dev, addr) \ + pci_dma_mapping_error(alpha_gendev_to_pci(dev), addr) #else /* no PCI - no IOMMU. */ @@ -45,7 +45,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, #define dma_unmap_page(dev, addr, size, dir) ((void)0) #define dma_unmap_sg(dev, sg, nents, dir) ((void)0) -#define dma_mapping_error(addr) (0) +#define dma_mapping_error(dev, addr) (0) #endif /* !CONFIG_PCI */ diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h index d31fd49ff79..2a14302c17a 100644 --- a/include/asm-alpha/pci.h +++ b/include/asm-alpha/pci.h @@ -106,7 +106,7 @@ extern dma_addr_t pci_map_page(struct pci_dev *, struct page *, /* Test for pci_map_single or pci_map_page having generated an error. */ static inline int -pci_dma_mapping_error(dma_addr_t dma_addr) +pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr) { return dma_addr == 0; } diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h index e99406a7bec..f41335ba633 100644 --- a/include/asm-arm/dma-mapping.h +++ b/include/asm-arm/dma-mapping.h @@ -56,7 +56,7 @@ static inline int dma_is_consistent(struct device *dev, dma_addr_t handle) /* * DMA errors are defined by all-bits-set in the DMA address. */ -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == ~0; } diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h index 57dc672bab8..0399359ab5d 100644 --- a/include/asm-avr32/dma-mapping.h +++ b/include/asm-avr32/dma-mapping.h @@ -35,7 +35,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask) /* * dma_map_single can't fail as it is implemented now. */ -static inline int dma_mapping_error(dma_addr_t addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t addr) { return 0; } diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h index edc8d1bfaae..cb2fb25ff8d 100644 --- a/include/asm-cris/dma-mapping.h +++ b/include/asm-cris/dma-mapping.h @@ -120,7 +120,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, } static inline int -dma_mapping_error(dma_addr_t dma_addr) +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h index 2e8966ca030..b2898877c07 100644 --- a/include/asm-frv/dma-mapping.h +++ b/include/asm-frv/dma-mapping.h @@ -126,7 +126,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele } static inline -int dma_mapping_error(dma_addr_t dma_addr) +int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/asm-generic/dma-mapping-broken.h b/include/asm-generic/dma-mapping-broken.h index e2468f894d2..82cd0cb1c3f 100644 --- a/include/asm-generic/dma-mapping-broken.h +++ b/include/asm-generic/dma-mapping-broken.h @@ -61,7 +61,7 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, #define dma_sync_sg_for_device dma_sync_sg_for_cpu extern int -dma_mapping_error(dma_addr_t dma_addr); +dma_mapping_error(struct device *dev, dma_addr_t dma_addr); extern int dma_supported(struct device *dev, u64 mask); diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index 783ab9944d7..189486c3f92 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -144,9 +144,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, } static inline int -dma_mapping_error(dma_addr_t dma_addr) +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return pci_dma_mapping_error(dma_addr); + return pci_dma_mapping_error(to_pci_dev(dev), dma_addr); } diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h index 25c10e96b2b..37b3706226e 100644 --- a/include/asm-generic/pci-dma-compat.h +++ b/include/asm-generic/pci-dma-compat.h @@ -99,9 +99,9 @@ pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, } static inline int -pci_dma_mapping_error(dma_addr_t dma_addr) +pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr) { - return dma_mapping_error(dma_addr); + return dma_mapping_error(&pdev->dev, dma_addr); } #endif diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index 0721a5e8271..a6d50c77b6b 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h @@ -54,7 +54,7 @@ typedef void ia64_mv_dma_sync_single_for_cpu (struct device *, dma_addr_t, size_ typedef void ia64_mv_dma_sync_sg_for_cpu (struct device *, struct scatterlist *, int, int); typedef void ia64_mv_dma_sync_single_for_device (struct device *, dma_addr_t, size_t, int); typedef void ia64_mv_dma_sync_sg_for_device (struct device *, struct scatterlist *, int, int); -typedef int ia64_mv_dma_mapping_error (dma_addr_t dma_addr); +typedef int ia64_mv_dma_mapping_error(struct device *, dma_addr_t dma_addr); typedef int ia64_mv_dma_supported (struct device *, u64); typedef dma_addr_t ia64_mv_dma_map_single_attrs (struct device *, void *, size_t, int, struct dma_attrs *); diff --git a/include/asm-m68k/dma-mapping.h b/include/asm-m68k/dma-mapping.h index a26cdeb46a5..91f7944333d 100644 --- a/include/asm-m68k/dma-mapping.h +++ b/include/asm-m68k/dma-mapping.h @@ -84,7 +84,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *s { } -static inline int dma_mapping_error(dma_addr_t handle) +static inline int dma_mapping_error(struct device *dev, dma_addr_t handle) { return 0; } diff --git a/include/asm-mips/dma-mapping.h b/include/asm-mips/dma-mapping.h index 230b3f1b69b..c64afb40cd0 100644 --- a/include/asm-mips/dma-mapping.h +++ b/include/asm-mips/dma-mapping.h @@ -42,7 +42,7 @@ extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction); extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction); -extern int dma_mapping_error(dma_addr_t dma_addr); +extern int dma_mapping_error(struct device *dev, dma_addr_t dma_addr); extern int dma_supported(struct device *dev, u64 mask); static inline int diff --git a/include/asm-mn10300/dma-mapping.h b/include/asm-mn10300/dma-mapping.h index 7c882fca9ec..ccae8f6c632 100644 --- a/include/asm-mn10300/dma-mapping.h +++ b/include/asm-mn10300/dma-mapping.h @@ -182,7 +182,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } static inline -int dma_mapping_error(dma_addr_t dma_addr) +int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h index c6c0e9ff6bd..53af696f23d 100644 --- a/include/asm-parisc/dma-mapping.h +++ b/include/asm-parisc/dma-mapping.h @@ -248,6 +248,6 @@ void * sba_get_iommu(struct parisc_device *dev); #endif /* At the moment, we panic on error for IOMMU resource exaustion */ -#define dma_mapping_error(x) 0 +#define dma_mapping_error(dev, x) 0 #endif diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h index 74c54978098..c7ca45f97dd 100644 --- a/include/asm-powerpc/dma-mapping.h +++ b/include/asm-powerpc/dma-mapping.h @@ -415,7 +415,7 @@ static inline void dma_sync_sg_for_device(struct device *dev, __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { #ifdef CONFIG_PPC64 return (dma_addr == DMA_ERROR_CODE); diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h index 22cc419389f..6c0b8a2de14 100644 --- a/include/asm-sh/dma-mapping.h +++ b/include/asm-sh/dma-mapping.h @@ -171,7 +171,7 @@ static inline int dma_get_cache_alignment(void) return L1_CACHE_BYTES; } -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == 0; } diff --git a/include/asm-sparc/dma-mapping_64.h b/include/asm-sparc/dma-mapping_64.h index 38cbec76a33..bfa64f9702d 100644 --- a/include/asm-sparc/dma-mapping_64.h +++ b/include/asm-sparc/dma-mapping_64.h @@ -135,7 +135,7 @@ static inline void dma_sync_sg_for_device(struct device *dev, /* No flushing needed to sync cpu writes to the device. */ } -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return (dma_addr == DMA_ERROR_CODE); } diff --git a/include/asm-sparc/pci_32.h b/include/asm-sparc/pci_32.h index b93b6c79e08..0ee949d220c 100644 --- a/include/asm-sparc/pci_32.h +++ b/include/asm-sparc/pci_32.h @@ -154,7 +154,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, #define PCI_DMA_ERROR_CODE (~(dma_addr_t)0x0) -static inline int pci_dma_mapping_error(dma_addr_t dma_addr) +static inline int pci_dma_mapping_error(struct pci_dev *pdev, + dma_addr_t dma_addr) { return (dma_addr == PCI_DMA_ERROR_CODE); } diff --git a/include/asm-sparc/pci_64.h b/include/asm-sparc/pci_64.h index f59f2571295..4f79a54948f 100644 --- a/include/asm-sparc/pci_64.h +++ b/include/asm-sparc/pci_64.h @@ -140,9 +140,10 @@ extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask); #define PCI64_REQUIRED_MASK (~(dma64_addr_t)0) #define PCI64_ADDR_BASE 0xfffc000000000000UL -static inline int pci_dma_mapping_error(dma_addr_t dma_addr) +static inline int pci_dma_mapping_error(struct pci_dev *pdev, + dma_addr_t dma_addr) { - return dma_mapping_error(dma_addr); + return dma_mapping_error(&pdev->dev, dma_addr); } #ifdef CONFIG_PCI diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h index 87a715367a1..3c034f48fdb 100644 --- a/include/asm-x86/device.h +++ b/include/asm-x86/device.h @@ -5,6 +5,9 @@ struct dev_archdata { #ifdef CONFIG_ACPI void *acpi_handle; #endif +#ifdef CONFIG_X86_64 +struct dma_mapping_ops *dma_ops; +#endif #ifdef CONFIG_DMAR void *iommu; /* hook for IOMMU specific extension */ #endif diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index c2ddd3d1b88..0eaa9bf6011 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -17,7 +17,8 @@ extern int panic_on_overflow; extern int force_iommu; struct dma_mapping_ops { - int (*mapping_error)(dma_addr_t dma_addr); + int (*mapping_error)(struct device *dev, + dma_addr_t dma_addr); void* (*alloc_coherent)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); void (*free_coherent)(struct device *dev, size_t size, @@ -56,14 +57,32 @@ struct dma_mapping_ops { int is_phys; }; -extern const struct dma_mapping_ops *dma_ops; +extern struct dma_mapping_ops *dma_ops; -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) { - if (dma_ops->mapping_error) - return dma_ops->mapping_error(dma_addr); +#ifdef CONFIG_X86_32 + return dma_ops; +#else + if (unlikely(!dev) || !dev->archdata.dma_ops) + return dma_ops; + else + return dev->archdata.dma_ops; +#endif +} + +/* Make sure we keep the same behaviour */ +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + struct dma_mapping_ops *ops = get_dma_ops(dev); + if (ops->mapping_error) + return ops->mapping_error(dev, dma_addr); return (dma_addr == bad_dma_address); +#endif } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) @@ -83,44 +102,53 @@ static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction); + return ops->map_single(hwdev, virt_to_phys(ptr), size, direction); } static inline void dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, addr, size, direction); + if (ops->unmap_single) + ops->unmap_single(dev, addr, size, direction); } static inline int dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_sg(hwdev, sg, nents, direction); + return ops->map_sg(hwdev, sg, nents, direction); } static inline void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->unmap_sg) - dma_ops->unmap_sg(hwdev, sg, nents, direction); + if (ops->unmap_sg) + ops->unmap_sg(hwdev, sg, nents, direction); } static inline void dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_for_cpu) - dma_ops->sync_single_for_cpu(hwdev, dma_handle, size, - direction); + if (ops->sync_single_for_cpu) + ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); flush_write_buffers(); } @@ -128,10 +156,11 @@ static inline void dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_for_device) - dma_ops->sync_single_for_device(hwdev, dma_handle, size, - direction); + if (ops->sync_single_for_device) + ops->sync_single_for_device(hwdev, dma_handle, size, direction); flush_write_buffers(); } @@ -139,11 +168,12 @@ static inline void dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_range_for_cpu) - dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, - size, direction); + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_range_for_cpu) + ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, + size, direction); flush_write_buffers(); } @@ -152,11 +182,12 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_range_for_device) - dma_ops->sync_single_range_for_device(hwdev, dma_handle, - offset, size, direction); + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_range_for_device) + ops->sync_single_range_for_device(hwdev, dma_handle, + offset, size, direction); flush_write_buffers(); } @@ -164,9 +195,11 @@ static inline void dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, int nelems, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_sg_for_cpu) - dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); + if (ops->sync_sg_for_cpu) + ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); flush_write_buffers(); } @@ -174,9 +207,11 @@ static inline void dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int nelems, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_sg_for_device) - dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction); + if (ops->sync_sg_for_device) + ops->sync_sg_for_device(hwdev, sg, nelems, direction); flush_write_buffers(); } @@ -185,9 +220,11 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_single(dev, page_to_phys(page)+offset, - size, direction); + return ops->map_single(dev, page_to_phys(page) + offset, + size, direction); } static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h index c706a744263..2730b351afc 100644 --- a/include/asm-x86/swiotlb.h +++ b/include/asm-x86/swiotlb.h @@ -35,7 +35,7 @@ extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction); extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction); -extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); +extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); diff --git a/include/asm-xtensa/dma-mapping.h b/include/asm-xtensa/dma-mapping.h index 3c7d537dd15..51882ae3db4 100644 --- a/include/asm-xtensa/dma-mapping.h +++ b/include/asm-xtensa/dma-mapping.h @@ -139,7 +139,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, consistent_sync(sg_virt(sg), sg->length, dir); } static inline int -dma_mapping_error(dma_addr_t dma_addr) +dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 7d51cbca49a..75ae6d8aba4 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -758,7 +758,7 @@ static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr, } dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction); - if (!dma_mapping_error(dma_addr)) { + if (!dma_mapping_error(&c->pdev->dev, dma_addr)) { #ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 if ((sizeof(dma_addr_t) > 4) && c->pae_support) { *mptr++ = cpu_to_le32(0x7C020002); diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 4bf8cade9db..e530026eedf 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -427,9 +427,9 @@ static inline int ssb_dma_mapping_error(struct ssb_device *dev, dma_addr_t addr) { switch (dev->bus->bustype) { case SSB_BUSTYPE_PCI: - return pci_dma_mapping_error(addr); + return pci_dma_mapping_error(dev->bus->host_pci, addr); case SSB_BUSTYPE_SSB: - return dma_mapping_error(addr); + return dma_mapping_error(dev->dev, addr); default: __ssb_dma_not_implemented(dev); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 90b529f7a15..936e333e7ce 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1590,7 +1590,7 @@ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { if (dev->dma_ops) return dev->dma_ops->mapping_error(dev, dma_addr); - return dma_mapping_error(dma_addr); + return dma_mapping_error(dev->dma_device, dma_addr); } /** diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d568894df8c..977edbdbc1d 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -492,7 +492,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ dma_addr_t handle; handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE); - if (swiotlb_dma_mapping_error(handle)) + if (swiotlb_dma_mapping_error(hwdev, handle)) return NULL; ret = bus_to_virt(handle); @@ -824,7 +824,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, } int -swiotlb_dma_mapping_error(dma_addr_t dma_addr) +swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) { return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index a19b22b452a..84d328329d9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -169,7 +169,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, (void *) vec->sge[xdr_sge_no].iov_base + sge_off, sge_bytes, DMA_TO_DEVICE); - if (dma_mapping_error(sge[sge_no].addr)) + if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, + sge[sge_no].addr)) goto err; sge_off = 0; sge_no++; -- cgit v1.2.3-70-g09d2 From 85ba2d862e521375a8ee01526c5c46b1f24bb4af Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 25 Jul 2008 19:45:58 -0700 Subject: tracehook: wait_task_inactive This extends wait_task_inactive() with a new argument so it can be used in a "soft" mode where it will check for the task changing state unexpectedly and back off. There is no change to existing callers. This lays the groundwork to allow robust, noninvasive tracing that can try to sample a blocked thread but back off safely if it wakes up. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Reviewed-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 4 ++-- include/linux/sched.h | 8 ++++++-- kernel/kthread.c | 2 +- kernel/ptrace.c | 2 +- kernel/sched.c | 29 +++++++++++++++++++++++++++-- 5 files changed, 37 insertions(+), 8 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 19d4493c619..fc8f3509df2 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2626,7 +2626,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) /* * make sure the task is off any CPU */ - wait_task_inactive(task); + wait_task_inactive(task, 0); /* more to come... */ @@ -4774,7 +4774,7 @@ recheck: UNPROTECT_CTX(ctx, flags); - wait_task_inactive(task); + wait_task_inactive(task, 0); PROTECT_CTX(ctx, flags); diff --git a/include/linux/sched.h b/include/linux/sched.h index a95d84d0da9..f59318a0099 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1882,9 +1882,13 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -extern void wait_task_inactive(struct task_struct * p); +extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else -#define wait_task_inactive(p) do { } while (0) +static inline unsigned long wait_task_inactive(struct task_struct *p, + long match_state) +{ + return 1; +} #endif #define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) diff --git a/kernel/kthread.c b/kernel/kthread.c index 6111c27491b..96cff2f8710 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -176,7 +176,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu) return; } /* Must have done schedule() in kthread() before we set_task_cpu */ - wait_task_inactive(k); + wait_task_inactive(k, 0); set_task_cpu(k, cpu); k->cpus_allowed = cpumask_of_cpu(cpu); k->rt.nr_cpus_allowed = 1; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8392a9da645..082b3fcb32a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -107,7 +107,7 @@ int ptrace_check_attach(struct task_struct *child, int kill) read_unlock(&tasklist_lock); if (!ret && !kill) - wait_task_inactive(child); + ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; /* All systems go.. */ return ret; diff --git a/kernel/sched.c b/kernel/sched.c index fde1a102635..0236958addc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1867,16 +1867,24 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) /* * wait_task_inactive - wait for a thread to unschedule. * + * If @match_state is nonzero, it's the @p->state value just checked and + * not expected to change. If it changes, i.e. @p might have woken up, + * then return zero. When we succeed in waiting for @p to be off its CPU, + * we return a positive number (its total switch count). If a second call + * a short while later returns the same number, the caller can be sure that + * @p has remained unscheduled the whole time. + * * The caller must ensure that the task *will* unschedule sometime soon, * else this function might spin for a *long* time. This function can't * be called with interrupts off, or it may introduce deadlock with * smp_call_function() if an IPI is sent by the same process we are * waiting to become inactive. */ -void wait_task_inactive(struct task_struct *p) +unsigned long wait_task_inactive(struct task_struct *p, long match_state) { unsigned long flags; int running, on_rq; + unsigned long ncsw; struct rq *rq; for (;;) { @@ -1899,8 +1907,11 @@ void wait_task_inactive(struct task_struct *p) * return false if the runqueue has changed and p * is actually now running somewhere else! */ - while (task_running(rq, p)) + while (task_running(rq, p)) { + if (match_state && unlikely(p->state != match_state)) + return 0; cpu_relax(); + } /* * Ok, time to look more closely! We need the rq @@ -1910,8 +1921,20 @@ void wait_task_inactive(struct task_struct *p) rq = task_rq_lock(p, &flags); running = task_running(rq, p); on_rq = p->se.on_rq; + ncsw = 0; + if (!match_state || p->state == match_state) { + ncsw = p->nivcsw + p->nvcsw; + if (unlikely(!ncsw)) + ncsw = 1; + } task_rq_unlock(rq, &flags); + /* + * If it changed from the expected state, bail out now. + */ + if (unlikely(!ncsw)) + break; + /* * Was it really running after all now that we * checked with the proper locks actually held? @@ -1944,6 +1967,8 @@ void wait_task_inactive(struct task_struct *p) */ break; } + + return ncsw; } /*** -- cgit v1.2.3-70-g09d2