From 405d967dc70002991f8fc35c20e0d3cbc7614f63 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:38 +0900 Subject: linker script: throw away .discard section x86 throws away .discard section but no other archs do. Also, .discard is not thrown away while linking modules. Make every arch and module linking throw it away. This will be used to define dummy variables for percpu declarations and definitions. This patch is based on Ivan Kokshaysky's alpha percpu patch. [ Impact: always throw away everything in .discard ] Signed-off-by: Tejun Heo Cc: Ivan Kokshaysky Cc: Richard Henderson Cc: Russell King Cc: Haavard Skinnemoen Cc: Bryan Wu Cc: Mikael Starvik Cc: Jesper Nilsson Cc: David Howells Cc: Yoshinori Sato Cc: Tony Luck Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Ralf Baechle Cc: Kyle McMartin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: David S. Miller Cc: Jeff Dike Cc: Chris Zankel Cc: Rusty Russell Cc: Ingo Molnar --- arch/m68knommu/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index b7fe505e358..68111a61a77 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -188,6 +188,7 @@ SECTIONS { EXIT_TEXT EXIT_DATA *(.exitcall.exit) + *(.discard) } .bss : { -- cgit v1.2.3-70-g09d2 From 023bf6f1b8bf58dc4da7f0dc1cf4787b0d5297c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2009 11:27:40 +0900 Subject: linker script: unify usage of discard definition Discarded sections in different archs share some commonality but have considerable differences. This led to linker script for each arch implementing its own /DISCARD/ definition, which makes maintaining tedious and adding new entries error-prone. This patch makes all linker scripts to move discard definitions to the end of the linker script and use the common DISCARDS macro. As ld uses the first matching section definition, archs can include default discarded sections by including them earlier in the linker script. ia64 is notable because it first throws away some ia64 specific subsections and then include the rest of the sections into the final image, so those sections must be discarded before the inclusion. defconfig compile tested for x86, x86-64, powerpc, powerpc64, ia64, alpha, sparc, sparc64 and s390. Michal Simek tested microblaze. Signed-off-by: Tejun Heo Acked-by: Paul Mundt Acked-by: Mike Frysinger Tested-by: Michal Simek Cc: linux-arch@vger.kernel.org Cc: Michal Simek Cc: microblaze-uclinux@itee.uq.edu.au Cc: Sam Ravnborg Cc: Tony Luck --- arch/alpha/kernel/vmlinux.lds.S | 10 ++-------- arch/avr32/kernel/vmlinux.lds.S | 10 +++------- arch/blackfin/kernel/vmlinux.lds.S | 6 +----- arch/cris/kernel/vmlinux.lds.S | 10 ++-------- arch/frv/kernel/vmlinux.lds.S | 2 +- arch/h8300/kernel/vmlinux.lds.S | 6 ++---- arch/ia64/kernel/vmlinux.lds.S | 17 ++++++++--------- arch/m32r/kernel/vmlinux.lds.S | 11 +++-------- arch/m68k/kernel/vmlinux-std.lds | 11 +++-------- arch/m68k/kernel/vmlinux-sun3.lds | 10 ++-------- arch/m68knommu/kernel/vmlinux.lds.S | 8 +------- arch/microblaze/kernel/vmlinux.lds.S | 2 +- arch/mips/kernel/vmlinux.lds.S | 22 ++++++++++------------ arch/mn10300/kernel/vmlinux.lds.S | 9 +++------ arch/parisc/kernel/vmlinux.lds.S | 9 ++++----- arch/powerpc/kernel/vmlinux.lds.S | 10 +++------- arch/s390/kernel/vmlinux.lds.S | 10 +++------- arch/sh/kernel/vmlinux.lds.S | 11 ++++------- arch/sparc/kernel/vmlinux.lds.S | 9 ++------- arch/um/include/asm/common.lds.S | 5 ----- arch/um/kernel/dyn.lds.S | 2 +- arch/um/kernel/uml.lds.S | 2 +- arch/x86/kernel/vmlinux.lds.S | 11 ++++------- arch/xtensa/kernel/vmlinux.lds.S | 14 ++++---------- include/asm-generic/vmlinux.lds.h | 18 ++++++++++++------ 25 files changed, 80 insertions(+), 155 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 75fe1d6877e..6dc03c35caa 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -134,14 +134,6 @@ SECTIONS __bss_stop = .; _end = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - .mdebug 0 : { *(.mdebug) } @@ -151,4 +143,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index b8324608ec0..c4b56654349 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -124,15 +124,11 @@ SECTIONS _end = .; } + DWARF_DEBUG + /* When something in the kernel is NOT compiled as a module, the module * cleanup code and data are put into these segments. Both can then be * thrown away, as cleanup code is never called unless it's a module. */ - /DISCARD/ : { - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - - DWARF_DEBUG + DISCARDS } diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 6e8eabd8f0a..d7ffe299b97 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -277,9 +277,5 @@ SECTIONS DWARF_DEBUG - /DISCARD/ : - { - *(.exitcall.exit) - *(.discard) - } + DISCARDS } diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index a3175ebb38c..6c81836b922 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -140,13 +140,7 @@ SECTIONS _end = .; __end = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024; + + DISCARDS } diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 64b5a5e4d35..7dbf41f68b5 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -178,7 +178,7 @@ SECTIONS .comment 0 : { *(.comment) } - /DISCARD/ : { *(.discard) } + DISCARDS } __kernel_image_size_no_bss = __bss_start - __kernel_image_start; diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 03d6c0df33d..662b02ecb86 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -152,10 +152,6 @@ SECTIONS __end = . ; __ramstart = .; } - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - } .romfs : { *(.romfs*) @@ -166,4 +162,6 @@ SECTIONS COMMAND_START = . - 0x200 ; __ramend = . ; } + + DISCARDS } diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 13d95897587..eb4214d1c5a 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -24,15 +24,14 @@ PHDRS { } SECTIONS { - /* Sections to be discarded */ + /* unwind exit sections must be discarded before the rest of the + sections get included. */ /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) *(.IA_64.unwind.exit.text) *(.IA_64.unwind_info.exit.text) - } + *(.comment) + *(.note) + } v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ phys_start = _start - LOAD_OFFSET; @@ -317,7 +316,7 @@ SECTIONS .debug_funcnames 0 : { *(.debug_funcnames) } .debug_typenames 0 : { *(.debug_typenames) } .debug_varnames 0 : { *(.debug_varnames) } - /* These must appear regardless of . */ - /DISCARD/ : { *(.comment) } - /DISCARD/ : { *(.note) } + + /* Default discards */ + DISCARDS } diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 480a49944cf..de5e21cca6a 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -120,14 +120,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } @@ -136,4 +128,7 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 905a797ada9..47eac19e8f6 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -82,14 +82,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } @@ -98,4 +90,7 @@ SECTIONS .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 47d04be322a..03efaf04d7d 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -77,14 +77,6 @@ __init_begin = .; _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - .crap : { /* Stabs debugging sections. */ *(.stab) @@ -97,4 +89,6 @@ __init_begin = .; *(.note) } + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index 68111a61a77..2736a5e309c 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -184,13 +184,6 @@ SECTIONS { __init_end = .; } > INIT - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - .bss : { . = ALIGN(4); _sbss = . ; @@ -201,5 +194,6 @@ SECTIONS { _end = . ; } > BSS + DISCARDS } diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index 81bebdcb18f..ec5fa91a48d 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -163,5 +163,5 @@ SECTIONS { . = ALIGN(4096); _end = .; - /DISCARD/ : { *(.discard) } + DISCARDS } diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 45901609b74..1474c18fb77 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -176,18 +176,6 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - - /* ABI crap starts here */ - *(.MIPS.options) - *(.options) - *(.pdr) - *(.reginfo) - } - /* These mark the ABI of the kernel for debuggers. */ .mdebug.abi32 : { KEEP(*(.mdebug.abi32)) @@ -213,4 +201,14 @@ SECTIONS *(.gptab.bss) *(.gptab.sbss) } + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { + /* ABI crap starts here */ + *(.MIPS.options) + *(.options) + *(.pdr) + *(.reginfo) + } } diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index 5609d4962a5..8fcd0f1e21d 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -115,13 +115,10 @@ SECTIONS . = ALIGN(PAGE_SIZE); pg0 = .; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_CALL - *(.discard) - } - STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index ccf58341845..aea1784edbd 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -237,10 +237,12 @@ SECTIONS /* freed after init ends here */ _end = . ; + STABS_DEBUG + .note 0 : { *(.note) } + /* Sections to be discarded */ + DISCARDS /DISCARD/ : { - *(.exitcall.exit) - *(.discard) #ifdef CONFIG_64BIT /* temporary hack until binutils is fixed to not emit these * for static binaries @@ -253,7 +255,4 @@ SECTIONS *(.gnu.hash) #endif } - - STABS_DEBUG - .note 0 : { *(.note) } } diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7fca9355fd3..244e3658983 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -37,13 +37,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - EXIT_DATA - } - . = KERNELBASE; /* @@ -299,4 +292,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); _end = . ; PROVIDE32 (end = .); + + /* Sections to be discarded. */ + DISCARDS } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 98867dfea46..82415c75b99 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -157,14 +157,10 @@ SECTIONS _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - /* Debugging sections. */ STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 766976d27b2..0ce254bca92 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -163,17 +163,14 @@ SECTIONS _end = . ; } + STABS_DEBUG + DWARF_DEBUG + /* * When something in the kernel is NOT compiled as a module, the * module cleanup code and data are put into these segments. Both * can then be thrown away, as cleanup code is never called unless * it's a module. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - } - - STABS_DEBUG - DWARF_DEBUG + DISCARDS } diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index d63cf914667..866390feb68 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -171,13 +171,8 @@ SECTIONS } _end = . ; - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index cb0248616d4..37ecc5577a9 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -123,8 +123,3 @@ __initramfs_end = .; } - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - } - diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 2916d6eadff..715a188c047 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -157,5 +157,5 @@ SECTIONS DWARF_DEBUG - /DISCARD/ : { *(.discard) } + DISCARDS } diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 1f8a622cabe..2ebd39765db 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -101,5 +101,5 @@ SECTIONS DWARF_DEBUG - /DISCARD/ : { *(.discard) } + DISCARDS } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 367e8788204..b600c843710 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -387,15 +387,12 @@ SECTIONS _end = .; } - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.eh_frame) - *(.discard) - } - STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { *(.eh_frame) } } diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index b1e24638acd..921b6ff3b64 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -280,16 +280,6 @@ SECTIONS *(.ResetVector.text) } - /* Sections to be discarded */ - /DISCARD/ : - { - *(.exit.literal) - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.discard) - } - .xt.lit : { *(.xt.lit) } .xt.prop : { *(.xt.prop) } @@ -322,4 +312,8 @@ SECTIONS *(.xt.lit) *(.gnu.linkonce.p*) } + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { *(.exit.literal) } } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c5c18ac878a..ab8ea9b7741 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -35,13 +35,10 @@ * __bss_stop = .; * _end = .; * - * /DISCARD/ : { - * EXIT_TEXT - * EXIT_DATA - * EXIT_CALL - * } * STABS_DEBUG * DWARF_DEBUG + * + * DISCARDS // must be the last * } * * [__init_begin, __init_end] is the init section that may be freed after init @@ -629,11 +626,20 @@ #define INIT_RAM_FS #endif +/* + * Default discarded sections. + * + * Some archs want to discard exit text/data at runtime rather than + * link time due to cross-section references such as alt instructions, + * bug table, eh_frame, etc. DISCARDS must be the last of output + * section definitions so that such archs put those in earlier section + * definitions. + */ #define DISCARDS \ /DISCARD/ : { \ EXIT_TEXT \ EXIT_DATA \ - *(.exitcall.exit) \ + EXIT_CALL \ *(.discard) \ } -- cgit v1.2.3-70-g09d2 From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:31 +0200 Subject: timekeeping: Increase granularity of read_persistent_clock() The persistent clock of some architectures (e.g. s390) have a better granularity than seconds. To reduce the delta between the host clock and the guest clock in a virtualized system change the read_persistent_clock function to return a struct timespec. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.013873340@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/m68knommu/kernel/time.c | 5 ++-- arch/mips/dec/time.c | 5 ++-- arch/mips/lasat/ds1603.c | 5 ++-- arch/mips/lasat/sysctl.c | 8 ++++-- arch/mips/lemote/lm2e/setup.c | 5 ++-- arch/mips/mti-malta/malta-time.c | 5 ++-- arch/mips/pmc-sierra/yosemite/setup.c | 5 ++-- arch/mips/sibyte/swarm/setup.c | 15 +++++++--- arch/mips/sni/time.c | 5 ++-- arch/powerpc/kernel/time.c | 7 +++-- arch/s390/kernel/time.c | 22 +++------------ arch/sh/kernel/time.c | 6 ++-- arch/x86/kernel/rtc.c | 5 ++-- arch/xtensa/kernel/time.c | 5 ++-- include/linux/time.h | 2 +- kernel/time/timekeeping.c | 52 +++++++++++++++++++---------------- 16 files changed, 83 insertions(+), 74 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index d182b2f7221..68432248515 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void) return mktime(year, mon, day, hour, min, sec);; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return read_rtc_mmss(); + ts->tv_sec = read_rtc_mmss(); + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c index 463136e6685..02f505f23c3 100644 --- a/arch/mips/dec/time.c +++ b/arch/mips/dec/time.c @@ -18,7 +18,7 @@ #include #include -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned int year, mon, day, hour, min, sec, real_year; unsigned long flags; @@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void) year += real_year - 72 + 2000; - return mktime(year, mon, day, hour, min, sec); + ts->tv_sec = mktime(year, mon, day, hour, min, sec); + ts->tv_nsec = 0; } /* diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c index 52cb1436a12..c6fd96ff118 100644 --- a/arch/mips/lasat/ds1603.c +++ b/arch/mips/lasat/ds1603.c @@ -135,7 +135,7 @@ static void rtc_end_op(void) lasat_ndelay(1000); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long word; unsigned long flags; @@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void) rtc_end_op(); spin_unlock_irqrestore(&rtc_lock, flags); - return word; + ts->tv_sec = word; + ts->tv_nsec = 0; } int rtc_mips_set_mmss(unsigned long time) diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 8f88886feb1..3f04d4c406b 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -92,10 +92,12 @@ static int rtctmp; int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, loff_t *ppos) { + struct timespec ts; int r; if (!write) { - rtctmp = read_persistent_clock(); + read_persistent_clock(&ts); + rtctmp = ts.tv_sec; /* check for time < 0 and set to 0 */ if (rtctmp < 0) rtctmp = 0; @@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen) { + struct timespec ts; int r; - rtctmp = read_persistent_clock(); + read_persistent_clock(&ts); + rtctmp = ts.tv_sec; if (rtctmp < 0) rtctmp = 0; r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c index ebd6ceaef2f..24b355df612 100644 --- a/arch/mips/lemote/lm2e/setup.c +++ b/arch/mips/lemote/lm2e/setup.c @@ -54,9 +54,10 @@ void __init plat_time_init(void) mips_hpt_frequency = cpu_clock_freq / 2; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return mc146818_get_cmos_time(); + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; } void (*__wbflush)(void); diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 0b97d47691f..3c6f190aa61 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void) return count; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return mc146818_get_cmos_time(); + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; } static void __init plat_perf_setup(void) diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c index 2d3c0dca275..3498ac9c35a 100644 --- a/arch/mips/pmc-sierra/yosemite/setup.c +++ b/arch/mips/pmc-sierra/yosemite/setup.c @@ -70,7 +70,7 @@ void __init bus_error_init(void) } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned int year, month, day, hour, min, sec; unsigned long flags; @@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void) m48t37_base->control = 0x00; spin_unlock_irqrestore(&rtc_lock, flags); - return mktime(year, month, day, hour, min, sec); + ts->tv_sec = mktime(year, month, day, hour, min, sec); + ts->tv_nsec = 0; } int rtc_mips_set_time(unsigned long tim) diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index 672e45d495a..623ffc933c4 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -87,19 +87,26 @@ enum swarm_rtc_type { enum swarm_rtc_type swarm_rtc_type; -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { + unsigned long sec; + switch (swarm_rtc_type) { case RTC_XICOR: - return xicor_get_time(); + sec = xicor_get_time(); + break; case RTC_M4LT81: - return m41t81_get_time(); + sec = m41t81_get_time(); + break; case RTC_NONE: default: - return mktime(2000, 1, 1, 0, 0, 0); + sec = mktime(2000, 1, 1, 0, 0, 0); + break; } + ts->tv_sec = sec; + tv->tv_nsec = 0; } int rtc_mips_set_time(unsigned long sec) diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index 0d9ec1a5c24..62df6a598e0 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -182,7 +182,8 @@ void __init plat_time_init(void) setup_pit_timer(); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return -1; + ts->tv_sec = -1; + ts->tv_nsec = 0; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511ceea..ad63f30fe3d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -769,7 +769,7 @@ int update_persistent_clock(struct timespec now) return ppc_md.set_rtc_time(&tm); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { struct rtc_time tm; static int first = 1; @@ -787,8 +787,9 @@ unsigned long read_persistent_clock(void) if (!ppc_md.get_rtc_time) return 0; ppc_md.get_rtc_time(&tm); - return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_nsec = 0; } /* clocksource code */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e76c2e7a8b9..a94ec48587b 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -182,12 +182,9 @@ static void timing_alert_interrupt(__u16 code) static void etr_reset(void); static void stp_reset(void); -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - struct timespec ts; - - tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts); - return ts.tv_sec; + tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); } static cycle_t read_tod_clock(struct clocksource *cs) @@ -248,7 +245,6 @@ void __init time_init(void) { struct timespec ts; unsigned long flags; - cycle_t now; /* Reset time synchronization interfaces. */ etr_reset(); @@ -266,20 +262,10 @@ void __init time_init(void) panic("Could not register TOD clock source"); /* - * The TOD clock is an accurate clock. The xtime should be - * initialized in a way that the difference between TOD and - * xtime is reasonably small. Too bad that timekeeping_init - * sets xtime.tv_nsec to zero. In addition the clock source - * change from the jiffies clock source to the TOD clock - * source add another error of up to 1/HZ second. The same - * function sets wall_to_monotonic to a value that is too - * small for /proc/uptime to be accurate. - * Reset xtime and wall_to_monotonic to sane values. + * Reset wall_to_monotonic to the initial timestamp created + * in head.S to get a precise value in /proc/uptime. */ write_seqlock_irqsave(&xtime_lock, flags); - now = get_clock(); - tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); - clocksource_tod.cycle_last = now; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); write_sequnlock_irqrestore(&xtime_lock, flags); diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index 9b352a1e3fb..3f4706aa975 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time; int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; #ifdef CONFIG_GENERIC_CMOS_UPDATE -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - struct timespec tv; - rtc_sh_get_time(&tv); - return tv.tv_sec; + rtc_sh_get_time(&ts); } int update_persistent_clock(struct timespec now) diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 5d465b207e7..bf67dcb4a44 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime) } /* not static: needed by APM */ -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long retval, flags; @@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void) retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); - return retval; + ts->tv_sec = retval; + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 8848120d291..19085ff0484 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = { void __init time_init(void) { - xtime.tv_nsec = 0; - xtime.tv_sec = read_persistent_clock(); - + /* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */ + read_persistent_clock(&xtime); set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); diff --git a/include/linux/time.h b/include/linux/time.h index e7c84455888..53a3216f0d1 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -101,7 +101,7 @@ extern struct timespec xtime; extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; -extern unsigned long read_persistent_clock(void); +extern void read_persistent_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 41579e7fcf9..f1a21ce491e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -154,7 +154,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); */ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); -static unsigned long total_sleep_time; /* seconds */ +static struct timespec total_sleep_time; /* * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. @@ -487,17 +487,18 @@ int timekeeping_valid_for_hres(void) } /** - * read_persistent_clock - Return time in seconds from the persistent clock. + * read_persistent_clock - Return time from the persistent clock. * * Weak dummy function for arches that do not yet support it. - * Returns seconds from epoch using the battery backed persistent clock. - * Returns zero if unsupported. + * Reads the time from the battery backed persistent clock. + * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. * * XXX - Do be sure to remove it once all arches implement it. */ -unsigned long __attribute__((weak)) read_persistent_clock(void) +void __attribute__((weak)) read_persistent_clock(struct timespec *ts) { - return 0; + ts->tv_sec = 0; + ts->tv_nsec = 0; } /* @@ -507,7 +508,9 @@ void __init timekeeping_init(void) { struct clocksource *clock; unsigned long flags; - unsigned long sec = read_persistent_clock(); + struct timespec now; + + read_persistent_clock(&now); write_seqlock_irqsave(&xtime_lock, flags); @@ -518,19 +521,20 @@ void __init timekeeping_init(void) clock->enable(clock); timekeeper_setup_internals(clock); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; + xtime.tv_sec = now.tv_sec; + xtime.tv_nsec = now.tv_nsec; raw_time.tv_sec = 0; raw_time.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); update_xtime_cache(0); - total_sleep_time = 0; + total_sleep_time.tv_sec = 0; + total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); } /* time in seconds when suspend began */ -static unsigned long timekeeping_suspend_time; +static struct timespec timekeeping_suspend_time; /** * timekeeping_resume - Resumes the generic timekeeping subsystem. @@ -543,18 +547,19 @@ static unsigned long timekeeping_suspend_time; static int timekeeping_resume(struct sys_device *dev) { unsigned long flags; - unsigned long now = read_persistent_clock(); + struct timespec ts; + + read_persistent_clock(&ts); clocksource_resume(); write_seqlock_irqsave(&xtime_lock, flags); - if (now && (now > timekeeping_suspend_time)) { - unsigned long sleep_length = now - timekeeping_suspend_time; - - xtime.tv_sec += sleep_length; - wall_to_monotonic.tv_sec -= sleep_length; - total_sleep_time += sleep_length; + if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { + ts = timespec_sub(ts, timekeeping_suspend_time); + xtime = timespec_add_safe(xtime, ts); + wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); + total_sleep_time = timespec_add_safe(total_sleep_time, ts); } update_xtime_cache(0); /* re-base the last cycle value */ @@ -577,7 +582,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; - timekeeping_suspend_time = read_persistent_clock(); + read_persistent_clock(&timekeeping_suspend_time); write_seqlock_irqsave(&xtime_lock, flags); timekeeping_forward_now(); @@ -801,9 +806,10 @@ void update_wall_time(void) */ void getboottime(struct timespec *ts) { - set_normalized_timespec(ts, - - (wall_to_monotonic.tv_sec + total_sleep_time), - - wall_to_monotonic.tv_nsec); + struct timespec boottime; + + boottime = timespec_add_safe(wall_to_monotonic, total_sleep_time); + set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); } /** @@ -812,7 +818,7 @@ void getboottime(struct timespec *ts) */ void monotonic_to_bootbased(struct timespec *ts) { - ts->tv_sec += total_sleep_time; + *ts = timespec_add_safe(*ts, total_sleep_time); } unsigned long get_seconds(void) -- cgit v1.2.3-70-g09d2 From af39bb8b07af83b579c90c09ba3943123cdb4132 Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:00 -0700 Subject: core generic GPIO support for Freescale Coldfire processors. This adds the basic infrastructure used by all of the different Coldfire CPUs. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/gpio.h | 238 ++++++++++++++++++++++++++++++ arch/m68k/include/asm/mcfgpio.h | 40 +++++ arch/m68k/include/asm/pinmux.h | 30 ++++ arch/m68knommu/Kconfig | 6 + arch/m68knommu/platform/coldfire/Makefile | 1 + arch/m68knommu/platform/coldfire/gpio.c | 127 ++++++++++++++++ arch/m68knommu/platform/coldfire/pinmux.c | 28 ++++ 7 files changed, 470 insertions(+) create mode 100644 arch/m68k/include/asm/gpio.h create mode 100644 arch/m68k/include/asm/mcfgpio.h create mode 100644 arch/m68k/include/asm/pinmux.h create mode 100644 arch/m68knommu/platform/coldfire/gpio.c create mode 100644 arch/m68knommu/platform/coldfire/pinmux.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/gpio.h b/arch/m68k/include/asm/gpio.h new file mode 100644 index 00000000000..283214dc65a --- /dev/null +++ b/arch/m68k/include/asm/gpio.h @@ -0,0 +1,238 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#ifndef coldfire_gpio_h +#define coldfire_gpio_h + +#include +#include +#include +#include + +/* + * The Freescale Coldfire family is quite varied in how they implement GPIO. + * Some parts have 8 bit ports, some have 16bit and some have 32bit; some have + * only one port, others have multiple ports; some have a single data latch + * for both input and output, others have a separate pin data register to read + * input; some require a read-modify-write access to change an output, others + * have set and clear registers for some of the outputs; Some have all the + * GPIOs in a single control area, others have some GPIOs implemented in + * different modules. + * + * This implementation attempts accomodate the differences while presenting + * a generic interface that will optimize to as few instructions as possible. + */ +#if defined(CONFIG_M5206) || defined(CONFIG_M5206e) || \ + defined(CONFIG_M520x) || defined(CONFIG_M523x) || \ + defined(CONFIG_M527x) || defined(CONFIG_M528x) || defined(CONFIG_M532x) + +/* These parts have GPIO organized by 8 bit ports */ + +#define MCFGPIO_PORTTYPE u8 +#define MCFGPIO_PORTSIZE 8 +#define mcfgpio_read(port) __raw_readb(port) +#define mcfgpio_write(data, port) __raw_writeb(data, port) + +#elif defined(CONFIG_M5307) || defined(CONFIG_M5407) || defined(CONFIG_M5272) + +/* These parts have GPIO organized by 16 bit ports */ + +#define MCFGPIO_PORTTYPE u16 +#define MCFGPIO_PORTSIZE 16 +#define mcfgpio_read(port) __raw_readw(port) +#define mcfgpio_write(data, port) __raw_writew(data, port) + +#elif defined(CONFIG_M5249) + +/* These parts have GPIO organized by 32 bit ports */ + +#define MCFGPIO_PORTTYPE u32 +#define MCFGPIO_PORTSIZE 32 +#define mcfgpio_read(port) __raw_readl(port) +#define mcfgpio_write(data, port) __raw_writel(data, port) + +#endif + +#define mcfgpio_bit(gpio) (1 << ((gpio) % MCFGPIO_PORTSIZE)) +#define mcfgpio_port(gpio) ((gpio) / MCFGPIO_PORTSIZE) + +#if defined(CONFIG_M520x) || defined(CONFIG_M523x) || \ + defined(CONFIG_M527x) || defined(CONFIG_M528x) || defined(CONFIG_M532x) +/* + * These parts have an 'Edge' Port module (external interrupt/GPIO) which uses + * read-modify-write to change an output and a GPIO module which has separate + * set/clr registers to directly change outputs with a single write access. + */ +#if defined(CONFIG_M528x) +/* + * The 528x also has GPIOs in other modules (GPT, QADC) which use + * read-modify-write as well as those controlled by the EPORT and GPIO modules. + */ +#define MCFGPIO_SCR_START 40 +#else +#define MCFGPIO_SCR_START 8 +#endif + +#define MCFGPIO_SETR_PORT(gpio) (MCFGPIO_SETR + \ + mcfgpio_port(gpio - MCFGPIO_SCR_START)) + +#define MCFGPIO_CLRR_PORT(gpio) (MCFGPIO_CLRR + \ + mcfgpio_port(gpio - MCFGPIO_SCR_START)) +#else + +#define MCFGPIO_SCR_START MCFGPIO_PIN_MAX +/* with MCFGPIO_SCR == MCFGPIO_PIN_MAX, these will be optimized away */ +#define MCFGPIO_SETR_PORT(gpio) 0 +#define MCFGPIO_CLRR_PORT(gpio) 0 + +#endif +/* + * Coldfire specific helper functions + */ + +/* return the port pin data register for a gpio */ +static inline u32 __mcf_gpio_ppdr(unsigned gpio) +{ +#if defined(CONFIG_M5206) || defined(CONFIG_M5206e) || \ + defined(CONFIG_M5307) || defined(CONFIG_M5407) + return MCFSIM_PADAT; +#elif defined(CONFIG_M5272) + if (gpio < 16) + return MCFSIM_PADAT; + else if (gpio < 32) + return MCFSIM_PBDAT; + else + return MCFSIM_PCDAT; +#elif defined(CONFIG_M5249) + if (gpio < 32) + return MCFSIM2_GPIOREAD; + else + return MCFSIM2_GPIO1READ; +#elif defined(CONFIG_M520x) || defined(CONFIG_M523x) || \ + defined(CONFIG_M527x) || defined(CONFIG_M528x) || defined(CONFIG_M532x) + if (gpio < 8) + return MCFEPORT_EPPDR; +#if defined(CONFIG_M528x) + else if (gpio < 16) + return MCFGPTA_GPTPORT; + else if (gpio < 24) + return MCFGPTB_GPTPORT; + else if (gpio < 32) + return MCFQADC_PORTQA; + else if (gpio < 40) + return MCFQADC_PORTQB; +#endif + else + return MCFGPIO_PPDR + mcfgpio_port(gpio - MCFGPIO_SCR_START); +#endif +} + +/* return the port output data register for a gpio */ +static inline u32 __mcf_gpio_podr(unsigned gpio) +{ +#if defined(CONFIG_M5206) || defined(CONFIG_M5206e) || \ + defined(CONFIG_M5307) || defined(CONFIG_M5407) + return MCFSIM_PADAT; +#elif defined(CONFIG_M5272) + if (gpio < 16) + return MCFSIM_PADAT; + else if (gpio < 32) + return MCFSIM_PBDAT; + else + return MCFSIM_PCDAT; +#elif defined(CONFIG_M5249) + if (gpio < 32) + return MCFSIM2_GPIOWRITE; + else + return MCFSIM2_GPIO1WRITE; +#elif defined(CONFIG_M520x) || defined(CONFIG_M523x) || \ + defined(CONFIG_M527x) || defined(CONFIG_M528x) || defined(CONFIG_M532x) + if (gpio < 8) + return MCFEPORT_EPDR; +#if defined(CONFIG_M528x) + else if (gpio < 16) + return MCFGPTA_GPTPORT; + else if (gpio < 24) + return MCFGPTB_GPTPORT; + else if (gpio < 32) + return MCFQADC_PORTQA; + else if (gpio < 40) + return MCFQADC_PORTQB; +#endif + else + return MCFGPIO_PODR + mcfgpio_port(gpio - MCFGPIO_SCR_START); +#endif +} + +/* + * The Generic GPIO functions + * + * If the gpio is a compile time constant and is one of the Coldfire gpios, + * use the inline version, otherwise dispatch thru gpiolib. + */ + +static inline int gpio_get_value(unsigned gpio) +{ + if (__builtin_constant_p(gpio) && gpio < MCFGPIO_PIN_MAX) + return mcfgpio_read(__mcf_gpio_ppdr(gpio)) & mcfgpio_bit(gpio); + else + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned gpio, int value) +{ + if (__builtin_constant_p(gpio) && gpio < MCFGPIO_PIN_MAX) { + if (gpio < MCFGPIO_SCR_START) { + unsigned long flags; + MCFGPIO_PORTTYPE data; + + local_irq_save(flags); + data = mcfgpio_read(__mcf_gpio_podr(gpio)); + if (value) + data |= mcfgpio_bit(gpio); + else + data &= ~mcfgpio_bit(gpio); + mcfgpio_write(data, __mcf_gpio_podr(gpio)); + local_irq_restore(flags); + } else { + if (value) + mcfgpio_write(mcfgpio_bit(gpio), + MCFGPIO_SETR_PORT(gpio)); + else + mcfgpio_write(~mcfgpio_bit(gpio), + MCFGPIO_CLRR_PORT(gpio)); + } + } else + __gpio_set_value(gpio, value); +} + +static inline int gpio_to_irq(unsigned gpio) +{ + return (gpio < MCFGPIO_IRQ_MAX) ? gpio + MCFGPIO_IRQ_VECBASE : -EINVAL; +} + +static inline int irq_to_gpio(unsigned irq) +{ + return (irq >= MCFGPIO_IRQ_VECBASE && + irq < (MCFGPIO_IRQ_VECBASE + MCFGPIO_IRQ_MAX)) ? + irq - MCFGPIO_IRQ_VECBASE : -ENXIO; +} + +static inline int gpio_cansleep(unsigned gpio) +{ + return gpio < MCFGPIO_PIN_MAX ? 0 : __gpio_cansleep(gpio); +} + +#endif diff --git a/arch/m68k/include/asm/mcfgpio.h b/arch/m68k/include/asm/mcfgpio.h new file mode 100644 index 00000000000..ee5e4ccce89 --- /dev/null +++ b/arch/m68k/include/asm/mcfgpio.h @@ -0,0 +1,40 @@ +/* + * Coldfire generic GPIO support. + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef mcfgpio_h +#define mcfgpio_h + +#include +#include + +struct mcf_gpio_chip { + struct gpio_chip gpio_chip; + void __iomem *pddr; + void __iomem *podr; + void __iomem *ppdr; + void __iomem *setr; + void __iomem *clrr; + const u8 *gpio_to_pinmux; +}; + +int mcf_gpio_direction_input(struct gpio_chip *, unsigned); +int mcf_gpio_get_value(struct gpio_chip *, unsigned); +int mcf_gpio_direction_output(struct gpio_chip *, unsigned, int); +void mcf_gpio_set_value(struct gpio_chip *, unsigned, int); +void mcf_gpio_set_value_fast(struct gpio_chip *, unsigned, int); +int mcf_gpio_request(struct gpio_chip *, unsigned); +void mcf_gpio_free(struct gpio_chip *, unsigned); + +#endif diff --git a/arch/m68k/include/asm/pinmux.h b/arch/m68k/include/asm/pinmux.h new file mode 100644 index 00000000000..119ee686dbd --- /dev/null +++ b/arch/m68k/include/asm/pinmux.h @@ -0,0 +1,30 @@ +/* + * Coldfire generic GPIO pinmux support. + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef pinmux_h +#define pinmux_h + +#define MCFPINMUX_NONE -1 + +extern int mcf_pinmux_request(unsigned, unsigned); +extern void mcf_pinmux_release(unsigned, unsigned); + +static inline int mcf_pinmux_is_valid(unsigned pinmux) +{ + return pinmux != MCFPINMUX_NONE; +} + +#endif + diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index 534376299a9..e2201b90aa2 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -47,6 +47,10 @@ config GENERIC_FIND_NEXT_BIT bool default y +config GENERIC_GPIO + bool + default n + config GENERIC_HWEIGHT bool default y @@ -182,6 +186,8 @@ config M527x config COLDFIRE bool depends on (M5206 || M5206e || M520x || M523x || M5249 || M527x || M5272 || M528x || M5307 || M532x || M5407) + select GENERIC_GPIO + select ARCH_REQUIRE_GPIOLIB default y config CLOCK_SET diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile index 1bcb9372353..2667323c7fe 100644 --- a/arch/m68knommu/platform/coldfire/Makefile +++ b/arch/m68knommu/platform/coldfire/Makefile @@ -27,4 +27,5 @@ obj-$(CONFIG_M5307) += timers.o obj-$(CONFIG_M532x) += timers.o obj-$(CONFIG_M5407) += timers.o +obj-y += pinmux.o gpio.o extra-y := head.o diff --git a/arch/m68knommu/platform/coldfire/gpio.c b/arch/m68knommu/platform/coldfire/gpio.c new file mode 100644 index 00000000000..ff004579345 --- /dev/null +++ b/arch/m68knommu/platform/coldfire/gpio.c @@ -0,0 +1,127 @@ +/* + * Coldfire generic GPIO support. + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include + +#include +#include +#include + +#define MCF_CHIP(chip) container_of(chip, struct mcf_gpio_chip, gpio_chip) + +int mcf_gpio_direction_input(struct gpio_chip *chip, unsigned offset) +{ + unsigned long flags; + MCFGPIO_PORTTYPE dir; + struct mcf_gpio_chip *mcf_chip = MCF_CHIP(chip); + + local_irq_save(flags); + dir = mcfgpio_read(mcf_chip->pddr); + dir &= ~mcfgpio_bit(chip->base + offset); + mcfgpio_write(dir, mcf_chip->pddr); + local_irq_restore(flags); + + return 0; +} + +int mcf_gpio_get_value(struct gpio_chip *chip, unsigned offset) +{ + struct mcf_gpio_chip *mcf_chip = MCF_CHIP(chip); + + return mcfgpio_read(mcf_chip->ppdr) & mcfgpio_bit(chip->base + offset); +} + +int mcf_gpio_direction_output(struct gpio_chip *chip, unsigned offset, + int value) +{ + unsigned long flags; + MCFGPIO_PORTTYPE data; + struct mcf_gpio_chip *mcf_chip = MCF_CHIP(chip); + + local_irq_save(flags); + /* write the value to the output latch */ + data = mcfgpio_read(mcf_chip->podr); + if (value) + data |= mcfgpio_bit(chip->base + offset); + else + data &= ~mcfgpio_bit(chip->base + offset); + mcfgpio_write(data, mcf_chip->podr); + + /* now set the direction to output */ + data = mcfgpio_read(mcf_chip->pddr); + data |= mcfgpio_bit(chip->base + offset); + mcfgpio_write(data, mcf_chip->pddr); + local_irq_restore(flags); + + return 0; +} + +void mcf_gpio_set_value(struct gpio_chip *chip, unsigned offset, int value) +{ + struct mcf_gpio_chip *mcf_chip = MCF_CHIP(chip); + + unsigned long flags; + MCFGPIO_PORTTYPE data; + + local_irq_save(flags); + data = mcfgpio_read(mcf_chip->podr); + if (value) + data |= mcfgpio_bit(chip->base + offset); + else + data &= ~mcfgpio_bit(chip->base + offset); + mcfgpio_write(data, mcf_chip->podr); + local_irq_restore(flags); +} + +void mcf_gpio_set_value_fast(struct gpio_chip *chip, unsigned offset, int value) +{ + struct mcf_gpio_chip *mcf_chip = MCF_CHIP(chip); + + if (value) + mcfgpio_write(mcfgpio_bit(chip->base + offset), mcf_chip->setr); + else + mcfgpio_write(~mcfgpio_bit(chip->base + offset), mcf_chip->clrr); +} + +int mcf_gpio_request(struct gpio_chip *chip, unsigned offset) +{ + struct mcf_gpio_chip *mcf_chip = MCF_CHIP(chip); + + return mcf_chip->gpio_to_pinmux ? + mcf_pinmux_request(mcf_chip->gpio_to_pinmux[offset], 0) : 0; +} + +void mcf_gpio_free(struct gpio_chip *chip, unsigned offset) +{ + struct mcf_gpio_chip *mcf_chip = MCF_CHIP(chip); + + mcf_gpio_direction_input(chip, offset); + + if (mcf_chip->gpio_to_pinmux) + mcf_pinmux_release(mcf_chip->gpio_to_pinmux[offset], 0); +} + +struct sysdev_class mcf_gpio_sysclass = { + .name = "gpio", +}; + +static int __init mcf_gpio_sysinit(void) +{ + return sysdev_class_register(&mcf_gpio_sysclass); +} + +core_initcall(mcf_gpio_sysinit); diff --git a/arch/m68knommu/platform/coldfire/pinmux.c b/arch/m68knommu/platform/coldfire/pinmux.c new file mode 100644 index 00000000000..8c62b825939 --- /dev/null +++ b/arch/m68knommu/platform/coldfire/pinmux.c @@ -0,0 +1,28 @@ +/* + * Coldfire generic GPIO pinmux support. + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include + +#include + +int mcf_pinmux_request(unsigned pinmux, unsigned func) +{ + return 0; +} + +void mcf_pinmux_release(unsigned pinmux, unsigned func) +{ +} -- cgit v1.2.3-70-g09d2 From bc25b057fa0a0e4754b7e2524c509874649d11ae Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:01 -0700 Subject: generic GPIO support for the Freescale Coldfire 5206. Add support for the 5206. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5206sim.h | 10 +++++-- arch/m68knommu/platform/5206/Makefile | 2 +- arch/m68knommu/platform/5206/gpio.c | 49 +++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 arch/m68knommu/platform/5206/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5206sim.h b/arch/m68k/include/asm/m5206sim.h index 7e3594dea88..7be8a2d3e65 100644 --- a/arch/m68k/include/asm/m5206sim.h +++ b/arch/m68k/include/asm/m5206sim.h @@ -85,9 +85,15 @@ #define MCFSIM_PAR 0xcb /* Pin Assignment reg (r/w) */ #endif -#define MCFSIM_PADDR 0x1c5 /* Parallel Direction (r/w) */ -#define MCFSIM_PADAT 0x1c9 /* Parallel Port Value (r/w) */ +#define MCFSIM_PADDR (MCF_MBAR + 0x1c5) /* Parallel Direction (r/w) */ +#define MCFSIM_PADAT (MCF_MBAR + 0x1c9) /* Parallel Port Value (r/w) */ +/* + * Generic GPIO + */ +#define MCFGPIO_PIN_MAX 8 +#define MCFGPIO_IRQ_VECBASE -1 +#define MCFGPIO_IRQ_MAX -1 /* * Some symbol defines for the Parallel Port Pin Assignment Register */ diff --git a/arch/m68knommu/platform/5206/Makefile b/arch/m68knommu/platform/5206/Makefile index a439d9ab3f2..113c3339006 100644 --- a/arch/m68knommu/platform/5206/Makefile +++ b/arch/m68knommu/platform/5206/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/5206/gpio.c b/arch/m68knommu/platform/5206/gpio.c new file mode 100644 index 00000000000..60f779ce165 --- /dev/null +++ b/arch/m68knommu/platform/5206/gpio.c @@ -0,0 +1,49 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "PP", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 8, + }, + .pddr = MCFSIM_PADDR, + .podr = MCFSIM_PADAT, + .ppdr = MCFSIM_PADAT, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From 24a1836ecd471409aeb5362f63032126ebe7fb10 Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:02 -0700 Subject: generic GPIO support for the Freescale Coldire 5206e. Add support for the 5206e. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/5206e/Makefile | 2 +- arch/m68knommu/platform/5206e/config.c | 1 + arch/m68knommu/platform/5206e/gpio.c | 49 ++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 arch/m68knommu/platform/5206e/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/5206e/Makefile b/arch/m68knommu/platform/5206e/Makefile index a439d9ab3f2..113c3339006 100644 --- a/arch/m68knommu/platform/5206e/Makefile +++ b/arch/m68knommu/platform/5206e/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c index 65887799db8..fcf1400db51 100644 --- a/arch/m68knommu/platform/5206e/config.c +++ b/arch/m68knommu/platform/5206e/config.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/arch/m68knommu/platform/5206e/gpio.c b/arch/m68knommu/platform/5206e/gpio.c new file mode 100644 index 00000000000..60f779ce165 --- /dev/null +++ b/arch/m68knommu/platform/5206e/gpio.c @@ -0,0 +1,49 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "PP", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 8, + }, + .pddr = MCFSIM_PADDR, + .podr = MCFSIM_PADAT, + .ppdr = MCFSIM_PADAT, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From afde8560b4217338c17b7dbc9e9d7676c1b3a5ff Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:03 -0700 Subject: generic GPIO support for the Freescale Coldfire 520x. Add support for the 520x. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m520xsim.h | 56 +++++++++ arch/m68knommu/platform/520x/Makefile | 2 +- arch/m68knommu/platform/520x/gpio.c | 211 ++++++++++++++++++++++++++++++++++ 3 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 arch/m68knommu/platform/520x/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m520xsim.h b/arch/m68k/include/asm/m520xsim.h index 83bbcfd6e8f..e80b6a54ea9 100644 --- a/arch/m68k/include/asm/m520xsim.h +++ b/arch/m68k/include/asm/m520xsim.h @@ -41,6 +41,62 @@ #define MCFSIM_SDCS0 0x000a8110 /* SDRAM Chip Select 0 Configuration */ #define MCFSIM_SDCS1 0x000a8114 /* SDRAM Chip Select 1 Configuration */ +#define MCFEPORT_EPDDR 0xFC088002 +#define MCFEPORT_EPDR 0xFC088004 +#define MCFEPORT_EPPDR 0xFC088005 + +#define MCFGPIO_PODR_BUSCTL 0xFC0A4000 +#define MCFGPIO_PODR_BE 0xFC0A4001 +#define MCFGPIO_PODR_CS 0xFC0A4002 +#define MCFGPIO_PODR_FECI2C 0xFC0A4003 +#define MCFGPIO_PODR_QSPI 0xFC0A4004 +#define MCFGPIO_PODR_TIMER 0xFC0A4005 +#define MCFGPIO_PODR_UART 0xFC0A4006 +#define MCFGPIO_PODR_FECH 0xFC0A4007 +#define MCFGPIO_PODR_FECL 0xFC0A4008 + +#define MCFGPIO_PDDR_BUSCTL 0xFC0A400C +#define MCFGPIO_PDDR_BE 0xFC0A400D +#define MCFGPIO_PDDR_CS 0xFC0A400E +#define MCFGPIO_PDDR_FECI2C 0xFC0A400F +#define MCFGPIO_PDDR_QSPI 0xFC0A4010 +#define MCFGPIO_PDDR_TIMER 0xFC0A4011 +#define MCFGPIO_PDDR_UART 0xFC0A4012 +#define MCFGPIO_PDDR_FECH 0xFC0A4013 +#define MCFGPIO_PDDR_FECL 0xFC0A4014 + +#define MCFGPIO_PPDSDR_BUSCTL 0xFC0A401A +#define MCFGPIO_PPDSDR_BE 0xFC0A401B +#define MCFGPIO_PPDSDR_CS 0xFC0A401C +#define MCFGPIO_PPDSDR_FECI2C 0xFC0A401D +#define MCFGPIO_PPDSDR_QSPI 0xFC0A401E +#define MCFGPIO_PPDSDR_TIMER 0xFC0A401F +#define MCFGPIO_PPDSDR_UART 0xFC0A4021 +#define MCFGPIO_PPDSDR_FECH 0xFC0A4021 +#define MCFGPIO_PPDSDR_FECL 0xFC0A4022 + +#define MCFGPIO_PCLRR_BUSCTL 0xFC0A4024 +#define MCFGPIO_PCLRR_BE 0xFC0A4025 +#define MCFGPIO_PCLRR_CS 0xFC0A4026 +#define MCFGPIO_PCLRR_FECI2C 0xFC0A4027 +#define MCFGPIO_PCLRR_QSPI 0xFC0A4028 +#define MCFGPIO_PCLRR_TIMER 0xFC0A4029 +#define MCFGPIO_PCLRR_UART 0xFC0A402A +#define MCFGPIO_PCLRR_FECH 0xFC0A402B +#define MCFGPIO_PCLRR_FECL 0xFC0A402C +/* + * Generic GPIO support + */ +#define MCFGPIO_PODR MCFGPIO_PODR_BUSCTL +#define MCFGPIO_PDDR MCFGPIO_PDDR_BUSCTL +#define MCFGPIO_PPDR MCFGPIO_PPDSDR_BUSCTL +#define MCFGPIO_SETR MCFGPIO_PPDSDR_BUSCTL +#define MCFGPIO_CLRR MCFGPIO_PCLRR_BUSCTL + +#define MCFGPIO_PIN_MAX 80 +#define MCFGPIO_IRQ_MAX 8 +#define MCFGPIO_IRQ_VECBASE MCFINT_VECBASE +/****************************************************************************/ #define MCF_GPIO_PAR_UART (0xA4036) #define MCF_GPIO_PAR_FECI2C (0xA4033) diff --git a/arch/m68knommu/platform/520x/Makefile b/arch/m68knommu/platform/520x/Makefile index a50e76acc8f..435ab3483dc 100644 --- a/arch/m68knommu/platform/520x/Makefile +++ b/arch/m68knommu/platform/520x/Makefile @@ -14,4 +14,4 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/520x/gpio.c b/arch/m68knommu/platform/520x/gpio.c new file mode 100644 index 00000000000..15b5bb62a69 --- /dev/null +++ b/arch/m68knommu/platform/520x/gpio.c @@ -0,0 +1,211 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "PIRQ", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 8, + }, + .pddr = MCFEPORT_EPDDR, + .podr = MCFEPORT_EPDR, + .ppdr = MCFEPORT_EPPDR, + }, + { + .gpio_chip = { + .label = "BUSCTL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 8, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_BUSCTL, + .podr = MCFGPIO_PODR_BUSCTL, + .ppdr = MCFGPIO_PPDSDR_BUSCTL, + .setr = MCFGPIO_PPDSDR_BUSCTL, + .clrr = MCFGPIO_PCLRR_BUSCTL, + }, + { + .gpio_chip = { + .label = "BE", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 16, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_BE, + .podr = MCFGPIO_PODR_BE, + .ppdr = MCFGPIO_PPDSDR_BE, + .setr = MCFGPIO_PPDSDR_BE, + .clrr = MCFGPIO_PCLRR_BE, + }, + { + .gpio_chip = { + .label = "CS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 25, + .ngpio = 3, + }, + .pddr = MCFGPIO_PDDR_CS, + .podr = MCFGPIO_PODR_CS, + .ppdr = MCFGPIO_PPDSDR_CS, + .setr = MCFGPIO_PPDSDR_CS, + .clrr = MCFGPIO_PCLRR_CS, + }, + { + .gpio_chip = { + .label = "FECI2C", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 32, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_FECI2C, + .podr = MCFGPIO_PODR_FECI2C, + .ppdr = MCFGPIO_PPDSDR_FECI2C, + .setr = MCFGPIO_PPDSDR_FECI2C, + .clrr = MCFGPIO_PCLRR_FECI2C, + }, + { + .gpio_chip = { + .label = "QSPI", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 40, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_QSPI, + .podr = MCFGPIO_PODR_QSPI, + .ppdr = MCFGPIO_PPDSDR_QSPI, + .setr = MCFGPIO_PPDSDR_QSPI, + .clrr = MCFGPIO_PCLRR_QSPI, + }, + { + .gpio_chip = { + .label = "TIMER", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 48, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_TIMER, + .podr = MCFGPIO_PODR_TIMER, + .ppdr = MCFGPIO_PPDSDR_TIMER, + .setr = MCFGPIO_PPDSDR_TIMER, + .clrr = MCFGPIO_PCLRR_TIMER, + }, + { + .gpio_chip = { + .label = "UART", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 56, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_UART, + .podr = MCFGPIO_PODR_UART, + .ppdr = MCFGPIO_PPDSDR_UART, + .setr = MCFGPIO_PPDSDR_UART, + .clrr = MCFGPIO_PCLRR_UART, + }, + { + .gpio_chip = { + .label = "FECH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 64, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_FECH, + .podr = MCFGPIO_PODR_FECH, + .ppdr = MCFGPIO_PPDSDR_FECH, + .setr = MCFGPIO_PPDSDR_FECH, + .clrr = MCFGPIO_PCLRR_FECH, + }, + { + .gpio_chip = { + .label = "FECL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 72, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_FECL, + .podr = MCFGPIO_PODR_FECL, + .ppdr = MCFGPIO_PPDSDR_FECL, + .setr = MCFGPIO_PPDSDR_FECL, + .clrr = MCFGPIO_PCLRR_FECL, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From a03ce7d9ef05e145ef706f99e68d5ffacf0ad325 Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:04 -0700 Subject: generic GPIO support for the Freescale Coldfire 523x. Add support for the 523x. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m523xsim.h | 77 +++++++++ arch/m68knommu/platform/523x/Makefile | 2 +- arch/m68knommu/platform/523x/gpio.c | 283 ++++++++++++++++++++++++++++++++++ 3 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 arch/m68knommu/platform/523x/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m523xsim.h b/arch/m68k/include/asm/m523xsim.h index 55183b5df1b..a34894cf8e6 100644 --- a/arch/m68k/include/asm/m523xsim.h +++ b/arch/m68k/include/asm/m523xsim.h @@ -50,5 +50,82 @@ #define MCF_RCR_SWRESET 0x80 /* Software reset bit */ #define MCF_RCR_FRCSTOUT 0x40 /* Force external reset */ +#define MCFGPIO_PODR_ADDR (MCF_IPSBAR + 0x100000) +#define MCFGPIO_PODR_DATAH (MCF_IPSBAR + 0x100001) +#define MCFGPIO_PODR_DATAL (MCF_IPSBAR + 0x100002) +#define MCFGPIO_PODR_BUSCTL (MCF_IPSBAR + 0x100003) +#define MCFGPIO_PODR_BS (MCF_IPSBAR + 0x100004) +#define MCFGPIO_PODR_CS (MCF_IPSBAR + 0x100005) +#define MCFGPIO_PODR_SDRAM (MCF_IPSBAR + 0x100006) +#define MCFGPIO_PODR_FECI2C (MCF_IPSBAR + 0x100007) +#define MCFGPIO_PODR_UARTH (MCF_IPSBAR + 0x100008) +#define MCFGPIO_PODR_UARTL (MCF_IPSBAR + 0x100009) +#define MCFGPIO_PODR_QSPI (MCF_IPSBAR + 0x10000A) +#define MCFGPIO_PODR_TIMER (MCF_IPSBAR + 0x10000B) +#define MCFGPIO_PODR_ETPU (MCF_IPSBAR + 0x10000C) + +#define MCFGPIO_PDDR_ADDR (MCF_IPSBAR + 0x100010) +#define MCFGPIO_PDDR_DATAH (MCF_IPSBAR + 0x100011) +#define MCFGPIO_PDDR_DATAL (MCF_IPSBAR + 0x100012) +#define MCFGPIO_PDDR_BUSCTL (MCF_IPSBAR + 0x100013) +#define MCFGPIO_PDDR_BS (MCF_IPSBAR + 0x100014) +#define MCFGPIO_PDDR_CS (MCF_IPSBAR + 0x100015) +#define MCFGPIO_PDDR_SDRAM (MCF_IPSBAR + 0x100016) +#define MCFGPIO_PDDR_FECI2C (MCF_IPSBAR + 0x100017) +#define MCFGPIO_PDDR_UARTH (MCF_IPSBAR + 0x100018) +#define MCFGPIO_PDDR_UARTL (MCF_IPSBAR + 0x100019) +#define MCFGPIO_PDDR_QSPI (MCF_IPSBAR + 0x10001A) +#define MCFGPIO_PDDR_TIMER (MCF_IPSBAR + 0x10001B) +#define MCFGPIO_PDDR_ETPU (MCF_IPSBAR + 0x10001C) + +#define MCFGPIO_PPDSDR_ADDR (MCF_IPSBAR + 0x100020) +#define MCFGPIO_PPDSDR_DATAH (MCF_IPSBAR + 0x100021) +#define MCFGPIO_PPDSDR_DATAL (MCF_IPSBAR + 0x100022) +#define MCFGPIO_PPDSDR_BUSCTL (MCF_IPSBAR + 0x100023) +#define MCFGPIO_PPDSDR_BS (MCF_IPSBAR + 0x100024) +#define MCFGPIO_PPDSDR_CS (MCF_IPSBAR + 0x100025) +#define MCFGPIO_PPDSDR_SDRAM (MCF_IPSBAR + 0x100026) +#define MCFGPIO_PPDSDR_FECI2C (MCF_IPSBAR + 0x100027) +#define MCFGPIO_PPDSDR_UARTH (MCF_IPSBAR + 0x100028) +#define MCFGPIO_PPDSDR_UARTL (MCF_IPSBAR + 0x100029) +#define MCFGPIO_PPDSDR_QSPI (MCF_IPSBAR + 0x10002A) +#define MCFGPIO_PPDSDR_TIMER (MCF_IPSBAR + 0x10002B) +#define MCFGPIO_PPDSDR_ETPU (MCF_IPSBAR + 0x10002C) + +#define MCFGPIO_PCLRR_ADDR (MCF_IPSBAR + 0x100030) +#define MCFGPIO_PCLRR_DATAH (MCF_IPSBAR + 0x100031) +#define MCFGPIO_PCLRR_DATAL (MCF_IPSBAR + 0x100032) +#define MCFGPIO_PCLRR_BUSCTL (MCF_IPSBAR + 0x100033) +#define MCFGPIO_PCLRR_BS (MCF_IPSBAR + 0x100034) +#define MCFGPIO_PCLRR_CS (MCF_IPSBAR + 0x100035) +#define MCFGPIO_PCLRR_SDRAM (MCF_IPSBAR + 0x100036) +#define MCFGPIO_PCLRR_FECI2C (MCF_IPSBAR + 0x100037) +#define MCFGPIO_PCLRR_UARTH (MCF_IPSBAR + 0x100038) +#define MCFGPIO_PCLRR_UARTL (MCF_IPSBAR + 0x100039) +#define MCFGPIO_PCLRR_QSPI (MCF_IPSBAR + 0x10003A) +#define MCFGPIO_PCLRR_TIMER (MCF_IPSBAR + 0x10003B) +#define MCFGPIO_PCLRR_ETPU (MCF_IPSBAR + 0x10003C) + +/* + * EPort + */ + +#define MCFEPORT_EPDDR (MCF_IPSBAR + 0x130002) +#define MCFEPORT_EPDR (MCF_IPSBAR + 0x130004) +#define MCFEPORT_EPPDR (MCF_IPSBAR + 0x130005) + +/* + * Generic GPIO support + */ +#define MCFGPIO_PODR MCFGPIO_PODR_ADDR +#define MCFGPIO_PDDR MCFGPIO_PDDR_ADDR +#define MCFGPIO_PPDR MCFGPIO_PPDSDR_ADDR +#define MCFGPIO_SETR MCFGPIO_PPDSDR_ADDR +#define MCFGPIO_CLRR MCFGPIO_PCLRR_ADDR + +#define MCFGPIO_PIN_MAX 107 +#define MCFGPIO_IRQ_MAX 8 +#define MCFGPIO_IRQ_VECBASE MCFINT_VECBASE + /****************************************************************************/ #endif /* m523xsim_h */ diff --git a/arch/m68knommu/platform/523x/Makefile b/arch/m68knommu/platform/523x/Makefile index 5694d593f02..b8f9b45440c 100644 --- a/arch/m68knommu/platform/523x/Makefile +++ b/arch/m68knommu/platform/523x/Makefile @@ -14,4 +14,4 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/523x/gpio.c b/arch/m68knommu/platform/523x/gpio.c new file mode 100644 index 00000000000..f02840d54d3 --- /dev/null +++ b/arch/m68knommu/platform/523x/gpio.c @@ -0,0 +1,283 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "PIRQ", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 8, + }, + .pddr = MCFEPORT_EPDDR, + .podr = MCFEPORT_EPDR, + .ppdr = MCFEPORT_EPPDR, + }, + { + .gpio_chip = { + .label = "ADDR", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 13, + .ngpio = 3, + }, + .pddr = MCFGPIO_PDDR_ADDR, + .podr = MCFGPIO_PODR_ADDR, + .ppdr = MCFGPIO_PPDSDR_ADDR, + .setr = MCFGPIO_PPDSDR_ADDR, + .clrr = MCFGPIO_PCLRR_ADDR, + }, + { + .gpio_chip = { + .label = "DATAH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 16, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_DATAH, + .podr = MCFGPIO_PODR_DATAH, + .ppdr = MCFGPIO_PPDSDR_DATAH, + .setr = MCFGPIO_PPDSDR_DATAH, + .clrr = MCFGPIO_PCLRR_DATAH, + }, + { + .gpio_chip = { + .label = "DATAL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 24, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_DATAL, + .podr = MCFGPIO_PODR_DATAL, + .ppdr = MCFGPIO_PPDSDR_DATAL, + .setr = MCFGPIO_PPDSDR_DATAL, + .clrr = MCFGPIO_PCLRR_DATAL, + }, + { + .gpio_chip = { + .label = "BUSCTL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 32, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_BUSCTL, + .podr = MCFGPIO_PODR_BUSCTL, + .ppdr = MCFGPIO_PPDSDR_BUSCTL, + .setr = MCFGPIO_PPDSDR_BUSCTL, + .clrr = MCFGPIO_PCLRR_BUSCTL, + }, + { + .gpio_chip = { + .label = "BS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 40, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_BS, + .podr = MCFGPIO_PODR_BS, + .ppdr = MCFGPIO_PPDSDR_BS, + .setr = MCFGPIO_PPDSDR_BS, + .clrr = MCFGPIO_PCLRR_BS, + }, + { + .gpio_chip = { + .label = "CS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 49, + .ngpio = 7, + }, + .pddr = MCFGPIO_PDDR_CS, + .podr = MCFGPIO_PODR_CS, + .ppdr = MCFGPIO_PPDSDR_CS, + .setr = MCFGPIO_PPDSDR_CS, + .clrr = MCFGPIO_PCLRR_CS, + }, + { + .gpio_chip = { + .label = "SDRAM", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 56, + .ngpio = 6, + }, + .pddr = MCFGPIO_PDDR_SDRAM, + .podr = MCFGPIO_PODR_SDRAM, + .ppdr = MCFGPIO_PPDSDR_SDRAM, + .setr = MCFGPIO_PPDSDR_SDRAM, + .clrr = MCFGPIO_PCLRR_SDRAM, + }, + { + .gpio_chip = { + .label = "FECI2C", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 64, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_FECI2C, + .podr = MCFGPIO_PODR_FECI2C, + .ppdr = MCFGPIO_PPDSDR_FECI2C, + .setr = MCFGPIO_PPDSDR_FECI2C, + .clrr = MCFGPIO_PCLRR_FECI2C, + }, + { + .gpio_chip = { + .label = "UARTH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 72, + .ngpio = 2, + }, + .pddr = MCFGPIO_PDDR_UARTH, + .podr = MCFGPIO_PODR_UARTH, + .ppdr = MCFGPIO_PPDSDR_UARTH, + .setr = MCFGPIO_PPDSDR_UARTH, + .clrr = MCFGPIO_PCLRR_UARTH, + }, + { + .gpio_chip = { + .label = "UARTL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 80, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_UARTL, + .podr = MCFGPIO_PODR_UARTL, + .ppdr = MCFGPIO_PPDSDR_UARTL, + .setr = MCFGPIO_PPDSDR_UARTL, + .clrr = MCFGPIO_PCLRR_UARTL, + }, + { + .gpio_chip = { + .label = "QSPI", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 88, + .ngpio = 5, + }, + .pddr = MCFGPIO_PDDR_QSPI, + .podr = MCFGPIO_PODR_QSPI, + .ppdr = MCFGPIO_PPDSDR_QSPI, + .setr = MCFGPIO_PPDSDR_QSPI, + .clrr = MCFGPIO_PCLRR_QSPI, + }, + { + .gpio_chip = { + .label = "TIMER", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 96, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_TIMER, + .podr = MCFGPIO_PODR_TIMER, + .ppdr = MCFGPIO_PPDSDR_TIMER, + .setr = MCFGPIO_PPDSDR_TIMER, + .clrr = MCFGPIO_PCLRR_TIMER, + }, + { + .gpio_chip = { + .label = "ETPU", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 104, + .ngpio = 3, + }, + .pddr = MCFGPIO_PDDR_ETPU, + .podr = MCFGPIO_PODR_ETPU, + .ppdr = MCFGPIO_PPDSDR_ETPU, + .setr = MCFGPIO_PPDSDR_ETPU, + .clrr = MCFGPIO_PCLRR_ETPU, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From 9e8ded166dcc7831ee6f31f8a0937cd9b58e83b0 Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:05 -0700 Subject: generic GPIO support for the Freescale Coldfire 5249. Add support for the 5249. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5249sim.h | 23 ++++++++----- arch/m68knommu/platform/5249/Makefile | 2 +- arch/m68knommu/platform/5249/gpio.c | 65 +++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 arch/m68knommu/platform/5249/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5249sim.h b/arch/m68k/include/asm/m5249sim.h index 366eb8602d2..2c23a83512a 100644 --- a/arch/m68k/include/asm/m5249sim.h +++ b/arch/m68k/include/asm/m5249sim.h @@ -73,14 +73,14 @@ /* * General purpose IO registers (in MBAR2). */ -#define MCFSIM2_GPIOREAD 0x0 /* GPIO read values */ -#define MCFSIM2_GPIOWRITE 0x4 /* GPIO write values */ -#define MCFSIM2_GPIOENABLE 0x8 /* GPIO enabled */ -#define MCFSIM2_GPIOFUNC 0xc /* GPIO function */ -#define MCFSIM2_GPIO1READ 0xb0 /* GPIO1 read values */ -#define MCFSIM2_GPIO1WRITE 0xb4 /* GPIO1 write values */ -#define MCFSIM2_GPIO1ENABLE 0xb8 /* GPIO1 enabled */ -#define MCFSIM2_GPIO1FUNC 0xbc /* GPIO1 function */ +#define MCFSIM2_GPIOREAD (MCF_MBAR2 + 0x000) /* GPIO read values */ +#define MCFSIM2_GPIOWRITE (MCF_MBAR2 + 0x004) /* GPIO write values */ +#define MCFSIM2_GPIOENABLE (MCF_MBAR2 + 0x008) /* GPIO enabled */ +#define MCFSIM2_GPIOFUNC (MCF_MBAR2 + 0x00C) /* GPIO function */ +#define MCFSIM2_GPIO1READ (MCF_MBAR2 + 0x0B0) /* GPIO1 read values */ +#define MCFSIM2_GPIO1WRITE (MCF_MBAR2 + 0x0B4) /* GPIO1 write values */ +#define MCFSIM2_GPIO1ENABLE (MCF_MBAR2 + 0x0B8) /* GPIO1 enabled */ +#define MCFSIM2_GPIO1FUNC (MCF_MBAR2 + 0x0BC) /* GPIO1 function */ #define MCFSIM2_GPIOINTSTAT 0xc0 /* GPIO interrupt status */ #define MCFSIM2_GPIOINTCLEAR 0xc0 /* GPIO interrupt clear */ @@ -100,7 +100,12 @@ #define MCFSIM2_IDECONFIG1 0x18c /* IDEconfig1 */ #define MCFSIM2_IDECONFIG2 0x190 /* IDEconfig2 */ - +/* + * Generic GPIO support + */ +#define MCFGPIO_PIN_MAX 64 +#define MCFGPIO_IRQ_MAX -1 +#define MCFGPIO_IRQ_VECBASE -1 /* * Macro to set IMR register. It is 32 bits on the 5249. */ diff --git a/arch/m68knommu/platform/5249/Makefile b/arch/m68knommu/platform/5249/Makefile index a439d9ab3f2..113c3339006 100644 --- a/arch/m68knommu/platform/5249/Makefile +++ b/arch/m68knommu/platform/5249/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/5249/gpio.c b/arch/m68knommu/platform/5249/gpio.c new file mode 100644 index 00000000000..c611eab8b3b --- /dev/null +++ b/arch/m68knommu/platform/5249/gpio.c @@ -0,0 +1,65 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "GPIO0", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 32, + }, + .pddr = MCFSIM2_GPIOENABLE, + .podr = MCFSIM2_GPIOWRITE, + .ppdr = MCFSIM2_GPIOREAD, + }, + { + .gpio_chip = { + .label = "GPIO1", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .base = 32, + .ngpio = 32, + }, + .pddr = MCFSIM2_GPIO1ENABLE, + .podr = MCFSIM2_GPIO1WRITE, + .ppdr = MCFSIM2_GPIO1READ, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From f1554da34f11518bde33776c292c1b58fc20f073 Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:06 -0700 Subject: generic GPIO support for the Freescale Coldfire 527x. Add support for the 5271 & 5275. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m527xsim.h | 169 ++++++++++ arch/m68knommu/platform/527x/Makefile | 2 +- arch/m68knommu/platform/527x/gpio.c | 607 ++++++++++++++++++++++++++++++++++ 3 files changed, 777 insertions(+), 1 deletion(-) create mode 100644 arch/m68knommu/platform/527x/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m527xsim.h b/arch/m68k/include/asm/m527xsim.h index 95f4f8ee8f7..453356d72d8 100644 --- a/arch/m68k/include/asm/m527xsim.h +++ b/arch/m68k/include/asm/m527xsim.h @@ -54,6 +54,175 @@ #define MCFSIM_DMR1 0x5c /* SDRAM address mask 1 */ #endif + +#ifdef CONFIG_M5271 +#define MCFGPIO_PODR_ADDR (MCF_IPSBAR + 0x100000) +#define MCFGPIO_PODR_DATAH (MCF_IPSBAR + 0x100001) +#define MCFGPIO_PODR_DATAL (MCF_IPSBAR + 0x100002) +#define MCFGPIO_PODR_BUSCTL (MCF_IPSBAR + 0x100003) +#define MCFGPIO_PODR_BS (MCF_IPSBAR + 0x100004) +#define MCFGPIO_PODR_CS (MCF_IPSBAR + 0x100005) +#define MCFGPIO_PODR_SDRAM (MCF_IPSBAR + 0x100006) +#define MCFGPIO_PODR_FECI2C (MCF_IPSBAR + 0x100007) +#define MCFGPIO_PODR_UARTH (MCF_IPSBAR + 0x100008) +#define MCFGPIO_PODR_UARTL (MCF_IPSBAR + 0x100009) +#define MCFGPIO_PODR_QSPI (MCF_IPSBAR + 0x10000A) +#define MCFGPIO_PODR_TIMER (MCF_IPSBAR + 0x10000B) + +#define MCFGPIO_PDDR_ADDR (MCF_IPSBAR + 0x100010) +#define MCFGPIO_PDDR_DATAH (MCF_IPSBAR + 0x100011) +#define MCFGPIO_PDDR_DATAL (MCF_IPSBAR + 0x100012) +#define MCFGPIO_PDDR_BUSCTL (MCF_IPSBAR + 0x100013) +#define MCFGPIO_PDDR_BS (MCF_IPSBAR + 0x100014) +#define MCFGPIO_PDDR_CS (MCF_IPSBAR + 0x100015) +#define MCFGPIO_PDDR_SDRAM (MCF_IPSBAR + 0x100016) +#define MCFGPIO_PDDR_FECI2C (MCF_IPSBAR + 0x100017) +#define MCFGPIO_PDDR_UARTH (MCF_IPSBAR + 0x100018) +#define MCFGPIO_PDDR_UARTL (MCF_IPSBAR + 0x100019) +#define MCFGPIO_PDDR_QSPI (MCF_IPSBAR + 0x10001A) +#define MCFGPIO_PDDR_TIMER (MCF_IPSBAR + 0x10001B) + +#define MCFGPIO_PPDSDR_ADDR (MCF_IPSBAR + 0x100020) +#define MCFGPIO_PPDSDR_DATAH (MCF_IPSBAR + 0x100021) +#define MCFGPIO_PPDSDR_DATAL (MCF_IPSBAR + 0x100022) +#define MCFGPIO_PPDSDR_BUSCTL (MCF_IPSBAR + 0x100023) +#define MCFGPIO_PPDSDR_BS (MCF_IPSBAR + 0x100024) +#define MCFGPIO_PPDSDR_CS (MCF_IPSBAR + 0x100025) +#define MCFGPIO_PPDSDR_SDRAM (MCF_IPSBAR + 0x100026) +#define MCFGPIO_PPDSDR_FECI2C (MCF_IPSBAR + 0x100027) +#define MCFGPIO_PPDSDR_UARTH (MCF_IPSBAR + 0x100028) +#define MCFGPIO_PPDSDR_UARTL (MCF_IPSBAR + 0x100029) +#define MCFGPIO_PPDSDR_QSPI (MCF_IPSBAR + 0x10002A) +#define MCFGPIO_PPDSDR_TIMER (MCF_IPSBAR + 0x10002B) + +#define MCFGPIO_PCLRR_ADDR (MCF_IPSBAR + 0x100030) +#define MCFGPIO_PCLRR_DATAH (MCF_IPSBAR + 0x100031) +#define MCFGPIO_PCLRR_DATAL (MCF_IPSBAR + 0x100032) +#define MCFGPIO_PCLRR_BUSCTL (MCF_IPSBAR + 0x100033) +#define MCFGPIO_PCLRR_BS (MCF_IPSBAR + 0x100034) +#define MCFGPIO_PCLRR_CS (MCF_IPSBAR + 0x100035) +#define MCFGPIO_PCLRR_SDRAM (MCF_IPSBAR + 0x100036) +#define MCFGPIO_PCLRR_FECI2C (MCF_IPSBAR + 0x100037) +#define MCFGPIO_PCLRR_UARTH (MCF_IPSBAR + 0x100038) +#define MCFGPIO_PCLRR_UARTL (MCF_IPSBAR + 0x100039) +#define MCFGPIO_PCLRR_QSPI (MCF_IPSBAR + 0x10003A) +#define MCFGPIO_PCLRR_TIMER (MCF_IPSBAR + 0x10003B) + +/* + * Generic GPIO support + */ +#define MCFGPIO_PODR MCFGPIO_PODR_ADDR +#define MCFGPIO_PDDR MCFGPIO_PDDR_ADDR +#define MCFGPIO_PPDR MCFGPIO_PPDSDR_ADDR +#define MCFGPIO_SETR MCFGPIO_PPDSDR_ADDR +#define MCFGPIO_CLRR MCFGPIO_PCLRR_ADDR + +#define MCFGPIO_PIN_MAX 100 +#define MCFGPIO_IRQ_MAX 8 +#define MCFGPIO_IRQ_VECBASE MCFINT_VECBASE +#endif + +#ifdef CONFIG_M5275 +#define MCFGPIO_PODR_BUSCTL (MCF_IPSBAR + 0x100004) +#define MCFGPIO_PODR_ADDR (MCF_IPSBAR + 0x100005) +#define MCFGPIO_PODR_CS (MCF_IPSBAR + 0x100008) +#define MCFGPIO_PODR_FEC0H (MCF_IPSBAR + 0x10000A) +#define MCFGPIO_PODR_FEC0L (MCF_IPSBAR + 0x10000B) +#define MCFGPIO_PODR_FECI2C (MCF_IPSBAR + 0x10000C) +#define MCFGPIO_PODR_QSPI (MCF_IPSBAR + 0x10000D) +#define MCFGPIO_PODR_SDRAM (MCF_IPSBAR + 0x10000E) +#define MCFGPIO_PODR_TIMERH (MCF_IPSBAR + 0x10000F) +#define MCFGPIO_PODR_TIMERL (MCF_IPSBAR + 0x100010) +#define MCFGPIO_PODR_UARTL (MCF_IPSBAR + 0x100011) +#define MCFGPIO_PODR_FEC1H (MCF_IPSBAR + 0x100012) +#define MCFGPIO_PODR_FEC1L (MCF_IPSBAR + 0x100013) +#define MCFGPIO_PODR_BS (MCF_IPSBAR + 0x100014) +#define MCFGPIO_PODR_IRQ (MCF_IPSBAR + 0x100015) +#define MCFGPIO_PODR_USBH (MCF_IPSBAR + 0x100016) +#define MCFGPIO_PODR_USBL (MCF_IPSBAR + 0x100017) +#define MCFGPIO_PODR_UARTH (MCF_IPSBAR + 0x100018) + +#define MCFGPIO_PDDR_BUSCTL (MCF_IPSBAR + 0x100020) +#define MCFGPIO_PDDR_ADDR (MCF_IPSBAR + 0x100021) +#define MCFGPIO_PDDR_CS (MCF_IPSBAR + 0x100024) +#define MCFGPIO_PDDR_FEC0H (MCF_IPSBAR + 0x100026) +#define MCFGPIO_PDDR_FEC0L (MCF_IPSBAR + 0x100027) +#define MCFGPIO_PDDR_FECI2C (MCF_IPSBAR + 0x100028) +#define MCFGPIO_PDDR_QSPI (MCF_IPSBAR + 0x100029) +#define MCFGPIO_PDDR_SDRAM (MCF_IPSBAR + 0x10002A) +#define MCFGPIO_PDDR_TIMERH (MCF_IPSBAR + 0x10002B) +#define MCFGPIO_PDDR_TIMERL (MCF_IPSBAR + 0x10002C) +#define MCFGPIO_PDDR_UARTL (MCF_IPSBAR + 0x10002D) +#define MCFGPIO_PDDR_FEC1H (MCF_IPSBAR + 0x10002E) +#define MCFGPIO_PDDR_FEC1L (MCF_IPSBAR + 0x10002F) +#define MCFGPIO_PDDR_BS (MCF_IPSBAR + 0x100030) +#define MCFGPIO_PDDR_IRQ (MCF_IPSBAR + 0x100031) +#define MCFGPIO_PDDR_USBH (MCF_IPSBAR + 0x100032) +#define MCFGPIO_PDDR_USBL (MCF_IPSBAR + 0x100033) +#define MCFGPIO_PDDR_UARTH (MCF_IPSBAR + 0x100034) + +#define MCFGPIO_PPDSDR_BUSCTL (MCF_IPSBAR + 0x10003C) +#define MCFGPIO_PPDSDR_ADDR (MCF_IPSBAR + 0x10003D) +#define MCFGPIO_PPDSDR_CS (MCF_IPSBAR + 0x100040) +#define MCFGPIO_PPDSDR_FEC0H (MCF_IPSBAR + 0x100042) +#define MCFGPIO_PPDSDR_FEC0L (MCF_IPSBAR + 0x100043) +#define MCFGPIO_PPDSDR_FECI2C (MCF_IPSBAR + 0x100044) +#define MCFGPIO_PPDSDR_QSPI (MCF_IPSBAR + 0x100045) +#define MCFGPIO_PPDSDR_SDRAM (MCF_IPSBAR + 0x100046) +#define MCFGPIO_PPDSDR_TIMERH (MCF_IPSBAR + 0x100047) +#define MCFGPIO_PPDSDR_TIMERL (MCF_IPSBAR + 0x100048) +#define MCFGPIO_PPDSDR_UARTL (MCF_IPSBAR + 0x100049) +#define MCFGPIO_PPDSDR_FEC1H (MCF_IPSBAR + 0x10004A) +#define MCFGPIO_PPDSDR_FEC1L (MCF_IPSBAR + 0x10004B) +#define MCFGPIO_PPDSDR_BS (MCF_IPSBAR + 0x10004C) +#define MCFGPIO_PPDSDR_IRQ (MCF_IPSBAR + 0x10004D) +#define MCFGPIO_PPDSDR_USBH (MCF_IPSBAR + 0x10004E) +#define MCFGPIO_PPDSDR_USBL (MCF_IPSBAR + 0x10004F) +#define MCFGPIO_PPDSDR_UARTH (MCF_IPSBAR + 0x100050) + +#define MCFGPIO_PCLRR_BUSCTL (MCF_IPSBAR + 0x100058) +#define MCFGPIO_PCLRR_ADDR (MCF_IPSBAR + 0x100059) +#define MCFGPIO_PCLRR_CS (MCF_IPSBAR + 0x10005C) +#define MCFGPIO_PCLRR_FEC0H (MCF_IPSBAR + 0x10005E) +#define MCFGPIO_PCLRR_FEC0L (MCF_IPSBAR + 0x10005F) +#define MCFGPIO_PCLRR_FECI2C (MCF_IPSBAR + 0x100060) +#define MCFGPIO_PCLRR_QSPI (MCF_IPSBAR + 0x100061) +#define MCFGPIO_PCLRR_SDRAM (MCF_IPSBAR + 0x100062) +#define MCFGPIO_PCLRR_TIMERH (MCF_IPSBAR + 0x100063) +#define MCFGPIO_PCLRR_TIMERL (MCF_IPSBAR + 0x100064) +#define MCFGPIO_PCLRR_UARTL (MCF_IPSBAR + 0x100065) +#define MCFGPIO_PCLRR_FEC1H (MCF_IPSBAR + 0x100066) +#define MCFGPIO_PCLRR_FEC1L (MCF_IPSBAR + 0x100067) +#define MCFGPIO_PCLRR_BS (MCF_IPSBAR + 0x100068) +#define MCFGPIO_PCLRR_IRQ (MCF_IPSBAR + 0x100069) +#define MCFGPIO_PCLRR_USBH (MCF_IPSBAR + 0x10006A) +#define MCFGPIO_PCLRR_USBL (MCF_IPSBAR + 0x10006B) +#define MCFGPIO_PCLRR_UARTH (MCF_IPSBAR + 0x10006C) + + +/* + * Generic GPIO support + */ +#define MCFGPIO_PODR MCFGPIO_PODR_BUSCTL +#define MCFGPIO_PDDR MCFGPIO_PDDR_BUSCTL +#define MCFGPIO_PPDR MCFGPIO_PPDSDR_BUSCTL +#define MCFGPIO_SETR MCFGPIO_PPDSDR_BUSCTL +#define MCFGPIO_CLRR MCFGPIO_PCLRR_BUSCTL + +#define MCFGPIO_PIN_MAX 148 +#define MCFGPIO_IRQ_MAX 8 +#define MCFGPIO_IRQ_VECBASE MCFINT_VECBASE +#endif + +/* + * EPort + */ + +#define MCFEPORT_EPDDR (MCF_IPSBAR + 0x130002) +#define MCFEPORT_EPDR (MCF_IPSBAR + 0x130004) +#define MCFEPORT_EPPDR (MCF_IPSBAR + 0x130005) + + /* * GPIO pins setups to enable the UARTs. */ diff --git a/arch/m68knommu/platform/527x/Makefile b/arch/m68knommu/platform/527x/Makefile index 26135d92b34..3d90e6d9245 100644 --- a/arch/m68knommu/platform/527x/Makefile +++ b/arch/m68knommu/platform/527x/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/527x/gpio.c b/arch/m68knommu/platform/527x/gpio.c new file mode 100644 index 00000000000..1028142851a --- /dev/null +++ b/arch/m68knommu/platform/527x/gpio.c @@ -0,0 +1,607 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { +#if defined(CONFIG_M5271) + { + .gpio_chip = { + .label = "PIRQ", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 8, + }, + .pddr = MCFEPORT_EPDDR, + .podr = MCFEPORT_EPDR, + .ppdr = MCFEPORT_EPPDR, + }, + { + .gpio_chip = { + .label = "ADDR", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 13, + .ngpio = 3, + }, + .pddr = MCFGPIO_PDDR_ADDR, + .podr = MCFGPIO_PODR_ADDR, + .ppdr = MCFGPIO_PPDSDR_ADDR, + .setr = MCFGPIO_PPDSDR_ADDR, + .clrr = MCFGPIO_PCLRR_ADDR, + }, + { + .gpio_chip = { + .label = "DATAH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 16, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_DATAH, + .podr = MCFGPIO_PODR_DATAH, + .ppdr = MCFGPIO_PPDSDR_DATAH, + .setr = MCFGPIO_PPDSDR_DATAH, + .clrr = MCFGPIO_PCLRR_DATAH, + }, + { + .gpio_chip = { + .label = "DATAL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 24, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_DATAL, + .podr = MCFGPIO_PODR_DATAL, + .ppdr = MCFGPIO_PPDSDR_DATAL, + .setr = MCFGPIO_PPDSDR_DATAL, + .clrr = MCFGPIO_PCLRR_DATAL, + }, + { + .gpio_chip = { + .label = "BUSCTL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 32, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_BUSCTL, + .podr = MCFGPIO_PODR_BUSCTL, + .ppdr = MCFGPIO_PPDSDR_BUSCTL, + .setr = MCFGPIO_PPDSDR_BUSCTL, + .clrr = MCFGPIO_PCLRR_BUSCTL, + }, + { + .gpio_chip = { + .label = "BS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 40, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_BS, + .podr = MCFGPIO_PODR_BS, + .ppdr = MCFGPIO_PPDSDR_BS, + .setr = MCFGPIO_PPDSDR_BS, + .clrr = MCFGPIO_PCLRR_BS, + }, + { + .gpio_chip = { + .label = "CS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 49, + .ngpio = 7, + }, + .pddr = MCFGPIO_PDDR_CS, + .podr = MCFGPIO_PODR_CS, + .ppdr = MCFGPIO_PPDSDR_CS, + .setr = MCFGPIO_PPDSDR_CS, + .clrr = MCFGPIO_PCLRR_CS, + }, + { + .gpio_chip = { + .label = "SDRAM", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 56, + .ngpio = 6, + }, + .pddr = MCFGPIO_PDDR_SDRAM, + .podr = MCFGPIO_PODR_SDRAM, + .ppdr = MCFGPIO_PPDSDR_SDRAM, + .setr = MCFGPIO_PPDSDR_SDRAM, + .clrr = MCFGPIO_PCLRR_SDRAM, + }, + { + .gpio_chip = { + .label = "FECI2C", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 64, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_FECI2C, + .podr = MCFGPIO_PODR_FECI2C, + .ppdr = MCFGPIO_PPDSDR_FECI2C, + .setr = MCFGPIO_PPDSDR_FECI2C, + .clrr = MCFGPIO_PCLRR_FECI2C, + }, + { + .gpio_chip = { + .label = "UARTH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 72, + .ngpio = 2, + }, + .pddr = MCFGPIO_PDDR_UARTH, + .podr = MCFGPIO_PODR_UARTH, + .ppdr = MCFGPIO_PPDSDR_UARTH, + .setr = MCFGPIO_PPDSDR_UARTH, + .clrr = MCFGPIO_PCLRR_UARTH, + }, + { + .gpio_chip = { + .label = "UARTL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 80, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_UARTL, + .podr = MCFGPIO_PODR_UARTL, + .ppdr = MCFGPIO_PPDSDR_UARTL, + .setr = MCFGPIO_PPDSDR_UARTL, + .clrr = MCFGPIO_PCLRR_UARTL, + }, + { + .gpio_chip = { + .label = "QSPI", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 88, + .ngpio = 5, + }, + .pddr = MCFGPIO_PDDR_QSPI, + .podr = MCFGPIO_PODR_QSPI, + .ppdr = MCFGPIO_PPDSDR_QSPI, + .setr = MCFGPIO_PPDSDR_QSPI, + .clrr = MCFGPIO_PCLRR_QSPI, + }, + { + .gpio_chip = { + .label = "TIMER", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 96, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_TIMER, + .podr = MCFGPIO_PODR_TIMER, + .ppdr = MCFGPIO_PPDSDR_TIMER, + .setr = MCFGPIO_PPDSDR_TIMER, + .clrr = MCFGPIO_PCLRR_TIMER, + }, +#elif defined(CONFIG_M5275) + { + .gpio_chip = { + .label = "PIRQ", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 8, + }, + .pddr = MCFEPORT_EPDDR, + .podr = MCFEPORT_EPDR, + .ppdr = MCFEPORT_EPPDR, + }, + { + .gpio_chip = { + .label = "BUSCTL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 8, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_BUSCTL, + .podr = MCFGPIO_PODR_BUSCTL, + .ppdr = MCFGPIO_PPDSDR_BUSCTL, + .setr = MCFGPIO_PPDSDR_BUSCTL, + .clrr = MCFGPIO_PCLRR_BUSCTL, + }, + { + .gpio_chip = { + .label = "ADDR", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 21, + .ngpio = 3, + }, + .pddr = MCFGPIO_PDDR_ADDR, + .podr = MCFGPIO_PODR_ADDR, + .ppdr = MCFGPIO_PPDSDR_ADDR, + .setr = MCFGPIO_PPDSDR_ADDR, + .clrr = MCFGPIO_PCLRR_ADDR, + }, + { + .gpio_chip = { + .label = "CS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 25, + .ngpio = 7, + }, + .pddr = MCFGPIO_PDDR_CS, + .podr = MCFGPIO_PODR_CS, + .ppdr = MCFGPIO_PPDSDR_CS, + .setr = MCFGPIO_PPDSDR_CS, + .clrr = MCFGPIO_PCLRR_CS, + }, + { + .gpio_chip = { + .label = "FEC0H", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 32, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_FEC0H, + .podr = MCFGPIO_PODR_FEC0H, + .ppdr = MCFGPIO_PPDSDR_FEC0H, + .setr = MCFGPIO_PPDSDR_FEC0H, + .clrr = MCFGPIO_PCLRR_FEC0H, + }, + { + .gpio_chip = { + .label = "FEC0L", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 40, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_FEC0L, + .podr = MCFGPIO_PODR_FEC0L, + .ppdr = MCFGPIO_PPDSDR_FEC0L, + .setr = MCFGPIO_PPDSDR_FEC0L, + .clrr = MCFGPIO_PCLRR_FEC0L, + }, + { + .gpio_chip = { + .label = "FECI2C", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 48, + .ngpio = 6, + }, + .pddr = MCFGPIO_PDDR_FECI2C, + .podr = MCFGPIO_PODR_FECI2C, + .ppdr = MCFGPIO_PPDSDR_FECI2C, + .setr = MCFGPIO_PPDSDR_FECI2C, + .clrr = MCFGPIO_PCLRR_FECI2C, + }, + { + .gpio_chip = { + .label = "QSPI", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 56, + .ngpio = 7, + }, + .pddr = MCFGPIO_PDDR_QSPI, + .podr = MCFGPIO_PODR_QSPI, + .ppdr = MCFGPIO_PPDSDR_QSPI, + .setr = MCFGPIO_PPDSDR_QSPI, + .clrr = MCFGPIO_PCLRR_QSPI, + }, + { + .gpio_chip = { + .label = "SDRAM", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 64, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_SDRAM, + .podr = MCFGPIO_PODR_SDRAM, + .ppdr = MCFGPIO_PPDSDR_SDRAM, + .setr = MCFGPIO_PPDSDR_SDRAM, + .clrr = MCFGPIO_PCLRR_SDRAM, + }, + { + .gpio_chip = { + .label = "TIMERH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 72, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_TIMERH, + .podr = MCFGPIO_PODR_TIMERH, + .ppdr = MCFGPIO_PPDSDR_TIMERH, + .setr = MCFGPIO_PPDSDR_TIMERH, + .clrr = MCFGPIO_PCLRR_TIMERH, + }, + { + .gpio_chip = { + .label = "TIMERL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 80, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_TIMERL, + .podr = MCFGPIO_PODR_TIMERL, + .ppdr = MCFGPIO_PPDSDR_TIMERL, + .setr = MCFGPIO_PPDSDR_TIMERL, + .clrr = MCFGPIO_PCLRR_TIMERL, + }, + { + .gpio_chip = { + .label = "UARTL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 88, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_UARTL, + .podr = MCFGPIO_PODR_UARTL, + .ppdr = MCFGPIO_PPDSDR_UARTL, + .setr = MCFGPIO_PPDSDR_UARTL, + .clrr = MCFGPIO_PCLRR_UARTL, + }, + { + .gpio_chip = { + .label = "FEC1H", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 96, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_FEC1H, + .podr = MCFGPIO_PODR_FEC1H, + .ppdr = MCFGPIO_PPDSDR_FEC1H, + .setr = MCFGPIO_PPDSDR_FEC1H, + .clrr = MCFGPIO_PCLRR_FEC1H, + }, + { + .gpio_chip = { + .label = "FEC1L", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 104, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_FEC1L, + .podr = MCFGPIO_PODR_FEC1L, + .ppdr = MCFGPIO_PPDSDR_FEC1L, + .setr = MCFGPIO_PPDSDR_FEC1L, + .clrr = MCFGPIO_PCLRR_FEC1L, + }, + { + .gpio_chip = { + .label = "BS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 114, + .ngpio = 2, + }, + .pddr = MCFGPIO_PDDR_BS, + .podr = MCFGPIO_PODR_BS, + .ppdr = MCFGPIO_PPDSDR_BS, + .setr = MCFGPIO_PPDSDR_BS, + .clrr = MCFGPIO_PCLRR_BS, + }, + { + .gpio_chip = { + .label = "IRQ", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 121, + .ngpio = 7, + }, + .pddr = MCFGPIO_PDDR_IRQ, + .podr = MCFGPIO_PODR_IRQ, + .ppdr = MCFGPIO_PPDSDR_IRQ, + .setr = MCFGPIO_PPDSDR_IRQ, + .clrr = MCFGPIO_PCLRR_IRQ, + }, + { + .gpio_chip = { + .label = "USBH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 128, + .ngpio = 1, + }, + .pddr = MCFGPIO_PDDR_USBH, + .podr = MCFGPIO_PODR_USBH, + .ppdr = MCFGPIO_PPDSDR_USBH, + .setr = MCFGPIO_PPDSDR_USBH, + .clrr = MCFGPIO_PCLRR_USBH, + }, + { + .gpio_chip = { + .label = "USBL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 136, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_USBL, + .podr = MCFGPIO_PODR_USBL, + .ppdr = MCFGPIO_PPDSDR_USBL, + .setr = MCFGPIO_PPDSDR_USBL, + .clrr = MCFGPIO_PCLRR_USBL, + }, + { + .gpio_chip = { + .label = "UARTH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 144, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_UARTH, + .podr = MCFGPIO_PODR_UARTH, + .ppdr = MCFGPIO_PPDSDR_UARTH, + .setr = MCFGPIO_PPDSDR_UARTH, + .clrr = MCFGPIO_PCLRR_UARTH, + }, +#endif +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From 316f2c483c32e9385329303258d12e6e33892af5 Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:07 -0700 Subject: generic GPIO support for the Freescale Coldfire 5272. Add support for the 5272. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5272sim.h | 24 +++++++---- arch/m68knommu/platform/5272/Makefile | 2 +- arch/m68knommu/platform/5272/gpio.c | 81 +++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 10 deletions(-) create mode 100644 arch/m68knommu/platform/5272/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5272sim.h b/arch/m68k/include/asm/m5272sim.h index 6217edc2113..0665ba1a5d3 100644 --- a/arch/m68k/include/asm/m5272sim.h +++ b/arch/m68k/include/asm/m5272sim.h @@ -63,16 +63,22 @@ #define MCFSIM_DCMR1 0x5c /* DRAM 1 Mask reg (r/w) */ #define MCFSIM_DCCR1 0x63 /* DRAM 1 Control reg (r/w) */ -#define MCFSIM_PACNT 0x80 /* Port A Control (r/w) */ -#define MCFSIM_PADDR 0x84 /* Port A Direction (r/w) */ -#define MCFSIM_PADAT 0x86 /* Port A Data (r/w) */ -#define MCFSIM_PBCNT 0x88 /* Port B Control (r/w) */ -#define MCFSIM_PBDDR 0x8c /* Port B Direction (r/w) */ -#define MCFSIM_PBDAT 0x8e /* Port B Data (r/w) */ -#define MCFSIM_PCDDR 0x94 /* Port C Direction (r/w) */ -#define MCFSIM_PCDAT 0x96 /* Port C Data (r/w) */ -#define MCFSIM_PDCNT 0x98 /* Port D Control (r/w) */ +#define MCFSIM_PACNT (MCF_MBAR + 0x80) /* Port A Control (r/w) */ +#define MCFSIM_PADDR (MCF_MBAR + 0x84) /* Port A Direction (r/w) */ +#define MCFSIM_PADAT (MCF_MBAR + 0x86) /* Port A Data (r/w) */ +#define MCFSIM_PBCNT (MCF_MBAR + 0x88) /* Port B Control (r/w) */ +#define MCFSIM_PBDDR (MCF_MBAR + 0x8c) /* Port B Direction (r/w) */ +#define MCFSIM_PBDAT (MCF_MBAR + 0x8e) /* Port B Data (r/w) */ +#define MCFSIM_PCDDR (MCF_MBAR + 0x94) /* Port C Direction (r/w) */ +#define MCFSIM_PCDAT (MCF_MBAR + 0x96) /* Port C Data (r/w) */ +#define MCFSIM_PDCNT (MCF_MBAR + 0x98) /* Port D Control (r/w) */ +/* + * Generic GPIO support + */ +#define MCFGPIO_PIN_MAX 48 +#define MCFGPIO_IRQ_MAX -1 +#define MCFGPIO_IRQ_VECBASE -1 /****************************************************************************/ #endif /* m5272sim_h */ diff --git a/arch/m68knommu/platform/5272/Makefile b/arch/m68knommu/platform/5272/Makefile index 26135d92b34..3d90e6d9245 100644 --- a/arch/m68knommu/platform/5272/Makefile +++ b/arch/m68knommu/platform/5272/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/5272/gpio.c b/arch/m68knommu/platform/5272/gpio.c new file mode 100644 index 00000000000..459db89a89c --- /dev/null +++ b/arch/m68knommu/platform/5272/gpio.c @@ -0,0 +1,81 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "PA", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 16, + }, + .pddr = MCFSIM_PADDR, + .podr = MCFSIM_PADAT, + .ppdr = MCFSIM_PADAT, + }, + { + .gpio_chip = { + .label = "PB", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .base = 16, + .ngpio = 16, + }, + .pddr = MCFSIM_PBDDR, + .podr = MCFSIM_PBDAT, + .ppdr = MCFSIM_PBDAT, + }, + { + .gpio_chip = { + .label = "PC", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .base = 32, + .ngpio = 16, + }, + .pddr = MCFSIM_PCDDR, + .podr = MCFSIM_PCDAT, + .ppdr = MCFSIM_PCDAT, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From 6da6e63c96f5fc8a92b6d1d6f12c2bf998588ffe Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:08 -0700 Subject: generic GPIO support for the Freescale Coldfire 528x. Add support for the 528x. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m528xsim.h | 151 ++++++++++++ arch/m68knommu/platform/528x/Makefile | 2 +- arch/m68knommu/platform/528x/gpio.c | 438 ++++++++++++++++++++++++++++++++++ 3 files changed, 590 insertions(+), 1 deletion(-) create mode 100644 arch/m68knommu/platform/528x/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m528xsim.h b/arch/m68k/include/asm/m528xsim.h index d79c49f8134..e2ad1f42b65 100644 --- a/arch/m68k/include/asm/m528xsim.h +++ b/arch/m68k/include/asm/m528xsim.h @@ -40,6 +40,157 @@ #define MCFSIM_DACR1 0x50 /* SDRAM base address 1 */ #define MCFSIM_DMR1 0x54 /* SDRAM address mask 1 */ +/* + * GPIO registers + */ +#define MCFGPIO_PORTA (MCF_IPSBAR + 0x00100000) +#define MCFGPIO_PORTB (MCF_IPSBAR + 0x00100001) +#define MCFGPIO_PORTC (MCF_IPSBAR + 0x00100002) +#define MCFGPIO_PORTD (MCF_IPSBAR + 0x00100003) +#define MCFGPIO_PORTE (MCF_IPSBAR + 0x00100004) +#define MCFGPIO_PORTF (MCF_IPSBAR + 0x00100005) +#define MCFGPIO_PORTG (MCF_IPSBAR + 0x00100006) +#define MCFGPIO_PORTH (MCF_IPSBAR + 0x00100007) +#define MCFGPIO_PORTJ (MCF_IPSBAR + 0x00100008) +#define MCFGPIO_PORTDD (MCF_IPSBAR + 0x00100009) +#define MCFGPIO_PORTEH (MCF_IPSBAR + 0x0010000A) +#define MCFGPIO_PORTEL (MCF_IPSBAR + 0x0010000B) +#define MCFGPIO_PORTAS (MCF_IPSBAR + 0x0010000C) +#define MCFGPIO_PORTQS (MCF_IPSBAR + 0x0010000D) +#define MCFGPIO_PORTSD (MCF_IPSBAR + 0x0010000E) +#define MCFGPIO_PORTTC (MCF_IPSBAR + 0x0010000F) +#define MCFGPIO_PORTTD (MCF_IPSBAR + 0x00100010) +#define MCFGPIO_PORTUA (MCF_IPSBAR + 0x00100011) + +#define MCFGPIO_DDRA (MCF_IPSBAR + 0x00100014) +#define MCFGPIO_DDRB (MCF_IPSBAR + 0x00100015) +#define MCFGPIO_DDRC (MCF_IPSBAR + 0x00100016) +#define MCFGPIO_DDRD (MCF_IPSBAR + 0x00100017) +#define MCFGPIO_DDRE (MCF_IPSBAR + 0x00100018) +#define MCFGPIO_DDRF (MCF_IPSBAR + 0x00100019) +#define MCFGPIO_DDRG (MCF_IPSBAR + 0x0010001A) +#define MCFGPIO_DDRH (MCF_IPSBAR + 0x0010001B) +#define MCFGPIO_DDRJ (MCF_IPSBAR + 0x0010001C) +#define MCFGPIO_DDRDD (MCF_IPSBAR + 0x0010001D) +#define MCFGPIO_DDREH (MCF_IPSBAR + 0x0010001E) +#define MCFGPIO_DDREL (MCF_IPSBAR + 0x0010001F) +#define MCFGPIO_DDRAS (MCF_IPSBAR + 0x00100020) +#define MCFGPIO_DDRQS (MCF_IPSBAR + 0x00100021) +#define MCFGPIO_DDRSD (MCF_IPSBAR + 0x00100022) +#define MCFGPIO_DDRTC (MCF_IPSBAR + 0x00100023) +#define MCFGPIO_DDRTD (MCF_IPSBAR + 0x00100024) +#define MCFGPIO_DDRUA (MCF_IPSBAR + 0x00100025) + +#define MCFGPIO_PORTAP (MCF_IPSBAR + 0x00100028) +#define MCFGPIO_PORTBP (MCF_IPSBAR + 0x00100029) +#define MCFGPIO_PORTCP (MCF_IPSBAR + 0x0010002A) +#define MCFGPIO_PORTDP (MCF_IPSBAR + 0x0010002B) +#define MCFGPIO_PORTEP (MCF_IPSBAR + 0x0010002C) +#define MCFGPIO_PORTFP (MCF_IPSBAR + 0x0010002D) +#define MCFGPIO_PORTGP (MCF_IPSBAR + 0x0010002E) +#define MCFGPIO_PORTHP (MCF_IPSBAR + 0x0010002F) +#define MCFGPIO_PORTJP (MCF_IPSBAR + 0x00100030) +#define MCFGPIO_PORTDDP (MCF_IPSBAR + 0x00100031) +#define MCFGPIO_PORTEHP (MCF_IPSBAR + 0x00100032) +#define MCFGPIO_PORTELP (MCF_IPSBAR + 0x00100033) +#define MCFGPIO_PORTASP (MCF_IPSBAR + 0x00100034) +#define MCFGPIO_PORTQSP (MCF_IPSBAR + 0x00100035) +#define MCFGPIO_PORTSDP (MCF_IPSBAR + 0x00100036) +#define MCFGPIO_PORTTCP (MCF_IPSBAR + 0x00100037) +#define MCFGPIO_PORTTDP (MCF_IPSBAR + 0x00100038) +#define MCFGPIO_PORTUAP (MCF_IPSBAR + 0x00100039) + +#define MCFGPIO_SETA (MCF_IPSBAR + 0x00100028) +#define MCFGPIO_SETB (MCF_IPSBAR + 0x00100029) +#define MCFGPIO_SETC (MCF_IPSBAR + 0x0010002A) +#define MCFGPIO_SETD (MCF_IPSBAR + 0x0010002B) +#define MCFGPIO_SETE (MCF_IPSBAR + 0x0010002C) +#define MCFGPIO_SETF (MCF_IPSBAR + 0x0010002D) +#define MCFGPIO_SETG (MCF_IPSBAR + 0x0010002E) +#define MCFGPIO_SETH (MCF_IPSBAR + 0x0010002F) +#define MCFGPIO_SETJ (MCF_IPSBAR + 0x00100030) +#define MCFGPIO_SETDD (MCF_IPSBAR + 0x00100031) +#define MCFGPIO_SETEH (MCF_IPSBAR + 0x00100032) +#define MCFGPIO_SETEL (MCF_IPSBAR + 0x00100033) +#define MCFGPIO_SETAS (MCF_IPSBAR + 0x00100034) +#define MCFGPIO_SETQS (MCF_IPSBAR + 0x00100035) +#define MCFGPIO_SETSD (MCF_IPSBAR + 0x00100036) +#define MCFGPIO_SETTC (MCF_IPSBAR + 0x00100037) +#define MCFGPIO_SETTD (MCF_IPSBAR + 0x00100038) +#define MCFGPIO_SETUA (MCF_IPSBAR + 0x00100039) + +#define MCFGPIO_CLRA (MCF_IPSBAR + 0x0010003C) +#define MCFGPIO_CLRB (MCF_IPSBAR + 0x0010003D) +#define MCFGPIO_CLRC (MCF_IPSBAR + 0x0010003E) +#define MCFGPIO_CLRD (MCF_IPSBAR + 0x0010003F) +#define MCFGPIO_CLRE (MCF_IPSBAR + 0x00100040) +#define MCFGPIO_CLRF (MCF_IPSBAR + 0x00100041) +#define MCFGPIO_CLRG (MCF_IPSBAR + 0x00100042) +#define MCFGPIO_CLRH (MCF_IPSBAR + 0x00100043) +#define MCFGPIO_CLRJ (MCF_IPSBAR + 0x00100044) +#define MCFGPIO_CLRDD (MCF_IPSBAR + 0x00100045) +#define MCFGPIO_CLREH (MCF_IPSBAR + 0x00100046) +#define MCFGPIO_CLREL (MCF_IPSBAR + 0x00100047) +#define MCFGPIO_CLRAS (MCF_IPSBAR + 0x00100048) +#define MCFGPIO_CLRQS (MCF_IPSBAR + 0x00100049) +#define MCFGPIO_CLRSD (MCF_IPSBAR + 0x0010004A) +#define MCFGPIO_CLRTC (MCF_IPSBAR + 0x0010004B) +#define MCFGPIO_CLRTD (MCF_IPSBAR + 0x0010004C) +#define MCFGPIO_CLRUA (MCF_IPSBAR + 0x0010004D) + +#define MCFGPIO_PBCDPAR (MCF_IPSBAR + 0x00100050) +#define MCFGPIO_PFPAR (MCF_IPSBAR + 0x00100051) +#define MCFGPIO_PEPAR (MCF_IPSBAR + 0x00100052) +#define MCFGPIO_PJPAR (MCF_IPSBAR + 0x00100054) +#define MCFGPIO_PSDPAR (MCF_IPSBAR + 0x00100055) +#define MCFGPIO_PASPAR (MCF_IPSBAR + 0x00100056) +#define MCFGPIO_PEHLPAR (MCF_IPSBAR + 0x00100058) +#define MCFGPIO_PQSPAR (MCF_IPSBAR + 0x00100059) +#define MCFGPIO_PTCPAR (MCF_IPSBAR + 0x0010005A) +#define MCFGPIO_PTDPAR (MCF_IPSBAR + 0x0010005B) +#define MCFGPIO_PUAPAR (MCF_IPSBAR + 0x0010005C) + +/* + * Edge Port registers + */ +#define MCFEPORT_EPPAR (MCF_IPSBAR + 0x00130000) +#define MCFEPORT_EPDDR (MCF_IPSBAR + 0x00130002) +#define MCFEPORT_EPIER (MCF_IPSBAR + 0x00130003) +#define MCFEPORT_EPDR (MCF_IPSBAR + 0x00130004) +#define MCFEPORT_EPPDR (MCF_IPSBAR + 0x00130005) +#define MCFEPORT_EPFR (MCF_IPSBAR + 0x00130006) + +/* + * Queued ADC registers + */ +#define MCFQADC_PORTQA (MCF_IPSBAR + 0x00190006) +#define MCFQADC_PORTQB (MCF_IPSBAR + 0x00190007) +#define MCFQADC_DDRQA (MCF_IPSBAR + 0x00190008) +#define MCFQADC_DDRQB (MCF_IPSBAR + 0x00190009) + +/* + * General Purpose Timers registers + */ +#define MCFGPTA_GPTPORT (MCF_IPSBAR + 0x001A001D) +#define MCFGPTA_GPTDDR (MCF_IPSBAR + 0x001A001E) +#define MCFGPTB_GPTPORT (MCF_IPSBAR + 0x001B001D) +#define MCFGPTB_GPTDDR (MCF_IPSBAR + 0x001B001E) +/* + * + * definitions for generic gpio support + * + */ +#define MCFGPIO_PODR MCFGPIO_PORTA /* port output data */ +#define MCFGPIO_PDDR MCFGPIO_DDRA /* port data direction */ +#define MCFGPIO_PPDR MCFGPIO_PORTAP /* port pin data */ +#define MCFGPIO_SETR MCFGPIO_SETA /* set output */ +#define MCFGPIO_CLRR MCFGPIO_CLRA /* clr output */ + +#define MCFGPIO_IRQ_MAX 8 +#define MCFGPIO_IRQ_VECBASE MCFINT_VECBASE +#define MCFGPIO_PIN_MAX 180 + + /* * Derek Cheung - 6 Feb 2005 * add I2C and QSPI register definition using Freescale's MCF5282 diff --git a/arch/m68knommu/platform/528x/Makefile b/arch/m68knommu/platform/528x/Makefile index 26135d92b34..3d90e6d9245 100644 --- a/arch/m68knommu/platform/528x/Makefile +++ b/arch/m68knommu/platform/528x/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/528x/gpio.c b/arch/m68knommu/platform/528x/gpio.c new file mode 100644 index 00000000000..ec593950696 --- /dev/null +++ b/arch/m68knommu/platform/528x/gpio.c @@ -0,0 +1,438 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "NQ", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .base = 1, + .ngpio = 8, + }, + .pddr = MCFEPORT_EPDDR, + .podr = MCFEPORT_EPDR, + .ppdr = MCFEPORT_EPPDR, + }, + { + .gpio_chip = { + .label = "TA", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 8, + .ngpio = 4, + }, + .pddr = MCFGPTA_GPTDDR, + .podr = MCFGPTA_GPTPORT, + .ppdr = MCFGPTB_GPTPORT, + }, + { + .gpio_chip = { + .label = "TB", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 16, + .ngpio = 4, + }, + .pddr = MCFGPTB_GPTDDR, + .podr = MCFGPTB_GPTPORT, + .ppdr = MCFGPTB_GPTPORT, + }, + { + .gpio_chip = { + .label = "QA", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 24, + .ngpio = 4, + }, + .pddr = MCFQADC_DDRQA, + .podr = MCFQADC_PORTQA, + .ppdr = MCFQADC_PORTQA, + }, + { + .gpio_chip = { + .label = "QB", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 32, + .ngpio = 4, + }, + .pddr = MCFQADC_DDRQB, + .podr = MCFQADC_PORTQB, + .ppdr = MCFQADC_PORTQB, + }, + { + .gpio_chip = { + .label = "A", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 40, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRA, + .podr = MCFGPIO_PORTA, + .ppdr = MCFGPIO_PORTAP, + .setr = MCFGPIO_SETA, + .clrr = MCFGPIO_CLRA, + }, + { + .gpio_chip = { + .label = "B", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 48, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRB, + .podr = MCFGPIO_PORTB, + .ppdr = MCFGPIO_PORTBP, + .setr = MCFGPIO_SETB, + .clrr = MCFGPIO_CLRB, + }, + { + .gpio_chip = { + .label = "C", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 56, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRC, + .podr = MCFGPIO_PORTC, + .ppdr = MCFGPIO_PORTCP, + .setr = MCFGPIO_SETC, + .clrr = MCFGPIO_CLRC, + }, + { + .gpio_chip = { + .label = "D", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 64, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRD, + .podr = MCFGPIO_PORTD, + .ppdr = MCFGPIO_PORTDP, + .setr = MCFGPIO_SETD, + .clrr = MCFGPIO_CLRD, + }, + { + .gpio_chip = { + .label = "E", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 72, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRE, + .podr = MCFGPIO_PORTE, + .ppdr = MCFGPIO_PORTEP, + .setr = MCFGPIO_SETE, + .clrr = MCFGPIO_CLRE, + }, + { + .gpio_chip = { + .label = "F", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 80, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRF, + .podr = MCFGPIO_PORTF, + .ppdr = MCFGPIO_PORTFP, + .setr = MCFGPIO_SETF, + .clrr = MCFGPIO_CLRF, + }, + { + .gpio_chip = { + .label = "G", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 88, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRG, + .podr = MCFGPIO_PORTG, + .ppdr = MCFGPIO_PORTGP, + .setr = MCFGPIO_SETG, + .clrr = MCFGPIO_CLRG, + }, + { + .gpio_chip = { + .label = "H", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 96, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRH, + .podr = MCFGPIO_PORTH, + .ppdr = MCFGPIO_PORTHP, + .setr = MCFGPIO_SETH, + .clrr = MCFGPIO_CLRH, + }, + { + .gpio_chip = { + .label = "J", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 104, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRJ, + .podr = MCFGPIO_PORTJ, + .ppdr = MCFGPIO_PORTJP, + .setr = MCFGPIO_SETJ, + .clrr = MCFGPIO_CLRJ, + }, + { + .gpio_chip = { + .label = "DD", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 112, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDRDD, + .podr = MCFGPIO_PORTDD, + .ppdr = MCFGPIO_PORTDDP, + .setr = MCFGPIO_SETDD, + .clrr = MCFGPIO_CLRDD, + }, + { + .gpio_chip = { + .label = "EH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 120, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDREH, + .podr = MCFGPIO_PORTEH, + .ppdr = MCFGPIO_PORTEHP, + .setr = MCFGPIO_SETEH, + .clrr = MCFGPIO_CLREH, + }, + { + .gpio_chip = { + .label = "EL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 128, + .ngpio = 8, + }, + .pddr = MCFGPIO_DDREL, + .podr = MCFGPIO_PORTEL, + .ppdr = MCFGPIO_PORTELP, + .setr = MCFGPIO_SETEL, + .clrr = MCFGPIO_CLREL, + }, + { + .gpio_chip = { + .label = "AS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 136, + .ngpio = 6, + }, + .pddr = MCFGPIO_DDRAS, + .podr = MCFGPIO_PORTAS, + .ppdr = MCFGPIO_PORTASP, + .setr = MCFGPIO_SETAS, + .clrr = MCFGPIO_CLRAS, + }, + { + .gpio_chip = { + .label = "QS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 144, + .ngpio = 7, + }, + .pddr = MCFGPIO_DDRQS, + .podr = MCFGPIO_PORTQS, + .ppdr = MCFGPIO_PORTQSP, + .setr = MCFGPIO_SETQS, + .clrr = MCFGPIO_CLRQS, + }, + { + .gpio_chip = { + .label = "SD", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 152, + .ngpio = 6, + }, + .pddr = MCFGPIO_DDRSD, + .podr = MCFGPIO_PORTSD, + .ppdr = MCFGPIO_PORTSDP, + .setr = MCFGPIO_SETSD, + .clrr = MCFGPIO_CLRSD, + }, + { + .gpio_chip = { + .label = "TC", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 160, + .ngpio = 4, + }, + .pddr = MCFGPIO_DDRTC, + .podr = MCFGPIO_PORTTC, + .ppdr = MCFGPIO_PORTTCP, + .setr = MCFGPIO_SETTC, + .clrr = MCFGPIO_CLRTC, + }, + { + .gpio_chip = { + .label = "TD", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 168, + .ngpio = 4, + }, + .pddr = MCFGPIO_DDRTD, + .podr = MCFGPIO_PORTTD, + .ppdr = MCFGPIO_PORTTDP, + .setr = MCFGPIO_SETTD, + .clrr = MCFGPIO_CLRTD, + }, + { + .gpio_chip = { + .label = "UA", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 176, + .ngpio = 4, + }, + .pddr = MCFGPIO_DDRUA, + .podr = MCFGPIO_PORTUA, + .ppdr = MCFGPIO_PORTUAP, + .setr = MCFGPIO_SETUA, + .clrr = MCFGPIO_CLRUA, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From f7a20ba06435d067247bd50a15a1d550b9b3fc7d Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:09 -0700 Subject: generic GPIO support for the Freescale Coldfire 5307. Add support for the 5307. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5307sim.h | 11 ++++++-- arch/m68knommu/platform/5307/Makefile | 2 +- arch/m68knommu/platform/5307/gpio.c | 49 +++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 arch/m68knommu/platform/5307/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5307sim.h b/arch/m68k/include/asm/m5307sim.h index 5886728409c..6a1870c925a 100644 --- a/arch/m68k/include/asm/m5307sim.h +++ b/arch/m68k/include/asm/m5307sim.h @@ -90,8 +90,15 @@ #define MCFSIM_DACR1 0x110 /* DRAM 1 Addr and Ctrl (r/w) */ #define MCFSIM_DMR1 0x114 /* DRAM 1 Mask reg (r/w) */ -#define MCFSIM_PADDR 0x244 /* Parallel Direction (r/w) */ -#define MCFSIM_PADAT 0x248 /* Parallel Data (r/w) */ +#define MCFSIM_PADDR (MCF_MBAR + 0x244) +#define MCFSIM_PADAT (MCF_MBAR + 0x248) + +/* + * Generic GPIO support + */ +#define MCFGPIO_PIN_MAX 16 +#define MCFGPIO_IRQ_MAX -1 +#define MCFGPIO_IRQ_VECBASE -1 /* Definition offset address for CS2-7 -- old mask 5307 */ diff --git a/arch/m68knommu/platform/5307/Makefile b/arch/m68knommu/platform/5307/Makefile index cfd586860fd..667db659845 100644 --- a/arch/m68knommu/platform/5307/Makefile +++ b/arch/m68knommu/platform/5307/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y += config.o +obj-y += config.o gpio.o diff --git a/arch/m68knommu/platform/5307/gpio.c b/arch/m68knommu/platform/5307/gpio.c new file mode 100644 index 00000000000..8da5880e406 --- /dev/null +++ b/arch/m68knommu/platform/5307/gpio.c @@ -0,0 +1,49 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "PP", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 16, + }, + .pddr = MCFSIM_PADDR, + .podr = MCFSIM_PADAT, + .ppdr = MCFSIM_PADAT, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From 7846fe800a81adae1e28d9d035b5226f75358280 Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:10 -0700 Subject: generic GPIO support for the Freescale Coldfire 532x. Add support for the 532x. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m532xsim.h | 154 +++++++++------- arch/m68knommu/platform/532x/Makefile | 2 +- arch/m68knommu/platform/532x/config.c | 4 +- arch/m68knommu/platform/532x/gpio.c | 337 ++++++++++++++++++++++++++++++++++ 4 files changed, 424 insertions(+), 73 deletions(-) create mode 100644 arch/m68knommu/platform/532x/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m532xsim.h b/arch/m68k/include/asm/m532xsim.h index eb7fd444894..3e80810b378 100644 --- a/arch/m68k/include/asm/m532xsim.h +++ b/arch/m68k/include/asm/m532xsim.h @@ -422,70 +422,70 @@ *********************************************************************/ /* Register read/write macros */ -#define MCF_GPIO_PODR_FECH MCF_REG08(0xFC0A4000) -#define MCF_GPIO_PODR_FECL MCF_REG08(0xFC0A4001) -#define MCF_GPIO_PODR_SSI MCF_REG08(0xFC0A4002) -#define MCF_GPIO_PODR_BUSCTL MCF_REG08(0xFC0A4003) -#define MCF_GPIO_PODR_BE MCF_REG08(0xFC0A4004) -#define MCF_GPIO_PODR_CS MCF_REG08(0xFC0A4005) -#define MCF_GPIO_PODR_PWM MCF_REG08(0xFC0A4006) -#define MCF_GPIO_PODR_FECI2C MCF_REG08(0xFC0A4007) -#define MCF_GPIO_PODR_UART MCF_REG08(0xFC0A4009) -#define MCF_GPIO_PODR_QSPI MCF_REG08(0xFC0A400A) -#define MCF_GPIO_PODR_TIMER MCF_REG08(0xFC0A400B) -#define MCF_GPIO_PODR_LCDDATAH MCF_REG08(0xFC0A400D) -#define MCF_GPIO_PODR_LCDDATAM MCF_REG08(0xFC0A400E) -#define MCF_GPIO_PODR_LCDDATAL MCF_REG08(0xFC0A400F) -#define MCF_GPIO_PODR_LCDCTLH MCF_REG08(0xFC0A4010) -#define MCF_GPIO_PODR_LCDCTLL MCF_REG08(0xFC0A4011) -#define MCF_GPIO_PDDR_FECH MCF_REG08(0xFC0A4014) -#define MCF_GPIO_PDDR_FECL MCF_REG08(0xFC0A4015) -#define MCF_GPIO_PDDR_SSI MCF_REG08(0xFC0A4016) -#define MCF_GPIO_PDDR_BUSCTL MCF_REG08(0xFC0A4017) -#define MCF_GPIO_PDDR_BE MCF_REG08(0xFC0A4018) -#define MCF_GPIO_PDDR_CS MCF_REG08(0xFC0A4019) -#define MCF_GPIO_PDDR_PWM MCF_REG08(0xFC0A401A) -#define MCF_GPIO_PDDR_FECI2C MCF_REG08(0xFC0A401B) -#define MCF_GPIO_PDDR_UART MCF_REG08(0xFC0A401C) -#define MCF_GPIO_PDDR_QSPI MCF_REG08(0xFC0A401E) -#define MCF_GPIO_PDDR_TIMER MCF_REG08(0xFC0A401F) -#define MCF_GPIO_PDDR_LCDDATAH MCF_REG08(0xFC0A4021) -#define MCF_GPIO_PDDR_LCDDATAM MCF_REG08(0xFC0A4022) -#define MCF_GPIO_PDDR_LCDDATAL MCF_REG08(0xFC0A4023) -#define MCF_GPIO_PDDR_LCDCTLH MCF_REG08(0xFC0A4024) -#define MCF_GPIO_PDDR_LCDCTLL MCF_REG08(0xFC0A4025) -#define MCF_GPIO_PPDSDR_FECH MCF_REG08(0xFC0A4028) -#define MCF_GPIO_PPDSDR_FECL MCF_REG08(0xFC0A4029) -#define MCF_GPIO_PPDSDR_SSI MCF_REG08(0xFC0A402A) -#define MCF_GPIO_PPDSDR_BUSCTL MCF_REG08(0xFC0A402B) -#define MCF_GPIO_PPDSDR_BE MCF_REG08(0xFC0A402C) -#define MCF_GPIO_PPDSDR_CS MCF_REG08(0xFC0A402D) -#define MCF_GPIO_PPDSDR_PWM MCF_REG08(0xFC0A402E) -#define MCF_GPIO_PPDSDR_FECI2C MCF_REG08(0xFC0A402F) -#define MCF_GPIO_PPDSDR_UART MCF_REG08(0xFC0A4031) -#define MCF_GPIO_PPDSDR_QSPI MCF_REG08(0xFC0A4032) -#define MCF_GPIO_PPDSDR_TIMER MCF_REG08(0xFC0A4033) -#define MCF_GPIO_PPDSDR_LCDDATAH MCF_REG08(0xFC0A4035) -#define MCF_GPIO_PPDSDR_LCDDATAM MCF_REG08(0xFC0A4036) -#define MCF_GPIO_PPDSDR_LCDDATAL MCF_REG08(0xFC0A4037) -#define MCF_GPIO_PPDSDR_LCDCTLH MCF_REG08(0xFC0A4038) -#define MCF_GPIO_PPDSDR_LCDCTLL MCF_REG08(0xFC0A4039) -#define MCF_GPIO_PCLRR_FECH MCF_REG08(0xFC0A403C) -#define MCF_GPIO_PCLRR_FECL MCF_REG08(0xFC0A403D) -#define MCF_GPIO_PCLRR_SSI MCF_REG08(0xFC0A403E) -#define MCF_GPIO_PCLRR_BUSCTL MCF_REG08(0xFC0A403F) -#define MCF_GPIO_PCLRR_BE MCF_REG08(0xFC0A4040) -#define MCF_GPIO_PCLRR_CS MCF_REG08(0xFC0A4041) -#define MCF_GPIO_PCLRR_PWM MCF_REG08(0xFC0A4042) -#define MCF_GPIO_PCLRR_FECI2C MCF_REG08(0xFC0A4043) -#define MCF_GPIO_PCLRR_UART MCF_REG08(0xFC0A4045) -#define MCF_GPIO_PCLRR_QSPI MCF_REG08(0xFC0A4046) -#define MCF_GPIO_PCLRR_TIMER MCF_REG08(0xFC0A4047) -#define MCF_GPIO_PCLRR_LCDDATAH MCF_REG08(0xFC0A4049) -#define MCF_GPIO_PCLRR_LCDDATAM MCF_REG08(0xFC0A404A) -#define MCF_GPIO_PCLRR_LCDDATAL MCF_REG08(0xFC0A404B) -#define MCF_GPIO_PCLRR_LCDCTLH MCF_REG08(0xFC0A404C) -#define MCF_GPIO_PCLRR_LCDCTLL MCF_REG08(0xFC0A404D) +#define MCFGPIO_PODR_FECH (0xFC0A4000) +#define MCFGPIO_PODR_FECL (0xFC0A4001) +#define MCFGPIO_PODR_SSI (0xFC0A4002) +#define MCFGPIO_PODR_BUSCTL (0xFC0A4003) +#define MCFGPIO_PODR_BE (0xFC0A4004) +#define MCFGPIO_PODR_CS (0xFC0A4005) +#define MCFGPIO_PODR_PWM (0xFC0A4006) +#define MCFGPIO_PODR_FECI2C (0xFC0A4007) +#define MCFGPIO_PODR_UART (0xFC0A4009) +#define MCFGPIO_PODR_QSPI (0xFC0A400A) +#define MCFGPIO_PODR_TIMER (0xFC0A400B) +#define MCFGPIO_PODR_LCDDATAH (0xFC0A400D) +#define MCFGPIO_PODR_LCDDATAM (0xFC0A400E) +#define MCFGPIO_PODR_LCDDATAL (0xFC0A400F) +#define MCFGPIO_PODR_LCDCTLH (0xFC0A4010) +#define MCFGPIO_PODR_LCDCTLL (0xFC0A4011) +#define MCFGPIO_PDDR_FECH (0xFC0A4014) +#define MCFGPIO_PDDR_FECL (0xFC0A4015) +#define MCFGPIO_PDDR_SSI (0xFC0A4016) +#define MCFGPIO_PDDR_BUSCTL (0xFC0A4017) +#define MCFGPIO_PDDR_BE (0xFC0A4018) +#define MCFGPIO_PDDR_CS (0xFC0A4019) +#define MCFGPIO_PDDR_PWM (0xFC0A401A) +#define MCFGPIO_PDDR_FECI2C (0xFC0A401B) +#define MCFGPIO_PDDR_UART (0xFC0A401C) +#define MCFGPIO_PDDR_QSPI (0xFC0A401E) +#define MCFGPIO_PDDR_TIMER (0xFC0A401F) +#define MCFGPIO_PDDR_LCDDATAH (0xFC0A4021) +#define MCFGPIO_PDDR_LCDDATAM (0xFC0A4022) +#define MCFGPIO_PDDR_LCDDATAL (0xFC0A4023) +#define MCFGPIO_PDDR_LCDCTLH (0xFC0A4024) +#define MCFGPIO_PDDR_LCDCTLL (0xFC0A4025) +#define MCFGPIO_PPDSDR_FECH (0xFC0A4028) +#define MCFGPIO_PPDSDR_FECL (0xFC0A4029) +#define MCFGPIO_PPDSDR_SSI (0xFC0A402A) +#define MCFGPIO_PPDSDR_BUSCTL (0xFC0A402B) +#define MCFGPIO_PPDSDR_BE (0xFC0A402C) +#define MCFGPIO_PPDSDR_CS (0xFC0A402D) +#define MCFGPIO_PPDSDR_PWM (0xFC0A402E) +#define MCFGPIO_PPDSDR_FECI2C (0xFC0A402F) +#define MCFGPIO_PPDSDR_UART (0xFC0A4031) +#define MCFGPIO_PPDSDR_QSPI (0xFC0A4032) +#define MCFGPIO_PPDSDR_TIMER (0xFC0A4033) +#define MCFGPIO_PPDSDR_LCDDATAH (0xFC0A4035) +#define MCFGPIO_PPDSDR_LCDDATAM (0xFC0A4036) +#define MCFGPIO_PPDSDR_LCDDATAL (0xFC0A4037) +#define MCFGPIO_PPDSDR_LCDCTLH (0xFC0A4038) +#define MCFGPIO_PPDSDR_LCDCTLL (0xFC0A4039) +#define MCFGPIO_PCLRR_FECH (0xFC0A403C) +#define MCFGPIO_PCLRR_FECL (0xFC0A403D) +#define MCFGPIO_PCLRR_SSI (0xFC0A403E) +#define MCFGPIO_PCLRR_BUSCTL (0xFC0A403F) +#define MCFGPIO_PCLRR_BE (0xFC0A4040) +#define MCFGPIO_PCLRR_CS (0xFC0A4041) +#define MCFGPIO_PCLRR_PWM (0xFC0A4042) +#define MCFGPIO_PCLRR_FECI2C (0xFC0A4043) +#define MCFGPIO_PCLRR_UART (0xFC0A4045) +#define MCFGPIO_PCLRR_QSPI (0xFC0A4046) +#define MCFGPIO_PCLRR_TIMER (0xFC0A4047) +#define MCFGPIO_PCLRR_LCDDATAH (0xFC0A4049) +#define MCFGPIO_PCLRR_LCDDATAM (0xFC0A404A) +#define MCFGPIO_PCLRR_LCDDATAL (0xFC0A404B) +#define MCFGPIO_PCLRR_LCDCTLH (0xFC0A404C) +#define MCFGPIO_PCLRR_LCDCTLL (0xFC0A404D) #define MCF_GPIO_PAR_FEC MCF_REG08(0xFC0A4050) #define MCF_GPIO_PAR_PWM MCF_REG08(0xFC0A4051) #define MCF_GPIO_PAR_BUSCTL MCF_REG08(0xFC0A4052) @@ -1187,6 +1187,20 @@ /* Bit definitions and macros for MCF_GPIO_DSCR_IRQ */ #define MCF_GPIO_DSCR_IRQ_IRQ_DSE(x) (((x)&0x03)<<0) +/* + * Generic GPIO support + */ +#define MCFGPIO_PODR MCFGPIO_PODR_FECH +#define MCFGPIO_PDDR MCFGPIO_PDDR_FECH +#define MCFGPIO_PPDR MCFGPIO_PPDSDR_FECH +#define MCFGPIO_SETR MCFGPIO_PPDSDR_FECH +#define MCFGPIO_CLRR MCFGPIO_PCLRR_FECH + +#define MCFGPIO_PIN_MAX 136 +#define MCFGPIO_IRQ_MAX 8 +#define MCFGPIO_IRQ_VECBASE MCFINT_VECBASE + + /********************************************************************* * * Interrupt Controller (INTC) @@ -2154,12 +2168,12 @@ *********************************************************************/ /* Register read/write macros */ -#define MCF_EPORT_EPPAR MCF_REG16(0xFC094000) -#define MCF_EPORT_EPDDR MCF_REG08(0xFC094002) -#define MCF_EPORT_EPIER MCF_REG08(0xFC094003) -#define MCF_EPORT_EPDR MCF_REG08(0xFC094004) -#define MCF_EPORT_EPPDR MCF_REG08(0xFC094005) -#define MCF_EPORT_EPFR MCF_REG08(0xFC094006) +#define MCFEPORT_EPPAR (0xFC094000) +#define MCFEPORT_EPDDR (0xFC094002) +#define MCFEPORT_EPIER (0xFC094003) +#define MCFEPORT_EPDR (0xFC094004) +#define MCFEPORT_EPPDR (0xFC094005) +#define MCFEPORT_EPFR (0xFC094006) /* Bit definitions and macros for MCF_EPORT_EPPAR */ #define MCF_EPORT_EPPAR_EPPA1(x) (((x)&0x0003)<<2) diff --git a/arch/m68knommu/platform/532x/Makefile b/arch/m68knommu/platform/532x/Makefile index e431912f562..4cc23245bcd 100644 --- a/arch/m68knommu/platform/532x/Makefile +++ b/arch/m68knommu/platform/532x/Makefile @@ -15,4 +15,4 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 #obj-y := config.o usb-mcf532x.o spi-mcf532x.o -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c index cdb761971f7..28eb9ea55f4 100644 --- a/arch/m68knommu/platform/532x/config.c +++ b/arch/m68knommu/platform/532x/config.c @@ -438,8 +438,8 @@ void gpio_init(void) /* Initialize TIN3 as a GPIO output to enable the write half of the latch */ MCF_GPIO_PAR_TIMER = 0x00; - MCF_GPIO_PDDR_TIMER = 0x08; - MCF_GPIO_PCLRR_TIMER = 0x0; + __raw_writeb(0x08, MCFGPIO_PDDR_TIMER); + __raw_writeb(0x00, MCFGPIO_PCLRR_TIMER); } diff --git a/arch/m68knommu/platform/532x/gpio.c b/arch/m68knommu/platform/532x/gpio.c new file mode 100644 index 00000000000..184b77382c3 --- /dev/null +++ b/arch/m68knommu/platform/532x/gpio.c @@ -0,0 +1,337 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "PIRQ", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 8, + }, + .pddr = MCFEPORT_EPDDR, + .podr = MCFEPORT_EPDR, + .ppdr = MCFEPORT_EPPDR, + }, + { + .gpio_chip = { + .label = "FECH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 8, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_FECH, + .podr = MCFGPIO_PODR_FECH, + .ppdr = MCFGPIO_PPDSDR_FECH, + .setr = MCFGPIO_PPDSDR_FECH, + .clrr = MCFGPIO_PCLRR_FECH, + }, + { + .gpio_chip = { + .label = "FECL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 16, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_FECL, + .podr = MCFGPIO_PODR_FECL, + .ppdr = MCFGPIO_PPDSDR_FECL, + .setr = MCFGPIO_PPDSDR_FECL, + .clrr = MCFGPIO_PCLRR_FECL, + }, + { + .gpio_chip = { + .label = "SSI", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 24, + .ngpio = 5, + }, + .pddr = MCFGPIO_PDDR_SSI, + .podr = MCFGPIO_PODR_SSI, + .ppdr = MCFGPIO_PPDSDR_SSI, + .setr = MCFGPIO_PPDSDR_SSI, + .clrr = MCFGPIO_PCLRR_SSI, + }, + { + .gpio_chip = { + .label = "BUSCTL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 32, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_BUSCTL, + .podr = MCFGPIO_PODR_BUSCTL, + .ppdr = MCFGPIO_PPDSDR_BUSCTL, + .setr = MCFGPIO_PPDSDR_BUSCTL, + .clrr = MCFGPIO_PCLRR_BUSCTL, + }, + { + .gpio_chip = { + .label = "BE", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 40, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_BE, + .podr = MCFGPIO_PODR_BE, + .ppdr = MCFGPIO_PPDSDR_BE, + .setr = MCFGPIO_PPDSDR_BE, + .clrr = MCFGPIO_PCLRR_BE, + }, + { + .gpio_chip = { + .label = "CS", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 49, + .ngpio = 5, + }, + .pddr = MCFGPIO_PDDR_CS, + .podr = MCFGPIO_PODR_CS, + .ppdr = MCFGPIO_PPDSDR_CS, + .setr = MCFGPIO_PPDSDR_CS, + .clrr = MCFGPIO_PCLRR_CS, + }, + { + .gpio_chip = { + .label = "PWM", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 58, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_PWM, + .podr = MCFGPIO_PODR_PWM, + .ppdr = MCFGPIO_PPDSDR_PWM, + .setr = MCFGPIO_PPDSDR_PWM, + .clrr = MCFGPIO_PCLRR_PWM, + }, + { + .gpio_chip = { + .label = "FECI2C", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 64, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_FECI2C, + .podr = MCFGPIO_PODR_FECI2C, + .ppdr = MCFGPIO_PPDSDR_FECI2C, + .setr = MCFGPIO_PPDSDR_FECI2C, + .clrr = MCFGPIO_PCLRR_FECI2C, + }, + { + .gpio_chip = { + .label = "UART", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 72, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_UART, + .podr = MCFGPIO_PODR_UART, + .ppdr = MCFGPIO_PPDSDR_UART, + .setr = MCFGPIO_PPDSDR_UART, + .clrr = MCFGPIO_PCLRR_UART, + }, + { + .gpio_chip = { + .label = "QSPI", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 80, + .ngpio = 6, + }, + .pddr = MCFGPIO_PDDR_QSPI, + .podr = MCFGPIO_PODR_QSPI, + .ppdr = MCFGPIO_PPDSDR_QSPI, + .setr = MCFGPIO_PPDSDR_QSPI, + .clrr = MCFGPIO_PCLRR_QSPI, + }, + { + .gpio_chip = { + .label = "TIMER", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 88, + .ngpio = 4, + }, + .pddr = MCFGPIO_PDDR_TIMER, + .podr = MCFGPIO_PODR_TIMER, + .ppdr = MCFGPIO_PPDSDR_TIMER, + .setr = MCFGPIO_PPDSDR_TIMER, + .clrr = MCFGPIO_PCLRR_TIMER, + }, + { + .gpio_chip = { + .label = "LCDDATAH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 96, + .ngpio = 2, + }, + .pddr = MCFGPIO_PDDR_LCDDATAH, + .podr = MCFGPIO_PODR_LCDDATAH, + .ppdr = MCFGPIO_PPDSDR_LCDDATAH, + .setr = MCFGPIO_PPDSDR_LCDDATAH, + .clrr = MCFGPIO_PCLRR_LCDDATAH, + }, + { + .gpio_chip = { + .label = "LCDDATAM", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 104, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_LCDDATAM, + .podr = MCFGPIO_PODR_LCDDATAM, + .ppdr = MCFGPIO_PPDSDR_LCDDATAM, + .setr = MCFGPIO_PPDSDR_LCDDATAM, + .clrr = MCFGPIO_PCLRR_LCDDATAM, + }, + { + .gpio_chip = { + .label = "LCDDATAL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 112, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_LCDDATAL, + .podr = MCFGPIO_PODR_LCDDATAL, + .ppdr = MCFGPIO_PPDSDR_LCDDATAL, + .setr = MCFGPIO_PPDSDR_LCDDATAL, + .clrr = MCFGPIO_PCLRR_LCDDATAL, + }, + { + .gpio_chip = { + .label = "LCDCTLH", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 120, + .ngpio = 1, + }, + .pddr = MCFGPIO_PDDR_LCDCTLH, + .podr = MCFGPIO_PODR_LCDCTLH, + .ppdr = MCFGPIO_PPDSDR_LCDCTLH, + .setr = MCFGPIO_PPDSDR_LCDCTLH, + .clrr = MCFGPIO_PCLRR_LCDCTLH, + }, + { + .gpio_chip = { + .label = "LCDCTLL", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value_fast, + .base = 128, + .ngpio = 8, + }, + .pddr = MCFGPIO_PDDR_LCDCTLL, + .podr = MCFGPIO_PODR_LCDCTLL, + .ppdr = MCFGPIO_PPDSDR_LCDCTLL, + .setr = MCFGPIO_PPDSDR_LCDCTLL, + .clrr = MCFGPIO_PCLRR_LCDCTLL, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From dca7cf33bb5c98187ffcc849652b1b2c4c8a0c25 Mon Sep 17 00:00:00 2001 From: "sfking@fdwdc.com" Date: Fri, 19 Jun 2009 18:11:11 -0700 Subject: generic GPIO support for the Freescale Coldfire 5407. Add support for the 5407. Signed-off-by: Steven King Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5407sim.h | 10 +++++-- arch/m68knommu/platform/5407/Makefile | 2 +- arch/m68knommu/platform/5407/gpio.c | 49 +++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 arch/m68knommu/platform/5407/gpio.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5407sim.h b/arch/m68k/include/asm/m5407sim.h index cc22c4a5300..25194756b43 100644 --- a/arch/m68k/include/asm/m5407sim.h +++ b/arch/m68k/include/asm/m5407sim.h @@ -73,9 +73,15 @@ #define MCFSIM_DACR1 0x110 /* DRAM 1 Addr and Ctrl (r/w) */ #define MCFSIM_DMR1 0x114 /* DRAM 1 Mask reg (r/w) */ -#define MCFSIM_PADDR 0x244 /* Parallel Direction (r/w) */ -#define MCFSIM_PADAT 0x248 /* Parallel Data (r/w) */ +#define MCFSIM_PADDR (MCF_MBAR + 0x244) +#define MCFSIM_PADAT (MCF_MBAR + 0x248) +/* + * Generic GPIO support + */ +#define MCFGPIO_PIN_MAX 16 +#define MCFGPIO_IRQ_MAX -1 +#define MCFGPIO_IRQ_VECBASE -1 /* * Some symbol defines for the above... diff --git a/arch/m68knommu/platform/5407/Makefile b/arch/m68knommu/platform/5407/Makefile index e6035e7a2d3..dee62c5dbaa 100644 --- a/arch/m68knommu/platform/5407/Makefile +++ b/arch/m68knommu/platform/5407/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o +obj-y := config.o gpio.o diff --git a/arch/m68knommu/platform/5407/gpio.c b/arch/m68knommu/platform/5407/gpio.c new file mode 100644 index 00000000000..8da5880e406 --- /dev/null +++ b/arch/m68knommu/platform/5407/gpio.c @@ -0,0 +1,49 @@ +/* + * Coldfire generic GPIO support + * + * (C) Copyright 2009, Steven King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#include +#include + +#include +#include +#include + +static struct mcf_gpio_chip mcf_gpio_chips[] = { + { + .gpio_chip = { + .label = "PP", + .request = mcf_gpio_request, + .free = mcf_gpio_free, + .direction_input = mcf_gpio_direction_input, + .direction_output = mcf_gpio_direction_output, + .get = mcf_gpio_get_value, + .set = mcf_gpio_set_value, + .ngpio = 16, + }, + .pddr = MCFSIM_PADDR, + .podr = MCFSIM_PADAT, + .ppdr = MCFSIM_PADAT, + }, +}; + +static int __init mcf_gpio_init(void) +{ + unsigned i = 0; + while (i < ARRAY_SIZE(mcf_gpio_chips)) + (void)gpiochip_add((struct gpio_chip *)&mcf_gpio_chips[i++]); + return 0; +} + +core_initcall(mcf_gpio_init); -- cgit v1.2.3-70-g09d2 From c573b29baf3e1b71a60ee6de76dd28c9c49fb87f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 2 Jul 2009 14:08:03 +1000 Subject: arch/m68knommu/kernel/time.c: Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: Greg Ungerer --- arch/m68knommu/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index d182b2f7221..c2aa717de08 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -69,7 +69,7 @@ static unsigned long read_rtc_mmss(void) if ((year += 1900) < 1970) year += 100; - return mktime(year, mon, day, hour, min, sec);; + return mktime(year, mon, day, hour, min, sec); } unsigned long read_persistent_clock(void) -- cgit v1.2.3-70-g09d2 From d668bf0a0d73daea4eaaf748435752f52cc077aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Aug 2009 16:52:44 +0200 Subject: m68knommu: convert to asm-generic/hardirq.h Signed-off-by: Christoph Hellwig Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/hardirq_no.h | 10 +--------- arch/m68knommu/kernel/irq.c | 5 ----- 2 files changed, 1 insertion(+), 14 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/hardirq_no.h b/arch/m68k/include/asm/hardirq_no.h index bfad28149a4..b44b14be87d 100644 --- a/arch/m68k/include/asm/hardirq_no.h +++ b/arch/m68k/include/asm/hardirq_no.h @@ -1,16 +1,8 @@ #ifndef __M68K_HARDIRQ_H #define __M68K_HARDIRQ_H -#include -#include #include -typedef struct { - unsigned int __softirq_pending; -} ____cacheline_aligned irq_cpustat_t; - -#include /* Standard mappings for irq_cpustat_t above */ - #define HARDIRQ_BITS 8 /* @@ -22,6 +14,6 @@ typedef struct { # error HARDIRQ_BITS is too low! #endif -void ack_bad_irq(unsigned int irq); +#include #endif /* __M68K_HARDIRQ_H */ diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c index 56e0f4c55a6..9e0c100447c 100644 --- a/arch/m68knommu/kernel/irq.c +++ b/arch/m68knommu/kernel/irq.c @@ -29,11 +29,6 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs) set_irq_regs(oldregs); } -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_ERR "IRQ: unexpected irq=%d\n", irq); -} - static struct irq_chip m_irq_chip = { .name = "M68K-INTC", .enable = enable_vector, -- cgit v1.2.3-70-g09d2 From 6192c1ea0ac5806592c5c9cc2b2b94b0298df02b Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Thu, 2 Jul 2009 16:11:58 +1000 Subject: m68k: merge the mmu and non-mmu versions of checksum.h The mmu and non-mmu versions of checksum.h are mostly the same, merge them. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/checksum.h | 173 +++++++++++++++++++++++++++++++++++- arch/m68k/include/asm/checksum_mm.h | 148 ------------------------------ arch/m68k/include/asm/checksum_no.h | 132 --------------------------- arch/m68knommu/lib/checksum.c | 11 +-- 4 files changed, 171 insertions(+), 293 deletions(-) delete mode 100644 arch/m68k/include/asm/checksum_mm.h delete mode 100644 arch/m68k/include/asm/checksum_no.h (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/checksum.h b/arch/m68k/include/asm/checksum.h index 1cf54476745..ec514485c8b 100644 --- a/arch/m68k/include/asm/checksum.h +++ b/arch/m68k/include/asm/checksum.h @@ -1,5 +1,170 @@ -#ifdef __uClinux__ -#include "checksum_no.h" +#ifndef _M68K_CHECKSUM_H +#define _M68K_CHECKSUM_H + +#include + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +extern __wsum csum_partial_copy_from_user(const void __user *src, + void *dst, + int len, __wsum sum, + int *csum_err); + +extern __wsum csum_partial_copy_nocheck(const void *src, + void *dst, int len, + __wsum sum); + + +#ifdef CONFIG_COLDFIRE + +/* + * The ColdFire cores don't support all the 68k instructions used + * in the optimized checksum code below. So it reverts back to using + * more standard C coded checksums. The fast checksum code is + * significantly larger than the optimized version, so it is not + * inlined here. + */ +__sum16 ip_fast_csum(const void *iph, unsigned int ihl); + +static inline __sum16 csum_fold(__wsum sum) +{ + unsigned int tmp = (__force u32)sum; + + tmp = (tmp & 0xffff) + (tmp >> 16); + tmp = (tmp & 0xffff) + (tmp >> 16); + + return (__force __sum16)~tmp; +} + #else -#include "checksum_mm.h" -#endif + +/* + * This is a version of ip_fast_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum = 0; + unsigned long tmp; + + __asm__ ("subqw #1,%2\n" + "1:\t" + "movel %1@+,%3\n\t" + "addxl %3,%0\n\t" + "dbra %2,1b\n\t" + "movel %0,%3\n\t" + "swap %3\n\t" + "addxw %3,%0\n\t" + "clrw %3\n\t" + "addxw %3,%0\n\t" + : "=d" (sum), "=&a" (iph), "=&d" (ihl), "=&d" (tmp) + : "0" (sum), "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)~sum; +} + +static inline __sum16 csum_fold(__wsum sum) +{ + unsigned int tmp = (__force u32)sum; + + __asm__("swap %1\n\t" + "addw %1, %0\n\t" + "clrw %1\n\t" + "addxw %1, %0" + : "=&d" (sum), "=&d" (tmp) + : "0" (sum), "1" (tmp)); + + return (__force __sum16)~sum; +} + +#endif /* CONFIG_COLDFIRE */ + +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + __asm__ ("addl %2,%0\n\t" + "addxl %3,%0\n\t" + "addxl %4,%0\n\t" + "clrl %1\n\t" + "addxl %1,%0" + : "=&d" (sum), "=d" (saddr) + : "g" (daddr), "1" (saddr), "d" (len + proto), + "0" (sum)); + return sum; +} + + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold (csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +static __inline__ __sum16 +csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, + __u32 len, unsigned short proto, __wsum sum) +{ + register unsigned long tmp; + __asm__("addl %2@,%0\n\t" + "movel %2@(4),%1\n\t" + "addxl %1,%0\n\t" + "movel %2@(8),%1\n\t" + "addxl %1,%0\n\t" + "movel %2@(12),%1\n\t" + "addxl %1,%0\n\t" + "movel %3@,%1\n\t" + "addxl %1,%0\n\t" + "movel %3@(4),%1\n\t" + "addxl %1,%0\n\t" + "movel %3@(8),%1\n\t" + "addxl %1,%0\n\t" + "movel %3@(12),%1\n\t" + "addxl %1,%0\n\t" + "addxl %4,%0\n\t" + "clrl %1\n\t" + "addxl %1,%0" + : "=&d" (sum), "=&d" (tmp) + : "a" (saddr), "a" (daddr), "d" (len + proto), + "0" (sum)); + + return csum_fold(sum); +} + +#endif /* _M68K_CHECKSUM_H */ diff --git a/arch/m68k/include/asm/checksum_mm.h b/arch/m68k/include/asm/checksum_mm.h deleted file mode 100644 index 494f9aec37e..00000000000 --- a/arch/m68k/include/asm/checksum_mm.h +++ /dev/null @@ -1,148 +0,0 @@ -#ifndef _M68K_CHECKSUM_H -#define _M68K_CHECKSUM_H - -#include - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -__wsum csum_partial(const void *buff, int len, __wsum sum); - -/* - * the same as csum_partial, but copies from src while it - * checksums - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ - -extern __wsum csum_partial_copy_from_user(const void __user *src, - void *dst, - int len, __wsum sum, - int *csum_err); - -extern __wsum csum_partial_copy_nocheck(const void *src, - void *dst, int len, - __wsum sum); - -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. - * - */ -static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) -{ - unsigned int sum = 0; - unsigned long tmp; - - __asm__ ("subqw #1,%2\n" - "1:\t" - "movel %1@+,%3\n\t" - "addxl %3,%0\n\t" - "dbra %2,1b\n\t" - "movel %0,%3\n\t" - "swap %3\n\t" - "addxw %3,%0\n\t" - "clrw %3\n\t" - "addxw %3,%0\n\t" - : "=d" (sum), "=&a" (iph), "=&d" (ihl), "=&d" (tmp) - : "0" (sum), "1" (iph), "2" (ihl) - : "memory"); - return (__force __sum16)~sum; -} - -/* - * Fold a partial checksum - */ - -static inline __sum16 csum_fold(__wsum sum) -{ - unsigned int tmp = (__force u32)sum; - __asm__("swap %1\n\t" - "addw %1, %0\n\t" - "clrw %1\n\t" - "addxw %1, %0" - : "=&d" (sum), "=&d" (tmp) - : "0" (sum), "1" (tmp)); - return (__force __sum16)~sum; -} - - -static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, - unsigned short proto, __wsum sum) -{ - __asm__ ("addl %2,%0\n\t" - "addxl %3,%0\n\t" - "addxl %4,%0\n\t" - "clrl %1\n\t" - "addxl %1,%0" - : "=&d" (sum), "=d" (saddr) - : "g" (daddr), "1" (saddr), "d" (len + proto), - "0" (sum)); - return sum; -} - - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -static inline __sum16 -csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, - unsigned short proto, __wsum sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -} - -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ - -static inline __sum16 ip_compute_csum(const void *buff, int len) -{ - return csum_fold (csum_partial(buff, len, 0)); -} - -#define _HAVE_ARCH_IPV6_CSUM -static __inline__ __sum16 -csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, - __u32 len, unsigned short proto, __wsum sum) -{ - register unsigned long tmp; - __asm__("addl %2@,%0\n\t" - "movel %2@(4),%1\n\t" - "addxl %1,%0\n\t" - "movel %2@(8),%1\n\t" - "addxl %1,%0\n\t" - "movel %2@(12),%1\n\t" - "addxl %1,%0\n\t" - "movel %3@,%1\n\t" - "addxl %1,%0\n\t" - "movel %3@(4),%1\n\t" - "addxl %1,%0\n\t" - "movel %3@(8),%1\n\t" - "addxl %1,%0\n\t" - "movel %3@(12),%1\n\t" - "addxl %1,%0\n\t" - "addxl %4,%0\n\t" - "clrl %1\n\t" - "addxl %1,%0" - : "=&d" (sum), "=&d" (tmp) - : "a" (saddr), "a" (daddr), "d" (len + proto), - "0" (sum)); - - return csum_fold(sum); -} - -#endif /* _M68K_CHECKSUM_H */ diff --git a/arch/m68k/include/asm/checksum_no.h b/arch/m68k/include/asm/checksum_no.h deleted file mode 100644 index 81883482ffb..00000000000 --- a/arch/m68k/include/asm/checksum_no.h +++ /dev/null @@ -1,132 +0,0 @@ -#ifndef _M68K_CHECKSUM_H -#define _M68K_CHECKSUM_H - -#include - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -__wsum csum_partial(const void *buff, int len, __wsum sum); - -/* - * the same as csum_partial, but copies from src while it - * checksums - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ - -__wsum csum_partial_copy_nocheck(const void *src, void *dst, - int len, __wsum sum); - - -/* - * the same as csum_partial_copy, but copies from user space. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ - -extern __wsum csum_partial_copy_from_user(const void __user *src, - void *dst, int len, __wsum sum, int *csum_err); - -__sum16 ip_fast_csum(const void *iph, unsigned int ihl); - -/* - * Fold a partial checksum - */ - -static inline __sum16 csum_fold(__wsum sum) -{ - unsigned int tmp = (__force u32)sum; -#ifdef CONFIG_COLDFIRE - tmp = (tmp & 0xffff) + (tmp >> 16); - tmp = (tmp & 0xffff) + (tmp >> 16); - return (__force __sum16)~tmp; -#else - __asm__("swap %1\n\t" - "addw %1, %0\n\t" - "clrw %1\n\t" - "addxw %1, %0" - : "=&d" (sum), "=&d" (tmp) - : "0" (sum), "1" (sum)); - return (__force __sum16)~sum; -#endif -} - - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ - -static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, - unsigned short proto, __wsum sum) -{ - __asm__ ("addl %1,%0\n\t" - "addxl %4,%0\n\t" - "addxl %5,%0\n\t" - "clrl %1\n\t" - "addxl %1,%0" - : "=&d" (sum), "=&d" (saddr) - : "0" (daddr), "1" (saddr), "d" (len + proto), - "d"(sum)); - return sum; -} - -static inline __sum16 -csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, - unsigned short proto, __wsum sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -} - -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ - -extern __sum16 ip_compute_csum(const void *buff, int len); - -#define _HAVE_ARCH_IPV6_CSUM -static __inline__ __sum16 -csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, - __u32 len, unsigned short proto, __wsum sum) -{ - register unsigned long tmp; - __asm__("addl %2@,%0\n\t" - "movel %2@(4),%1\n\t" - "addxl %1,%0\n\t" - "movel %2@(8),%1\n\t" - "addxl %1,%0\n\t" - "movel %2@(12),%1\n\t" - "addxl %1,%0\n\t" - "movel %3@,%1\n\t" - "addxl %1,%0\n\t" - "movel %3@(4),%1\n\t" - "addxl %1,%0\n\t" - "movel %3@(8),%1\n\t" - "addxl %1,%0\n\t" - "movel %3@(12),%1\n\t" - "addxl %1,%0\n\t" - "addxl %4,%0\n\t" - "clrl %1\n\t" - "addxl %1,%0" - : "=&d" (sum), "=&d" (tmp) - : "a" (saddr), "a" (daddr), "d" (len + proto), - "0" (sum)); - - return csum_fold(sum); -} - -#endif /* _M68K_CHECKSUM_H */ diff --git a/arch/m68knommu/lib/checksum.c b/arch/m68knommu/lib/checksum.c index 269d83bfbbe..eccf25d3d73 100644 --- a/arch/m68knommu/lib/checksum.c +++ b/arch/m68knommu/lib/checksum.c @@ -92,6 +92,7 @@ out: return result; } +#ifdef CONFIG_COLDFIRE /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. @@ -100,6 +101,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { return (__force __sum16)~do_csum(iph,ihl*4); } +#endif /* * computes the checksum of a memory block at buff, length len, @@ -126,15 +128,6 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) EXPORT_SYMBOL(csum_partial); -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ -__sum16 ip_compute_csum(const void *buff, int len) -{ - return (__force __sum16)~do_csum(buff,len); -} - /* * copy from fs while checksumming, otherwise like csum_partial */ -- cgit v1.2.3-70-g09d2 From cd3dd4068db5e1258a14b63e0feaf0332640d896 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 27 Apr 2009 15:09:29 +1000 Subject: m68knommu: use general interrupt controller for ColdFire 520x family Create general interrupt controller code for the ColdFire 520x family, that does proper masking and unmasking of interrupts. With this in place some of the driver hacks in place to support ColdFire interrupts can finally go away. Within the ColdFire family there is a variety of different interrupt controllers in use. Some are used on multiple parts, some on only one. There is quite some differences in some varients, so much so that common code for all ColdFire parts would be impossible. This commit introduces code to support one of the newer interrupt controllers in the ColdFire 5208 and 5207 parts. It has very simple mask and unmask operations, so is one of the easiest to support. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m520xsim.h | 2 + arch/m68knommu/kernel/irq.c | 4 ++ arch/m68knommu/platform/coldfire/Makefile | 2 +- arch/m68knommu/platform/coldfire/intc-simr.c | 61 ++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 arch/m68knommu/platform/coldfire/intc-simr.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m520xsim.h b/arch/m68k/include/asm/m520xsim.h index e80b6a54ea9..e79b9bc76a1 100644 --- a/arch/m68k/include/asm/m520xsim.h +++ b/arch/m68k/include/asm/m520xsim.h @@ -22,6 +22,8 @@ #define MCFINTC_IMRL 0x0c /* Interrupt mask 1-31 */ #define MCFINTC_INTFRCH 0x10 /* Interrupt force 32-63 */ #define MCFINTC_INTFRCL 0x14 /* Interrupt force 1-31 */ +#define MCFINTC_SIMR 0x1c /* Set interrupt mask 0-63 */ +#define MCFINTC_CIMR 0x1d /* Clear interrupt mask 0-63 */ #define MCFINTC_ICR0 0x40 /* Base ICR register */ #define MCFINT_VECBASE 64 diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c index 9e0c100447c..47f6af57e18 100644 --- a/arch/m68knommu/kernel/irq.c +++ b/arch/m68knommu/kernel/irq.c @@ -29,6 +29,8 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs) set_irq_regs(oldregs); } +#if !defined(CONFIG_M520x) + static struct irq_chip m_irq_chip = { .name = "M68K-INTC", .enable = enable_vector, @@ -50,6 +52,8 @@ void __init init_IRQ(void) } } +#endif + int show_interrupts(struct seq_file *p, void *v) { struct irqaction *ap; diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile index 2667323c7fe..dd242db7daa 100644 --- a/arch/m68knommu/platform/coldfire/Makefile +++ b/arch/m68knommu/platform/coldfire/Makefile @@ -17,7 +17,7 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 obj-$(CONFIG_COLDFIRE) += clk.o dma.o entry.o vectors.o obj-$(CONFIG_M5206) += timers.o obj-$(CONFIG_M5206e) += timers.o -obj-$(CONFIG_M520x) += pit.o +obj-$(CONFIG_M520x) += pit.o intc-simr.o obj-$(CONFIG_M523x) += pit.o dma_timer.o obj-$(CONFIG_M5249) += timers.o obj-$(CONFIG_M527x) += pit.o diff --git a/arch/m68knommu/platform/coldfire/intc-simr.c b/arch/m68knommu/platform/coldfire/intc-simr.c new file mode 100644 index 00000000000..3b614a3508f --- /dev/null +++ b/arch/m68knommu/platform/coldfire/intc-simr.c @@ -0,0 +1,61 @@ +/* + * intc-simr.c + * + * (C) Copyright 2009, Greg Ungerer + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void intc_irq_mask(unsigned int irq) +{ + if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECBASE + 63)) + __raw_writeb(irq - MCFINT_VECBASE, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_SIMR); +} + +static void intc_irq_unmask(unsigned int irq) +{ + if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECBASE + 63)) + __raw_writeb(irq - MCFINT_VECBASE, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_CIMR); +} + +static int intc_irq_set_type(unsigned int irq, unsigned int type) +{ + if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECBASE + 63)) + __raw_writeb(5, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + irq - MCFINT_VECBASE); + return 0; +} + +static struct irq_chip intc_irq_chip = { + .name = "CF-INTC", + .mask = intc_irq_mask, + .unmask = intc_irq_unmask, + .set_type = intc_irq_set_type, +}; + +void __init init_IRQ(void) +{ + int irq; + + init_vectors(); + + for (irq = 0; (irq < NR_IRQS); irq++) { + irq_desc[irq].status = IRQ_DISABLED; + irq_desc[irq].action = NULL; + irq_desc[irq].depth = 1; + irq_desc[irq].chip = &intc_irq_chip; + intc_irq_set_type(irq, 0); + } +} + -- cgit v1.2.3-70-g09d2 From d0d77c26cb6195e881325befc950cc54732ba261 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 27 Apr 2009 15:15:01 +1000 Subject: m68knommu: remove per device interrupt mask setting for ColdFire 520x With general interrupt controller code in place we don't need specific unmasking code for the internal ColdFire 520x UARTs or ethernet (FEC). Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/520x/config.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68knommu/platform/520x/config.c index 1c43a8aec69..6a1fd743817 100644 --- a/arch/m68knommu/platform/520x/config.c +++ b/arch/m68knommu/platform/520x/config.c @@ -85,16 +85,11 @@ static struct platform_device *m520x_devices[] __initdata = { static void __init m520x_uart_init_line(int line, int irq) { - u32 imr; u16 par; u8 par2; writeb(0x03, INTC0 + MCFINTC_ICR0 + MCFINT_UART0 + line); - imr = readl(INTC0 + MCFINTC_IMRL); - imr &= ~((1 << (irq - MCFINT_VECBASE)) | 1); - writel(imr, INTC0 + MCFINTC_IMRL); - switch (line) { case 0: par = readw(MCF_IPSBAR + MCF_GPIO_PAR_UART); @@ -131,7 +126,6 @@ static void __init m520x_uarts_init(void) static void __init m520x_fec_init(void) { - u32 imr; u8 v; /* Unmask FEC interrupts at ColdFire interrupt controller */ @@ -139,10 +133,6 @@ static void __init m520x_fec_init(void) writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 40); writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 42); - imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); - imr &= ~0x0001FFF0; - writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); - /* Set multi-function pins to ethernet mode */ v = readb(MCF_IPSBAR + MCF_GPIO_PAR_FEC); writeb(v | 0xf0, MCF_IPSBAR + MCF_GPIO_PAR_FEC); @@ -153,17 +143,6 @@ static void __init m520x_fec_init(void) /***************************************************************************/ -/* - * Program the vector to be an auto-vectored. - */ - -void mcf_autovector(unsigned int vec) -{ - /* Everything is auto-vectored on the 520x devices */ -} - -/***************************************************************************/ - static void m520x_cpu_reset(void) { local_irq_disable(); -- cgit v1.2.3-70-g09d2 From 2fba4f0b035f7723073068cf8b3a01ec38f72ed5 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 27 Apr 2009 15:38:03 +1000 Subject: m68knommu: general interrupt controller for ColdFire many 52xx parts Create general interrupt controller code for the many ColdFire version 2 cores that use the two region INTC interrupt controller. This includes the 523x family, 5270, 5271, 5274, 5275, and the 528x families. This code does proper masking and unmasking of interrupts. With this in place some of the driver hacks in place to support ColdFire interrupts can finally go away. Signed-off-by: Greg Ungerer --- arch/m68knommu/kernel/irq.c | 3 +- arch/m68knommu/platform/coldfire/Makefile | 6 +- arch/m68knommu/platform/coldfire/intc-2.c | 93 +++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 4 deletions(-) create mode 100644 arch/m68knommu/platform/coldfire/intc-2.c (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c index 47f6af57e18..f9965d7ee7c 100644 --- a/arch/m68knommu/kernel/irq.c +++ b/arch/m68knommu/kernel/irq.c @@ -29,7 +29,8 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs) set_irq_regs(oldregs); } -#if !defined(CONFIG_M520x) +#if !defined(CONFIG_M520x) && !defined(CONFIG_M523x) && \ + !defined(CONFIG_M527x) && !defined(CONFIG_M528x) static struct irq_chip m_irq_chip = { .name = "M68K-INTC", diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile index dd242db7daa..bce9a62d3a1 100644 --- a/arch/m68knommu/platform/coldfire/Makefile +++ b/arch/m68knommu/platform/coldfire/Makefile @@ -18,11 +18,11 @@ obj-$(CONFIG_COLDFIRE) += clk.o dma.o entry.o vectors.o obj-$(CONFIG_M5206) += timers.o obj-$(CONFIG_M5206e) += timers.o obj-$(CONFIG_M520x) += pit.o intc-simr.o -obj-$(CONFIG_M523x) += pit.o dma_timer.o +obj-$(CONFIG_M523x) += pit.o dma_timer.o intc-2.o obj-$(CONFIG_M5249) += timers.o -obj-$(CONFIG_M527x) += pit.o +obj-$(CONFIG_M527x) += pit.o intc-2.o obj-$(CONFIG_M5272) += timers.o -obj-$(CONFIG_M528x) += pit.o +obj-$(CONFIG_M528x) += pit.o intc-2.o obj-$(CONFIG_M5307) += timers.o obj-$(CONFIG_M532x) += timers.o obj-$(CONFIG_M5407) += timers.o diff --git a/arch/m68knommu/platform/coldfire/intc-2.c b/arch/m68knommu/platform/coldfire/intc-2.c new file mode 100644 index 00000000000..5598c8b8661 --- /dev/null +++ b/arch/m68knommu/platform/coldfire/intc-2.c @@ -0,0 +1,93 @@ +/* + * intc-1.c + * + * (C) Copyright 2009, Greg Ungerer + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Each vector needs a unique priority and level asscoiated with it. + * We don't really care so much what they are, we don't rely on the + * tranditional priority interrupt scheme of the m68k/ColdFire. + */ +static u8 intc_intpri = 0x36; + +static void intc_irq_mask(unsigned int irq) +{ + if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECBASE + 128)) { + unsigned long imraddr; + u32 val, imrbit; + + irq -= MCFINT_VECBASE; + imraddr = MCF_IPSBAR; + imraddr += (irq & 0x40) ? MCFICM_INTC1 : MCFICM_INTC0; + imraddr += (irq & 0x20) ? MCFINTC_IMRH : MCFINTC_IMRL; + imrbit = 0x1 << (irq & 0x1f); + + val = __raw_readl(imraddr); + __raw_writel(val | imrbit, imraddr); + } +} + +static void intc_irq_unmask(unsigned int irq) +{ + if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECBASE + 128)) { + unsigned long intaddr, imraddr, icraddr; + u32 val, imrbit; + + irq -= MCFINT_VECBASE; + intaddr = MCF_IPSBAR; + intaddr += (irq & 0x40) ? MCFICM_INTC1 : MCFICM_INTC0; + imraddr = intaddr + ((irq & 0x20) ? MCFINTC_IMRH : MCFINTC_IMRL); + icraddr = intaddr + MCFINTC_ICR0 + (irq & 0x3f); + imrbit = 0x1 << (irq & 0x1f); + + /* Don't set the "maskall" bit! */ + if ((irq & 0x20) == 0) + imrbit |= 0x1; + + if (__raw_readb(icraddr) == 0) + __raw_writeb(intc_intpri--, icraddr); + + val = __raw_readl(imraddr); + __raw_writel(val & ~imrbit, imraddr); + } +} + +static struct irq_chip intc_irq_chip = { + .name = "CF-INTC", + .mask = intc_irq_mask, + .unmask = intc_irq_unmask, +}; + +void __init init_IRQ(void) +{ + int irq; + + init_vectors(); + + /* Mask all interrupt sources */ + __raw_writel(0x1, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); + __raw_writel(0x1, MCF_IPSBAR + MCFICM_INTC1 + MCFINTC_IMRL); + + for (irq = 0; (irq < NR_IRQS); irq++) { + irq_desc[irq].status = IRQ_DISABLED; + irq_desc[irq].action = NULL; + irq_desc[irq].depth = 1; + irq_desc[irq].chip = &intc_irq_chip; + } +} + -- cgit v1.2.3-70-g09d2 From 1b718c71b7830c77a9b70826be83b899e314b585 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 28 Apr 2009 10:57:36 +1000 Subject: m68knommu: clean up ColdFire 527x interrupt setup With the common intc-2 interrupt controller code in place the ColdFire 527x family startup code can be greatly simplified. Remove all the interrupt masking code, and the per-device interrupt config here. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/527x/config.c | 49 ----------------------------------- 1 file changed, 49 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/527x/config.c b/arch/m68knommu/platform/527x/config.c index f746439cfd3..fa51be17283 100644 --- a/arch/m68knommu/platform/527x/config.c +++ b/arch/m68knommu/platform/527x/config.c @@ -116,23 +116,13 @@ static struct platform_device *m527x_devices[] __initdata = { /***************************************************************************/ -#define INTC0 (MCF_MBAR + MCFICM_INTC0) - static void __init m527x_uart_init_line(int line, int irq) { u16 sepmask; - u32 imr; if ((line < 0) || (line > 2)) return; - /* level 6, line based priority */ - writeb(0x30+line, INTC0 + MCFINTC_ICR0 + MCFINT_UART0 + line); - - imr = readl(INTC0 + MCFINTC_IMRL); - imr &= ~((1 << (irq - MCFINT_VECBASE)) | 1); - writel(imr, INTC0 + MCFINTC_IMRL); - /* * External Pin Mask Setting & Enable External Pin for Interface */ @@ -157,32 +147,11 @@ static void __init m527x_uarts_init(void) /***************************************************************************/ -static void __init m527x_fec_irq_init(int nr) -{ - unsigned long base; - u32 imr; - - base = MCF_IPSBAR + (nr ? MCFICM_INTC1 : MCFICM_INTC0); - - writeb(0x28, base + MCFINTC_ICR0 + 23); - writeb(0x27, base + MCFINTC_ICR0 + 27); - writeb(0x26, base + MCFINTC_ICR0 + 29); - - imr = readl(base + MCFINTC_IMRH); - imr &= ~0xf; - writel(imr, base + MCFINTC_IMRH); - imr = readl(base + MCFINTC_IMRL); - imr &= ~0xff800001; - writel(imr, base + MCFINTC_IMRL); -} - static void __init m527x_fec_init(void) { u16 par; u8 v; - m527x_fec_irq_init(0); - /* Set multi-function pins to ethernet mode for fec0 */ #if defined(CONFIG_M5271) v = readb(MCF_IPSBAR + 0x100047); @@ -195,8 +164,6 @@ static void __init m527x_fec_init(void) #endif #ifdef CONFIG_FEC2 - m527x_fec_irq_init(1); - /* Set multi-function pins to ethernet mode for fec1 */ par = readw(MCF_IPSBAR + 0x100082); writew(par | 0xa0, MCF_IPSBAR + 0x100082); @@ -207,21 +174,6 @@ static void __init m527x_fec_init(void) /***************************************************************************/ -void mcf_disableall(void) -{ - *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff; - *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL)) = 0xffffffff; -} - -/***************************************************************************/ - -void mcf_autovector(unsigned int vec) -{ - /* Everything is auto-vectored on the 5272 */ -} - -/***************************************************************************/ - static void m527x_cpu_reset(void) { local_irq_disable(); @@ -232,7 +184,6 @@ static void m527x_cpu_reset(void) void __init config_BSP(char *commandp, int size) { - mcf_disableall(); mach_reset = m527x_cpu_reset; m527x_uarts_init(); m527x_fec_init(); -- cgit v1.2.3-70-g09d2 From 980f9235eba5091d9fbbeb7fb72348c19d1ba710 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 28 Apr 2009 14:24:25 +1000 Subject: m68knommu: clean up ColdFire 528x interrupt setup With the common intc-2 interrupt controller code in place the ColdFire 528x family startup code can be greatly simplified. Remove all the interrupt masking code, and the per-device interrupt config here. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/528x/config.c | 51 ++--------------------------------- 1 file changed, 2 insertions(+), 49 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/528x/config.c b/arch/m68knommu/platform/528x/config.c index a1d1a61c4fe..6e608d1836f 100644 --- a/arch/m68knommu/platform/528x/config.c +++ b/arch/m68knommu/platform/528x/config.c @@ -3,8 +3,8 @@ /* * linux/arch/m68knommu/platform/528x/config.c * - * Sub-architcture dependant initialization code for the Motorola - * 5280 and 5282 CPUs. + * Sub-architcture dependant initialization code for the Freescale + * 5280, 5281 and 5282 CPUs. * * Copyright (C) 1999-2003, Greg Ungerer (gerg@snapgear.com) * Copyright (C) 2001-2003, SnapGear Inc. (www.snapgear.com) @@ -15,20 +15,13 @@ #include #include #include -#include #include -#include -#include #include #include #include #include #include -#ifdef CONFIG_MTD_PARTITIONS -#include -#endif - /***************************************************************************/ static struct mcf_platform_uart m528x_uart_platform[] = { @@ -91,23 +84,13 @@ static struct platform_device *m528x_devices[] __initdata = { /***************************************************************************/ -#define INTC0 (MCF_MBAR + MCFICM_INTC0) - static void __init m528x_uart_init_line(int line, int irq) { u8 port; - u32 imr; if ((line < 0) || (line > 2)) return; - /* level 6, line based priority */ - writeb(0x30+line, INTC0 + MCFINTC_ICR0 + MCFINT_UART0 + line); - - imr = readl(INTC0 + MCFINTC_IMRL); - imr &= ~((1 << (irq - MCFINT_VECBASE)) | 1); - writel(imr, INTC0 + MCFINTC_IMRL); - /* make sure PUAPAR is set for UART0 and UART1 */ if (line < 2) { port = readb(MCF_MBAR + MCF5282_GPIO_PUAPAR); @@ -129,21 +112,8 @@ static void __init m528x_uarts_init(void) static void __init m528x_fec_init(void) { - u32 imr; u16 v16; - /* Unmask FEC interrupts at ColdFire interrupt controller */ - writeb(0x28, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 23); - writeb(0x27, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 27); - writeb(0x26, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 29); - - imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); - imr &= ~0xf; - writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); - imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); - imr &= ~0xff800001; - writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); - /* Set multi-function pins to ethernet mode for fec0 */ v16 = readw(MCF_IPSBAR + 0x100056); writew(v16 | 0xf00, MCF_IPSBAR + 0x100056); @@ -152,21 +122,6 @@ static void __init m528x_fec_init(void) /***************************************************************************/ -void mcf_disableall(void) -{ - *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff; - *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL)) = 0xffffffff; -} - -/***************************************************************************/ - -void mcf_autovector(unsigned int vec) -{ - /* Everything is auto-vectored on the 5272 */ -} - -/***************************************************************************/ - static void m528x_cpu_reset(void) { local_irq_disable(); @@ -204,8 +159,6 @@ void wildfiremod_halt(void) void __init config_BSP(char *commandp, int size) { - mcf_disableall(); - #ifdef CONFIG_WILDFIRE mach_halt = wildfire_halt; #endif -- cgit v1.2.3-70-g09d2 From 1f946533bb562f5144752ea583cac45e9410fdaa Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 28 Apr 2009 15:59:10 +1000 Subject: m68knommu: clean up ColdFire 523x interrupt setup With the common intc-2 interrupt controller code in place the ColdFire 523x family startup code can be greatly simplified. Remove all the interrupt masking code, and the per-device interrupt config here. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/523x/config.c | 63 ----------------------------------- 1 file changed, 63 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/523x/config.c b/arch/m68knommu/platform/523x/config.c index 961fefebca1..00f08d2e43b 100644 --- a/arch/m68knommu/platform/523x/config.c +++ b/arch/m68knommu/platform/523x/config.c @@ -82,66 +82,6 @@ static struct platform_device *m523x_devices[] __initdata = { /***************************************************************************/ -#define INTC0 (MCF_MBAR + MCFICM_INTC0) - -static void __init m523x_uart_init_line(int line, int irq) -{ - u32 imr; - - if ((line < 0) || (line > 2)) - return; - - writeb(0x30+line, (INTC0 + MCFINTC_ICR0 + MCFINT_UART0 + line)); - - imr = readl(INTC0 + MCFINTC_IMRL); - imr &= ~((1 << (irq - MCFINT_VECBASE)) | 1); - writel(imr, INTC0 + MCFINTC_IMRL); -} - -static void __init m523x_uarts_init(void) -{ - const int nrlines = ARRAY_SIZE(m523x_uart_platform); - int line; - - for (line = 0; (line < nrlines); line++) - m523x_uart_init_line(line, m523x_uart_platform[line].irq); -} - -/***************************************************************************/ - -static void __init m523x_fec_init(void) -{ - u32 imr; - - /* Unmask FEC interrupts at ColdFire interrupt controller */ - writeb(0x28, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 23); - writeb(0x27, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 27); - writeb(0x26, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 29); - - imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); - imr &= ~0xf; - writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH); - imr = readl(MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); - imr &= ~0xff800001; - writel(imr, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL); -} - -/***************************************************************************/ - -void mcf_disableall(void) -{ - *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH)) = 0xffffffff; - *((volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRL)) = 0xffffffff; -} - -/***************************************************************************/ - -void mcf_autovector(unsigned int vec) -{ - /* Everything is auto-vectored on the 523x */ -} -/***************************************************************************/ - static void m523x_cpu_reset(void) { local_irq_disable(); @@ -152,10 +92,7 @@ static void m523x_cpu_reset(void) void __init config_BSP(char *commandp, int size) { - mcf_disableall(); mach_reset = m523x_cpu_reset; - m523x_uarts_init(); - m523x_fec_init(); } /***************************************************************************/ -- cgit v1.2.3-70-g09d2 From 277c5e3e26cac45010f57a581c56476639b2cfa0 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 29 Apr 2009 12:07:13 +1000 Subject: m68knommu: general interrupt controller for ColdFire 532x parts The ColdFire 532x family of parts uses 2 of the same INTC interrupt controlers used in the ColdFire 520x family. So modify the code to support both parts. The extra code for the second INTC controler in the case of the 520x is easily optimized away to nothing. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m520xsim.h | 15 +++++++++++++-- arch/m68k/include/asm/m532xsim.h | 18 ++++++++++-------- arch/m68knommu/kernel/irq.c | 3 ++- arch/m68knommu/platform/coldfire/Makefile | 2 +- arch/m68knommu/platform/coldfire/intc-simr.c | 24 ++++++++++++++++++------ 5 files changed, 44 insertions(+), 18 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m520xsim.h b/arch/m68k/include/asm/m520xsim.h index e79b9bc76a1..91de39c8d86 100644 --- a/arch/m68k/include/asm/m520xsim.h +++ b/arch/m68k/include/asm/m520xsim.h @@ -11,9 +11,8 @@ #define m520xsim_h /****************************************************************************/ - /* - * Define the 5282 SIM register set addresses. + * Define the 520x SIM register set addresses. */ #define MCFICM_INTC0 0x48000 /* Base for Interrupt Ctrl 0 */ #define MCFINTC_IPRH 0x00 /* Interrupt pending 32-63 */ @@ -26,6 +25,18 @@ #define MCFINTC_CIMR 0x1d /* Clear interrupt mask 0-63 */ #define MCFINTC_ICR0 0x40 /* Base ICR register */ +/* + * The common interrupt controller code just wants to know the absolute + * address to the SIMR and CIMR registers (not offsets into IPSBAR). + * The 520x family only has a single INTC unit. + */ +#define MCFINTC0_SIMR (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_SIMR) +#define MCFINTC0_CIMR (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_CIMR) +#define MCFINTC0_ICR0 (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0) +#define MCFINTC1_SIMR (0) +#define MCFINTC1_CIMR (0) +#define MCFINTC1_ICR0 (0) + #define MCFINT_VECBASE 64 #define MCFINT_UART0 26 /* Interrupt number for UART0 */ #define MCFINT_UART1 27 /* Interrupt number for UART1 */ diff --git a/arch/m68k/include/asm/m532xsim.h b/arch/m68k/include/asm/m532xsim.h index 3e80810b378..41c57e0f445 100644 --- a/arch/m68k/include/asm/m532xsim.h +++ b/arch/m68k/include/asm/m532xsim.h @@ -58,10 +58,12 @@ #define MCFSIM_IMR_MASKALL 0xFFFFFFFF /* All SIM intr sources */ -#define MCFSIM_IMR_SIMR0 0xFC04801C -#define MCFSIM_IMR_SIMR1 0xFC04C01C -#define MCFSIM_IMR_CIMR0 0xFC04801D -#define MCFSIM_IMR_CIMR1 0xFC04C01D +#define MCFINTC0_SIMR 0xFC04801C +#define MCFINTC0_CIMR 0xFC04801D +#define MCFINTC0_ICR0 0xFC048040 +#define MCFINTC1_SIMR 0xFC04C01C +#define MCFINTC1_CIMR 0xFC04C01D +#define MCFINTC1_ICR0 0xFC04C040 #define MCFSIM_ICR_TIMER1 (0xFC048040+32) #define MCFSIM_ICR_TIMER2 (0xFC048040+33) @@ -87,16 +89,16 @@ #define mcf_enable_irq0(irq) \ - *((volatile unsigned char*) (MCFSIM_IMR_CIMR0)) = (irq); + *((volatile unsigned char *) (MCFINTC0_CIMR)) = (irq); #define mcf_enable_irq1(irq) \ - *((volatile unsigned char*) (MCFSIM_IMR_CIMR1)) = (irq); + *((volatile unsigned char *) (MCFINTC1_CIMR)) = (irq); #define mcf_disable_irq0(irq) \ - *((volatile unsigned char*) (MCFSIM_IMR_SIMR0)) = (irq); + *((volatile unsigned char *) (MCFINTC0_SIMR)) = (irq); #define mcf_disable_irq1(irq) \ - *((volatile unsigned char*) (MCFSIM_IMR_SIMR1)) = (irq); + *((volatile unsigned char *) (MCFINTC1_SIMR)) = (irq); /* * Define the Cache register flags. diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c index f9965d7ee7c..93d567bbf33 100644 --- a/arch/m68knommu/kernel/irq.c +++ b/arch/m68knommu/kernel/irq.c @@ -30,7 +30,8 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs) } #if !defined(CONFIG_M520x) && !defined(CONFIG_M523x) && \ - !defined(CONFIG_M527x) && !defined(CONFIG_M528x) + !defined(CONFIG_M527x) && !defined(CONFIG_M528x) && \ + !defined(CONFIG_M532x) static struct irq_chip m_irq_chip = { .name = "M68K-INTC", diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile index bce9a62d3a1..6c5f699cf14 100644 --- a/arch/m68knommu/platform/coldfire/Makefile +++ b/arch/m68knommu/platform/coldfire/Makefile @@ -24,7 +24,7 @@ obj-$(CONFIG_M527x) += pit.o intc-2.o obj-$(CONFIG_M5272) += timers.o obj-$(CONFIG_M528x) += pit.o intc-2.o obj-$(CONFIG_M5307) += timers.o -obj-$(CONFIG_M532x) += timers.o +obj-$(CONFIG_M532x) += timers.o intc-simr.o obj-$(CONFIG_M5407) += timers.o obj-y += pinmux.o gpio.o diff --git a/arch/m68knommu/platform/coldfire/intc-simr.c b/arch/m68knommu/platform/coldfire/intc-simr.c index 3b614a3508f..86fc2047d7a 100644 --- a/arch/m68knommu/platform/coldfire/intc-simr.c +++ b/arch/m68knommu/platform/coldfire/intc-simr.c @@ -20,20 +20,32 @@ static void intc_irq_mask(unsigned int irq) { - if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECBASE + 63)) - __raw_writeb(irq - MCFINT_VECBASE, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_SIMR); + if (irq >= MCFINT_VECBASE) { + if (irq < MCFINT_VECBASE + 64) + __raw_writeb(irq - MCFINT_VECBASE, MCFINTC0_SIMR); + else if ((irq < MCFINT_VECBASE + 128) && MCFINTC1_SIMR) + __raw_writeb(irq - MCFINT_VECBASE - 64, MCFINTC1_SIMR); + } } static void intc_irq_unmask(unsigned int irq) { - if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECBASE + 63)) - __raw_writeb(irq - MCFINT_VECBASE, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_CIMR); + if (irq >= MCFINT_VECBASE) { + if (irq < MCFINT_VECBASE + 64) + __raw_writeb(irq - MCFINT_VECBASE, MCFINTC0_CIMR); + else if ((irq < MCFINT_VECBASE + 128) && MCFINTC1_CIMR) + __raw_writeb(irq - MCFINT_VECBASE - 64, MCFINTC1_CIMR); + } } static int intc_irq_set_type(unsigned int irq, unsigned int type) { - if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECBASE + 63)) - __raw_writeb(5, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + irq - MCFINT_VECBASE); + if (irq >= MCFINT_VECBASE) { + if (irq < MCFINT_VECBASE + 64) + __raw_writeb(5, MCFINTC0_ICR0 + irq - MCFINT_VECBASE); + else if ((irq < MCFINT_VECBASE) && MCFINTC1_ICR0) + __raw_writeb(5, MCFINTC1_ICR0 + irq - MCFINT_VECBASE - 64); + } return 0; } -- cgit v1.2.3-70-g09d2 From 6589c1d71581618dfc344628fb425ee4f09ce904 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 29 Apr 2009 13:31:27 +1000 Subject: m68knommu: clean up ColdFire 532x interrupt setup With the common intc-simr interrupt controller code in place the ColdFire 532x family startup code can be greatly simplified. Remove all the interrupt masking code, and the per-device interrupt config here. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/532x/config.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c index 28eb9ea55f4..3d585599588 100644 --- a/arch/m68knommu/platform/532x/config.c +++ b/arch/m68knommu/platform/532x/config.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -98,18 +97,11 @@ static struct platform_device *m532x_devices[] __initdata = { static void __init m532x_uart_init_line(int line, int irq) { if (line == 0) { - MCF_INTC0_ICR26 = 0x3; - MCF_INTC0_CIMR = 26; /* GPIO initialization */ MCF_GPIO_PAR_UART |= 0x000F; } else if (line == 1) { - MCF_INTC0_ICR27 = 0x3; - MCF_INTC0_CIMR = 27; /* GPIO initialization */ MCF_GPIO_PAR_UART |= 0x0FF0; - } else if (line == 2) { - MCF_INTC0_ICR28 = 0x3; - MCF_INTC0_CIMR = 28; } } @@ -125,14 +117,6 @@ static void __init m532x_uarts_init(void) static void __init m532x_fec_init(void) { - /* Unmask FEC interrupts at ColdFire interrupt controller */ - MCF_INTC0_ICR36 = 0x2; - MCF_INTC0_ICR40 = 0x2; - MCF_INTC0_ICR42 = 0x2; - - MCF_INTC0_IMRH &= ~(MCF_INTC_IMRH_INT_MASK36 | - MCF_INTC_IMRH_INT_MASK40 | MCF_INTC_IMRH_INT_MASK42); - /* Set multi-function pins to ethernet mode for fec0 */ MCF_GPIO_PAR_FECI2C |= (MCF_GPIO_PAR_FECI2C_PAR_MDC_EMDC | MCF_GPIO_PAR_FECI2C_PAR_MDIO_EMDIO); @@ -172,8 +156,6 @@ static void m532x_cpu_reset(void) void __init config_BSP(char *commandp, int size) { - mcf_setimr(MCFSIM_IMR_MASKALL); - #if !defined(CONFIG_BOOTPARAM) /* Copy command line from FLASH to local buffer... */ memcpy(commandp, (char *) 0x4000, 4); -- cgit v1.2.3-70-g09d2 From 33a21263bf74177209c11f08246fc308916d9ffa Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Thu, 30 Apr 2009 16:22:24 +1000 Subject: m68knommu: use common interrupt controller code for older ColdFire CPU's The old ColdFire CPU's (5206, 5307, 5407, 5249 etc) use a simple interrupt controller. Use common setup code for them. This addition means that all ColdFire CPU's now have some specific type of interrupt controller code. Signed-off-by: Greg Ungerer --- arch/m68knommu/kernel/irq.c | 4 +-- arch/m68knommu/platform/coldfire/Makefile | 12 +++---- arch/m68knommu/platform/coldfire/intc.c | 55 ++++++++++++++++++++++++++++++ arch/m68knommu/platform/coldfire/vectors.c | 20 +---------- 4 files changed, 63 insertions(+), 28 deletions(-) create mode 100644 arch/m68knommu/platform/coldfire/intc.c (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c index 93d567bbf33..73daaf0a455 100644 --- a/arch/m68knommu/kernel/irq.c +++ b/arch/m68knommu/kernel/irq.c @@ -29,9 +29,7 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs) set_irq_regs(oldregs); } -#if !defined(CONFIG_M520x) && !defined(CONFIG_M523x) && \ - !defined(CONFIG_M527x) && !defined(CONFIG_M528x) && \ - !defined(CONFIG_M532x) +#if !defined(CONFIG_COLDFIRE) static struct irq_chip m_irq_chip = { .name = "M68K-INTC", diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile index 6c5f699cf14..24ea95a2312 100644 --- a/arch/m68knommu/platform/coldfire/Makefile +++ b/arch/m68knommu/platform/coldfire/Makefile @@ -15,17 +15,17 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 obj-$(CONFIG_COLDFIRE) += clk.o dma.o entry.o vectors.o -obj-$(CONFIG_M5206) += timers.o -obj-$(CONFIG_M5206e) += timers.o +obj-$(CONFIG_M5206) += timers.o intc.o +obj-$(CONFIG_M5206e) += timers.o intc.o obj-$(CONFIG_M520x) += pit.o intc-simr.o obj-$(CONFIG_M523x) += pit.o dma_timer.o intc-2.o -obj-$(CONFIG_M5249) += timers.o +obj-$(CONFIG_M5249) += timers.o intc.o obj-$(CONFIG_M527x) += pit.o intc-2.o -obj-$(CONFIG_M5272) += timers.o +obj-$(CONFIG_M5272) += timers.o intc.o obj-$(CONFIG_M528x) += pit.o intc-2.o -obj-$(CONFIG_M5307) += timers.o +obj-$(CONFIG_M5307) += timers.o intc.o obj-$(CONFIG_M532x) += timers.o intc-simr.o -obj-$(CONFIG_M5407) += timers.o +obj-$(CONFIG_M5407) += timers.o intc.o obj-y += pinmux.o gpio.o extra-y := head.o diff --git a/arch/m68knommu/platform/coldfire/intc.c b/arch/m68knommu/platform/coldfire/intc.c new file mode 100644 index 00000000000..c81ab6e5cf2 --- /dev/null +++ b/arch/m68knommu/platform/coldfire/intc.c @@ -0,0 +1,55 @@ +/* + * intc.c + * + * (C) Copyright 2009, Greg Ungerer + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void intc_irq_mask(unsigned int irq) +{ +} + +static void intc_irq_unmask(unsigned int irq) +{ +} + +static int intc_irq_set_type(unsigned int irq, unsigned int type) +{ + return 0; +} + +static struct irq_chip intc_irq_chip = { + .name = "CF-INTC", + .mask = intc_irq_mask, + .unmask = intc_irq_unmask, + .set_type = intc_irq_set_type, +}; + +void __init init_IRQ(void) +{ + int irq; + + init_vectors(); + + for (irq = 0; (irq < NR_IRQS); irq++) { + irq_desc[irq].status = IRQ_DISABLED; + irq_desc[irq].action = NULL; + irq_desc[irq].depth = 1; + irq_desc[irq].chip = &intc_irq_chip; + intc_irq_set_type(irq, 0); + } +} + diff --git a/arch/m68knommu/platform/coldfire/vectors.c b/arch/m68knommu/platform/coldfire/vectors.c index bdca0297fa9..a21d3f870b7 100644 --- a/arch/m68knommu/platform/coldfire/vectors.c +++ b/arch/m68knommu/platform/coldfire/vectors.c @@ -1,7 +1,7 @@ /***************************************************************************/ /* - * linux/arch/m68knommu/platform/5307/vectors.c + * linux/arch/m68knommu/platform/coldfire/vectors.c * * Copyright (C) 1999-2007, Greg Ungerer */ @@ -15,7 +15,6 @@ #include #include #include -#include #include /***************************************************************************/ @@ -79,20 +78,3 @@ void __init init_vectors(void) } /***************************************************************************/ - -void enable_vector(unsigned int irq) -{ - /* Currently no action on ColdFire */ -} - -void disable_vector(unsigned int irq) -{ - /* Currently no action on ColdFire */ -} - -void ack_vector(unsigned int irq) -{ - /* Currently no action on ColdFire */ -} - -/***************************************************************************/ -- cgit v1.2.3-70-g09d2 From 1985d2538cccdcdb444d03bc5994be82e85e1f60 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 1 May 2009 16:54:49 +1000 Subject: m68knommu: complete interrupt controller code for the 68328 CPU's Define the interrupt controller structures along with the interrupt controller code for the 68328 CPU family. This brings the interrupt setup and control into one place for this CPU family. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/68328/ints.c | 72 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 32 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/68328/ints.c b/arch/m68knommu/platform/68328/ints.c index 72e56d554f4..b91ee85d4b5 100644 --- a/arch/m68knommu/platform/68328/ints.c +++ b/arch/m68knommu/platform/68328/ints.c @@ -73,34 +73,6 @@ extern e_vector *_ramvec; /* The number of spurious interrupts */ volatile unsigned int num_spurious; -/* - * This function should be called during kernel startup to initialize - * the machine vector table. - */ -void __init init_vectors(void) -{ - int i; - - /* set up the vectors */ - for (i = 72; i < 256; ++i) - _ramvec[i] = (e_vector) bad_interrupt; - - _ramvec[32] = system_call; - - _ramvec[65] = (e_vector) inthandler1; - _ramvec[66] = (e_vector) inthandler2; - _ramvec[67] = (e_vector) inthandler3; - _ramvec[68] = (e_vector) inthandler4; - _ramvec[69] = (e_vector) inthandler5; - _ramvec[70] = (e_vector) inthandler6; - _ramvec[71] = (e_vector) inthandler7; - - IVR = 0x40; /* Set DragonBall IVR (interrupt base) to 64 */ - - /* turn off all interrupts */ - IMR = ~0; -} - /* The 68k family did not have a good way to determine the source * of interrupts until later in the family. The EC000 core does * not provide the vector number on the stack, we vector everything @@ -163,18 +135,54 @@ void process_int(int vec, struct pt_regs *fp) } } -void enable_vector(unsigned int irq) +static void intc_irq_unmask(unsigned int irq) { IMR &= ~(1< Date: Fri, 1 May 2009 17:22:36 +1000 Subject: m68knommu: complete interrupt controller code for the 68360 CPU Define the interrupt controller structures along with the interrupt controller code for the 68360 CPU. This brings the interrupt setup and control into one place for this CPU family. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/68360/ints.c | 44 ++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/68360/ints.c b/arch/m68knommu/platform/68360/ints.c index c36781157e0..1143f77caca 100644 --- a/arch/m68knommu/platform/68360/ints.c +++ b/arch/m68knommu/platform/68360/ints.c @@ -37,11 +37,33 @@ extern void *_ramvec[]; /* The number of spurious interrupts */ volatile unsigned int num_spurious; +static void intc_irq_unmask(unsigned int irq) +{ + pquicc->intr_cimr |= (1 << irq); +} + +static void intc_irq_mask(unsigned int irq) +{ + pquicc->intr_cimr &= ~(1 << irq); +} + +static void intc_irq_ack(unsigned int irq) +{ + pquicc->intr_cisr = (1 << irq); +} + +static struct irq_chip intc_irq_chip = { + .name = "M68K-INTC", + .mask = intc_irq_mask, + .unmask = intc_irq_unmask, + .ack = intc_irq_ack, +}; + /* * This function should be called during kernel startup to initialize * the vector table. */ -void init_vectors(void) +void init_IRQ(void) { int i; int vba = (CPM_VECTOR_BASE<<4); @@ -109,20 +131,12 @@ void init_vectors(void) /* turn off all CPM interrupts */ pquicc->intr_cimr = 0x00000000; -} - -void enable_vector(unsigned int irq) -{ - pquicc->intr_cimr |= (1 << irq); -} -void disable_vector(unsigned int irq) -{ - pquicc->intr_cimr &= ~(1 << irq); -} - -void ack_vector(unsigned int irq) -{ - pquicc->intr_cisr = (1 << irq); + for (i = 0; (i < NR_IRQS); i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = NULL; + irq_desc[i].depth = 1; + irq_desc[i].chip = &intc_irq_chip; + } } -- cgit v1.2.3-70-g09d2 From de4cbfb5994465e7c0f4cc545722b1144e8ba717 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 1 May 2009 17:23:37 +1000 Subject: m68knommu: remove the common interrupt controller structure Each different m68knommu CPU interrupt controller type has its own interrupt controller data structures now. Remove the old, and now not used, common irq structs and init code from here. Signed-off-by: Greg Ungerer --- arch/m68knommu/kernel/irq.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c index 73daaf0a455..c9cac36d442 100644 --- a/arch/m68knommu/kernel/irq.c +++ b/arch/m68knommu/kernel/irq.c @@ -29,31 +29,6 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs) set_irq_regs(oldregs); } -#if !defined(CONFIG_COLDFIRE) - -static struct irq_chip m_irq_chip = { - .name = "M68K-INTC", - .enable = enable_vector, - .disable = disable_vector, - .ack = ack_vector, -}; - -void __init init_IRQ(void) -{ - int irq; - - init_vectors(); - - for (irq = 0; (irq < NR_IRQS); irq++) { - irq_desc[irq].status = IRQ_DISABLED; - irq_desc[irq].action = NULL; - irq_desc[irq].depth = 1; - irq_desc[irq].chip = &m_irq_chip; - } -} - -#endif - int show_interrupts(struct seq_file *p, void *v) { struct irqaction *ap; -- cgit v1.2.3-70-g09d2 From a3d9bf1dfdaf6f7df6c5340521dff1aafe39393f Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 6 May 2009 10:14:04 +1000 Subject: m68knommu: remove unecessary interrupt level setting in ColdFire 520x setup The new code for the interrupt controller in the ColdFire 520x takes care of all the interrupt controller setup. No manual config of the level registers (ICR) is required by the platform device setup code. So remove it. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/520x/config.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68knommu/platform/520x/config.c index 6a1fd743817..92614de42cd 100644 --- a/arch/m68knommu/platform/520x/config.c +++ b/arch/m68knommu/platform/520x/config.c @@ -81,15 +81,11 @@ static struct platform_device *m520x_devices[] __initdata = { /***************************************************************************/ -#define INTC0 (MCF_MBAR + MCFICM_INTC0) - static void __init m520x_uart_init_line(int line, int irq) { u16 par; u8 par2; - writeb(0x03, INTC0 + MCFINTC_ICR0 + MCFINT_UART0 + line); - switch (line) { case 0: par = readw(MCF_IPSBAR + MCF_GPIO_PAR_UART); @@ -128,11 +124,6 @@ static void __init m520x_fec_init(void) { u8 v; - /* Unmask FEC interrupts at ColdFire interrupt controller */ - writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 36); - writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 40); - writeb(0x4, MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_ICR0 + 42); - /* Set multi-function pins to ethernet mode */ v = readb(MCF_IPSBAR + MCF_GPIO_PAR_FEC); writeb(v | 0xf0, MCF_IPSBAR + MCF_GPIO_PAR_FEC); -- cgit v1.2.3-70-g09d2 From f1a59d244abd8d7b94b90f45ee5e0988e5a154cb Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 6 May 2009 11:36:00 +1000 Subject: m68knommu: remove interrupt masking from ColdFire pit timer With proper interrupt controller code in place there is no need for devices like the timers to have custom interrupt masking code. Remove it (and the defines that go along with it). Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m520xsim.h | 4 ---- arch/m68k/include/asm/mcfsim.h | 14 -------------- arch/m68knommu/platform/coldfire/pit.c | 8 -------- 3 files changed, 26 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m520xsim.h b/arch/m68k/include/asm/m520xsim.h index 91de39c8d86..ed2b69b9680 100644 --- a/arch/m68k/include/asm/m520xsim.h +++ b/arch/m68k/include/asm/m520xsim.h @@ -124,10 +124,6 @@ #define MCF_GPIO_PAR_FECI2C_PAR_SDA_URXD2 (0x02) #define MCF_GPIO_PAR_FECI2C_PAR_SCL_UTXD2 (0x04) -#define ICR_INTRCONF 0x05 -#define MCFPIT_IMR MCFINTC_IMRL -#define MCFPIT_IMR_IBIT (1 << MCFINT_PIT1) - /* * Reset Controll Unit. */ diff --git a/arch/m68k/include/asm/mcfsim.h b/arch/m68k/include/asm/mcfsim.h index da3f2ceff3a..b90425fb50f 100644 --- a/arch/m68k/include/asm/mcfsim.h +++ b/arch/m68k/include/asm/mcfsim.h @@ -101,20 +101,6 @@ #endif -/* - * PIT interrupt settings, if not found in mXXXXsim.h file. - */ -#ifndef ICR_INTRCONF -#define ICR_INTRCONF 0x2b /* PIT1 level 5, priority 3 */ -#endif -#ifndef MCFPIT_IMR -#define MCFPIT_IMR MCFINTC_IMRH -#endif -#ifndef MCFPIT_IMR_IBIT -#define MCFPIT_IMR_IBIT (1 << (MCFINT_PIT1 - 32)) -#endif - - #ifndef __ASSEMBLY__ /* * Definition for the interrupt auto-vectoring support. diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c index 61b96211f8f..d8720ee3451 100644 --- a/arch/m68knommu/platform/coldfire/pit.c +++ b/arch/m68knommu/platform/coldfire/pit.c @@ -32,7 +32,6 @@ */ #define FREQ ((MCF_CLK / 2) / 64) #define TA(a) (MCF_IPSBAR + MCFPIT_BASE1 + (a)) -#define INTC0 (MCF_IPSBAR + MCFICM_INTC0) #define PIT_CYCLES_PER_JIFFY (FREQ / HZ) static u32 pit_cnt; @@ -154,8 +153,6 @@ static struct clocksource pit_clk = { void hw_timer_init(void) { - u32 imr; - cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id()); cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32); cf_pit_clockevent.max_delta_ns = @@ -166,11 +163,6 @@ void hw_timer_init(void) setup_irq(MCFINT_VECBASE + MCFINT_PIT1, &pit_irq); - __raw_writeb(ICR_INTRCONF, INTC0 + MCFINTC_ICR0 + MCFINT_PIT1); - imr = __raw_readl(INTC0 + MCFPIT_IMR); - imr &= ~MCFPIT_IMR_IBIT; - __raw_writel(imr, INTC0 + MCFPIT_IMR); - pit_clk.mult = clocksource_hz2mult(FREQ, pit_clk.shift); clocksource_register(&pit_clk); } -- cgit v1.2.3-70-g09d2 From 91b1b94f88219ea1b747264f4f6995a1202566cb Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 6 May 2009 14:25:40 +1000 Subject: m68knommu: remove timer device interrupt setup for ColdFire 532x With fully implemented interrupt controller code we don't need to do the custom interrupt setup for the timer device of the ColdFire 532x. Remove that code. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/532x/config.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c index 3d585599588..66e212b58b8 100644 --- a/arch/m68knommu/platform/532x/config.c +++ b/arch/m68knommu/platform/532x/config.c @@ -128,20 +128,6 @@ static void __init m532x_fec_init(void) void mcf_settimericr(unsigned int timer, unsigned int level) { - volatile unsigned char *icrp; - unsigned int icr; - unsigned char irq; - - if (timer <= 2) { - switch (timer) { - case 2: irq = 33; icr = MCFSIM_ICR_TIMER2; break; - default: irq = 32; icr = MCFSIM_ICR_TIMER1; break; - } - - icrp = (volatile unsigned char *) (icr); - *icrp = level; - mcf_enable_irq0(irq); - } } /***************************************************************************/ -- cgit v1.2.3-70-g09d2 From e47cc3d6acacffdc673779a44c21a4a12b20db23 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 6 May 2009 14:28:25 +1000 Subject: m68knommu: mask off all interrupts in ColdFire intc-simr controller The ColdFire intc-simr interrupt controller should mask off all interrupt sources at init time. Doing it here instead of separately in each platform setup. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m532xsim.h | 33 ---------------------------- arch/m68knommu/platform/coldfire/intc-simr.c | 5 +++++ 2 files changed, 5 insertions(+), 33 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m532xsim.h b/arch/m68k/include/asm/m532xsim.h index 41c57e0f445..021a0e15527 100644 --- a/arch/m68k/include/asm/m532xsim.h +++ b/arch/m68k/include/asm/m532xsim.h @@ -56,8 +56,6 @@ #define MCFSIM_DMA3ICR MCFSIM_ICR9 /* DMA 3 ICR */ -#define MCFSIM_IMR_MASKALL 0xFFFFFFFF /* All SIM intr sources */ - #define MCFINTC0_SIMR 0xFC04801C #define MCFINTC0_CIMR 0xFC04801D #define MCFINTC0_ICR0 0xFC048040 @@ -69,37 +67,6 @@ #define MCFSIM_ICR_TIMER2 (0xFC048040+33) -/* - * Macro to set IMR register. It is 32 bits on the 5307. - */ -#define mcf_getimr() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IMR)) - -#define mcf_setimr(imr) \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IMR)) = (imr); - -#define mcf_getipr() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IPR)) - -#define mcf_getiprl() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IPRL)) - -#define mcf_getiprh() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IPRH)) - - -#define mcf_enable_irq0(irq) \ - *((volatile unsigned char *) (MCFINTC0_CIMR)) = (irq); - -#define mcf_enable_irq1(irq) \ - *((volatile unsigned char *) (MCFINTC1_CIMR)) = (irq); - -#define mcf_disable_irq0(irq) \ - *((volatile unsigned char *) (MCFINTC0_SIMR)) = (irq); - -#define mcf_disable_irq1(irq) \ - *((volatile unsigned char *) (MCFINTC1_SIMR)) = (irq); - /* * Define the Cache register flags. */ diff --git a/arch/m68knommu/platform/coldfire/intc-simr.c b/arch/m68knommu/platform/coldfire/intc-simr.c index 86fc2047d7a..1b01e79c2f6 100644 --- a/arch/m68knommu/platform/coldfire/intc-simr.c +++ b/arch/m68knommu/platform/coldfire/intc-simr.c @@ -62,6 +62,11 @@ void __init init_IRQ(void) init_vectors(); + /* Mask all interrupt sources */ + __raw_writeb(0xff, MCFINTC0_SIMR); + if (MCFINTC1_SIMR) + __raw_writeb(0xff, MCFINTC1_SIMR); + for (irq = 0; (irq < NR_IRQS); irq++) { irq_desc[irq].status = IRQ_DISABLED; irq_desc[irq].action = NULL; -- cgit v1.2.3-70-g09d2 From 5187995f0a9253e915dfee83684eae7b692213e6 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 19 May 2009 14:08:47 +1000 Subject: m68knommu: remove duplicate ColdFire mcf_autovector() code Each of the ColdFire CPU platform code that used the old style interrupt controller had its own copy of the mcf_autovector() function. They are all the same, remove them all and create a single function in the common coldfire/intc.c code. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/5206/config.c | 17 ----------------- arch/m68knommu/platform/5206e/config.c | 17 ----------------- arch/m68knommu/platform/5249/config.c | 14 -------------- arch/m68knommu/platform/5272/config.c | 7 ------- arch/m68knommu/platform/5307/config.c | 14 -------------- arch/m68knommu/platform/5407/config.c | 14 -------------- arch/m68knommu/platform/coldfire/intc.c | 27 +++++++++++++++++++++++++++ 7 files changed, 27 insertions(+), 83 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/5206/config.c b/arch/m68knommu/platform/5206/config.c index f6f79874e9a..481617a23d0 100644 --- a/arch/m68knommu/platform/5206/config.c +++ b/arch/m68knommu/platform/5206/config.c @@ -68,23 +68,6 @@ static void __init m5206_uarts_init(void) /***************************************************************************/ -void mcf_autovector(unsigned int vec) -{ - volatile unsigned char *mbar; - unsigned char icr; - - if ((vec >= 25) && (vec <= 31)) { - vec -= 25; - mbar = (volatile unsigned char *) MCF_MBAR; - icr = MCFSIM_ICR_AUTOVEC | (vec << 3); - *(mbar + MCFSIM_ICR1 + vec) = icr; - vec = 0x1 << (vec + 1); - mcf_setimr(mcf_getimr() & ~vec); - } -} - -/***************************************************************************/ - void mcf_settimericr(unsigned int timer, unsigned int level) { volatile unsigned char *icrp; diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c index fcf1400db51..29e565a4443 100644 --- a/arch/m68knommu/platform/5206e/config.c +++ b/arch/m68knommu/platform/5206e/config.c @@ -69,23 +69,6 @@ static void __init m5206e_uarts_init(void) /***************************************************************************/ -void mcf_autovector(unsigned int vec) -{ - volatile unsigned char *mbar; - unsigned char icr; - - if ((vec >= 25) && (vec <= 31)) { - vec -= 25; - mbar = (volatile unsigned char *) MCF_MBAR; - icr = MCFSIM_ICR_AUTOVEC | (vec << 3); - *(mbar + MCFSIM_ICR1 + vec) = icr; - vec = 0x1 << (vec + 1); - mcf_setimr(mcf_getimr() & ~vec); - } -} - -/***************************************************************************/ - void mcf_settimericr(unsigned int timer, unsigned int level) { volatile unsigned char *icrp; diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c index 93d99882592..365fb6c5270 100644 --- a/arch/m68knommu/platform/5249/config.c +++ b/arch/m68knommu/platform/5249/config.c @@ -66,20 +66,6 @@ static void __init m5249_uarts_init(void) } -/***************************************************************************/ - -void mcf_autovector(unsigned int vec) -{ - volatile unsigned char *mbar; - - if ((vec >= 25) && (vec <= 31)) { - mbar = (volatile unsigned char *) MCF_MBAR; - vec = 0x1 << (vec - 24); - *(mbar + MCFSIM_AVR) |= vec; - mcf_setimr(mcf_getimr() & ~vec); - } -} - /***************************************************************************/ void mcf_settimericr(unsigned int timer, unsigned int level) diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68knommu/platform/5272/config.c index 5f95fcde05f..94a66c12c96 100644 --- a/arch/m68knommu/platform/5272/config.c +++ b/arch/m68knommu/platform/5272/config.c @@ -148,13 +148,6 @@ void mcf_disableall(void) /***************************************************************************/ -void mcf_autovector(unsigned int vec) -{ - /* Everything is auto-vectored on the 5272 */ -} - -/***************************************************************************/ - void mcf_settimericr(int timer, int level) { volatile unsigned long *icrp; diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c index 39da9e9ff67..60fe45d5139 100644 --- a/arch/m68knommu/platform/5307/config.c +++ b/arch/m68knommu/platform/5307/config.c @@ -83,20 +83,6 @@ static void __init m5307_uarts_init(void) /***************************************************************************/ -void mcf_autovector(unsigned int vec) -{ - volatile unsigned char *mbar; - - if ((vec >= 25) && (vec <= 31)) { - mbar = (volatile unsigned char *) MCF_MBAR; - vec = 0x1 << (vec - 24); - *(mbar + MCFSIM_AVR) |= vec; - mcf_setimr(mcf_getimr() & ~vec); - } -} - -/***************************************************************************/ - void mcf_settimericr(unsigned int timer, unsigned int level) { volatile unsigned char *icrp; diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c index b41d942bf8d..1e8ef74ea15 100644 --- a/arch/m68knommu/platform/5407/config.c +++ b/arch/m68knommu/platform/5407/config.c @@ -74,20 +74,6 @@ static void __init m5407_uarts_init(void) /***************************************************************************/ -void mcf_autovector(unsigned int vec) -{ - volatile unsigned char *mbar; - - if ((vec >= 25) && (vec <= 31)) { - mbar = (volatile unsigned char *) MCF_MBAR; - vec = 0x1 << (vec - 24); - *(mbar + MCFSIM_AVR) |= vec; - mcf_setimr(mcf_getimr() & ~vec); - } -} - -/***************************************************************************/ - void mcf_settimericr(unsigned int timer, unsigned int level) { volatile unsigned char *icrp; diff --git a/arch/m68knommu/platform/coldfire/intc.c b/arch/m68knommu/platform/coldfire/intc.c index c81ab6e5cf2..f7a61346ee2 100644 --- a/arch/m68knommu/platform/coldfire/intc.c +++ b/arch/m68knommu/platform/coldfire/intc.c @@ -18,6 +18,33 @@ #include #include +/* + * Define the vector numbers for the basic 7 interrupt sources. + * These are often referred to as the "external" interrupts in + * the ColdFire documentation (for the early ColdFire cores at least). + */ +#define EIRQ1 25 +#define EIRQ7 31 + +/* + * Interrupts can be "vectored" on the ColdFire cores that support this old + * interrupt controller. That is, the device raising the interrupt can also + * supply the vector number to interrupt through. The AVR register of the + * interrupt controller enables or disables this for each external interrupt, + * so provide generic support for this. Setting this up is out-of-band for + * the interrupt system API's, and needs to be done by the driver that + * supports this device. Very few devices actually use this. + */ +void mcf_autovector(int irq) +{ + if ((irq >= EIRQ1) && (irq <= EIRQ7)) { + u8 avec; + avec = __raw_readb(MCF_MBAR + MCFSIM_AVR); + avec |= (0x1 << (irq - EIRQ1 + 1)); + __raw_writeb(avec, MCF_MBAR + MCFSIM_AVR); + } +} + static void intc_irq_mask(unsigned int irq) { } -- cgit v1.2.3-70-g09d2 From f2154bef817ac3d0ea67b52526fd8e88898b66f9 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 19 May 2009 14:38:08 +1000 Subject: m68knommu: merge old ColdFire interrupt controller masking macros Currently the code that supports setting the old style ColdFire interrupt controller mask registers is macros in the include files of each of the CPU types. Merge all these into a set of real masking functions in the old Coldfire interrupt controller code proper. All the macros are basically the same (excepting a register size difference on really early parts). Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5206sim.h | 16 ++------ arch/m68k/include/asm/m5249sim.h | 13 ------ arch/m68k/include/asm/m5307sim.h | 17 -------- arch/m68k/include/asm/m5407sim.h | 13 ------ arch/m68k/include/asm/mcfintc.h | 73 +++++++++++---------------------- arch/m68knommu/platform/5206/config.c | 11 +++-- arch/m68knommu/platform/5206e/config.c | 12 +++--- arch/m68knommu/platform/5249/config.c | 11 +++-- arch/m68knommu/platform/5307/config.c | 12 +++--- arch/m68knommu/platform/5407/config.c | 12 +++--- arch/m68knommu/platform/coldfire/intc.c | 59 +++++++++++++++++++++++++- 11 files changed, 109 insertions(+), 140 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5206sim.h b/arch/m68k/include/asm/m5206sim.h index 7be8a2d3e65..b50061aaf8f 100644 --- a/arch/m68k/include/asm/m5206sim.h +++ b/arch/m68k/include/asm/m5206sim.h @@ -117,21 +117,11 @@ #define MCFSIM_DMA2ICR MCFSIM_ICR15 /* DMA 2 ICR */ #endif -#if defined(CONFIG_M5206e) -#define MCFSIM_IMR_MASKALL 0xfffe /* All SIM intr sources */ -#endif - /* - * Macro to get and set IMR register. It is 16 bits on the 5206. + * Let the common interrupt handler code know that the ColdFire 5206* + * family of CPU's only has a 16bit sized IMR register. */ -#define mcf_getimr() \ - *((volatile unsigned short *) (MCF_MBAR + MCFSIM_IMR)) - -#define mcf_setimr(imr) \ - *((volatile unsigned short *) (MCF_MBAR + MCFSIM_IMR)) = (imr) - -#define mcf_getipr() \ - *((volatile unsigned short *) (MCF_MBAR + MCFSIM_IPR)) +#define MCFSIM_IMR_IS_16BITS /****************************************************************************/ #endif /* m5206sim_h */ diff --git a/arch/m68k/include/asm/m5249sim.h b/arch/m68k/include/asm/m5249sim.h index 2c23a83512a..36ed31bbf6c 100644 --- a/arch/m68k/include/asm/m5249sim.h +++ b/arch/m68k/include/asm/m5249sim.h @@ -106,19 +106,6 @@ #define MCFGPIO_PIN_MAX 64 #define MCFGPIO_IRQ_MAX -1 #define MCFGPIO_IRQ_VECBASE -1 -/* - * Macro to set IMR register. It is 32 bits on the 5249. - */ -#define MCFSIM_IMR_MASKALL 0x7fffe /* All SIM intr sources */ - -#define mcf_getimr() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IMR)) - -#define mcf_setimr(imr) \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IMR)) = (imr); - -#define mcf_getipr() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IPR)) /****************************************************************************/ diff --git a/arch/m68k/include/asm/m5307sim.h b/arch/m68k/include/asm/m5307sim.h index 6a1870c925a..60946225699 100644 --- a/arch/m68k/include/asm/m5307sim.h +++ b/arch/m68k/include/asm/m5307sim.h @@ -124,23 +124,6 @@ #define MCFSIM_DMA2ICR MCFSIM_ICR8 /* DMA 2 ICR */ #define MCFSIM_DMA3ICR MCFSIM_ICR9 /* DMA 3 ICR */ -#if defined(CONFIG_M5307) -#define MCFSIM_IMR_MASKALL 0x3fffe /* All SIM intr sources */ -#endif - -/* - * Macro to set IMR register. It is 32 bits on the 5307. - */ -#define mcf_getimr() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IMR)) - -#define mcf_setimr(imr) \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IMR)) = (imr); - -#define mcf_getipr() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IPR)) - - /* * Some symbol defines for the Parallel Port Pin Assignment Register */ diff --git a/arch/m68k/include/asm/m5407sim.h b/arch/m68k/include/asm/m5407sim.h index 25194756b43..3c4bd5f08cd 100644 --- a/arch/m68k/include/asm/m5407sim.h +++ b/arch/m68k/include/asm/m5407sim.h @@ -96,19 +96,6 @@ #define MCFSIM_DMA2ICR MCFSIM_ICR8 /* DMA 2 ICR */ #define MCFSIM_DMA3ICR MCFSIM_ICR9 /* DMA 3 ICR */ -/* - * Macro to set IMR register. It is 32 bits on the 5407. - */ -#define mcf_getimr() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IMR)) - -#define mcf_setimr(imr) \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IMR)) = (imr); - -#define mcf_getipr() \ - *((volatile unsigned long *) (MCF_MBAR + MCFSIM_IPR)) - - /* * Some symbol defines for the Parallel Port Pin Assignment Register */ diff --git a/arch/m68k/include/asm/mcfintc.h b/arch/m68k/include/asm/mcfintc.h index a75a001e773..213aa6c68ab 100644 --- a/arch/m68k/include/asm/mcfintc.h +++ b/arch/m68k/include/asm/mcfintc.h @@ -48,59 +48,32 @@ #define MCFSIM_ICR_PRI3 0x03 /* Priority 3 intr */ /* - * Bit definitions for the ICR family of registers. + * IMR bit position definitions. */ -#define MCFSIM_ICR_AUTOVEC 0x80 /* Auto-vectored intr */ -#define MCFSIM_ICR_LEVEL0 0x00 /* Level 0 intr */ -#define MCFSIM_ICR_LEVEL1 0x04 /* Level 1 intr */ -#define MCFSIM_ICR_LEVEL2 0x08 /* Level 2 intr */ -#define MCFSIM_ICR_LEVEL3 0x0c /* Level 3 intr */ -#define MCFSIM_ICR_LEVEL4 0x10 /* Level 4 intr */ -#define MCFSIM_ICR_LEVEL5 0x14 /* Level 5 intr */ -#define MCFSIM_ICR_LEVEL6 0x18 /* Level 6 intr */ -#define MCFSIM_ICR_LEVEL7 0x1c /* Level 7 intr */ +#define MCFINTC_EINT1 1 /* External int #1 */ +#define MCFINTC_EINT2 2 /* External int #2 */ +#define MCFINTC_EINT3 3 /* External int #3 */ +#define MCFINTC_EINT4 4 /* External int #4 */ +#define MCFINTC_EINT5 5 /* External int #5 */ +#define MCFINTC_EINT6 6 /* External int #6 */ +#define MCFINTC_EINT7 7 /* External int #7 */ +#define MCFINTC_SWT 8 /* Software Watchdog */ +#define MCFINTC_TIMER1 9 +#define MCFINTC_TIMER2 10 +#define MCFINTC_I2C 11 /* I2C / MBUS */ +#define MCFINTC_UART0 12 +#define MCFINTC_UART1 13 +#define MCFINTC_DMA0 14 +#define MCFINTC_DMA1 15 +#define MCFINTC_DMA2 16 +#define MCFINTC_DMA3 17 +#define MCFINTC_QSPI 18 -#define MCFSIM_ICR_PRI0 0x00 /* Priority 0 intr */ -#define MCFSIM_ICR_PRI1 0x01 /* Priority 1 intr */ -#define MCFSIM_ICR_PRI2 0x02 /* Priority 2 intr */ -#define MCFSIM_ICR_PRI3 0x03 /* Priority 3 intr */ - -/* - * Bit definitions for the Interrupt Mask register (IMR). - */ -#define MCFSIM_IMR_EINT1 0x0002 /* External intr # 1 */ -#define MCFSIM_IMR_EINT2 0x0004 /* External intr # 2 */ -#define MCFSIM_IMR_EINT3 0x0008 /* External intr # 3 */ -#define MCFSIM_IMR_EINT4 0x0010 /* External intr # 4 */ -#define MCFSIM_IMR_EINT5 0x0020 /* External intr # 5 */ -#define MCFSIM_IMR_EINT6 0x0040 /* External intr # 6 */ -#define MCFSIM_IMR_EINT7 0x0080 /* External intr # 7 */ - -#define MCFSIM_IMR_SWD 0x0100 /* Software Watchdog intr */ -#define MCFSIM_IMR_TIMER1 0x0200 /* TIMER 1 intr */ -#define MCFSIM_IMR_TIMER2 0x0400 /* TIMER 2 intr */ -#define MCFSIM_IMR_MBUS 0x0800 /* MBUS intr */ -#define MCFSIM_IMR_UART1 0x1000 /* UART 1 intr */ -#define MCFSIM_IMR_UART2 0x2000 /* UART 2 intr */ - -#if defined(CONFIG_M5206e) -#define MCFSIM_IMR_DMA1 0x4000 /* DMA 1 intr */ -#define MCFSIM_IMR_DMA2 0x8000 /* DMA 2 intr */ -#elif defined(CONFIG_M5249) || defined(CONFIG_M5307) -#define MCFSIM_IMR_DMA0 0x4000 /* DMA 0 intr */ -#define MCFSIM_IMR_DMA1 0x8000 /* DMA 1 intr */ -#define MCFSIM_IMR_DMA2 0x10000 /* DMA 2 intr */ -#define MCFSIM_IMR_DMA3 0x20000 /* DMA 3 intr */ +#ifndef __ASSEMBLER__ +void mcf_autovector(int irq); +void mcf_setimr(int index); +void mcf_clrimr(int index); #endif -/* - * Mask for all of the SIM devices. Some parts have more or less - * SIM devices. This is a catchall for the sandard set. - */ -#ifndef MCFSIM_IMR_MASKALL -#define MCFSIM_IMR_MASKALL 0x3ffe /* All intr sources */ -#endif - - /****************************************************************************/ #endif /* mcfintc_h */ diff --git a/arch/m68knommu/platform/5206/config.c b/arch/m68knommu/platform/5206/config.c index 481617a23d0..0dce2383320 100644 --- a/arch/m68knommu/platform/5206/config.c +++ b/arch/m68knommu/platform/5206/config.c @@ -49,11 +49,11 @@ static void __init m5206_uart_init_line(int line, int irq) if (line == 0) { writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCFUART_BASE1 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1); + mcf_clrimr(MCFINTC_UART0); } else if (line == 1) { writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCFUART_BASE2 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); + mcf_clrimr(MCFINTC_UART1); } } @@ -75,13 +75,13 @@ void mcf_settimericr(unsigned int timer, unsigned int level) if (timer <= 2) { switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFSIM_IMR_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFSIM_IMR_TIMER1; break; + case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; + default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; } icrp = (volatile unsigned char *) (MCF_MBAR + icr); *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_setimr(mcf_getimr() & ~imr); + mcf_clrimr(imr); } } @@ -100,7 +100,6 @@ void m5206_cpu_reset(void) void __init config_BSP(char *commandp, int size) { - mcf_setimr(MCFSIM_IMR_MASKALL); mach_reset = m5206_cpu_reset; } diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c index 29e565a4443..08ef7e26898 100644 --- a/arch/m68knommu/platform/5206e/config.c +++ b/arch/m68knommu/platform/5206e/config.c @@ -50,11 +50,11 @@ static void __init m5206e_uart_init_line(int line, int irq) if (line == 0) { writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCFUART_BASE1 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1); + mcf_clrimr(MCFINTC_UART0); } else if (line == 1) { writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCFUART_BASE2 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); + mcf_clrimr(MCFINTC_UART1); } } @@ -76,13 +76,13 @@ void mcf_settimericr(unsigned int timer, unsigned int level) if (timer <= 2) { switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFSIM_IMR_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFSIM_IMR_TIMER1; break; + case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; + default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; } icrp = (volatile unsigned char *) (MCF_MBAR + icr); *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_setimr(mcf_getimr() & ~imr); + mcf_clrimr(imr); } } @@ -101,8 +101,6 @@ void m5206e_cpu_reset(void) void __init config_BSP(char *commandp, int size) { - mcf_setimr(MCFSIM_IMR_MASKALL); - #if defined(CONFIG_NETtel) /* Copy command line from FLASH to local buffer... */ memcpy(commandp, (char *) 0xf0004000, size); diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c index 365fb6c5270..7261a3d28ad 100644 --- a/arch/m68knommu/platform/5249/config.c +++ b/arch/m68knommu/platform/5249/config.c @@ -48,11 +48,11 @@ static void __init m5249_uart_init_line(int line, int irq) if (line == 0) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1); + mcf_clrimr(MCFINTC_UART0); } else if (line == 1) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); + mcf_clrimr(MCFINTC_UART1); } } @@ -75,13 +75,13 @@ void mcf_settimericr(unsigned int timer, unsigned int level) if (timer <= 2) { switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFSIM_IMR_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFSIM_IMR_TIMER1; break; + case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; + default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; } icrp = (volatile unsigned char *) (MCF_MBAR + icr); *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_setimr(mcf_getimr() & ~imr); + mcf_clrimr(imr); } } @@ -100,7 +100,6 @@ void m5249_cpu_reset(void) void __init config_BSP(char *commandp, int size) { - mcf_setimr(MCFSIM_IMR_MASKALL); mach_reset = m5249_cpu_reset; } diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c index 60fe45d5139..3e27d2ec03f 100644 --- a/arch/m68knommu/platform/5307/config.c +++ b/arch/m68knommu/platform/5307/config.c @@ -64,11 +64,11 @@ static void __init m5307_uart_init_line(int line, int irq) if (line == 0) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1); + mcf_clrimr(MCFINTC_UART0); } else if (line == 1) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); + mcf_clrimr(MCFINTC_UART1); } } @@ -90,13 +90,13 @@ void mcf_settimericr(unsigned int timer, unsigned int level) if (timer <= 2) { switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFSIM_IMR_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFSIM_IMR_TIMER1; break; + case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; + default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; } icrp = (volatile unsigned char *) (MCF_MBAR + icr); *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_setimr(mcf_getimr() & ~imr); + mcf_clrimr(imr); } } @@ -115,8 +115,6 @@ void m5307_cpu_reset(void) void __init config_BSP(char *commandp, int size) { - mcf_setimr(MCFSIM_IMR_MASKALL); - #if defined(CONFIG_NETtel) || \ defined(CONFIG_SECUREEDGEMP3) || defined(CONFIG_CLEOPATRA) /* Copy command line from FLASH to local buffer... */ diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c index 1e8ef74ea15..8aa94837bbc 100644 --- a/arch/m68knommu/platform/5407/config.c +++ b/arch/m68knommu/platform/5407/config.c @@ -55,11 +55,11 @@ static void __init m5407_uart_init_line(int line, int irq) if (line == 0) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART1); + mcf_clrimr(MCFINTC_UART0); } else if (line == 1) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); + mcf_clrimr(MCFINTC_UART1); } } @@ -81,13 +81,13 @@ void mcf_settimericr(unsigned int timer, unsigned int level) if (timer <= 2) { switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFSIM_IMR_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFSIM_IMR_TIMER1; break; + case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; + default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; } icrp = (volatile unsigned char *) (MCF_MBAR + icr); *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_setimr(mcf_getimr() & ~imr); + mcf_clrimr(imr); } } @@ -106,8 +106,6 @@ void m5407_cpu_reset(void) void __init config_BSP(char *commandp, int size) { - mcf_setimr(MCFSIM_IMR_MASKALL); - #if defined(CONFIG_CLEOPATRA) /* Different timer setup - to prevent device clash */ mcf_timervector = 30; diff --git a/arch/m68knommu/platform/coldfire/intc.c b/arch/m68knommu/platform/coldfire/intc.c index f7a61346ee2..88bffac50c6 100644 --- a/arch/m68knommu/platform/coldfire/intc.c +++ b/arch/m68knommu/platform/coldfire/intc.c @@ -1,5 +1,5 @@ /* - * intc.c + * intc.c -- support for the old ColdFire interrupt controller * * (C) Copyright 2009, Greg Ungerer * @@ -26,6 +26,62 @@ #define EIRQ1 25 #define EIRQ7 31 +/* + * In the early version 2 core ColdFire parts the IMR register was 16 bits + * in size. Version 3 (and later version 2) core parts have a 32 bit + * sized IMR register. Provide some size independant methods to access the + * IMR register. + */ +#ifdef MCFSIM_IMR_IS_16BITS + +void mcf_setimr(int index) +{ + u16 imr; + imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); + __raw_writew(imr | (0x1 << index), MCF_MBAR + MCFSIM_IMR); +} + +void mcf_clrimr(int index) +{ + u16 imr; + imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); + __raw_writew(imr & ~(0x1 << index), MCF_MBAR + MCFSIM_IMR); +} + +void mcf_maskimr(unsigned int mask) +{ + u16 imr; + imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); + imr |= mask; + __raw_writew(imr, MCF_MBAR + MCFSIM_IMR); +} + +#else + +void mcf_setimr(int index) +{ + u32 imr; + imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); + __raw_writel(imr | (0x1 << index), MCF_MBAR + MCFSIM_IMR); +} + +void mcf_clrimr(int index) +{ + u32 imr; + imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); + __raw_writel(imr & ~(0x1 << index), MCF_MBAR + MCFSIM_IMR); +} + +void mcf_maskimr(unsigned int mask) +{ + u32 imr; + imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); + imr |= mask; + __raw_writel(imr, MCF_MBAR + MCFSIM_IMR); +} + +#endif + /* * Interrupts can be "vectored" on the ColdFire cores that support this old * interrupt controller. That is, the device raising the interrupt can also @@ -70,6 +126,7 @@ void __init init_IRQ(void) int irq; init_vectors(); + mcf_maskimr(0xffffffff); for (irq = 0; (irq < NR_IRQS); irq++) { irq_desc[irq].status = IRQ_DISABLED; -- cgit v1.2.3-70-g09d2 From f9311f26434cea3e926f56ca2aa3e5740e962c72 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 19 May 2009 14:39:19 +1000 Subject: m68knommu: support code to mask external interrupts on old ColdFire CPU's The external interrupts used on the old Coldfire parts with the old style interrupt controller can be properly mask/unmasked in the interrupt handling code. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/coldfire/intc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/coldfire/intc.c b/arch/m68knommu/platform/coldfire/intc.c index 88bffac50c6..14db26bf6e2 100644 --- a/arch/m68knommu/platform/coldfire/intc.c +++ b/arch/m68knommu/platform/coldfire/intc.c @@ -103,10 +103,14 @@ void mcf_autovector(int irq) static void intc_irq_mask(unsigned int irq) { + if ((irq >= EIRQ1) && (irq <= EIRQ7)) + mcf_setimr(irq - EIRQ1 + 1); } static void intc_irq_unmask(unsigned int irq) { + if ((irq >= EIRQ1) && (irq <= EIRQ7)) + mcf_clrimr(irq - EIRQ1 + 1); } static int intc_irq_set_type(unsigned int irq, unsigned int type) -- cgit v1.2.3-70-g09d2 From 04b75b10dceadf937e3707ecc3dfccf6a076fd29 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 19 May 2009 14:52:40 +1000 Subject: m68knommu: simplify ColdFire "timers" clock initialization The ColdFire "timers" clock setup can be simplified. There is really no need for the flexible per-platform setup code. The clock interrupt can be hard defined per CPU platform (in CPU include files). This makes the actual timer code simpler. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5206sim.h | 13 ++++++----- arch/m68k/include/asm/m5249sim.h | 6 +++++ arch/m68k/include/asm/m5272sim.h | 5 ++++ arch/m68k/include/asm/m5307sim.h | 6 +++++ arch/m68k/include/asm/m5407sim.h | 5 ++++ arch/m68knommu/platform/5206/config.c | 25 +++++++++----------- arch/m68knommu/platform/5206e/config.c | 25 +++++++++----------- arch/m68knommu/platform/5249/config.c | 26 +++++++++------------ arch/m68knommu/platform/5272/config.c | 22 +++++++---------- arch/m68knommu/platform/5307/config.c | 35 +++++++++------------------ arch/m68knommu/platform/5407/config.c | 39 ++++++++++--------------------- arch/m68knommu/platform/coldfire/timers.c | 17 ++++---------- 12 files changed, 97 insertions(+), 127 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5206sim.h b/arch/m68k/include/asm/m5206sim.h index b50061aaf8f..9c384e294af 100644 --- a/arch/m68k/include/asm/m5206sim.h +++ b/arch/m68k/include/asm/m5206sim.h @@ -88,12 +88,19 @@ #define MCFSIM_PADDR (MCF_MBAR + 0x1c5) /* Parallel Direction (r/w) */ #define MCFSIM_PADAT (MCF_MBAR + 0x1c9) /* Parallel Port Value (r/w) */ +/* + * Define system peripheral IRQ usage. + */ +#define MCF_IRQ_TIMER 30 /* Timer0, Level 6 */ +#define MCF_IRQ_PROFILER 31 /* Timer1, Level 7 */ + /* * Generic GPIO */ #define MCFGPIO_PIN_MAX 8 #define MCFGPIO_IRQ_VECBASE -1 #define MCFGPIO_IRQ_MAX -1 + /* * Some symbol defines for the Parallel Port Pin Assignment Register */ @@ -117,11 +124,5 @@ #define MCFSIM_DMA2ICR MCFSIM_ICR15 /* DMA 2 ICR */ #endif -/* - * Let the common interrupt handler code know that the ColdFire 5206* - * family of CPU's only has a 16bit sized IMR register. - */ -#define MCFSIM_IMR_IS_16BITS - /****************************************************************************/ #endif /* m5206sim_h */ diff --git a/arch/m68k/include/asm/m5249sim.h b/arch/m68k/include/asm/m5249sim.h index 36ed31bbf6c..8d76a193071 100644 --- a/arch/m68k/include/asm/m5249sim.h +++ b/arch/m68k/include/asm/m5249sim.h @@ -70,6 +70,12 @@ #define MCFSIM_DMA2ICR MCFSIM_ICR8 /* DMA 2 ICR */ #define MCFSIM_DMA3ICR MCFSIM_ICR9 /* DMA 3 ICR */ +/* + * Define system peripheral IRQ usage. + */ +#define MCF_IRQ_TIMER 30 /* Timer0, Level 6 */ +#define MCF_IRQ_PROFILER 31 /* Timer1, Level 7 */ + /* * General purpose IO registers (in MBAR2). */ diff --git a/arch/m68k/include/asm/m5272sim.h b/arch/m68k/include/asm/m5272sim.h index 0665ba1a5d3..469686ffc4a 100644 --- a/arch/m68k/include/asm/m5272sim.h +++ b/arch/m68k/include/asm/m5272sim.h @@ -73,6 +73,11 @@ #define MCFSIM_PCDAT (MCF_MBAR + 0x96) /* Port C Data (r/w) */ #define MCFSIM_PDCNT (MCF_MBAR + 0x98) /* Port D Control (r/w) */ +/* + * Define system peripheral IRQ usage. + */ +#define MCF_IRQ_TIMER 69 /* Timer0, Level 6 */ +#define MCF_IRQ_PROFILER 70 /* Timer1, Level 7 */ /* * Generic GPIO support diff --git a/arch/m68k/include/asm/m5307sim.h b/arch/m68k/include/asm/m5307sim.h index 60946225699..c6830e5b54c 100644 --- a/arch/m68k/include/asm/m5307sim.h +++ b/arch/m68k/include/asm/m5307sim.h @@ -124,6 +124,7 @@ #define MCFSIM_DMA2ICR MCFSIM_ICR8 /* DMA 2 ICR */ #define MCFSIM_DMA3ICR MCFSIM_ICR9 /* DMA 3 ICR */ + /* * Some symbol defines for the Parallel Port Pin Assignment Register */ @@ -139,6 +140,11 @@ #define IRQ3_LEVEL6 0x40 #define IRQ1_LEVEL2 0x20 +/* + * Define system peripheral IRQ usage. + */ +#define MCF_IRQ_TIMER 30 /* Timer0, Level 6 */ +#define MCF_IRQ_PROFILER 31 /* Timer1, Level 7 */ /* * Define the Cache register flags. diff --git a/arch/m68k/include/asm/m5407sim.h b/arch/m68k/include/asm/m5407sim.h index 3c4bd5f08cd..c399abbf953 100644 --- a/arch/m68k/include/asm/m5407sim.h +++ b/arch/m68k/include/asm/m5407sim.h @@ -111,6 +111,11 @@ #define IRQ3_LEVEL6 0x40 #define IRQ1_LEVEL2 0x20 +/* + * Define system peripheral IRQ usage. + */ +#define MCF_IRQ_TIMER 30 /* Timer0, Level 6 */ +#define MCF_IRQ_PROFILER 31 /* Timer1, Level 7 */ /* * Define the Cache register flags. diff --git a/arch/m68knommu/platform/5206/config.c b/arch/m68knommu/platform/5206/config.c index 0dce2383320..c1d24796ef2 100644 --- a/arch/m68knommu/platform/5206/config.c +++ b/arch/m68knommu/platform/5206/config.c @@ -68,21 +68,17 @@ static void __init m5206_uarts_init(void) /***************************************************************************/ -void mcf_settimericr(unsigned int timer, unsigned int level) +static void __init m5206_timers_init(void) { - volatile unsigned char *icrp; - unsigned int icr, imr; - - if (timer <= 2) { - switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; - } - - icrp = (volatile unsigned char *) (MCF_MBAR + icr); - *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_clrimr(imr); - } + /* Timer1 is always used as system timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER1ICR); + +#ifdef CONFIG_HIGHPROFILE + /* Timer2 is to be used as a high speed profile timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER2ICR); +#endif } /***************************************************************************/ @@ -101,6 +97,7 @@ void m5206_cpu_reset(void) void __init config_BSP(char *commandp, int size) { mach_reset = m5206_cpu_reset; + m5206_timers_init(); } /***************************************************************************/ diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c index 08ef7e26898..363296af2ee 100644 --- a/arch/m68knommu/platform/5206e/config.c +++ b/arch/m68knommu/platform/5206e/config.c @@ -69,21 +69,17 @@ static void __init m5206e_uarts_init(void) /***************************************************************************/ -void mcf_settimericr(unsigned int timer, unsigned int level) +static void __init m5206e_timers_init(void) { - volatile unsigned char *icrp; - unsigned int icr, imr; - - if (timer <= 2) { - switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; - } - - icrp = (volatile unsigned char *) (MCF_MBAR + icr); - *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_clrimr(imr); - } + /* Timer1 is always used as system timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER1ICR); + +#ifdef CONFIG_HIGHPROFILE + /* Timer2 is to be used as a high speed profile timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER2ICR); +#endif } /***************************************************************************/ @@ -108,6 +104,7 @@ void __init config_BSP(char *commandp, int size) #endif /* CONFIG_NETtel */ mach_reset = m5206e_cpu_reset; + m5206e_timers_init(); } /***************************************************************************/ diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c index 7261a3d28ad..51202b1096c 100644 --- a/arch/m68knommu/platform/5249/config.c +++ b/arch/m68knommu/platform/5249/config.c @@ -65,24 +65,19 @@ static void __init m5249_uarts_init(void) m5249_uart_init_line(line, m5249_uart_platform[line].irq); } - /***************************************************************************/ -void mcf_settimericr(unsigned int timer, unsigned int level) +static void __init m5249_timers_init(void) { - volatile unsigned char *icrp; - unsigned int icr, imr; - - if (timer <= 2) { - switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; - } - - icrp = (volatile unsigned char *) (MCF_MBAR + icr); - *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_clrimr(imr); - } + /* Timer1 is always used as system timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER1ICR); + +#ifdef CONFIG_HIGHPROFILE + /* Timer2 is to be used as a high speed profile timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER2ICR); +#endif } /***************************************************************************/ @@ -101,6 +96,7 @@ void m5249_cpu_reset(void) void __init config_BSP(char *commandp, int size) { mach_reset = m5249_cpu_reset; + m5249_timers_init(); } /***************************************************************************/ diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68knommu/platform/5272/config.c index 94a66c12c96..b16add9aa4e 100644 --- a/arch/m68knommu/platform/5272/config.c +++ b/arch/m68knommu/platform/5272/config.c @@ -20,12 +20,6 @@ /***************************************************************************/ -extern unsigned int mcf_timervector; -extern unsigned int mcf_profilevector; -extern unsigned int mcf_timerlevel; - -/***************************************************************************/ - /* * Some platforms need software versions of the GPIO data registers. */ @@ -148,14 +142,15 @@ void mcf_disableall(void) /***************************************************************************/ -void mcf_settimericr(int timer, int level) +static void __init m5272_timers_init(void) { - volatile unsigned long *icrp; + /* Timer1 @ level6 is always used as system timer */ + writel((0x8 | 0x6) << ((4 - 1) * 4), MCF_MBAR + MCFSIM_ICR1); - if ((timer >= 1 ) && (timer <= 4)) { - icrp = (volatile unsigned long *) (MCF_MBAR + MCFSIM_ICR1); - *icrp = (0x8 | level) << ((4 - timer) * 4); - } +#ifdef CONFIG_HIGHPROFILE + /* Timer2 @ level7 is to be used as a high speed profile timer */ + writel((0x8 | 0x7) << ((4 - 2) * 4), MCF_MBAR + MCFSIM_ICR1); +#endif } /***************************************************************************/ @@ -195,9 +190,8 @@ void __init config_BSP(char *commandp, int size) commandp[size-1] = 0; #endif - mcf_timervector = 69; - mcf_profilevector = 70; mach_reset = m5272_cpu_reset; + m5272_timers_init(); } /***************************************************************************/ diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c index 3e27d2ec03f..b711597ac8e 100644 --- a/arch/m68knommu/platform/5307/config.c +++ b/arch/m68knommu/platform/5307/config.c @@ -21,12 +21,6 @@ /***************************************************************************/ -extern unsigned int mcf_timervector; -extern unsigned int mcf_profilevector; -extern unsigned int mcf_timerlevel; - -/***************************************************************************/ - /* * Some platforms need software versions of the GPIO data registers. */ @@ -83,21 +77,17 @@ static void __init m5307_uarts_init(void) /***************************************************************************/ -void mcf_settimericr(unsigned int timer, unsigned int level) +static void __init m5307_timers_init(void) { - volatile unsigned char *icrp; - unsigned int icr, imr; - - if (timer <= 2) { - switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; - } - - icrp = (volatile unsigned char *) (MCF_MBAR + icr); - *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_clrimr(imr); - } + /* Timer1 is always used as system timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER1ICR); + +#ifdef CONFIG_HIGHPROFILE + /* Timer2 is to be used as a high speed profile timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER2ICR); +#endif } /***************************************************************************/ @@ -120,13 +110,10 @@ void __init config_BSP(char *commandp, int size) /* Copy command line from FLASH to local buffer... */ memcpy(commandp, (char *) 0xf0004000, size); commandp[size-1] = 0; - /* Different timer setup - to prevent device clash */ - mcf_timervector = 30; - mcf_profilevector = 31; - mcf_timerlevel = 6; #endif mach_reset = m5307_cpu_reset; + m5307_timers_init(); #ifdef CONFIG_BDM_DISABLE /* diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c index 8aa94837bbc..cc80029a4a0 100644 --- a/arch/m68knommu/platform/5407/config.c +++ b/arch/m68knommu/platform/5407/config.c @@ -20,12 +20,6 @@ /***************************************************************************/ -extern unsigned int mcf_timervector; -extern unsigned int mcf_profilevector; -extern unsigned int mcf_timerlevel; - -/***************************************************************************/ - static struct mcf_platform_uart m5407_uart_platform[] = { { .mapbase = MCF_MBAR + MCFUART_BASE1, @@ -59,6 +53,7 @@ static void __init m5407_uart_init_line(int line, int irq) } else if (line == 1) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR); + mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); mcf_clrimr(MCFINTC_UART1); } } @@ -74,21 +69,17 @@ static void __init m5407_uarts_init(void) /***************************************************************************/ -void mcf_settimericr(unsigned int timer, unsigned int level) +static void __init m5407_timers_init(void) { - volatile unsigned char *icrp; - unsigned int icr, imr; - - if (timer <= 2) { - switch (timer) { - case 2: icr = MCFSIM_TIMER2ICR; imr = MCFINTC_TIMER2; break; - default: icr = MCFSIM_TIMER1ICR; imr = MCFINTC_TIMER1; break; - } - - icrp = (volatile unsigned char *) (MCF_MBAR + icr); - *icrp = MCFSIM_ICR_AUTOVEC | (level << 2) | MCFSIM_ICR_PRI3; - mcf_clrimr(imr); - } + /* Timer1 is always used as system timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER1ICR); + +#ifdef CONFIG_HIGHPROFILE + /* Timer2 is to be used as a high speed profile timer */ + writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, + MCF_MBAR + MCFSIM_TIMER2ICR); +#endif } /***************************************************************************/ @@ -106,14 +97,8 @@ void m5407_cpu_reset(void) void __init config_BSP(char *commandp, int size) { -#if defined(CONFIG_CLEOPATRA) - /* Different timer setup - to prevent device clash */ - mcf_timervector = 30; - mcf_profilevector = 31; - mcf_timerlevel = 6; -#endif - mach_reset = m5407_cpu_reset; + m5407_timers_init(); } /***************************************************************************/ diff --git a/arch/m68knommu/platform/coldfire/timers.c b/arch/m68knommu/platform/coldfire/timers.c index 1ba8a373165..77b0c17ce6b 100644 --- a/arch/m68knommu/platform/coldfire/timers.c +++ b/arch/m68knommu/platform/coldfire/timers.c @@ -30,15 +30,6 @@ #define FREQ (MCF_BUSCLK / 16) #define TA(a) (MCF_MBAR + MCFTIMER_BASE1 + (a)) -/* - * Default the timer and vector to use for ColdFire. Some ColdFire - * CPU's and some boards may want different. Their sub-architecture - * startup code (in config.c) can change these if they want. - */ -unsigned int mcf_timervector = 29; -unsigned int mcf_profilevector = 31; -unsigned int mcf_timerlevel = 5; - /* * These provide the underlying interrupt vector support. * Unfortunately it is a little different on each ColdFire. @@ -107,7 +98,7 @@ static struct clocksource mcftmr_clk = { void hw_timer_init(void) { - setup_irq(mcf_timervector, &mcftmr_timer_irq); + setup_irq(MCF_IRQ_TIMER, &mcftmr_timer_irq); __raw_writew(MCFTIMER_TMR_DISABLE, TA(MCFTIMER_TMR)); mcftmr_cycles_per_jiffy = FREQ / HZ; @@ -124,7 +115,7 @@ void hw_timer_init(void) mcftmr_clk.mult = clocksource_hz2mult(FREQ, mcftmr_clk.shift); clocksource_register(&mcftmr_clk); - mcf_settimericr(1, mcf_timerlevel); + mcf_clrimr(MCFINTC_TIMER1); #ifdef CONFIG_HIGHPROFILE coldfire_profile_init(); @@ -171,7 +162,7 @@ void coldfire_profile_init(void) printk(KERN_INFO "PROFILE: lodging TIMER2 @ %dHz as profile timer\n", PROFILEHZ); - setup_irq(mcf_profilevector, &coldfire_profile_irq); + setup_irq(MCF_IRQ_PROFILER, &coldfire_profile_irq); /* Set up TIMER 2 as high speed profile clock */ __raw_writew(MCFTIMER_TMR_DISABLE, PA(MCFTIMER_TMR)); @@ -180,7 +171,7 @@ void coldfire_profile_init(void) __raw_writew(MCFTIMER_TMR_ENORI | MCFTIMER_TMR_CLK16 | MCFTIMER_TMR_RESTART | MCFTIMER_TMR_ENABLE, PA(MCFTIMER_TMR)); - mcf_settimericr(2, 7); + mcf_clrimr(MCFINTC_TIMER2); } /***************************************************************************/ -- cgit v1.2.3-70-g09d2 From f6a66276f5fdc018d2a9378c71de3bae13c588d7 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 19 May 2009 15:00:01 +1000 Subject: m68knommu: clean up ColdFire 532x CPU timer setup The newer ColdFire 532x family of CPU's uses the old timer, but has a newer interrupt controller. It doesn't need the special timer setup that was required when using the older interrupt controller. Remove the dead timer irq and level setting code, and define the hard coded vector. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m532xsim.h | 5 +++++ arch/m68knommu/platform/532x/config.c | 17 +---------------- 2 files changed, 6 insertions(+), 16 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m532xsim.h b/arch/m68k/include/asm/m532xsim.h index 021a0e15527..36bf15aec9a 100644 --- a/arch/m68k/include/asm/m532xsim.h +++ b/arch/m68k/include/asm/m532xsim.h @@ -66,6 +66,11 @@ #define MCFSIM_ICR_TIMER1 (0xFC048040+32) #define MCFSIM_ICR_TIMER2 (0xFC048040+33) +/* + * Define system peripheral IRQ usage. + */ +#define MCF_IRQ_TIMER (64 + 32) /* Timer0 */ +#define MCF_IRQ_PROFILER (64 + 33) /* Timer1 */ /* * Define the Cache register flags. diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c index 66e212b58b8..d632948e64e 100644 --- a/arch/m68knommu/platform/532x/config.c +++ b/arch/m68knommu/platform/532x/config.c @@ -30,12 +30,6 @@ /***************************************************************************/ -extern unsigned int mcf_timervector; -extern unsigned int mcf_profilevector; -extern unsigned int mcf_timerlevel; - -/***************************************************************************/ - static struct mcf_platform_uart m532x_uart_platform[] = { { .mapbase = MCFUART_BASE1, @@ -87,6 +81,7 @@ static struct platform_device m532x_fec = { .num_resources = ARRAY_SIZE(m532x_fec_resources), .resource = m532x_fec_resources, }; + static struct platform_device *m532x_devices[] __initdata = { &m532x_uart, &m532x_fec, @@ -126,12 +121,6 @@ static void __init m532x_fec_init(void) /***************************************************************************/ -void mcf_settimericr(unsigned int timer, unsigned int level) -{ -} - -/***************************************************************************/ - static void m532x_cpu_reset(void) { local_irq_disable(); @@ -153,10 +142,6 @@ void __init config_BSP(char *commandp, int size) } #endif - mcf_timervector = 64+32; - mcf_profilevector = 64+33; - mach_reset = m532x_cpu_reset; - #ifdef CONFIG_BDM_DISABLE /* * Disable the BDM clocking. This also turns off most of the rest of -- cgit v1.2.3-70-g09d2 From 39f0fb6a3448cfc316e0d5295ed1b121db50037e Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 22 May 2009 13:33:35 +1000 Subject: m68knommu: map ColdFire interrupts to correct masking bits The older simple ColdFire interrupt controller has no one-to-one mapping of interrupt numbers to bits in the interrupt mask register. Create a mapping array that each ColdFire CPU type can populate with its available interrupts and the bits that each use in the interrupt mask register. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/mcfintc.h | 22 ++++++++++++----- arch/m68knommu/platform/5206/config.c | 13 +++++++--- arch/m68knommu/platform/5206e/config.c | 13 +++++++--- arch/m68knommu/platform/5249/config.c | 8 +++--- arch/m68knommu/platform/5307/config.c | 14 ++++++++--- arch/m68knommu/platform/5407/config.c | 15 ++++++++--- arch/m68knommu/platform/coldfire/intc.c | 44 ++++++++++++++++++++------------- 7 files changed, 90 insertions(+), 39 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/mcfintc.h b/arch/m68k/include/asm/mcfintc.h index 213aa6c68ab..4183320a381 100644 --- a/arch/m68k/include/asm/mcfintc.h +++ b/arch/m68k/include/asm/mcfintc.h @@ -24,11 +24,6 @@ * interrupt control purposes. */ -/* - * Define the base address of the SIM within the MBAR address space. - */ -#define MCFSIM_BASE 0x0 /* Base address within SIM */ - /* * Bit definitions for the ICR family of registers. */ @@ -48,7 +43,9 @@ #define MCFSIM_ICR_PRI3 0x03 /* Priority 3 intr */ /* - * IMR bit position definitions. + * IMR bit position definitions. Not all ColdFire parts with this interrupt + * controller actually support all of these interrupt sources. But the bit + * numbers are the same in all cores. */ #define MCFINTC_EINT1 1 /* External int #1 */ #define MCFINTC_EINT2 2 /* External int #2 */ @@ -70,6 +67,19 @@ #define MCFINTC_QSPI 18 #ifndef __ASSEMBLER__ + +/* + * There is no one-is-one correspondance between the interrupt number (irq) + * and the bit fields on the mask register. So we create a per-cpu type + * mapping of irq to mask bit. The CPU platform code needs to register + * its supported irq's at init time, using this function. + */ +extern unsigned char mcf_irq2imr[]; +static inline void mcf_mapirq2imr(int irq, int imr) +{ + mcf_irq2imr[irq] = imr; +} + void mcf_autovector(int irq); void mcf_setimr(int index); void mcf_clrimr(int index); diff --git a/arch/m68knommu/platform/5206/config.c b/arch/m68knommu/platform/5206/config.c index c1d24796ef2..9c335465e66 100644 --- a/arch/m68knommu/platform/5206/config.c +++ b/arch/m68knommu/platform/5206/config.c @@ -49,11 +49,11 @@ static void __init m5206_uart_init_line(int line, int irq) if (line == 0) { writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCFUART_BASE1 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART0); + mcf_mapirq2imr(irq, MCFINTC_UART0); } else if (line == 1) { writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCFUART_BASE2 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART1); + mcf_mapirq2imr(irq, MCFINTC_UART1); } } @@ -73,11 +73,13 @@ static void __init m5206_timers_init(void) /* Timer1 is always used as system timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER1ICR); + mcf_mapirq2imr(MCF_IRQ_TIMER, MCFINTC_TIMER1); #ifdef CONFIG_HIGHPROFILE /* Timer2 is to be used as a high speed profile timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER2ICR); + mcf_mapirq2imr(MCF_IRQ_PROFILER, MCFINTC_TIMER2); #endif } @@ -98,13 +100,18 @@ void __init config_BSP(char *commandp, int size) { mach_reset = m5206_cpu_reset; m5206_timers_init(); + m5206_uarts_init(); + + /* Only support the external interrupts on their primary level */ + mcf_mapirq2imr(25, MCFINTC_EINT1); + mcf_mapirq2imr(28, MCFINTC_EINT4); + mcf_mapirq2imr(31, MCFINTC_EINT7); } /***************************************************************************/ static int __init init_BSP(void) { - m5206_uarts_init(); platform_add_devices(m5206_devices, ARRAY_SIZE(m5206_devices)); return 0; } diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c index 363296af2ee..0f41ba82a3b 100644 --- a/arch/m68knommu/platform/5206e/config.c +++ b/arch/m68knommu/platform/5206e/config.c @@ -50,11 +50,11 @@ static void __init m5206e_uart_init_line(int line, int irq) if (line == 0) { writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCFUART_BASE1 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART0); + mcf_mapirq2imr(irq, MCFINTC_UART0); } else if (line == 1) { writel(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCFUART_BASE2 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART1); + mcf_mapirq2imr(irq, MCFINTC_UART1); } } @@ -74,11 +74,13 @@ static void __init m5206e_timers_init(void) /* Timer1 is always used as system timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER1ICR); + mcf_mapirq2imr(MCF_IRQ_TIMER, MCFINTC_TIMER1); #ifdef CONFIG_HIGHPROFILE /* Timer2 is to be used as a high speed profile timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER2ICR); + mcf_mapirq2imr(MCF_IRQ_PROFILER, MCFINTC_TIMER2); #endif } @@ -105,13 +107,18 @@ void __init config_BSP(char *commandp, int size) mach_reset = m5206e_cpu_reset; m5206e_timers_init(); + m5206e_uarts_init(); + + /* Only support the external interrupts on their primary level */ + mcf_mapirq2imr(25, MCFINTC_EINT1); + mcf_mapirq2imr(28, MCFINTC_EINT4); + mcf_mapirq2imr(31, MCFINTC_EINT7); } /***************************************************************************/ static int __init init_BSP(void) { - m5206e_uarts_init(); platform_add_devices(m5206e_devices, ARRAY_SIZE(m5206e_devices)); return 0; } diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c index 51202b1096c..646f5ba462f 100644 --- a/arch/m68knommu/platform/5249/config.c +++ b/arch/m68knommu/platform/5249/config.c @@ -48,11 +48,11 @@ static void __init m5249_uart_init_line(int line, int irq) if (line == 0) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART0); + mcf_mapirq2imr(irq, MCFINTC_UART0); } else if (line == 1) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART1); + mcf_mapirq2imr(irq, MCFINTC_UART1); } } @@ -72,11 +72,13 @@ static void __init m5249_timers_init(void) /* Timer1 is always used as system timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER1ICR); + mcf_mapirq2imr(MCF_IRQ_TIMER, MCFINTC_TIMER1); #ifdef CONFIG_HIGHPROFILE /* Timer2 is to be used as a high speed profile timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER2ICR); + mcf_mapirq2imr(MCF_IRQ_PROFILER, MCFINTC_TIMER2); #endif } @@ -97,13 +99,13 @@ void __init config_BSP(char *commandp, int size) { mach_reset = m5249_cpu_reset; m5249_timers_init(); + m5249_uarts_init(); } /***************************************************************************/ static int __init init_BSP(void) { - m5249_uarts_init(); platform_add_devices(m5249_devices, ARRAY_SIZE(m5249_devices)); return 0; } diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c index b711597ac8e..00900ac06a9 100644 --- a/arch/m68knommu/platform/5307/config.c +++ b/arch/m68knommu/platform/5307/config.c @@ -58,11 +58,11 @@ static void __init m5307_uart_init_line(int line, int irq) if (line == 0) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART0); + mcf_mapirq2imr(irq, MCFINTC_UART0); } else if (line == 1) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART1); + mcf_mapirq2imr(irq, MCFINTC_UART1); } } @@ -82,11 +82,13 @@ static void __init m5307_timers_init(void) /* Timer1 is always used as system timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER1ICR); + mcf_mapirq2imr(MCF_IRQ_TIMER, MCFINTC_TIMER1); #ifdef CONFIG_HIGHPROFILE /* Timer2 is to be used as a high speed profile timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER2ICR); + mcf_mapirq2imr(MCF_IRQ_PROFILER, MCFINTC_TIMER2); #endif } @@ -114,6 +116,13 @@ void __init config_BSP(char *commandp, int size) mach_reset = m5307_cpu_reset; m5307_timers_init(); + m5307_uarts_init(); + + /* Only support the external interrupts on their primary level */ + mcf_mapirq2imr(25, MCFINTC_EINT1); + mcf_mapirq2imr(27, MCFINTC_EINT3); + mcf_mapirq2imr(29, MCFINTC_EINT5); + mcf_mapirq2imr(31, MCFINTC_EINT7); #ifdef CONFIG_BDM_DISABLE /* @@ -129,7 +138,6 @@ void __init config_BSP(char *commandp, int size) static int __init init_BSP(void) { - m5307_uarts_init(); platform_add_devices(m5307_devices, ARRAY_SIZE(m5307_devices)); return 0; } diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c index cc80029a4a0..70ea789a400 100644 --- a/arch/m68knommu/platform/5407/config.c +++ b/arch/m68knommu/platform/5407/config.c @@ -49,12 +49,11 @@ static void __init m5407_uart_init_line(int line, int irq) if (line == 0) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI1, MCF_MBAR + MCFSIM_UART1ICR); writeb(irq, MCF_MBAR + MCFUART_BASE1 + MCFUART_UIVR); - mcf_clrimr(MCFINTC_UART0); + mcf_mapirq2imr(irq, MCFINTC_UART0); } else if (line == 1) { writeb(MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI2, MCF_MBAR + MCFSIM_UART2ICR); writeb(irq, MCF_MBAR + MCFUART_BASE2 + MCFUART_UIVR); - mcf_setimr(mcf_getimr() & ~MCFSIM_IMR_UART2); - mcf_clrimr(MCFINTC_UART1); + mcf_mapirq2imr(irq, MCFINTC_UART1); } } @@ -74,11 +73,13 @@ static void __init m5407_timers_init(void) /* Timer1 is always used as system timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL6 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER1ICR); + mcf_mapirq2imr(MCF_IRQ_TIMER, MCFINTC_TIMER1); #ifdef CONFIG_HIGHPROFILE /* Timer2 is to be used as a high speed profile timer */ writeb(MCFSIM_ICR_AUTOVEC | MCFSIM_ICR_LEVEL7 | MCFSIM_ICR_PRI3, MCF_MBAR + MCFSIM_TIMER2ICR); + mcf_mapirq2imr(MCF_IRQ_PROFILER, MCFINTC_TIMER2); #endif } @@ -99,13 +100,19 @@ void __init config_BSP(char *commandp, int size) { mach_reset = m5407_cpu_reset; m5407_timers_init(); + m5407_uarts_init(); + + /* Only support the external interrupts on their primary level */ + mcf_mapirq2imr(25, MCFINTC_EINT1); + mcf_mapirq2imr(27, MCFINTC_EINT3); + mcf_mapirq2imr(29, MCFINTC_EINT5); + mcf_mapirq2imr(31, MCFINTC_EINT7); } /***************************************************************************/ static int __init init_BSP(void) { - m5407_uarts_init(); platform_add_devices(m5407_devices, ARRAY_SIZE(m5407_devices)); return 0; } diff --git a/arch/m68knommu/platform/coldfire/intc.c b/arch/m68knommu/platform/coldfire/intc.c index 14db26bf6e2..a4560c86db7 100644 --- a/arch/m68knommu/platform/coldfire/intc.c +++ b/arch/m68knommu/platform/coldfire/intc.c @@ -19,9 +19,17 @@ #include /* - * Define the vector numbers for the basic 7 interrupt sources. - * These are often referred to as the "external" interrupts in - * the ColdFire documentation (for the early ColdFire cores at least). + * The mapping of irq number to a mask register bit is not one-to-one. + * The irq numbers are either based on "level" of interrupt or fixed + * for an autovector-able interrupt. So we keep a local data structure + * that maps from irq to mask register. Not all interrupts will have + * an IMR bit. + */ +unsigned char mcf_irq2imr[NR_IRQS]; + +/* + * Define the miniumun and maximum external interrupt numbers. + * This is also used as the "level" interrupt numbers. */ #define EIRQ1 25 #define EIRQ7 31 @@ -36,22 +44,22 @@ void mcf_setimr(int index) { - u16 imr; - imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); + u16 imr; + imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); __raw_writew(imr | (0x1 << index), MCF_MBAR + MCFSIM_IMR); } void mcf_clrimr(int index) { - u16 imr; - imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); + u16 imr; + imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); __raw_writew(imr & ~(0x1 << index), MCF_MBAR + MCFSIM_IMR); } void mcf_maskimr(unsigned int mask) { u16 imr; - imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); + imr = __raw_readw(MCF_MBAR + MCFSIM_IMR); imr |= mask; __raw_writew(imr, MCF_MBAR + MCFSIM_IMR); } @@ -60,22 +68,22 @@ void mcf_maskimr(unsigned int mask) void mcf_setimr(int index) { - u32 imr; - imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); + u32 imr; + imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); __raw_writel(imr | (0x1 << index), MCF_MBAR + MCFSIM_IMR); } void mcf_clrimr(int index) { - u32 imr; - imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); + u32 imr; + imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); __raw_writel(imr & ~(0x1 << index), MCF_MBAR + MCFSIM_IMR); } void mcf_maskimr(unsigned int mask) { u32 imr; - imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); + imr = __raw_readl(MCF_MBAR + MCFSIM_IMR); imr |= mask; __raw_writel(imr, MCF_MBAR + MCFSIM_IMR); } @@ -93,24 +101,26 @@ void mcf_maskimr(unsigned int mask) */ void mcf_autovector(int irq) { +#ifdef MCFSIM_AVR if ((irq >= EIRQ1) && (irq <= EIRQ7)) { u8 avec; avec = __raw_readb(MCF_MBAR + MCFSIM_AVR); avec |= (0x1 << (irq - EIRQ1 + 1)); __raw_writeb(avec, MCF_MBAR + MCFSIM_AVR); } +#endif } static void intc_irq_mask(unsigned int irq) { - if ((irq >= EIRQ1) && (irq <= EIRQ7)) - mcf_setimr(irq - EIRQ1 + 1); + if (mcf_irq2imr[irq]) + mcf_setimr(mcf_irq2imr[irq]); } static void intc_irq_unmask(unsigned int irq) { - if ((irq >= EIRQ1) && (irq <= EIRQ7)) - mcf_clrimr(irq - EIRQ1 + 1); + if (mcf_irq2imr[irq]) + mcf_clrimr(mcf_irq2imr[irq]); } static int intc_irq_set_type(unsigned int irq, unsigned int type) -- cgit v1.2.3-70-g09d2 From 3945ca0f84fee3fe564189fe8cf8f02491d19622 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 22 May 2009 13:50:53 +1000 Subject: m68knommu: clean up old ColdFire timer irq setup The recent changes to the old ColdFire interrupt controller code means we no longer need to manually unmask the timer interrupt. That is now done in the interrupt controller code proper. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/coldfire/timers.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/coldfire/timers.c b/arch/m68knommu/platform/coldfire/timers.c index 77b0c17ce6b..2304d736c70 100644 --- a/arch/m68knommu/platform/coldfire/timers.c +++ b/arch/m68knommu/platform/coldfire/timers.c @@ -34,7 +34,6 @@ * These provide the underlying interrupt vector support. * Unfortunately it is a little different on each ColdFire. */ -extern void mcf_settimericr(int timer, int level); void coldfire_profile_init(void); #if defined(CONFIG_M532x) @@ -98,8 +97,6 @@ static struct clocksource mcftmr_clk = { void hw_timer_init(void) { - setup_irq(MCF_IRQ_TIMER, &mcftmr_timer_irq); - __raw_writew(MCFTIMER_TMR_DISABLE, TA(MCFTIMER_TMR)); mcftmr_cycles_per_jiffy = FREQ / HZ; /* @@ -115,7 +112,7 @@ void hw_timer_init(void) mcftmr_clk.mult = clocksource_hz2mult(FREQ, mcftmr_clk.shift); clocksource_register(&mcftmr_clk); - mcf_clrimr(MCFINTC_TIMER1); + setup_irq(MCF_IRQ_TIMER, &mcftmr_timer_irq); #ifdef CONFIG_HIGHPROFILE coldfire_profile_init(); @@ -162,8 +159,6 @@ void coldfire_profile_init(void) printk(KERN_INFO "PROFILE: lodging TIMER2 @ %dHz as profile timer\n", PROFILEHZ); - setup_irq(MCF_IRQ_PROFILER, &coldfire_profile_irq); - /* Set up TIMER 2 as high speed profile clock */ __raw_writew(MCFTIMER_TMR_DISABLE, PA(MCFTIMER_TMR)); @@ -171,7 +166,7 @@ void coldfire_profile_init(void) __raw_writew(MCFTIMER_TMR_ENORI | MCFTIMER_TMR_CLK16 | MCFTIMER_TMR_RESTART | MCFTIMER_TMR_ENABLE, PA(MCFTIMER_TMR)); - mcf_clrimr(MCFINTC_TIMER2); + setup_irq(MCF_IRQ_PROFILER, &coldfire_profile_irq); } /***************************************************************************/ -- cgit v1.2.3-70-g09d2 From da3601a5fa664c8d51383fe916d96bd4fbce84b8 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 22 May 2009 14:16:39 +1000 Subject: m68knommu: add support for second interrupt controller of ColdFire 5249 The ColdFire 5249 CPU has a second (compleletly different) interrupt controller. It is the only ColdFire CPU that has this type. It controlls GPIO interrupts amongst a number of interrupts from other internal peripherals. Add support code for it. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5249sim.h | 20 ++++++++++-- arch/m68knommu/platform/5249/Makefile | 2 +- arch/m68knommu/platform/5249/intc2.c | 59 +++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 arch/m68knommu/platform/5249/intc2.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5249sim.h b/arch/m68k/include/asm/m5249sim.h index 8d76a193071..14bce877ed8 100644 --- a/arch/m68k/include/asm/m5249sim.h +++ b/arch/m68k/include/asm/m5249sim.h @@ -106,6 +106,22 @@ #define MCFSIM2_IDECONFIG1 0x18c /* IDEconfig1 */ #define MCFSIM2_IDECONFIG2 0x190 /* IDEconfig2 */ +/* + * Define the base interrupt for the second interrupt controller. + * We set it to 128, out of the way of the base interrupts, and plenty + * of room for its 64 interrupts. + */ +#define MCFINTC2_VECBASE 128 + +#define MCFINTC2_GPIOIRQ0 (MCFINTC2_VECBASE + 32) +#define MCFINTC2_GPIOIRQ1 (MCFINTC2_VECBASE + 33) +#define MCFINTC2_GPIOIRQ2 (MCFINTC2_VECBASE + 34) +#define MCFINTC2_GPIOIRQ3 (MCFINTC2_VECBASE + 35) +#define MCFINTC2_GPIOIRQ4 (MCFINTC2_VECBASE + 36) +#define MCFINTC2_GPIOIRQ5 (MCFINTC2_VECBASE + 37) +#define MCFINTC2_GPIOIRQ6 (MCFINTC2_VECBASE + 38) +#define MCFINTC2_GPIOIRQ7 (MCFINTC2_VECBASE + 39) + /* * Generic GPIO support */ @@ -135,9 +151,9 @@ subql #1,%a1 /* get MBAR2 address in a1 */ /* - * Move secondary interrupts to base at 128. + * Move secondary interrupts to their base (128). */ - moveb #0x80,%d0 + moveb #MCFINTC2_VECBASE,%d0 moveb %d0,0x16b(%a1) /* interrupt base register */ /* diff --git a/arch/m68knommu/platform/5249/Makefile b/arch/m68knommu/platform/5249/Makefile index 113c3339006..f56225d1582 100644 --- a/arch/m68knommu/platform/5249/Makefile +++ b/arch/m68knommu/platform/5249/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o gpio.o +obj-y := config.o gpio.o intc2.o diff --git a/arch/m68knommu/platform/5249/intc2.c b/arch/m68knommu/platform/5249/intc2.c new file mode 100644 index 00000000000..d09d9da0453 --- /dev/null +++ b/arch/m68knommu/platform/5249/intc2.c @@ -0,0 +1,59 @@ +/* + * intc2.c -- support for the 2nd INTC controller of the 5249 + * + * (C) Copyright 2009, Greg Ungerer + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static void intc2_irq_gpio_mask(unsigned int irq) +{ + u32 imr; + imr = readl(MCF_MBAR2 + MCFSIM2_GPIOINTENABLE); + imr &= ~(0x1 << (irq - MCFINTC2_GPIOIRQ0)); + writel(imr, MCF_MBAR2 + MCFSIM2_GPIOINTENABLE); +} + +static void intc2_irq_gpio_unmask(unsigned int irq) +{ + u32 imr; + imr = readl(MCF_MBAR2 + MCFSIM2_GPIOINTENABLE); + imr |= (0x1 << (irq - MCFINTC2_GPIOIRQ0)); + writel(imr, MCF_MBAR2 + MCFSIM2_GPIOINTENABLE); +} + +static void intc2_irq_gpio_ack(unsigned int irq) +{ + writel(0x1 << (irq - MCFINTC2_GPIOIRQ0), MCF_MBAR2 + MCFSIM2_GPIOINTCLEAR); +} + +static struct irq_chip intc2_irq_gpio_chip = { + .name = "CF-INTC2", + .mask = intc2_irq_gpio_mask, + .unmask = intc2_irq_gpio_unmask, + .ack = intc2_irq_gpio_ack, +}; + +static int __init mcf_intc2_init(void) +{ + int irq; + + /* GPIO interrupt sources */ + for (irq = MCFINTC2_GPIOIRQ0; (irq <= MCFINTC2_GPIOIRQ7); irq++) + irq_desc[irq].chip = &intc2_irq_gpio_chip; + + return 0; +} + +arch_initcall(mcf_intc2_init); -- cgit v1.2.3-70-g09d2 From 9075216d2c8761c91dc268125f6580af3ffc58b5 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2009 09:39:11 +1000 Subject: m68knommu: create a speciailized ColdFire 5272 interrupt controller The ColdFire 5272 CPU has a very different interrupt controller than any of the other ColdFire parts. It needs its own controller code to correctly setup and ack interrupts. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/m5272sim.h | 37 +++++++- arch/m68knommu/platform/5272/Makefile | 2 +- arch/m68knommu/platform/5272/intc.c | 138 ++++++++++++++++++++++++++++++ arch/m68knommu/platform/coldfire/Makefile | 2 +- 4 files changed, 174 insertions(+), 5 deletions(-) create mode 100644 arch/m68knommu/platform/5272/intc.c (limited to 'arch/m68knommu') diff --git a/arch/m68k/include/asm/m5272sim.h b/arch/m68k/include/asm/m5272sim.h index 469686ffc4a..df3332c2317 100644 --- a/arch/m68k/include/asm/m5272sim.h +++ b/arch/m68k/include/asm/m5272sim.h @@ -12,7 +12,6 @@ #define m5272sim_h /****************************************************************************/ - /* * Define the 5272 SIM register set addresses. */ @@ -76,8 +75,40 @@ /* * Define system peripheral IRQ usage. */ -#define MCF_IRQ_TIMER 69 /* Timer0, Level 6 */ -#define MCF_IRQ_PROFILER 70 /* Timer1, Level 7 */ +#define MCFINT_VECBASE 64 /* Base of interrupts */ +#define MCF_IRQ_SPURIOUS 64 /* User Spurious */ +#define MCF_IRQ_EINT1 65 /* External Interrupt 1 */ +#define MCF_IRQ_EINT2 66 /* External Interrupt 2 */ +#define MCF_IRQ_EINT3 67 /* External Interrupt 3 */ +#define MCF_IRQ_EINT4 68 /* External Interrupt 4 */ +#define MCF_IRQ_TIMER1 69 /* Timer 1 */ +#define MCF_IRQ_TIMER2 70 /* Timer 2 */ +#define MCF_IRQ_TIMER3 71 /* Timer 3 */ +#define MCF_IRQ_TIMER4 72 /* Timer 4 */ +#define MCF_IRQ_UART1 73 /* UART 1 */ +#define MCF_IRQ_UART2 74 /* UART 2 */ +#define MCF_IRQ_PLIP 75 /* PLIC 2Khz Periodic */ +#define MCF_IRQ_PLIA 76 /* PLIC Asynchronous */ +#define MCF_IRQ_USB0 77 /* USB Endpoint 0 */ +#define MCF_IRQ_USB1 78 /* USB Endpoint 1 */ +#define MCF_IRQ_USB2 79 /* USB Endpoint 2 */ +#define MCF_IRQ_USB3 80 /* USB Endpoint 3 */ +#define MCF_IRQ_USB4 81 /* USB Endpoint 4 */ +#define MCF_IRQ_USB5 82 /* USB Endpoint 5 */ +#define MCF_IRQ_USB6 83 /* USB Endpoint 6 */ +#define MCF_IRQ_USB7 84 /* USB Endpoint 7 */ +#define MCF_IRQ_DMA 85 /* DMA Controller */ +#define MCF_IRQ_ERX 86 /* Ethernet Receiver */ +#define MCF_IRQ_ETX 87 /* Ethernet Transmitter */ +#define MCF_IRQ_ENTC 88 /* Ethernet Non-Time Critical */ +#define MCF_IRQ_QSPI 89 /* Queued Serial Interface */ +#define MCF_IRQ_EINT5 90 /* External Interrupt 5 */ +#define MCF_IRQ_EINT6 91 /* External Interrupt 6 */ +#define MCF_IRQ_SWTO 92 /* Software Watchdog */ +#define MCFINT_VECMAX 95 /* Maxmum interrupt */ + +#define MCF_IRQ_TIMER MCF_IRQ_TIMER1 +#define MCF_IRQ_PROFILER MCF_IRQ_TIMER2 /* * Generic GPIO support diff --git a/arch/m68knommu/platform/5272/Makefile b/arch/m68knommu/platform/5272/Makefile index 3d90e6d9245..93673ef8e2c 100644 --- a/arch/m68knommu/platform/5272/Makefile +++ b/arch/m68knommu/platform/5272/Makefile @@ -14,5 +14,5 @@ asflags-$(CONFIG_FULLDEBUG) := -DDEBUGGER_COMPATIBLE_CACHE=1 -obj-y := config.o gpio.o +obj-y := config.o gpio.o intc.o diff --git a/arch/m68knommu/platform/5272/intc.c b/arch/m68knommu/platform/5272/intc.c new file mode 100644 index 00000000000..7081e0a9720 --- /dev/null +++ b/arch/m68knommu/platform/5272/intc.c @@ -0,0 +1,138 @@ +/* + * intc.c -- interrupt controller or ColdFire 5272 SoC + * + * (C) Copyright 2009, Greg Ungerer + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The 5272 ColdFire interrupt controller is nothing like any other + * ColdFire interrupt controller - it truly is completely different. + * Given its age it is unlikely to be used on any other ColdFire CPU. + */ + +/* + * The masking and priproty setting of interrupts on the 5272 is done + * via a set of 4 "Interrupt Controller Registers" (ICR). There is a + * loose mapping of vector number to register and internal bits, but + * a table is the easiest and quickest way to map them. + */ +struct irqmap { + unsigned char icr; + unsigned char index; + unsigned char ack; +}; + +static struct irqmap intc_irqmap[MCFINT_VECMAX - MCFINT_VECBASE] = { + /*MCF_IRQ_SPURIOUS*/ { .icr = 0, .index = 0, .ack = 0, }, + /*MCF_IRQ_EINT1*/ { .icr = MCFSIM_ICR1, .index = 28, .ack = 1, }, + /*MCF_IRQ_EINT2*/ { .icr = MCFSIM_ICR1, .index = 24, .ack = 1, }, + /*MCF_IRQ_EINT3*/ { .icr = MCFSIM_ICR1, .index = 20, .ack = 1, }, + /*MCF_IRQ_EINT4*/ { .icr = MCFSIM_ICR1, .index = 16, .ack = 1, }, + /*MCF_IRQ_TIMER1*/ { .icr = MCFSIM_ICR1, .index = 12, .ack = 0, }, + /*MCF_IRQ_TIMER2*/ { .icr = MCFSIM_ICR1, .index = 8, .ack = 0, }, + /*MCF_IRQ_TIMER3*/ { .icr = MCFSIM_ICR1, .index = 4, .ack = 0, }, + /*MCF_IRQ_TIMER4*/ { .icr = MCFSIM_ICR1, .index = 0, .ack = 0, }, + /*MCF_IRQ_UART1*/ { .icr = MCFSIM_ICR2, .index = 28, .ack = 0, }, + /*MCF_IRQ_UART2*/ { .icr = MCFSIM_ICR2, .index = 24, .ack = 0, }, + /*MCF_IRQ_PLIP*/ { .icr = MCFSIM_ICR2, .index = 20, .ack = 0, }, + /*MCF_IRQ_PLIA*/ { .icr = MCFSIM_ICR2, .index = 16, .ack = 0, }, + /*MCF_IRQ_USB0*/ { .icr = MCFSIM_ICR2, .index = 12, .ack = 0, }, + /*MCF_IRQ_USB1*/ { .icr = MCFSIM_ICR2, .index = 8, .ack = 0, }, + /*MCF_IRQ_USB2*/ { .icr = MCFSIM_ICR2, .index = 4, .ack = 0, }, + /*MCF_IRQ_USB3*/ { .icr = MCFSIM_ICR2, .index = 0, .ack = 0, }, + /*MCF_IRQ_USB4*/ { .icr = MCFSIM_ICR3, .index = 28, .ack = 0, }, + /*MCF_IRQ_USB5*/ { .icr = MCFSIM_ICR3, .index = 24, .ack = 0, }, + /*MCF_IRQ_USB6*/ { .icr = MCFSIM_ICR3, .index = 20, .ack = 0, }, + /*MCF_IRQ_USB7*/ { .icr = MCFSIM_ICR3, .index = 16, .ack = 0, }, + /*MCF_IRQ_DMA*/ { .icr = MCFSIM_ICR3, .index = 12, .ack = 0, }, + /*MCF_IRQ_ERX*/ { .icr = MCFSIM_ICR3, .index = 8, .ack = 0, }, + /*MCF_IRQ_ETX*/ { .icr = MCFSIM_ICR3, .index = 4, .ack = 0, }, + /*MCF_IRQ_ENTC*/ { .icr = MCFSIM_ICR3, .index = 0, .ack = 0, }, + /*MCF_IRQ_QSPI*/ { .icr = MCFSIM_ICR4, .index = 28, .ack = 0, }, + /*MCF_IRQ_EINT5*/ { .icr = MCFSIM_ICR4, .index = 24, .ack = 1, }, + /*MCF_IRQ_EINT6*/ { .icr = MCFSIM_ICR4, .index = 20, .ack = 1, }, + /*MCF_IRQ_SWTO*/ { .icr = MCFSIM_ICR4, .index = 16, .ack = 0, }, +}; + +static void intc_irq_mask(unsigned int irq) +{ + if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECMAX)) { + u32 v; + irq -= MCFINT_VECBASE; + v = 0x8 << intc_irqmap[irq].index; + writel(v, MCF_MBAR + intc_irqmap[irq].icr); + } +} + +static void intc_irq_unmask(unsigned int irq) +{ + if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECMAX)) { + u32 v; + irq -= MCFINT_VECBASE; + v = 0xd << intc_irqmap[irq].index; + writel(v, MCF_MBAR + intc_irqmap[irq].icr); + } +} + +static void intc_irq_ack(unsigned int irq) +{ + /* Only external interrupts are acked */ + if ((irq >= MCFINT_VECBASE) && (irq <= MCFINT_VECMAX)) { + irq -= MCFINT_VECBASE; + if (intc_irqmap[irq].ack) { + u32 v; + v = 0xd << intc_irqmap[irq].index; + writel(v, MCF_MBAR + intc_irqmap[irq].icr); + } + } +} + +static int intc_irq_set_type(unsigned int irq, unsigned int type) +{ + /* We can set the edge type here for external interrupts */ + return 0; +} + +static struct irq_chip intc_irq_chip = { + .name = "CF-INTC", + .mask = intc_irq_mask, + .unmask = intc_irq_unmask, + .ack = intc_irq_ack, + .set_type = intc_irq_set_type, +}; + +void __init init_IRQ(void) +{ + int irq; + + init_vectors(); + + /* Mask all interrupt sources */ + writel(0x88888888, MCF_MBAR + MCFSIM_ICR1); + writel(0x88888888, MCF_MBAR + MCFSIM_ICR2); + writel(0x88888888, MCF_MBAR + MCFSIM_ICR3); + writel(0x88888888, MCF_MBAR + MCFSIM_ICR4); + + for (irq = 0; (irq < NR_IRQS); irq++) { + irq_desc[irq].status = IRQ_DISABLED; + irq_desc[irq].action = NULL; + irq_desc[irq].depth = 1; + irq_desc[irq].chip = &intc_irq_chip; + intc_irq_set_type(irq, 0); + } +} + diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68knommu/platform/coldfire/Makefile index 24ea95a2312..f72a0e5d999 100644 --- a/arch/m68knommu/platform/coldfire/Makefile +++ b/arch/m68knommu/platform/coldfire/Makefile @@ -21,7 +21,7 @@ obj-$(CONFIG_M520x) += pit.o intc-simr.o obj-$(CONFIG_M523x) += pit.o dma_timer.o intc-2.o obj-$(CONFIG_M5249) += timers.o intc.o obj-$(CONFIG_M527x) += pit.o intc-2.o -obj-$(CONFIG_M5272) += timers.o intc.o +obj-$(CONFIG_M5272) += timers.o obj-$(CONFIG_M528x) += pit.o intc-2.o obj-$(CONFIG_M5307) += timers.o intc.o obj-$(CONFIG_M532x) += timers.o intc-simr.o -- cgit v1.2.3-70-g09d2 From 49802967cb7ec4f36a64e192108babe1c3b67e8e Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2009 12:13:23 +1000 Subject: m68knommu: remove ColdFire direct interrupt register access Now that the ColdFire 5272 has full interrupt controller functionality we can remove all the interrupt masking and acking code from the FEC ethernet driver. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/5272/config.c | 65 +++++------------------------------ drivers/net/fec.c | 18 ++-------- 2 files changed, 11 insertions(+), 72 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68knommu/platform/5272/config.c index b16add9aa4e..59278c0887d 100644 --- a/arch/m68knommu/platform/5272/config.c +++ b/arch/m68knommu/platform/5272/config.c @@ -31,11 +31,11 @@ unsigned char ledbank = 0xff; static struct mcf_platform_uart m5272_uart_platform[] = { { .mapbase = MCF_MBAR + MCFUART_BASE1, - .irq = 73, + .irq = MCF_IRQ_UART1, }, { .mapbase = MCF_MBAR + MCFUART_BASE2, - .irq = 74, + .irq = MCF_IRQ_UART2, }, { }, }; @@ -53,18 +53,18 @@ static struct resource m5272_fec_resources[] = { .flags = IORESOURCE_MEM, }, { - .start = 86, - .end = 86, + .start = MCF_IRQ_ERX, + .end = MCF_IRQ_ERX, .flags = IORESOURCE_IRQ, }, { - .start = 87, - .end = 87, + .start = MCF_IRQ_ETX, + .end = MCF_IRQ_ETX, .flags = IORESOURCE_IRQ, }, { - .start = 88, - .end = 88, + .start = MCF_IRQ_ENTC, + .end = MCF_IRQ_ENTC, .flags = IORESOURCE_IRQ, }, }; @@ -88,9 +88,6 @@ static void __init m5272_uart_init_line(int line, int irq) u32 v; if ((line >= 0) && (line < 2)) { - v = (line) ? 0x0e000000 : 0xe0000000; - writel(v, MCF_MBAR + MCFSIM_ICR2); - /* Enable the output lines for the serial ports */ v = readl(MCF_MBAR + MCFSIM_PBCNT); v = (v & ~0x000000ff) | 0x00000055; @@ -113,48 +110,6 @@ static void __init m5272_uarts_init(void) /***************************************************************************/ -static void __init m5272_fec_init(void) -{ - u32 imr; - - /* Unmask FEC interrupts at ColdFire interrupt controller */ - imr = readl(MCF_MBAR + MCFSIM_ICR3); - imr = (imr & ~0x00000fff) | 0x00000ddd; - writel(imr, MCF_MBAR + MCFSIM_ICR3); - - imr = readl(MCF_MBAR + MCFSIM_ICR1); - imr = (imr & ~0x0f000000) | 0x0d000000; - writel(imr, MCF_MBAR + MCFSIM_ICR1); -} - -/***************************************************************************/ - -void mcf_disableall(void) -{ - volatile unsigned long *icrp; - - icrp = (volatile unsigned long *) (MCF_MBAR + MCFSIM_ICR1); - icrp[0] = 0x88888888; - icrp[1] = 0x88888888; - icrp[2] = 0x88888888; - icrp[3] = 0x88888888; -} - -/***************************************************************************/ - -static void __init m5272_timers_init(void) -{ - /* Timer1 @ level6 is always used as system timer */ - writel((0x8 | 0x6) << ((4 - 1) * 4), MCF_MBAR + MCFSIM_ICR1); - -#ifdef CONFIG_HIGHPROFILE - /* Timer2 @ level7 is to be used as a high speed profile timer */ - writel((0x8 | 0x7) << ((4 - 2) * 4), MCF_MBAR + MCFSIM_ICR1); -#endif -} - -/***************************************************************************/ - static void m5272_cpu_reset(void) { local_irq_disable(); @@ -178,8 +133,6 @@ void __init config_BSP(char *commandp, int size) *pivrp = 0x40; #endif - mcf_disableall(); - #if defined(CONFIG_NETtel) || defined(CONFIG_SCALES) /* Copy command line from FLASH to local buffer... */ memcpy(commandp, (char *) 0xf0004000, size); @@ -191,7 +144,6 @@ void __init config_BSP(char *commandp, int size) #endif mach_reset = m5272_cpu_reset; - m5272_timers_init(); } /***************************************************************************/ @@ -199,7 +151,6 @@ void __init config_BSP(char *commandp, int size) static int __init init_BSP(void) { m5272_uarts_init(); - m5272_fec_init(); platform_add_devices(m5272_devices, ARRAY_SIZE(m5272_devices)); return 0; } diff --git a/drivers/net/fec.c b/drivers/net/fec.c index c9fd82d3a80..a32230bbe19 100644 --- a/drivers/net/fec.c +++ b/drivers/net/fec.c @@ -1142,19 +1142,9 @@ static void __inline__ fec_request_mii_intr(struct net_device *dev) printk("FEC: Could not allocate fec(MII) IRQ(66)!\n"); } -static void __inline__ fec_disable_phy_intr(void) +static void __inline__ fec_disable_phy_intr(struct net_device *dev) { - volatile unsigned long *icrp; - icrp = (volatile unsigned long *) (MCF_MBAR + MCFSIM_ICR1); - *icrp = 0x08000000; -} - -static void __inline__ fec_phy_ack_intr(void) -{ - volatile unsigned long *icrp; - /* Acknowledge the interrupt */ - icrp = (volatile unsigned long *) (MCF_MBAR + MCFSIM_ICR1); - *icrp = 0x0d000000; + free_irq(66, dev); } #endif @@ -1386,7 +1376,7 @@ mii_discover_phy(uint mii_reg, struct net_device *dev) writel(0, fep->hwp + FEC_MII_SPEED); fep->phy_speed = 0; #ifdef HAVE_mii_link_interrupt - fec_disable_phy_intr(); + fec_disable_phy_intr(dev); #endif } } @@ -1399,8 +1389,6 @@ mii_link_interrupt(int irq, void * dev_id) struct net_device *dev = dev_id; struct fec_enet_private *fep = netdev_priv(dev); - fec_phy_ack_intr(); - mii_do_cmd(dev, fep->phy->ack_int); mii_do_cmd(dev, phy_cmd_relink); /* restart and display status */ -- cgit v1.2.3-70-g09d2 From 14c16db3906ba9453e9c786323f872e11fd0f48d Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 12 Aug 2009 16:14:43 +1000 Subject: m68knommu: set multi-function pins for ethernet when enabled The ethernet pins on the 532x ColdFire CPU family are multi-function pins. We need to enable them as ethernet pins when using the FEC ethernet driver. Bug report, and older patch, from timothee@manaud.net. Signed-off-by: Greg Ungerer --- arch/m68knommu/platform/523x/config.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/m68knommu') diff --git a/arch/m68knommu/platform/523x/config.c b/arch/m68knommu/platform/523x/config.c index 00f08d2e43b..6ba84f2aa39 100644 --- a/arch/m68knommu/platform/523x/config.c +++ b/arch/m68knommu/platform/523x/config.c @@ -82,6 +82,20 @@ static struct platform_device *m523x_devices[] __initdata = { /***************************************************************************/ +static void __init m523x_fec_init(void) +{ + u16 par; + u8 v; + + /* Set multi-function pins to ethernet use */ + par = readw(MCF_IPSBAR + 0x100082); + writew(par | 0xf00, MCF_IPSBAR + 0x100082); + v = readb(MCF_IPSBAR + 0x100078); + writeb(v | 0xc0, MCF_IPSBAR + 0x100078); +} + +/***************************************************************************/ + static void m523x_cpu_reset(void) { local_irq_disable(); @@ -99,6 +113,7 @@ void __init config_BSP(char *commandp, int size) static int __init init_BSP(void) { + m523x_fec_init(); platform_add_devices(m523x_devices, ARRAY_SIZE(m523x_devices)); return 0; } -- cgit v1.2.3-70-g09d2 From d200c922bc2b1ac88b8d33b6cfff2ed837af186a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 20 Sep 2009 18:14:13 -0400 Subject: Use new __init_task_data macro in arch init_task.c files. Signed-off-by: Joe Perches Signed-off-by: Tim Abbott Acked-by: Paul Mundt Signed-off-by: Sam Ravnborg --- arch/arm/kernel/init_task.c | 5 ++--- arch/avr32/kernel/init_task.c | 5 ++--- arch/cris/kernel/process.c | 5 ++--- arch/frv/kernel/init_task.c | 5 ++--- arch/h8300/kernel/init_task.c | 5 ++--- arch/ia64/kernel/init_task.c | 3 ++- arch/m32r/kernel/init_task.c | 5 ++--- arch/m68k/kernel/process.c | 6 +++--- arch/m68knommu/kernel/init_task.c | 5 ++--- arch/microblaze/kernel/init_task.c | 5 ++--- arch/mips/kernel/init_task.c | 5 ++--- arch/mn10300/kernel/init_task.c | 5 ++--- arch/parisc/kernel/init_task.c | 4 ++-- arch/powerpc/kernel/init_task.c | 5 ++--- arch/powerpc/kernel/machine_kexec_64.c | 5 +++-- arch/s390/kernel/init_task.c | 5 ++--- arch/score/kernel/init_task.c | 5 ++--- arch/sh/kernel/init_task.c | 5 ++--- arch/sparc/kernel/init_task.c | 5 ++--- arch/um/kernel/init_task.c | 5 ++--- arch/x86/kernel/init_task.c | 5 ++--- arch/xtensa/kernel/init_task.c | 5 ++--- 22 files changed, 46 insertions(+), 62 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c index 3f470866bb8..e7cbb50dc35 100644 --- a/arch/arm/kernel/init_task.c +++ b/arch/arm/kernel/init_task.c @@ -24,9 +24,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * * The things we do for performance.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c index 57ec9f2dcd9..6b2343e6fe3 100644 --- a/arch/avr32/kernel/init_task.c +++ b/arch/avr32/kernel/init_task.c @@ -18,9 +18,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); /* * Initial thread structure. Must be aligned on an 8192-byte boundary. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 51dcd04d277..c99aeab7cef 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -45,9 +45,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c index 1d3df1d9495..3c3e0b336a9 100644 --- a/arch/frv/kernel/init_task.c +++ b/arch/frv/kernel/init_task.c @@ -19,9 +19,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c index 089c65ed6eb..54c1062ee80 100644 --- a/arch/h8300/kernel/init_task.c +++ b/arch/h8300/kernel/init_task.c @@ -31,7 +31,6 @@ EXPORT_SYMBOL(init_task); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index c475fc281be..e253ab8fcbc 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -33,7 +33,8 @@ union { struct thread_info thread_info; } s; unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; -} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{ +} init_task_mem asm ("init_task") __init_task_data = + {{ .task = INIT_TASK(init_task_mem.s.task), .thread_info = INIT_THREAD_INFO(init_task_mem.s.task) }}; diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c index fce57e5d3f9..6c42d5f8df5 100644 --- a/arch/m32r/kernel/init_task.c +++ b/arch/m32r/kernel/init_task.c @@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 72bad65dba3..41230c595a8 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -42,9 +42,9 @@ */ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -union thread_union init_thread_union -__attribute__((section(".data.init_task"), aligned(THREAD_SIZE))) - = { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data + __attribute__((aligned(THREAD_SIZE))) = + { INIT_THREAD_INFO(init_task) }; /* initial task structure */ struct task_struct init_task = INIT_TASK(init_task); diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c index 45e97a207fe..cbf9dc3cc51 100644 --- a/arch/m68knommu/kernel/init_task.c +++ b/arch/m68knommu/kernel/init_task.c @@ -31,7 +31,6 @@ EXPORT_SYMBOL(init_task); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/microblaze/kernel/init_task.c b/arch/microblaze/kernel/init_task.c index 67da22579b6..b5d711f94ff 100644 --- a/arch/microblaze/kernel/init_task.c +++ b/arch/microblaze/kernel/init_task.c @@ -19,9 +19,8 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = -{ INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; struct task_struct init_task = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c index 5b457a40c78..6d6ca530589 100644 --- a/arch/mips/kernel/init_task.c +++ b/arch/mips/kernel/init_task.c @@ -21,9 +21,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * * The things we do for performance.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"), - __aligned__(THREAD_SIZE))) = +union thread_union init_thread_union __init_task_data + __attribute__((__aligned__(THREAD_SIZE))) = { INIT_THREAD_INFO(init_task) }; /* diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c index 80d423b80af..a481b043bea 100644 --- a/arch/mn10300/kernel/init_task.c +++ b/arch/mn10300/kernel/init_task.c @@ -27,9 +27,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c index 82974b20fc1..d020eae6525 100644 --- a/arch/parisc/kernel/init_task.c +++ b/arch/parisc/kernel/init_task.c @@ -43,8 +43,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((aligned(128))) __attribute__((__section__(".data.init_task"))) = +union thread_union init_thread_union __init_task_data + __attribute__((aligned(128))) = { INIT_THREAD_INFO(init_task) }; #if PT_NLEVELS == 3 diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c index ffc4253fef5..2375b7eb1c7 100644 --- a/arch/powerpc/kernel/init_task.c +++ b/arch/powerpc/kernel/init_task.c @@ -16,9 +16,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 49e705fcee6..040bd1de8d9 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -249,8 +250,8 @@ static void kexec_prepare_cpus(void) * We could use a smaller stack if we don't care about anything using * current, but that audit has not been performed. */ -static union thread_union kexec_stack - __attribute__((__section__(".data.init_task"))) = { }; +static union thread_union kexec_stack __init_task_data = + { }; /* Our assembly helper, in kexec_stub.S */ extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c index fe787f9e5f3..4d1c9fb0b54 100644 --- a/arch/s390/kernel/init_task.c +++ b/arch/s390/kernel/init_task.c @@ -25,9 +25,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/score/kernel/init_task.c b/arch/score/kernel/init_task.c index ff952f6c63f..baa03ee217d 100644 --- a/arch/score/kernel/init_task.c +++ b/arch/score/kernel/init_task.c @@ -34,9 +34,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"), __aligned__(THREAD_SIZE))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c index 1719957c0a6..11f2ea556a6 100644 --- a/arch/sh/kernel/init_task.c +++ b/arch/sh/kernel/init_task.c @@ -17,9 +17,8 @@ struct pt_regs fake_swapper_regs; * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c index 28125c5b3d3..5fe3d65581f 100644 --- a/arch/sparc/kernel/init_task.c +++ b/arch/sparc/kernel/init_task.c @@ -18,6 +18,5 @@ EXPORT_SYMBOL(init_task); * If this is not aligned on a 8k boundry, then you should change code * in etrap.S which assumes it. */ -union thread_union init_thread_union - __attribute__((section (".data.init_task"))) - = { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c index b25121b537d..8aa77b61a5f 100644 --- a/arch/um/kernel/init_task.c +++ b/arch/um/kernel/init_task.c @@ -30,9 +30,8 @@ EXPORT_SYMBOL(init_task); * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; union thread_union cpu0_irqstack __attribute__((__section__(".data.init_irqstack"))) = diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 270ff83efc1..3a54dcb9cd0 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c index c4302f0e4ba..cd122fb7e48 100644 --- a/arch/xtensa/kernel/init_task.c +++ b/arch/xtensa/kernel/init_task.c @@ -23,9 +23,8 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = -{ INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; struct task_struct init_task = INIT_TASK(init_task); -- cgit v1.2.3-70-g09d2 From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Sep 2009 12:02:48 +0200 Subject: perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian Acked-by: Peter Zijlstra Acked-by: Paul Mackerras Reviewed-by: Arjan van de Ven Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Benjamin Herrenschmidt Cc: David Howells Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- arch/arm/include/asm/unistd.h | 2 +- arch/arm/kernel/calls.S | 2 +- arch/blackfin/include/asm/unistd.h | 2 +- arch/blackfin/mach-common/entry.S | 2 +- arch/frv/Kconfig | 2 +- arch/frv/include/asm/perf_counter.h | 17 - arch/frv/include/asm/perf_event.h | 17 + arch/frv/include/asm/unistd.h | 2 +- arch/frv/kernel/entry.S | 2 +- arch/frv/lib/Makefile | 2 +- arch/frv/lib/perf_counter.c | 19 - arch/frv/lib/perf_event.c | 19 + arch/m68k/include/asm/unistd.h | 2 +- arch/m68k/kernel/entry.S | 2 +- arch/m68knommu/kernel/syscalltable.S | 2 +- arch/microblaze/include/asm/unistd.h | 2 +- arch/microblaze/kernel/syscall_table.S | 2 +- arch/mips/include/asm/unistd.h | 6 +- arch/mips/kernel/scall32-o32.S | 2 +- arch/mips/kernel/scall64-64.S | 2 +- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- arch/mn10300/include/asm/unistd.h | 2 +- arch/mn10300/kernel/entry.S | 2 +- arch/parisc/Kconfig | 2 +- arch/parisc/include/asm/perf_counter.h | 7 - arch/parisc/include/asm/perf_event.h | 7 + arch/parisc/include/asm/unistd.h | 4 +- arch/parisc/kernel/syscall_table.S | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/hw_irq.h | 22 +- arch/powerpc/include/asm/paca.h | 2 +- arch/powerpc/include/asm/perf_counter.h | 110 - arch/powerpc/include/asm/perf_event.h | 110 + arch/powerpc/include/asm/systbl.h | 2 +- arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_64.S | 8 +- arch/powerpc/kernel/irq.c | 8 +- arch/powerpc/kernel/mpc7450-pmu.c | 2 +- arch/powerpc/kernel/perf_callchain.c | 2 +- arch/powerpc/kernel/perf_counter.c | 1315 -------- arch/powerpc/kernel/perf_event.c | 1315 ++++++++ arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- arch/powerpc/kernel/time.c | 30 +- arch/powerpc/mm/fault.c | 8 +- arch/powerpc/platforms/Kconfig.cputype | 4 +- arch/s390/Kconfig | 2 +- arch/s390/include/asm/perf_counter.h | 10 - arch/s390/include/asm/perf_event.h | 10 + arch/s390/include/asm/unistd.h | 2 +- arch/s390/kernel/compat_wrapper.S | 8 +- arch/s390/kernel/syscalls.S | 2 +- arch/s390/mm/fault.c | 8 +- arch/sh/Kconfig | 2 +- arch/sh/include/asm/perf_counter.h | 9 - arch/sh/include/asm/perf_event.h | 9 + arch/sh/include/asm/unistd_32.h | 2 +- arch/sh/include/asm/unistd_64.h | 2 +- arch/sh/kernel/syscalls_32.S | 2 +- arch/sh/kernel/syscalls_64.S | 2 +- arch/sh/mm/fault_32.c | 8 +- arch/sh/mm/tlbflush_64.c | 8 +- arch/sparc/Kconfig | 4 +- arch/sparc/include/asm/perf_counter.h | 14 - arch/sparc/include/asm/perf_event.h | 14 + arch/sparc/include/asm/unistd.h | 2 +- arch/sparc/kernel/Makefile | 2 +- arch/sparc/kernel/nmi.c | 4 +- arch/sparc/kernel/pcr.c | 10 +- arch/sparc/kernel/perf_counter.c | 556 ---- arch/sparc/kernel/perf_event.c | 556 ++++ arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 +- arch/x86/Kconfig | 2 +- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/include/asm/perf_counter.h | 108 - arch/x86/include/asm/perf_event.h | 108 + arch/x86/include/asm/unistd_32.h | 2 +- arch/x86/include/asm/unistd_64.h | 4 +- arch/x86/kernel/apic/apic.c | 6 +- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/cpu/perf_counter.c | 2298 -------------- arch/x86/kernel/cpu/perf_event.c | 2298 ++++++++++++++ arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +- arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/irqinit.c | 2 +- arch/x86/kernel/syscall_table_32.S | 2 +- arch/x86/mm/fault.c | 8 +- arch/x86/oprofile/op_model_ppro.c | 4 +- arch/x86/oprofile/op_x86_model.h | 2 +- drivers/char/sysrq.c | 4 +- fs/exec.c | 6 +- include/asm-generic/unistd.h | 4 +- include/linux/init_task.h | 14 +- include/linux/perf_counter.h | 858 ------ include/linux/perf_event.h | 858 ++++++ include/linux/prctl.h | 4 +- include/linux/sched.h | 12 +- include/linux/syscalls.h | 6 +- include/trace/ftrace.h | 10 +- init/Kconfig | 8 +- kernel/Makefile | 2 +- kernel/exit.c | 8 +- kernel/fork.c | 8 +- kernel/perf_counter.c | 5000 ------------------------------- kernel/perf_event.c | 5000 +++++++++++++++++++++++++++++++ kernel/sched.c | 14 +- kernel/sys.c | 10 +- kernel/sys_ni.c | 2 +- kernel/sysctl.c | 22 +- kernel/timer.c | 4 +- kernel/trace/trace_syscalls.c | 6 +- mm/mmap.c | 6 +- mm/mprotect.c | 4 +- tools/perf/Makefile | 2 +- tools/perf/builtin-annotate.c | 28 +- tools/perf/builtin-record.c | 22 +- tools/perf/builtin-report.c | 48 +- tools/perf/builtin-sched.c | 20 +- tools/perf/builtin-stat.c | 10 +- tools/perf/builtin-timechart.c | 14 +- tools/perf/builtin-top.c | 12 +- tools/perf/builtin-trace.c | 22 +- tools/perf/design.txt | 58 +- tools/perf/perf.h | 12 +- tools/perf/util/event.h | 4 +- tools/perf/util/header.c | 6 +- tools/perf/util/header.h | 8 +- tools/perf/util/parse-events.c | 32 +- tools/perf/util/parse-events.h | 2 +- tools/perf/util/trace-event-info.c | 8 +- tools/perf/util/trace-event.h | 2 +- 141 files changed, 10694 insertions(+), 10694 deletions(-) delete mode 100644 arch/frv/include/asm/perf_counter.h create mode 100644 arch/frv/include/asm/perf_event.h delete mode 100644 arch/frv/lib/perf_counter.c create mode 100644 arch/frv/lib/perf_event.c delete mode 100644 arch/parisc/include/asm/perf_counter.h create mode 100644 arch/parisc/include/asm/perf_event.h delete mode 100644 arch/powerpc/include/asm/perf_counter.h create mode 100644 arch/powerpc/include/asm/perf_event.h delete mode 100644 arch/powerpc/kernel/perf_counter.c create mode 100644 arch/powerpc/kernel/perf_event.c delete mode 100644 arch/s390/include/asm/perf_counter.h create mode 100644 arch/s390/include/asm/perf_event.h delete mode 100644 arch/sh/include/asm/perf_counter.h create mode 100644 arch/sh/include/asm/perf_event.h delete mode 100644 arch/sparc/include/asm/perf_counter.h create mode 100644 arch/sparc/include/asm/perf_event.h delete mode 100644 arch/sparc/kernel/perf_counter.c create mode 100644 arch/sparc/kernel/perf_event.c delete mode 100644 arch/x86/include/asm/perf_counter.h create mode 100644 arch/x86/include/asm/perf_event.h delete mode 100644 arch/x86/kernel/cpu/perf_counter.c create mode 100644 arch/x86/kernel/cpu/perf_event.c delete mode 100644 include/linux/perf_counter.h create mode 100644 include/linux/perf_event.h delete mode 100644 kernel/perf_counter.c create mode 100644 kernel/perf_event.c (limited to 'arch/m68knommu') diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 9122c9ee18f..89f7eade20a 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -390,7 +390,7 @@ #define __NR_preadv (__NR_SYSCALL_BASE+361) #define __NR_pwritev (__NR_SYSCALL_BASE+362) #define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363) -#define __NR_perf_counter_open (__NR_SYSCALL_BASE+364) +#define __NR_perf_event_open (__NR_SYSCALL_BASE+364) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index ecfa98954d1..fafce1b5c69 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -373,7 +373,7 @@ CALL(sys_preadv) CALL(sys_pwritev) CALL(sys_rt_tgsigqueueinfo) - CALL(sys_perf_counter_open) + CALL(sys_perf_event_open) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index c8e7ee4768c..02b1529dad5 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -381,7 +381,7 @@ #define __NR_preadv 366 #define __NR_pwritev 367 #define __NR_rt_tgsigqueueinfo 368 -#define __NR_perf_counter_open 369 +#define __NR_perf_event_open 369 #define __NR_syscall 370 #define NR_syscalls __NR_syscall diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 01af24cde36..1e7cac23e25 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1620,7 +1620,7 @@ ENTRY(_sys_call_table) .long _sys_preadv .long _sys_pwritev .long _sys_rt_tgsigqueueinfo - .long _sys_perf_counter_open + .long _sys_perf_event_open .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index b86e19c9b5b..4b5830bcbe2 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -7,7 +7,7 @@ config FRV default y select HAVE_IDE select HAVE_ARCH_TRACEHOOK - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config ZONE_DMA bool diff --git a/arch/frv/include/asm/perf_counter.h b/arch/frv/include/asm/perf_counter.h deleted file mode 100644 index ccf726e61b2..00000000000 --- a/arch/frv/include/asm/perf_counter.h +++ /dev/null @@ -1,17 +0,0 @@ -/* FRV performance counter support - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#ifndef _ASM_PERF_COUNTER_H -#define _ASM_PERF_COUNTER_H - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#endif /* _ASM_PERF_COUNTER_H */ diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h new file mode 100644 index 00000000000..a69e0155d14 --- /dev/null +++ b/arch/frv/include/asm/perf_event.h @@ -0,0 +1,17 @@ +/* FRV performance event support + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_PERF_EVENT_H +#define _ASM_PERF_EVENT_H + +#define PERF_EVENT_INDEX_OFFSET 0 + +#endif /* _ASM_PERF_EVENT_H */ diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index 4a8fb427ce0..be6ef0f5cd4 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -342,7 +342,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #ifdef __KERNEL__ diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index fde1e446b44..189397ec012 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1525,6 +1525,6 @@ sys_call_table: .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open syscall_table_size = (. - sys_call_table) diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile index 0a377210c89..f4709756d0d 100644 --- a/arch/frv/lib/Makefile +++ b/arch/frv/lib/Makefile @@ -5,4 +5,4 @@ lib-y := \ __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ - outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_counter.o + outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o diff --git a/arch/frv/lib/perf_counter.c b/arch/frv/lib/perf_counter.c deleted file mode 100644 index 2000feecd57..00000000000 --- a/arch/frv/lib/perf_counter.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Performance counter handling - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include - -/* - * mark the performance counter as pending - */ -void set_perf_counter_pending(void) -{ -} diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c new file mode 100644 index 00000000000..9ac5acfd2e9 --- /dev/null +++ b/arch/frv/lib/perf_event.c @@ -0,0 +1,19 @@ +/* Performance event handling + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include + +/* + * mark the performance event as pending + */ +void set_perf_event_pending(void) +{ +} diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 946d8691f2b..48b87f5ced5 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -335,7 +335,7 @@ #define __NR_preadv 329 #define __NR_pwritev 330 #define __NR_rt_tgsigqueueinfo 331 -#define __NR_perf_counter_open 332 +#define __NR_perf_event_open 332 #ifdef __KERNEL__ diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 922f52e7ed1..c5b33634c98 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -756,5 +756,5 @@ sys_call_table: .long sys_preadv .long sys_pwritev /* 330 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index 0ae123e0898..23535cc415a 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -350,7 +350,7 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev /* 330 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 0b852327c0e..cb05a07e55e 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -381,7 +381,7 @@ #define __NR_preadv 363 /* new */ #define __NR_pwritev 364 /* new */ #define __NR_rt_tgsigqueueinfo 365 /* new */ -#define __NR_perf_counter_open 366 /* new */ +#define __NR_perf_event_open 366 /* new */ #define __NR_syscalls 367 diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index 457216097df..ecec1915513 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -370,4 +370,4 @@ ENTRY(sys_call_table) .long sys_ni_syscall .long sys_ni_syscall .long sys_rt_tgsigqueueinfo /* 365 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index e753a777949..8c9dfa9e901 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -353,7 +353,7 @@ #define __NR_preadv (__NR_Linux + 330) #define __NR_pwritev (__NR_Linux + 331) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) -#define __NR_perf_counter_open (__NR_Linux + 333) +#define __NR_perf_event_open (__NR_Linux + 333) #define __NR_accept4 (__NR_Linux + 334) /* @@ -664,7 +664,7 @@ #define __NR_preadv (__NR_Linux + 289) #define __NR_pwritev (__NR_Linux + 290) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) -#define __NR_perf_counter_open (__NR_Linux + 292) +#define __NR_perf_event_open (__NR_Linux + 292) #define __NR_accept4 (__NR_Linux + 293) /* @@ -979,7 +979,7 @@ #define __NR_preadv (__NR_Linux + 293) #define __NR_pwritev (__NR_Linux + 294) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) -#define __NR_perf_counter_open (__NR_Linux + 296) +#define __NR_perf_event_open (__NR_Linux + 296) #define __NR_accept4 (__NR_Linux + 297) /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 7c2de4f091c..fd2a9bb620d 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -581,7 +581,7 @@ einval: li v0, -ENOSYS sys sys_preadv 6 /* 4330 */ sys sys_pwritev 6 sys sys_rt_tgsigqueueinfo 4 - sys sys_perf_counter_open 5 + sys sys_perf_event_open 5 sys sys_accept4 4 .endm diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index b97b993846d..18bf7f32c5e 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -418,6 +418,6 @@ sys_call_table: PTR sys_preadv PTR sys_pwritev /* 5390 */ PTR sys_rt_tgsigqueueinfo - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 1a6ae124635..6ebc0797669 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -416,6 +416,6 @@ EXPORT(sysn32_call_table) PTR sys_preadv PTR sys_pwritev PTR compat_sys_rt_tgsigqueueinfo /* 5295 */ - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index cd31087a651..9bbf9775e0b 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -536,6 +536,6 @@ sys_call_table: PTR compat_sys_preadv /* 4330 */ PTR compat_sys_pwritev PTR compat_sys_rt_tgsigqueueinfo - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sys_call_table,.-sys_call_table diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index fad68616af3..2a983931c11 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -347,7 +347,7 @@ #define __NR_preadv 334 #define __NR_pwritev 335 #define __NR_rt_tgsigqueueinfo 336 -#define __NR_perf_counter_open 337 +#define __NR_perf_event_open 337 #ifdef __KERNEL__ diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index e0d2563af4f..a94e7ea3faa 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -723,7 +723,7 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev /* 335 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open nr_syscalls=(.-sys_call_table)/4 diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 06f8d5b5b0f..f388dc68f60 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,7 +16,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/perf_counter.h b/arch/parisc/include/asm/perf_counter.h deleted file mode 100644 index dc9e829f701..00000000000 --- a/arch/parisc/include/asm/perf_counter.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_PARISC_PERF_COUNTER_H -#define __ASM_PARISC_PERF_COUNTER_H - -/* parisc only supports software counters through this interface. */ -static inline void set_perf_counter_pending(void) { } - -#endif /* __ASM_PARISC_PERF_COUNTER_H */ diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h new file mode 100644 index 00000000000..cc146427d8f --- /dev/null +++ b/arch/parisc/include/asm/perf_event.h @@ -0,0 +1,7 @@ +#ifndef __ASM_PARISC_PERF_EVENT_H +#define __ASM_PARISC_PERF_EVENT_H + +/* parisc only supports software events through this interface. */ +static inline void set_perf_event_pending(void) { } + +#endif /* __ASM_PARISC_PERF_EVENT_H */ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index f3d3b8b012c..cda158318c6 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -810,9 +810,9 @@ #define __NR_preadv (__NR_Linux + 315) #define __NR_pwritev (__NR_Linux + 316) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 317) -#define __NR_perf_counter_open (__NR_Linux + 318) +#define __NR_perf_event_open (__NR_Linux + 318) -#define __NR_Linux_syscalls (__NR_perf_counter_open + 1) +#define __NR_Linux_syscalls (__NR_perf_event_open + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index cf145eb026b..843f423dec6 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -416,7 +416,7 @@ ENTRY_COMP(preadv) /* 315 */ ENTRY_COMP(pwritev) ENTRY_COMP(rt_tgsigqueueinfo) - ENTRY_SAME(perf_counter_open) + ENTRY_SAME(perf_event_open) /* Nothing yet */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8250902265c..4fd479059d6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -129,7 +129,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e73d554538d..abbc2aaaced 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -135,43 +135,43 @@ static inline int irqs_disabled_flags(unsigned long flags) */ struct irq_chip; -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PPC64 -static inline unsigned long test_perf_counter_pending(void) +static inline unsigned long test_perf_event_pending(void) { unsigned long x; asm volatile("lbz %0,%1(13)" : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_counter_pending))); + : "i" (offsetof(struct paca_struct, perf_event_pending))); return x; } -static inline void set_perf_counter_pending(void) +static inline void set_perf_event_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (1), - "i" (offsetof(struct paca_struct, perf_counter_pending))); + "i" (offsetof(struct paca_struct, perf_event_pending))); } -static inline void clear_perf_counter_pending(void) +static inline void clear_perf_event_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (0), - "i" (offsetof(struct paca_struct, perf_counter_pending))); + "i" (offsetof(struct paca_struct, perf_event_pending))); } #endif /* CONFIG_PPC64 */ -#else /* CONFIG_PERF_COUNTERS */ +#else /* CONFIG_PERF_EVENTS */ -static inline unsigned long test_perf_counter_pending(void) +static inline unsigned long test_perf_event_pending(void) { return 0; } -static inline void clear_perf_counter_pending(void) {} -#endif /* CONFIG_PERF_COUNTERS */ +static inline void clear_perf_event_pending(void) {} +#endif /* CONFIG_PERF_EVENTS */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index b634456ea89..154f405b642 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -122,7 +122,7 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ - u8 perf_counter_pending; /* PM interrupt while soft-disabled */ + u8 perf_event_pending; /* PM interrupt while soft-disabled */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h deleted file mode 100644 index 0ea0639fcf7..00000000000 --- a/arch/powerpc/include/asm/perf_counter.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Performance counter support - PowerPC-specific definitions. - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include - -#include - -#define MAX_HWCOUNTERS 8 -#define MAX_EVENT_ALTERNATIVES 8 -#define MAX_LIMITED_HWCOUNTERS 2 - -/* - * This struct provides the constants and functions needed to - * describe the PMU on a particular POWER-family CPU. - */ -struct power_pmu { - const char *name; - int n_counter; - int max_alternatives; - unsigned long add_fields; - unsigned long test_adder; - int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]); - int (*get_constraint)(u64 event, unsigned long *mskp, - unsigned long *valp); - int (*get_alternatives)(u64 event, unsigned int flags, - u64 alt[]); - void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); - int (*limited_pmc_event)(u64 event); - u32 flags; - int n_generic; - int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; -}; - -/* - * Values for power_pmu.flags - */ -#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ -#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ - -/* - * Values for flags to get_alternatives() - */ -#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ -#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ -#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ - -extern int register_power_pmu(struct power_pmu *); - -struct pt_regs; -extern unsigned long perf_misc_flags(struct pt_regs *regs); -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); - -#define PERF_COUNTER_INDEX_OFFSET 1 - -/* - * Only override the default definitions in include/linux/perf_counter.h - * if we have hardware PMU support. - */ -#ifdef CONFIG_PPC_PERF_CTRS -#define perf_misc_flags(regs) perf_misc_flags(regs) -#endif - -/* - * The power_pmu.get_constraint function returns a 32/64-bit value and - * a 32/64-bit mask that express the constraints between this event and - * other events. - * - * The value and mask are divided up into (non-overlapping) bitfields - * of three different types: - * - * Select field: this expresses the constraint that some set of bits - * in MMCR* needs to be set to a specific value for this event. For a - * select field, the mask contains 1s in every bit of the field, and - * the value contains a unique value for each possible setting of the - * MMCR* bits. The constraint checking code will ensure that two events - * that set the same field in their masks have the same value in their - * value dwords. - * - * Add field: this expresses the constraint that there can be at most - * N events in a particular class. A field of k bits can be used for - * N <= 2^(k-1) - 1. The mask has the most significant bit of the field - * set (and the other bits 0), and the value has only the least significant - * bit of the field set. In addition, the 'add_fields' and 'test_adder' - * in the struct power_pmu for this processor come into play. The - * add_fields value contains 1 in the LSB of the field, and the - * test_adder contains 2^(k-1) - 1 - N in the field. - * - * NAND field: this expresses the constraint that you may not have events - * in all of a set of classes. (For example, on PPC970, you can't select - * events from the FPU, ISU and IDU simultaneously, although any two are - * possible.) For N classes, the field is N+1 bits wide, and each class - * is assigned one bit from the least-significant N bits. The mask has - * only the most-significant bit set, and the value has only the bit - * for the event's class set. The test_adder has the least significant - * bit set in the field. - * - * If an event is not subject to the constraint expressed by a particular - * field, then it will have 0 in both the mask and value for that field. - */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h new file mode 100644 index 00000000000..2499aaadaeb --- /dev/null +++ b/arch/powerpc/include/asm/perf_event.h @@ -0,0 +1,110 @@ +/* + * Performance event support - PowerPC-specific definitions. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#include + +#define MAX_HWEVENTS 8 +#define MAX_EVENT_ALTERNATIVES 8 +#define MAX_LIMITED_HWEVENTS 2 + +/* + * This struct provides the constants and functions needed to + * describe the PMU on a particular POWER-family CPU. + */ +struct power_pmu { + const char *name; + int n_event; + int max_alternatives; + unsigned long add_fields; + unsigned long test_adder; + int (*compute_mmcr)(u64 events[], int n_ev, + unsigned int hwc[], unsigned long mmcr[]); + int (*get_constraint)(u64 event_id, unsigned long *mskp, + unsigned long *valp); + int (*get_alternatives)(u64 event_id, unsigned int flags, + u64 alt[]); + void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); + int (*limited_pmc_event)(u64 event_id); + u32 flags; + int n_generic; + int *generic_events; + int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +}; + +/* + * Values for power_pmu.flags + */ +#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ +#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ + +/* + * Values for flags to get_alternatives() + */ +#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ +#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ +#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ + +extern int register_power_pmu(struct power_pmu *); + +struct pt_regs; +extern unsigned long perf_misc_flags(struct pt_regs *regs); +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); + +#define PERF_EVENT_INDEX_OFFSET 1 + +/* + * Only override the default definitions in include/linux/perf_event.h + * if we have hardware PMU support. + */ +#ifdef CONFIG_PPC_PERF_CTRS +#define perf_misc_flags(regs) perf_misc_flags(regs) +#endif + +/* + * The power_pmu.get_constraint function returns a 32/64-bit value and + * a 32/64-bit mask that express the constraints between this event_id and + * other events. + * + * The value and mask are divided up into (non-overlapping) bitfields + * of three different types: + * + * Select field: this expresses the constraint that some set of bits + * in MMCR* needs to be set to a specific value for this event_id. For a + * select field, the mask contains 1s in every bit of the field, and + * the value contains a unique value for each possible setting of the + * MMCR* bits. The constraint checking code will ensure that two events + * that set the same field in their masks have the same value in their + * value dwords. + * + * Add field: this expresses the constraint that there can be at most + * N events in a particular class. A field of k bits can be used for + * N <= 2^(k-1) - 1. The mask has the most significant bit of the field + * set (and the other bits 0), and the value has only the least significant + * bit of the field set. In addition, the 'add_fields' and 'test_adder' + * in the struct power_pmu for this processor come into play. The + * add_fields value contains 1 in the LSB of the field, and the + * test_adder contains 2^(k-1) - 1 - N in the field. + * + * NAND field: this expresses the constraint that you may not have events + * in all of a set of classes. (For example, on PPC970, you can't select + * events from the FPU, ISU and IDU simultaneously, although any two are + * possible.) For N classes, the field is N+1 bits wide, and each class + * is assigned one bit from the least-significant N bits. The mask has + * only the most-significant bit set, and the value has only the bit + * for the event_id's class set. The test_adder has the least significant + * bit set in the field. + * + * If an event_id is not subject to the constraint expressed by a particular + * field, then it will have 0 in both the mask and value for that field. + */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index ed24bd92fe4..c7d671a7d9a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,7 +322,7 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) -SYSCALL_SPU(perf_counter_open) +SYSCALL_SPU(perf_event_open) COMPAT_SYS_SPU(preadv) COMPAT_SYS_SPU(pwritev) COMPAT_SYS(rt_tgsigqueueinfo) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index cef080bfc60..f6ca7617676 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -341,7 +341,7 @@ #define __NR_dup3 316 #define __NR_pipe2 317 #define __NR_inotify_init1 318 -#define __NR_perf_counter_open 319 +#define __NR_perf_event_open 319 #define __NR_preadv 320 #define __NR_pwritev 321 #define __NR_rt_tgsigqueueinfo 322 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 569f79ccd31..b23664a0b86 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f0df285f0f8..0812b0f414b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -133,7 +133,7 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); + DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 66bcda34a6b..900e0eea009 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,14 +556,14 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_COUNTERS - /* check paca->perf_counter_pending if we're enabling ints */ +#ifdef CONFIG_PERF_EVENTS + /* check paca->perf_event_pending if we're enabling ints */ lbz r3,PACAPERFPEND(r13) and. r3,r3,r5 beq 27f - bl .perf_counter_do_pending + bl .perf_event_do_pending 27: -#endif /* CONFIG_PERF_COUNTERS */ +#endif /* CONFIG_PERF_EVENTS */ /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f7f376ea7b1..e5d12117798 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -138,9 +138,9 @@ notrace void raw_local_irq_restore(unsigned long en) } #endif /* CONFIG_PPC_STD_MMU_64 */ - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); } /* diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index cc466d039af..09d72028f31 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index f74b62c6751..0a03cf70d24 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -10,7 +10,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c deleted file mode 100644 index 5ccf9bca96c..00000000000 --- a/arch/powerpc/kernel/perf_counter.c +++ /dev/null @@ -1,1315 +0,0 @@ -/* - * Performance counter support - powerpc architecture code - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct cpu_hw_counters { - int n_counters; - int n_percpu; - int disabled; - int n_added; - int n_limited; - u8 pmcs_enabled; - struct perf_counter *counter[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int flags[MAX_HWCOUNTERS]; - unsigned long mmcr[3]; - struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; - u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; - u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; -}; -DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); - -struct power_pmu *ppmu; - -/* - * Normally, to ignore kernel events we set the FCS (freeze counters - * in supervisor mode) bit in MMCR0, but if the kernel runs with the - * hypervisor bit set in the MSR, or if we are running on a processor - * where the hypervisor bit is forced to 1 (as on Apple G5 processors), - * then we need to use the FCHV bit to ignore kernel events. - */ -static unsigned int freeze_counters_kernel = MMCR0_FCS; - -/* - * 32-bit doesn't have MMCRA but does have an MMCR2, - * and a few other names are different. - */ -#ifdef CONFIG_PPC32 - -#define MMCR0_FCHV 0 -#define MMCR0_PMCjCE MMCR0_PMCnCE - -#define SPRN_MMCRA SPRN_MMCR2 -#define MMCRA_SAMPLE_ENABLE 0 - -static inline unsigned long perf_ip_adjust(struct pt_regs *regs) -{ - return 0; -} -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } -static inline u32 perf_get_misc_flags(struct pt_regs *regs) -{ - return 0; -} -static inline void perf_read_regs(struct pt_regs *regs) { } -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return 0; -} - -#endif /* CONFIG_PPC32 */ - -/* - * Things that are specific to 64-bit implementations. - */ -#ifdef CONFIG_PPC64 - -static inline unsigned long perf_ip_adjust(struct pt_regs *regs) -{ - unsigned long mmcra = regs->dsisr; - - if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { - unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; - if (slot > 1) - return 4 * (slot - 1); - } - return 0; -} - -/* - * The user wants a data address recorded. - * If we're not doing instruction sampling, give them the SDAR - * (sampled data address). If we are doing instruction sampling, then - * only give them the SDAR if it corresponds to the instruction - * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC - * bit in MMCRA. - */ -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) -{ - unsigned long mmcra = regs->dsisr; - unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? - POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; - - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - *addrp = mfspr(SPRN_SDAR); -} - -static inline u32 perf_get_misc_flags(struct pt_regs *regs) -{ - unsigned long mmcra = regs->dsisr; - - if (TRAP(regs) != 0xf00) - return 0; /* not a PMU interrupt */ - - if (ppmu->flags & PPMU_ALT_SIPR) { - if (mmcra & POWER6_MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & POWER6_MMCRA_SIPR) ? - PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; - } - if (mmcra & MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Overload regs->dsisr to store MMCRA so we only need to read it once - * on each interrupt. - */ -static inline void perf_read_regs(struct pt_regs *regs) -{ - regs->dsisr = mfspr(SPRN_MMCRA); -} - -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return !regs->softe; -} - -#endif /* CONFIG_PPC64 */ - -static void perf_counter_interrupt(struct pt_regs *regs); - -void perf_counter_print_debug(void) -{ -} - -/* - * Read one performance monitor counter (PMC). - */ -static unsigned long read_pmc(int idx) -{ - unsigned long val; - - switch (idx) { - case 1: - val = mfspr(SPRN_PMC1); - break; - case 2: - val = mfspr(SPRN_PMC2); - break; - case 3: - val = mfspr(SPRN_PMC3); - break; - case 4: - val = mfspr(SPRN_PMC4); - break; - case 5: - val = mfspr(SPRN_PMC5); - break; - case 6: - val = mfspr(SPRN_PMC6); - break; -#ifdef CONFIG_PPC64 - case 7: - val = mfspr(SPRN_PMC7); - break; - case 8: - val = mfspr(SPRN_PMC8); - break; -#endif /* CONFIG_PPC64 */ - default: - printk(KERN_ERR "oops trying to read PMC%d\n", idx); - val = 0; - } - return val; -} - -/* - * Write one PMC. - */ -static void write_pmc(int idx, unsigned long val) -{ - switch (idx) { - case 1: - mtspr(SPRN_PMC1, val); - break; - case 2: - mtspr(SPRN_PMC2, val); - break; - case 3: - mtspr(SPRN_PMC3, val); - break; - case 4: - mtspr(SPRN_PMC4, val); - break; - case 5: - mtspr(SPRN_PMC5, val); - break; - case 6: - mtspr(SPRN_PMC6, val); - break; -#ifdef CONFIG_PPC64 - case 7: - mtspr(SPRN_PMC7, val); - break; - case 8: - mtspr(SPRN_PMC8, val); - break; -#endif /* CONFIG_PPC64 */ - default: - printk(KERN_ERR "oops trying to write PMC%d\n", idx); - } -} - -/* - * Check if a set of events can all go on the PMU at once. - * If they can't, this will look at alternative codes for the events - * and see if any combination of alternative codes is feasible. - * The feasible set is returned in event[]. - */ -static int power_check_constraints(struct cpu_hw_counters *cpuhw, - u64 event[], unsigned int cflags[], - int n_ev) -{ - unsigned long mask, value, nv; - unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; - int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; - int i, j; - unsigned long addf = ppmu->add_fields; - unsigned long tadd = ppmu->test_adder; - - if (n_ev > ppmu->n_counter) - return -1; - - /* First see if the events will go on as-is */ - for (i = 0; i < n_ev; ++i) { - if ((cflags[i] & PPMU_LIMITED_PMC_REQD) - && !ppmu->limited_pmc_event(event[i])) { - ppmu->get_alternatives(event[i], cflags[i], - cpuhw->alternatives[i]); - event[i] = cpuhw->alternatives[i][0]; - } - if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0], - &cpuhw->avalues[i][0])) - return -1; - } - value = mask = 0; - for (i = 0; i < n_ev; ++i) { - nv = (value | cpuhw->avalues[i][0]) + - (value & cpuhw->avalues[i][0] & addf); - if ((((nv + tadd) ^ value) & mask) != 0 || - (((nv + tadd) ^ cpuhw->avalues[i][0]) & - cpuhw->amasks[i][0]) != 0) - break; - value = nv; - mask |= cpuhw->amasks[i][0]; - } - if (i == n_ev) - return 0; /* all OK */ - - /* doesn't work, gather alternatives... */ - if (!ppmu->get_alternatives) - return -1; - for (i = 0; i < n_ev; ++i) { - choice[i] = 0; - n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], - cpuhw->alternatives[i]); - for (j = 1; j < n_alt[i]; ++j) - ppmu->get_constraint(cpuhw->alternatives[i][j], - &cpuhw->amasks[i][j], - &cpuhw->avalues[i][j]); - } - - /* enumerate all possibilities and see if any will work */ - i = 0; - j = -1; - value = mask = nv = 0; - while (i < n_ev) { - if (j >= 0) { - /* we're backtracking, restore context */ - value = svalues[i]; - mask = smasks[i]; - j = choice[i]; - } - /* - * See if any alternative k for event i, - * where k > j, will satisfy the constraints. - */ - while (++j < n_alt[i]) { - nv = (value | cpuhw->avalues[i][j]) + - (value & cpuhw->avalues[i][j] & addf); - if ((((nv + tadd) ^ value) & mask) == 0 && - (((nv + tadd) ^ cpuhw->avalues[i][j]) - & cpuhw->amasks[i][j]) == 0) - break; - } - if (j >= n_alt[i]) { - /* - * No feasible alternative, backtrack - * to event i-1 and continue enumerating its - * alternatives from where we got up to. - */ - if (--i < 0) - return -1; - } else { - /* - * Found a feasible alternative for event i, - * remember where we got up to with this event, - * go on to the next event, and start with - * the first alternative for it. - */ - choice[i] = j; - svalues[i] = value; - smasks[i] = mask; - value = nv; - mask |= cpuhw->amasks[i][j]; - ++i; - j = -1; - } - } - - /* OK, we have a feasible combination, tell the caller the solution */ - for (i = 0; i < n_ev; ++i) - event[i] = cpuhw->alternatives[i][choice[i]]; - return 0; -} - -/* - * Check if newly-added counters have consistent settings for - * exclude_{user,kernel,hv} with each other and any previously - * added counters. - */ -static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], - int n_prev, int n_new) -{ - int eu = 0, ek = 0, eh = 0; - int i, n, first; - struct perf_counter *counter; - - n = n_prev + n_new; - if (n <= 1) - return 0; - - first = 1; - for (i = 0; i < n; ++i) { - if (cflags[i] & PPMU_LIMITED_PMC_OK) { - cflags[i] &= ~PPMU_LIMITED_PMC_REQD; - continue; - } - counter = ctrs[i]; - if (first) { - eu = counter->attr.exclude_user; - ek = counter->attr.exclude_kernel; - eh = counter->attr.exclude_hv; - first = 0; - } else if (counter->attr.exclude_user != eu || - counter->attr.exclude_kernel != ek || - counter->attr.exclude_hv != eh) { - return -EAGAIN; - } - } - - if (eu || ek || eh) - for (i = 0; i < n; ++i) - if (cflags[i] & PPMU_LIMITED_PMC_OK) - cflags[i] |= PPMU_LIMITED_PMC_REQD; - - return 0; -} - -static void power_pmu_read(struct perf_counter *counter) -{ - s64 val, delta, prev; - - if (!counter->hw.idx) - return; - /* - * Performance monitor interrupts come even when interrupts - * are soft-disabled, as long as interrupts are hard-enabled. - * Therefore we treat them like NMIs. - */ - do { - prev = atomic64_read(&counter->hw.prev_count); - barrier(); - val = read_pmc(counter->hw.idx); - } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); - - /* The counters are only 32 bits wide */ - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &counter->hw.period_left); -} - -/* - * On some machines, PMC5 and PMC6 can't be written, don't respect - * the freeze conditions, and don't generate interrupts. This tells - * us if `counter' is using such a PMC. - */ -static int is_limited_pmc(int pmcnum) -{ - return (ppmu->flags & PPMU_LIMITED_PMC5_6) - && (pmcnum == 5 || pmcnum == 6); -} - -static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val, prev, delta; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - if (!counter->hw.idx) - continue; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - prev = atomic64_read(&counter->hw.prev_count); - counter->hw.idx = 0; - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - } -} - -static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - counter->hw.idx = cpuhw->limited_hwidx[i]; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - atomic64_set(&counter->hw.prev_count, val); - perf_counter_update_userpage(counter); - } -} - -/* - * Since limited counters don't respect the freeze conditions, we - * have to read them immediately after freezing or unfreezing the - * other counters. We try to keep the values from the limited - * counters as consistent as possible by keeping the delay (in - * cycles and instructions) between freezing/unfreezing and reading - * the limited counters as small and consistent as possible. - * Therefore, if any limited counters are in use, we read them - * both, and always in the same order, to minimize variability, - * and do it inside the same asm that writes MMCR0. - */ -static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) -{ - unsigned long pmc5, pmc6; - - if (!cpuhw->n_limited) { - mtspr(SPRN_MMCR0, mmcr0); - return; - } - - /* - * Write MMCR0, then read PMC5 and PMC6 immediately. - * To ensure we don't get a performance monitor interrupt - * between writing MMCR0 and freezing/thawing the limited - * counters, we first write MMCR0 with the counter overflow - * interrupt enable bits turned off. - */ - asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" - : "=&r" (pmc5), "=&r" (pmc6) - : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), - "i" (SPRN_MMCR0), - "i" (SPRN_PMC5), "i" (SPRN_PMC6)); - - if (mmcr0 & MMCR0_FC) - freeze_limited_counters(cpuhw, pmc5, pmc6); - else - thaw_limited_counters(cpuhw, pmc5, pmc6); - - /* - * Write the full MMCR0 including the counter overflow interrupt - * enable bits, if necessary. - */ - if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) - mtspr(SPRN_MMCR0, mmcr0); -} - -/* - * Disable all counters to prevent PMU interrupts and to allow - * counters to be added or removed. - */ -void hw_perf_disable(void) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - - if (!ppmu) - return; - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - - if (!cpuhw->disabled) { - cpuhw->disabled = 1; - cpuhw->n_added = 0; - - /* - * Check if we ever enabled the PMU on this cpu. - */ - if (!cpuhw->pmcs_enabled) { - ppc_enable_pmcs(); - cpuhw->pmcs_enabled = 1; - } - - /* - * Disable instruction sampling if it was enabled - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mb(); - } - - /* - * Set the 'freeze counters' bit. - * The barrier is to make sure the mtspr has been - * executed and the PMU has frozen the counters - * before we return. - */ - write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); - mb(); - } - local_irq_restore(flags); -} - -/* - * Re-enable all counters if disable == 0. - * If we were previously disabled and counters were added, then - * put the new config on the PMU. - */ -void hw_perf_enable(void) -{ - struct perf_counter *counter; - struct cpu_hw_counters *cpuhw; - unsigned long flags; - long i; - unsigned long val; - s64 left; - unsigned int hwc_index[MAX_HWCOUNTERS]; - int n_lim; - int idx; - - if (!ppmu) - return; - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - if (!cpuhw->disabled) { - local_irq_restore(flags); - return; - } - cpuhw->disabled = 0; - - /* - * If we didn't change anything, or only removed counters, - * no need to recalculate MMCR* settings and reset the PMCs. - * Just reenable the PMU with the current MMCR* settings - * (possibly updated for removal of counters). - */ - if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - if (cpuhw->n_counters == 0) - ppc_set_pmu_inuse(0); - goto out_enable; - } - - /* - * Compute MMCR* values for the new set of counters - */ - if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, - cpuhw->mmcr)) { - /* shouldn't ever get here */ - printk(KERN_ERR "oops compute_mmcr failed\n"); - goto out; - } - - /* - * Add in MMCR0 freeze bits corresponding to the - * attr.exclude_* bits for the first counter. - * We have already checked that all counters have the - * same values for these bits as the first counter. - */ - counter = cpuhw->counter[0]; - if (counter->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; - if (counter->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_counters_kernel; - if (counter->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; - - /* - * Write the new configuration to MMCR* with the freeze - * bit set and set the hardware counters to their initial values. - * Then unfreeze the counters. - */ - ppc_set_pmu_inuse(1); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) - | MMCR0_FC); - - /* - * Read off any pre-existing counters that need to move - * to another PMC. - */ - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { - power_pmu_read(counter); - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - } - - /* - * Initialize the PMCs for all the new and moved counters. - */ - cpuhw->n_limited = n_lim = 0; - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx) - continue; - idx = hwc_index[i] + 1; - if (is_limited_pmc(idx)) { - cpuhw->limited_counter[n_lim] = counter; - cpuhw->limited_hwidx[n_lim] = idx; - ++n_lim; - continue; - } - val = 0; - if (counter->hw.sample_period) { - left = atomic64_read(&counter->hw.period_left); - if (left < 0x80000000L) - val = 0x80000000L - left; - } - atomic64_set(&counter->hw.prev_count, val); - counter->hw.idx = idx; - write_pmc(idx, val); - perf_counter_update_userpage(counter); - } - cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; - - out_enable: - mb(); - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - /* - * Enable instruction sampling if necessary - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); - } - - out: - local_irq_restore(flags); -} - -static int collect_events(struct perf_counter *group, int max_count, - struct perf_counter *ctrs[], u64 *events, - unsigned int *flags) -{ - int n = 0; - struct perf_counter *counter; - - if (!is_software_counter(group)) { - if (n >= max_count) - return -1; - ctrs[n] = group; - flags[n] = group->hw.counter_base; - events[n++] = group->hw.config; - } - list_for_each_entry(counter, &group->sibling_list, list_entry) { - if (!is_software_counter(counter) && - counter->state != PERF_COUNTER_STATE_OFF) { - if (n >= max_count) - return -1; - ctrs[n] = counter; - flags[n] = counter->hw.counter_base; - events[n++] = counter->hw.config; - } - } - return n; -} - -static void counter_sched_in(struct perf_counter *counter, int cpu) -{ - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; - counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; - if (is_software_counter(counter)) - counter->pmu->enable(counter); -} - -/* - * Called to enable a whole group of counters. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - */ -int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) -{ - struct cpu_hw_counters *cpuhw; - long i, n, n0; - struct perf_counter *sub; - - if (!ppmu) - return 0; - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->counter[n0], &cpuhw->events[n0], - &cpuhw->flags[n0]); - if (n < 0) - return -EAGAIN; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) - return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); - if (i < 0) - return -EAGAIN; - cpuhw->n_counters = n0 + n; - cpuhw->n_added += n; - - /* - * OK, this group can go on; update counter states etc., - * and enable any software counters - */ - for (i = n0; i < n0 + n; ++i) - cpuhw->counter[i]->hw.config = cpuhw->events[i]; - cpuctx->active_oncpu += n; - n = 1; - counter_sched_in(group_leader, cpu); - list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { - if (sub->state != PERF_COUNTER_STATE_OFF) { - counter_sched_in(sub, cpu); - ++n; - } - } - ctx->nr_active += n; - - return 1; -} - -/* - * Add a counter to the PMU. - * If all counters are not already frozen, then we disable and - * re-enable the PMU in order to get hw_perf_enable to do the - * actual work of reconfiguring the PMU. - */ -static int power_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - int n0; - int ret = -EAGAIN; - - local_irq_save(flags); - perf_disable(); - - /* - * Add the counter to the list (if there is room) - * and check whether the total set is still feasible. - */ - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - if (n0 >= ppmu->n_counter) - goto out; - cpuhw->counter[n0] = counter; - cpuhw->events[n0] = counter->hw.config; - cpuhw->flags[n0] = counter->hw.counter_base; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) - goto out; - if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) - goto out; - - counter->hw.config = cpuhw->events[n0]; - ++cpuhw->n_counters; - ++cpuhw->n_added; - - ret = 0; - out: - perf_enable(); - local_irq_restore(flags); - return ret; -} - -/* - * Remove a counter from the PMU. - */ -static void power_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - long i; - unsigned long flags; - - local_irq_save(flags); - perf_disable(); - - power_pmu_read(counter); - - cpuhw = &__get_cpu_var(cpu_hw_counters); - for (i = 0; i < cpuhw->n_counters; ++i) { - if (counter == cpuhw->counter[i]) { - while (++i < cpuhw->n_counters) - cpuhw->counter[i-1] = cpuhw->counter[i]; - --cpuhw->n_counters; - ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); - if (counter->hw.idx) { - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - perf_counter_update_userpage(counter); - break; - } - } - for (i = 0; i < cpuhw->n_limited; ++i) - if (counter == cpuhw->limited_counter[i]) - break; - if (i < cpuhw->n_limited) { - while (++i < cpuhw->n_limited) { - cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; - cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; - } - --cpuhw->n_limited; - } - if (cpuhw->n_counters == 0) { - /* disable exceptions if no counters are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); - } - - perf_enable(); - local_irq_restore(flags); -} - -/* - * Re-enable interrupts on a counter after they were throttled - * because they were coming too fast. - */ -static void power_pmu_unthrottle(struct perf_counter *counter) -{ - s64 val, left; - unsigned long flags; - - if (!counter->hw.idx || !counter->hw.sample_period) - return; - local_irq_save(flags); - perf_disable(); - power_pmu_read(counter); - left = counter->hw.sample_period; - counter->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); - perf_enable(); - local_irq_restore(flags); -} - -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, -}; - -/* - * Return 1 if we might be able to put counter on a limited PMC, - * or 0 if not. - * A counter can only go on a limited PMC if it counts something - * that a limited PMC can count, doesn't require interrupts, and - * doesn't exclude any processor mode. - */ -static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, - unsigned int flags) -{ - int n; - u64 alt[MAX_EVENT_ALTERNATIVES]; - - if (counter->attr.exclude_user - || counter->attr.exclude_kernel - || counter->attr.exclude_hv - || counter->attr.sample_period) - return 0; - - if (ppmu->limited_pmc_event(ev)) - return 1; - - /* - * The requested event isn't on a limited PMC already; - * see if any alternative code goes on a limited PMC. - */ - if (!ppmu->get_alternatives) - return 0; - - flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; - n = ppmu->get_alternatives(ev, flags, alt); - - return n > 0; -} - -/* - * Find an alternative event that goes on a normal PMC, if possible, - * and return the event code, or 0 if there is no such alternative. - * (Note: event code 0 is "don't count" on all machines.) - */ -static u64 normal_pmc_alternative(u64 ev, unsigned long flags) -{ - u64 alt[MAX_EVENT_ALTERNATIVES]; - int n; - - flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); - n = ppmu->get_alternatives(ev, flags, alt); - if (!n) - return 0; - return alt[0]; -} - -/* Number of perf_counters counting hardware events */ -static atomic_t num_counters; -/* Used to avoid races in calling reserve/release_pmc_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - -/* - * Release the PMU if this is the last perf_counter. - */ -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (!atomic_add_unless(&num_counters, -1, 1)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_counters) == 0) - release_pmc_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -/* - * Translate a generic cache event config to a raw event code. - */ -static int hw_perf_cache_event(u64 config, u64 *eventp) -{ - unsigned long type, op, result; - int ev; - - if (!ppmu->cache_events) - return -EINVAL; - - /* unpack config */ - type = config & 0xff; - op = (config >> 8) & 0xff; - result = (config >> 16) & 0xff; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ev = (*ppmu->cache_events)[type][op][result]; - if (ev == 0) - return -EOPNOTSUPP; - if (ev == -1) - return -EINVAL; - *eventp = ev; - return 0; -} - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - u64 ev; - unsigned long flags; - struct perf_counter *ctrs[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int cflags[MAX_HWCOUNTERS]; - int n; - int err; - struct cpu_hw_counters *cpuhw; - - if (!ppmu) - return ERR_PTR(-ENXIO); - switch (counter->attr.type) { - case PERF_TYPE_HARDWARE: - ev = counter->attr.config; - if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); - ev = ppmu->generic_events[ev]; - break; - case PERF_TYPE_HW_CACHE: - err = hw_perf_cache_event(counter->attr.config, &ev); - if (err) - return ERR_PTR(err); - break; - case PERF_TYPE_RAW: - ev = counter->attr.config; - break; - default: - return ERR_PTR(-EINVAL); - } - counter->hw.config_base = ev; - counter->hw.idx = 0; - - /* - * If we are not running on a hypervisor, force the - * exclude_hv bit to 0 so that we don't care what - * the user set it to. - */ - if (!firmware_has_feature(FW_FEATURE_LPAR)) - counter->attr.exclude_hv = 0; - - /* - * If this is a per-task counter, then we can use - * PM_RUN_* events interchangeably with their non RUN_* - * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. - * XXX we should check if the task is an idle task. - */ - flags = 0; - if (counter->ctx->task) - flags |= PPMU_ONLY_COUNT_RUN; - - /* - * If this machine has limited counters, check whether this - * event could go on a limited counter. - */ - if (ppmu->flags & PPMU_LIMITED_PMC5_6) { - if (can_go_on_limited_pmc(counter, ev, flags)) { - flags |= PPMU_LIMITED_PMC_OK; - } else if (ppmu->limited_pmc_event(ev)) { - /* - * The requested event is on a limited PMC, - * but we can't use a limited PMC; see if any - * alternative goes on a normal PMC. - */ - ev = normal_pmc_alternative(ev, flags); - if (!ev) - return ERR_PTR(-EINVAL); - } - } - - /* - * If this is in a group, check if it can go on with all the - * other hardware counters in the group. We assume the counter - * hasn't been linked into its leader's sibling list at this point. - */ - n = 0; - if (counter->group_leader != counter) { - n = collect_events(counter->group_leader, ppmu->n_counter - 1, - ctrs, events, cflags); - if (n < 0) - return ERR_PTR(-EINVAL); - } - events[n] = ev; - ctrs[n] = counter; - cflags[n] = flags; - if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); - - cpuhw = &get_cpu_var(cpu_hw_counters); - err = power_check_constraints(cpuhw, events, cflags, n + 1); - put_cpu_var(cpu_hw_counters); - if (err) - return ERR_PTR(-EINVAL); - - counter->hw.config = events[n]; - counter->hw.counter_base = cflags[n]; - counter->hw.last_period = counter->hw.sample_period; - atomic64_set(&counter->hw.period_left, counter->hw.last_period); - - /* - * See if we need to reserve the PMU. - * If no counters are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing - * reserve_pmc_hardware or release_pmc_hardware. - */ - err = 0; - if (!atomic_inc_not_zero(&num_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&num_counters) == 0 && - reserve_pmc_hardware(perf_counter_interrupt)) - err = -EBUSY; - else - atomic_inc(&num_counters); - mutex_unlock(&pmc_reserve_mutex); - } - counter->destroy = hw_perf_counter_destroy; - - if (err) - return ERR_PTR(err); - return &power_pmu; -} - -/* - * A counter has overflowed; update its count and record - * things if requested. Note that interrupts are hard-disabled - * here so there is no possibility of being interrupted. - */ -static void record_and_restart(struct perf_counter *counter, unsigned long val, - struct pt_regs *regs, int nmi) -{ - u64 period = counter->hw.sample_period; - s64 prev, delta, left; - int record = 0; - - /* we don't have to worry about interrupts here */ - prev = atomic64_read(&counter->hw.prev_count); - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - - /* - * See if the total period for this counter has expired, - * and update for the next period. - */ - val = 0; - left = atomic64_read(&counter->hw.period_left) - delta; - if (period) { - if (left <= 0) { - left += period; - if (left <= 0) - left = period; - record = 1; - } - if (left < 0x80000000LL) - val = 0x80000000LL - left; - } - - /* - * Finally record data if requested. - */ - if (record) { - struct perf_sample_data data = { - .addr = 0, - .period = counter->hw.last_period, - }; - - if (counter->attr.sample_type & PERF_SAMPLE_ADDR) - perf_get_data_addr(regs, &data.addr); - - if (perf_counter_overflow(counter, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the counter to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each counter counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } - } - - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); -} - -/* - * Called from generic code to get the misc flags (i.e. processor mode) - * for an event. - */ -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - u32 flags = perf_get_misc_flags(regs); - - if (flags) - return flags; - return user_mode(regs) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Called from generic code to get the instruction pointer - * for an event. - */ -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - unsigned long ip; - - if (TRAP(regs) != 0xf00) - return regs->nip; /* not a PMU interrupt */ - - ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); - return ip; -} - -/* - * Performance monitor interrupt stuff - */ -static void perf_counter_interrupt(struct pt_regs *regs) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - unsigned long val; - int found = 0; - int nmi; - - if (cpuhw->n_limited) - freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), - mfspr(SPRN_PMC6)); - - perf_read_regs(regs); - - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (!counter->hw.idx || is_limited_pmc(counter->hw.idx)) - continue; - val = read_pmc(counter->hw.idx); - if ((int)val < 0) { - /* counter has overflowed */ - found = 1; - record_and_restart(counter, val, regs, nmi); - } - } - - /* - * In case we didn't find and reset the counter that caused - * the interrupt, scan all counters and reset any that are - * negative, to avoid getting continual interrupts. - * Any that we processed in the previous loop will not be negative. - */ - if (!found) { - for (i = 0; i < ppmu->n_counter; ++i) { - if (is_limited_pmc(i + 1)) - continue; - val = read_pmc(i + 1); - if ((int)val < 0) - write_pmc(i + 1, 0); - } - } - - /* - * Reset MMCR0 to its normal value. This will set PMXE and - * clear FC (freeze counters) and PMAO (perf mon alert occurred) - * and thus allow interrupts to occur again. - * XXX might want to use MSR.PM to keep the counters frozen until - * we get back out of this interrupt. - */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - if (nmi) - nmi_exit(); - else - irq_exit(); -} - -void hw_perf_counter_setup(int cpu) -{ - struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); - - if (!ppmu) - return; - memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; -} - -int register_power_pmu(struct power_pmu *pmu) -{ - if (ppmu) - return -EBUSY; /* something's already registered */ - - ppmu = pmu; - pr_info("%s performance monitor hardware support registered\n", - pmu->name); - -#ifdef MSR_HV - /* - * Use FCHV to ignore kernel events if MSR.HV is set. - */ - if (mfmsr() & MSR_HV) - freeze_counters_kernel = MMCR0_FCHV; -#endif /* CONFIG_PPC64 */ - - return 0; -} diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c new file mode 100644 index 00000000000..c98321fcb45 --- /dev/null +++ b/arch/powerpc/kernel/perf_event.c @@ -0,0 +1,1315 @@ +/* + * Performance event support - powerpc architecture code + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct cpu_hw_events { + int n_events; + int n_percpu; + int disabled; + int n_added; + int n_limited; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; + u64 events[MAX_HWEVENTS]; + unsigned int flags[MAX_HWEVENTS]; + unsigned long mmcr[3]; + struct perf_event *limited_event[MAX_LIMITED_HWEVENTS]; + u8 limited_hwidx[MAX_LIMITED_HWEVENTS]; + u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +struct power_pmu *ppmu; + +/* + * Normally, to ignore kernel events we set the FCS (freeze events + * in supervisor mode) bit in MMCR0, but if the kernel runs with the + * hypervisor bit set in the MSR, or if we are running on a processor + * where the hypervisor bit is forced to 1 (as on Apple G5 processors), + * then we need to use the FCHV bit to ignore kernel events. + */ +static unsigned int freeze_events_kernel = MMCR0_FCS; + +/* + * 32-bit doesn't have MMCRA but does have an MMCR2, + * and a few other names are different. + */ +#ifdef CONFIG_PPC32 + +#define MMCR0_FCHV 0 +#define MMCR0_PMCjCE MMCR0_PMCnCE + +#define SPRN_MMCRA SPRN_MMCR2 +#define MMCRA_SAMPLE_ENABLE 0 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_read_regs(struct pt_regs *regs) { } +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return 0; +} + +#endif /* CONFIG_PPC32 */ + +/* + * Things that are specific to 64-bit implementations. + */ +#ifdef CONFIG_PPC64 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { + unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; + if (slot > 1) + return 4 * (slot - 1); + } + return 0; +} + +/* + * The user wants a data address recorded. + * If we're not doing instruction sampling, give them the SDAR + * (sampled data address). If we are doing instruction sampling, then + * only give them the SDAR if it corresponds to the instruction + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC + * bit in MMCRA. + */ +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +{ + unsigned long mmcra = regs->dsisr; + unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? + POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; + + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + *addrp = mfspr(SPRN_SDAR); +} + +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if (TRAP(regs) != 0xf00) + return 0; /* not a PMU interrupt */ + + if (ppmu->flags & PPMU_ALT_SIPR) { + if (mmcra & POWER6_MMCRA_SIHV) + return PERF_RECORD_MISC_HYPERVISOR; + return (mmcra & POWER6_MMCRA_SIPR) ? + PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL; + } + if (mmcra & MMCRA_SIHV) + return PERF_RECORD_MISC_HYPERVISOR; + return (mmcra & MMCRA_SIPR) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} + +/* + * Overload regs->dsisr to store MMCRA so we only need to read it once + * on each interrupt. + */ +static inline void perf_read_regs(struct pt_regs *regs) +{ + regs->dsisr = mfspr(SPRN_MMCRA); +} + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return !regs->softe; +} + +#endif /* CONFIG_PPC64 */ + +static void perf_event_interrupt(struct pt_regs *regs); + +void perf_event_print_debug(void) +{ +} + +/* + * Read one performance monitor event (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 1: + val = mfspr(SPRN_PMC1); + break; + case 2: + val = mfspr(SPRN_PMC2); + break; + case 3: + val = mfspr(SPRN_PMC3); + break; + case 4: + val = mfspr(SPRN_PMC4); + break; + case 5: + val = mfspr(SPRN_PMC5); + break; + case 6: + val = mfspr(SPRN_PMC6); + break; +#ifdef CONFIG_PPC64 + case 7: + val = mfspr(SPRN_PMC7); + break; + case 8: + val = mfspr(SPRN_PMC8); + break; +#endif /* CONFIG_PPC64 */ + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 1: + mtspr(SPRN_PMC1, val); + break; + case 2: + mtspr(SPRN_PMC2, val); + break; + case 3: + mtspr(SPRN_PMC3, val); + break; + case 4: + mtspr(SPRN_PMC4, val); + break; + case 5: + mtspr(SPRN_PMC5, val); + break; + case 6: + mtspr(SPRN_PMC6, val); + break; +#ifdef CONFIG_PPC64 + case 7: + mtspr(SPRN_PMC7, val); + break; + case 8: + mtspr(SPRN_PMC8, val); + break; +#endif /* CONFIG_PPC64 */ + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } +} + +/* + * Check if a set of events can all go on the PMU at once. + * If they can't, this will look at alternative codes for the events + * and see if any combination of alternative codes is feasible. + * The feasible set is returned in event_id[]. + */ +static int power_check_constraints(struct cpu_hw_events *cpuhw, + u64 event_id[], unsigned int cflags[], + int n_ev) +{ + unsigned long mask, value, nv; + unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; + int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; + int i, j; + unsigned long addf = ppmu->add_fields; + unsigned long tadd = ppmu->test_adder; + + if (n_ev > ppmu->n_event) + return -1; + + /* First see if the events will go on as-is */ + for (i = 0; i < n_ev; ++i) { + if ((cflags[i] & PPMU_LIMITED_PMC_REQD) + && !ppmu->limited_pmc_event(event_id[i])) { + ppmu->get_alternatives(event_id[i], cflags[i], + cpuhw->alternatives[i]); + event_id[i] = cpuhw->alternatives[i][0]; + } + if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], + &cpuhw->avalues[i][0])) + return -1; + } + value = mask = 0; + for (i = 0; i < n_ev; ++i) { + nv = (value | cpuhw->avalues[i][0]) + + (value & cpuhw->avalues[i][0] & addf); + if ((((nv + tadd) ^ value) & mask) != 0 || + (((nv + tadd) ^ cpuhw->avalues[i][0]) & + cpuhw->amasks[i][0]) != 0) + break; + value = nv; + mask |= cpuhw->amasks[i][0]; + } + if (i == n_ev) + return 0; /* all OK */ + + /* doesn't work, gather alternatives... */ + if (!ppmu->get_alternatives) + return -1; + for (i = 0; i < n_ev; ++i) { + choice[i] = 0; + n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], + cpuhw->alternatives[i]); + for (j = 1; j < n_alt[i]; ++j) + ppmu->get_constraint(cpuhw->alternatives[i][j], + &cpuhw->amasks[i][j], + &cpuhw->avalues[i][j]); + } + + /* enumerate all possibilities and see if any will work */ + i = 0; + j = -1; + value = mask = nv = 0; + while (i < n_ev) { + if (j >= 0) { + /* we're backtracking, restore context */ + value = svalues[i]; + mask = smasks[i]; + j = choice[i]; + } + /* + * See if any alternative k for event_id i, + * where k > j, will satisfy the constraints. + */ + while (++j < n_alt[i]) { + nv = (value | cpuhw->avalues[i][j]) + + (value & cpuhw->avalues[i][j] & addf); + if ((((nv + tadd) ^ value) & mask) == 0 && + (((nv + tadd) ^ cpuhw->avalues[i][j]) + & cpuhw->amasks[i][j]) == 0) + break; + } + if (j >= n_alt[i]) { + /* + * No feasible alternative, backtrack + * to event_id i-1 and continue enumerating its + * alternatives from where we got up to. + */ + if (--i < 0) + return -1; + } else { + /* + * Found a feasible alternative for event_id i, + * remember where we got up to with this event_id, + * go on to the next event_id, and start with + * the first alternative for it. + */ + choice[i] = j; + svalues[i] = value; + smasks[i] = mask; + value = nv; + mask |= cpuhw->amasks[i][j]; + ++i; + j = -1; + } + } + + /* OK, we have a feasible combination, tell the caller the solution */ + for (i = 0; i < n_ev; ++i) + event_id[i] = cpuhw->alternatives[i][choice[i]]; + return 0; +} + +/* + * Check if newly-added events have consistent settings for + * exclude_{user,kernel,hv} with each other and any previously + * added events. + */ +static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], + int n_prev, int n_new) +{ + int eu = 0, ek = 0, eh = 0; + int i, n, first; + struct perf_event *event; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + first = 1; + for (i = 0; i < n; ++i) { + if (cflags[i] & PPMU_LIMITED_PMC_OK) { + cflags[i] &= ~PPMU_LIMITED_PMC_REQD; + continue; + } + event = ctrs[i]; + if (first) { + eu = event->attr.exclude_user; + ek = event->attr.exclude_kernel; + eh = event->attr.exclude_hv; + first = 0; + } else if (event->attr.exclude_user != eu || + event->attr.exclude_kernel != ek || + event->attr.exclude_hv != eh) { + return -EAGAIN; + } + } + + if (eu || ek || eh) + for (i = 0; i < n; ++i) + if (cflags[i] & PPMU_LIMITED_PMC_OK) + cflags[i] |= PPMU_LIMITED_PMC_REQD; + + return 0; +} + +static void power_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + if (!event->hw.idx) + return; + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The events are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * On some machines, PMC5 and PMC6 can't be written, don't respect + * the freeze conditions, and don't generate interrupts. This tells + * us if `event' is using such a PMC. + */ +static int is_limited_pmc(int pmcnum) +{ + return (ppmu->flags & PPMU_LIMITED_PMC5_6) + && (pmcnum == 5 || pmcnum == 6); +} + +static void freeze_limited_events(struct cpu_hw_events *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_event *event; + u64 val, prev, delta; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + event = cpuhw->limited_event[i]; + if (!event->hw.idx) + continue; + val = (event->hw.idx == 5) ? pmc5 : pmc6; + prev = atomic64_read(&event->hw.prev_count); + event->hw.idx = 0; + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + } +} + +static void thaw_limited_events(struct cpu_hw_events *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_event *event; + u64 val; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + event = cpuhw->limited_event[i]; + event->hw.idx = cpuhw->limited_hwidx[i]; + val = (event->hw.idx == 5) ? pmc5 : pmc6; + atomic64_set(&event->hw.prev_count, val); + perf_event_update_userpage(event); + } +} + +/* + * Since limited events don't respect the freeze conditions, we + * have to read them immediately after freezing or unfreezing the + * other events. We try to keep the values from the limited + * events as consistent as possible by keeping the delay (in + * cycles and instructions) between freezing/unfreezing and reading + * the limited events as small and consistent as possible. + * Therefore, if any limited events are in use, we read them + * both, and always in the same order, to minimize variability, + * and do it inside the same asm that writes MMCR0. + */ +static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) +{ + unsigned long pmc5, pmc6; + + if (!cpuhw->n_limited) { + mtspr(SPRN_MMCR0, mmcr0); + return; + } + + /* + * Write MMCR0, then read PMC5 and PMC6 immediately. + * To ensure we don't get a performance monitor interrupt + * between writing MMCR0 and freezing/thawing the limited + * events, we first write MMCR0 with the event overflow + * interrupt enable bits turned off. + */ + asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" + : "=&r" (pmc5), "=&r" (pmc6) + : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), + "i" (SPRN_MMCR0), + "i" (SPRN_PMC5), "i" (SPRN_PMC6)); + + if (mmcr0 & MMCR0_FC) + freeze_limited_events(cpuhw, pmc5, pmc6); + else + thaw_limited_events(cpuhw, pmc5, pmc6); + + /* + * Write the full MMCR0 including the event overflow interrupt + * enable bits, if necessary. + */ + if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCR0, mmcr0); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + if (!ppmu) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + cpuhw->n_added = 0; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + /* + * Disable instruction sampling if it was enabled + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mtspr(SPRN_MMCRA, + cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mb(); + } + + /* + * Set the 'freeze events' bit. + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the events + * before we return. + */ + write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); + mb(); + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct perf_event *event; + struct cpu_hw_events *cpuhw; + unsigned long flags; + long i; + unsigned long val; + s64 left; + unsigned int hwc_index[MAX_HWEVENTS]; + int n_lim; + int idx; + + if (!ppmu) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) { + local_irq_restore(flags); + return; + } + cpuhw->disabled = 0; + + /* + * If we didn't change anything, or only removed events, + * no need to recalculate MMCR* settings and reset the PMCs. + * Just reenable the PMU with the current MMCR* settings + * (possibly updated for removal of events). + */ + if (!cpuhw->n_added) { + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + if (cpuhw->n_events == 0) + ppc_set_pmu_inuse(0); + goto out_enable; + } + + /* + * Compute MMCR* values for the new set of events + */ + if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, + cpuhw->mmcr)) { + /* shouldn't ever get here */ + printk(KERN_ERR "oops compute_mmcr failed\n"); + goto out; + } + + /* + * Add in MMCR0 freeze bits corresponding to the + * attr.exclude_* bits for the first event. + * We have already checked that all events have the + * same values for these bits as the first event. + */ + event = cpuhw->event[0]; + if (event->attr.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (event->attr.exclude_kernel) + cpuhw->mmcr[0] |= freeze_events_kernel; + if (event->attr.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + + /* + * Write the new configuration to MMCR* with the freeze + * bit set and set the hardware events to their initial values. + * Then unfreeze the events. + */ + ppc_set_pmu_inuse(1); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + | MMCR0_FC); + + /* + * Read off any pre-existing events that need to move + * to another PMC. + */ + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { + power_pmu_read(event); + write_pmc(event->hw.idx, 0); + event->hw.idx = 0; + } + } + + /* + * Initialize the PMCs for all the new and moved events. + */ + cpuhw->n_limited = n_lim = 0; + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (event->hw.idx) + continue; + idx = hwc_index[i] + 1; + if (is_limited_pmc(idx)) { + cpuhw->limited_event[n_lim] = event; + cpuhw->limited_hwidx[n_lim] = idx; + ++n_lim; + continue; + } + val = 0; + if (event->hw.sample_period) { + left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + event->hw.idx = idx; + write_pmc(idx, val); + perf_event_update_userpage(event); + } + cpuhw->n_limited = n_lim; + cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + + out_enable: + mb(); + write_mmcr0(cpuhw, cpuhw->mmcr[0]); + + /* + * Enable instruction sampling if necessary + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mb(); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[], u64 *events, + unsigned int *flags) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + flags[n] = group->hw.event_base; + events[n++] = group->hw.config; + } + list_for_each_entry(event, &group->sibling_list, list_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + flags[n] = event->hw.event_base; + events[n++] = event->hw.config; + } + } + return n; +} + +static void event_sched_in(struct perf_event *event, int cpu) +{ + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; + event->tstamp_running += event->ctx->time - event->tstamp_stopped; + if (is_software_event(event)) + event->pmu->enable(event); +} + +/* + * Called to enable a whole group of events. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + */ +int hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + struct cpu_hw_events *cpuhw; + long i, n, n0; + struct perf_event *sub; + + if (!ppmu) + return 0; + cpuhw = &__get_cpu_var(cpu_hw_events); + n0 = cpuhw->n_events; + n = collect_events(group_leader, ppmu->n_event - n0, + &cpuhw->event[n0], &cpuhw->events[n0], + &cpuhw->flags[n0]); + if (n < 0) + return -EAGAIN; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, n)) + return -EAGAIN; + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); + if (i < 0) + return -EAGAIN; + cpuhw->n_events = n0 + n; + cpuhw->n_added += n; + + /* + * OK, this group can go on; update event states etc., + * and enable any software events + */ + for (i = n0; i < n0 + n; ++i) + cpuhw->event[i]->hw.config = cpuhw->events[i]; + cpuctx->active_oncpu += n; + n = 1; + event_sched_in(group_leader, cpu); + list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { + if (sub->state != PERF_EVENT_STATE_OFF) { + event_sched_in(sub, cpu); + ++n; + } + } + ctx->nr_active += n; + + return 1; +} + +/* + * Add a event to the PMU. + * If all events are not already frozen, then we disable and + * re-enable the PMU in order to get hw_perf_enable to do the + * actual work of reconfiguring the PMU. + */ +static int power_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + int n0; + int ret = -EAGAIN; + + local_irq_save(flags); + perf_disable(); + + /* + * Add the event to the list (if there is room) + * and check whether the total set is still feasible. + */ + cpuhw = &__get_cpu_var(cpu_hw_events); + n0 = cpuhw->n_events; + if (n0 >= ppmu->n_event) + goto out; + cpuhw->event[n0] = event; + cpuhw->events[n0] = event->hw.config; + cpuhw->flags[n0] = event->hw.event_base; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) + goto out; + if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) + goto out; + + event->hw.config = cpuhw->events[n0]; + ++cpuhw->n_events; + ++cpuhw->n_added; + + ret = 0; + out: + perf_enable(); + local_irq_restore(flags); + return ret; +} + +/* + * Remove a event from the PMU. + */ +static void power_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + long i; + unsigned long flags; + + local_irq_save(flags); + perf_disable(); + + power_pmu_read(event); + + cpuhw = &__get_cpu_var(cpu_hw_events); + for (i = 0; i < cpuhw->n_events; ++i) { + if (event == cpuhw->event[i]) { + while (++i < cpuhw->n_events) + cpuhw->event[i-1] = cpuhw->event[i]; + --cpuhw->n_events; + ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); + if (event->hw.idx) { + write_pmc(event->hw.idx, 0); + event->hw.idx = 0; + } + perf_event_update_userpage(event); + break; + } + } + for (i = 0; i < cpuhw->n_limited; ++i) + if (event == cpuhw->limited_event[i]) + break; + if (i < cpuhw->n_limited) { + while (++i < cpuhw->n_limited) { + cpuhw->limited_event[i-1] = cpuhw->limited_event[i]; + cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; + } + --cpuhw->n_limited; + } + if (cpuhw->n_events == 0) { + /* disable exceptions if no events are running */ + cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + } + + perf_enable(); + local_irq_restore(flags); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + */ +static void power_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (!event->hw.idx || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + power_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +struct pmu power_pmu = { + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, +}; + +/* + * Return 1 if we might be able to put event on a limited PMC, + * or 0 if not. + * A event can only go on a limited PMC if it counts something + * that a limited PMC can count, doesn't require interrupts, and + * doesn't exclude any processor mode. + */ +static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, + unsigned int flags) +{ + int n; + u64 alt[MAX_EVENT_ALTERNATIVES]; + + if (event->attr.exclude_user + || event->attr.exclude_kernel + || event->attr.exclude_hv + || event->attr.sample_period) + return 0; + + if (ppmu->limited_pmc_event(ev)) + return 1; + + /* + * The requested event_id isn't on a limited PMC already; + * see if any alternative code goes on a limited PMC. + */ + if (!ppmu->get_alternatives) + return 0; + + flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; + n = ppmu->get_alternatives(ev, flags, alt); + + return n > 0; +} + +/* + * Find an alternative event_id that goes on a normal PMC, if possible, + * and return the event_id code, or 0 if there is no such alternative. + * (Note: event_id code 0 is "don't count" on all machines.) + */ +static u64 normal_pmc_alternative(u64 ev, unsigned long flags) +{ + u64 alt[MAX_EVENT_ALTERNATIVES]; + int n; + + flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); + n = ppmu->get_alternatives(ev, flags, alt); + if (!n) + return 0; + return alt[0]; +} + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + unsigned long flags; + struct perf_event *ctrs[MAX_HWEVENTS]; + u64 events[MAX_HWEVENTS]; + unsigned int cflags[MAX_HWEVENTS]; + int n; + int err; + struct cpu_hw_events *cpuhw; + + if (!ppmu) + return ERR_PTR(-ENXIO); + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + default: + return ERR_PTR(-EINVAL); + } + event->hw.config_base = ev; + event->hw.idx = 0; + + /* + * If we are not running on a hypervisor, force the + * exclude_hv bit to 0 so that we don't care what + * the user set it to. + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + event->attr.exclude_hv = 0; + + /* + * If this is a per-task event, then we can use + * PM_RUN_* events interchangeably with their non RUN_* + * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. + * XXX we should check if the task is an idle task. + */ + flags = 0; + if (event->ctx->task) + flags |= PPMU_ONLY_COUNT_RUN; + + /* + * If this machine has limited events, check whether this + * event_id could go on a limited event. + */ + if (ppmu->flags & PPMU_LIMITED_PMC5_6) { + if (can_go_on_limited_pmc(event, ev, flags)) { + flags |= PPMU_LIMITED_PMC_OK; + } else if (ppmu->limited_pmc_event(ev)) { + /* + * The requested event_id is on a limited PMC, + * but we can't use a limited PMC; see if any + * alternative goes on a normal PMC. + */ + ev = normal_pmc_alternative(ev, flags); + if (!ev) + return ERR_PTR(-EINVAL); + } + } + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, ppmu->n_event - 1, + ctrs, events, cflags); + if (n < 0) + return ERR_PTR(-EINVAL); + } + events[n] = ev; + ctrs[n] = event; + cflags[n] = flags; + if (check_excludes(ctrs, cflags, n, 1)) + return ERR_PTR(-EINVAL); + + cpuhw = &get_cpu_var(cpu_hw_events); + err = power_check_constraints(cpuhw, events, cflags, n + 1); + put_cpu_var(cpu_hw_events); + if (err) + return ERR_PTR(-EINVAL); + + event->hw.config = events[n]; + event->hw.event_base = cflags[n]; + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &power_pmu; +} + +/* + * A event has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data = { + .addr = 0, + .period = event->hw.last_period, + }; + + if (event->attr.sample_type & PERF_SAMPLE_ADDR) + perf_get_data_addr(regs, &data.addr); + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +/* + * Called from generic code to get the misc flags (i.e. processor mode) + * for an event_id. + */ +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + u32 flags = perf_get_misc_flags(regs); + + if (flags) + return flags; + return user_mode(regs) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} + +/* + * Called from generic code to get the instruction pointer + * for an event_id. + */ +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + unsigned long ip; + + if (TRAP(regs) != 0xf00) + return regs->nip; /* not a PMU interrupt */ + + ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); + return ip; +} + +/* + * Performance monitor interrupt stuff + */ +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + if (cpuhw->n_limited) + freeze_limited_events(cpuhw, mfspr(SPRN_PMC5), + mfspr(SPRN_PMC6)); + + perf_read_regs(regs); + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (!event->hw.idx || is_limited_pmc(event->hw.idx)) + continue; + val = read_pmc(event->hw.idx); + if ((int)val < 0) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } + } + + /* + * In case we didn't find and reset the event that caused + * the interrupt, scan all events and reset any that are + * negative, to avoid getting continual interrupts. + * Any that we processed in the previous loop will not be negative. + */ + if (!found) { + for (i = 0; i < ppmu->n_event; ++i) { + if (is_limited_pmc(i + 1)) + continue; + val = read_pmc(i + 1); + if ((int)val < 0) + write_pmc(i + 1, 0); + } + } + + /* + * Reset MMCR0 to its normal value. This will set PMXE and + * clear FC (freeze events) and PMAO (perf mon alert occurred) + * and thus allow interrupts to occur again. + * XXX might want to use MSR.PM to keep the events frozen until + * we get back out of this interrupt. + */ + write_mmcr0(cpuhw, cpuhw->mmcr[0]); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + if (!ppmu) + return; + memset(cpuhw, 0, sizeof(*cpuhw)); + cpuhw->mmcr[0] = MMCR0_FC; +} + +int register_power_pmu(struct power_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + +#ifdef MSR_HV + /* + * Use FCHV to ignore kernel events if MSR.HV is set. + */ + if (mfmsr() & MSR_HV) + freeze_events_kernel = MMCR0_FCHV; +#endif /* CONFIG_PPC64 */ + + return 0; +} diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 3c90a3d9173..2a361cdda63 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 31918af3e35..0f4c1c73a6a 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 867f6f66396..c351b3a57fb 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index fa21890531d..ca399ba5034 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 018d094d92f..28a4daacdc0 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 75dccb71a04..479574413a9 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 465e498bcb3..df45a7449a6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -527,25 +527,25 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_counter_pending); +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) +DEFINE_PER_CPU(u8, perf_event_pending); -void set_perf_counter_pending(void) +void set_perf_event_pending(void) { - get_cpu_var(perf_counter_pending) = 1; + get_cpu_var(perf_event_pending) = 1; set_dec(1); - put_cpu_var(perf_counter_pending); + put_cpu_var(perf_event_pending); } -#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending) -#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0 +#define test_perf_event_pending() __get_cpu_var(perf_event_pending) +#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ +#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ -#define test_perf_counter_pending() 0 -#define clear_perf_counter_pending() +#define test_perf_event_pending() 0 +#define clear_perf_event_pending() -#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ /* * For iSeries shared processors, we have to let the hypervisor @@ -573,9 +573,9 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 830bef0a113..e7dae82c128 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -312,7 +312,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { @@ -323,7 +323,7 @@ good_area: #endif } else { current->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9efc8bda01b..e382cae678b 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -280,9 +280,9 @@ config PPC_HAVE_PMU_SUPPORT config PPC_PERF_CTRS def_bool y - depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT + depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT help - This enables the powerpc-specific perf_counter back-end. + This enables the powerpc-specific perf_event back-end. config SMP depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1c866efd217..43c0acad716 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -94,7 +94,7 @@ config S390 select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config SCHED_OMIT_FRAME_POINTER bool diff --git a/arch/s390/include/asm/perf_counter.h b/arch/s390/include/asm/perf_counter.h deleted file mode 100644 index 7015188c2cc..00000000000 --- a/arch/s390/include/asm/perf_counter.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Performance counter support - s390 specific definitions. - * - * Copyright 2009 Martin Schwidefsky, IBM Corporation. - */ - -static inline void set_perf_counter_pending(void) {} -static inline void clear_perf_counter_pending(void) {} - -#define PERF_COUNTER_INDEX_OFFSET 0 diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h new file mode 100644 index 00000000000..3840cbe7763 --- /dev/null +++ b/arch/s390/include/asm/perf_event.h @@ -0,0 +1,10 @@ +/* + * Performance event support - s390 specific definitions. + * + * Copyright 2009 Martin Schwidefsky, IBM Corporation. + */ + +static inline void set_perf_event_pending(void) {} +static inline void clear_perf_event_pending(void) {} + +#define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index c80602d7c88..cb5232df151 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -268,7 +268,7 @@ #define __NR_preadv 328 #define __NR_pwritev 329 #define __NR_rt_tgsigqueueinfo 330 -#define __NR_perf_counter_open 331 +#define __NR_perf_event_open 331 #define NR_syscalls 332 /* diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 88a83366819..624790042d4 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1832,11 +1832,11 @@ compat_sys_rt_tgsigqueueinfo_wrapper: llgtr %r5,%r5 # struct compat_siginfo * jg compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call - .globl sys_perf_counter_open_wrapper -sys_perf_counter_open_wrapper: - llgtr %r2,%r2 # const struct perf_counter_attr * + .globl sys_perf_event_open_wrapper +sys_perf_event_open_wrapper: + llgtr %r2,%r2 # const struct perf_event_attr * lgfr %r3,%r3 # pid_t lgfr %r4,%r4 # int lgfr %r5,%r5 # int llgfr %r6,%r6 # unsigned long - jg sys_perf_counter_open # branch to system call + jg sys_perf_event_open # branch to system call diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index ad1acd20038..0b5083681e7 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -339,4 +339,4 @@ SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper) SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper) SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */ -SYSCALL(sys_perf_counter_open,sys_perf_counter_open,sys_perf_counter_open_wrapper) +SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1abbadd497e..6d507462967 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -10,7 +10,7 @@ * Copyright (C) 1995 Linus Torvalds */ -#include +#include #include #include #include @@ -306,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write) * interrupts again and then search the VMAs */ local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); down_read(&mm->mmap_sem); si_code = SEGV_MAPERR; @@ -366,11 +366,11 @@ good_area: } if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 4df3570fe51..b940424f8cc 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -16,7 +16,7 @@ config SUPERH select HAVE_IOREMAP_PROT if MMU select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA diff --git a/arch/sh/include/asm/perf_counter.h b/arch/sh/include/asm/perf_counter.h deleted file mode 100644 index d8e6bb9c0cc..00000000000 --- a/arch/sh/include/asm/perf_counter.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_SH_PERF_COUNTER_H -#define __ASM_SH_PERF_COUNTER_H - -/* SH only supports software counters through this interface. */ -static inline void set_perf_counter_pending(void) {} - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#endif /* __ASM_SH_PERF_COUNTER_H */ diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h new file mode 100644 index 00000000000..11a302297ab --- /dev/null +++ b/arch/sh/include/asm/perf_event.h @@ -0,0 +1,9 @@ +#ifndef __ASM_SH_PERF_EVENT_H +#define __ASM_SH_PERF_EVENT_H + +/* SH only supports software events through this interface. */ +static inline void set_perf_event_pending(void) {} + +#define PERF_EVENT_INDEX_OFFSET 0 + +#endif /* __ASM_SH_PERF_EVENT_H */ diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h index 925dd40d9d5..f3fd1b9eb6b 100644 --- a/arch/sh/include/asm/unistd_32.h +++ b/arch/sh/include/asm/unistd_32.h @@ -344,7 +344,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #define NR_syscalls 337 diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h index 2b84bc916bc..343ce8f073e 100644 --- a/arch/sh/include/asm/unistd_64.h +++ b/arch/sh/include/asm/unistd_64.h @@ -384,7 +384,7 @@ #define __NR_preadv 361 #define __NR_pwritev 362 #define __NR_rt_tgsigqueueinfo 363 -#define __NR_perf_counter_open 364 +#define __NR_perf_event_open 364 #ifdef __KERNEL__ diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index 16ba225ede8..19fd11dd987 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -352,4 +352,4 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index af6fb7410c2..5bfde6c7749 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -390,4 +390,4 @@ sys_call_table: .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c index 781b413ff82..47530104e0a 100644 --- a/arch/sh/mm/fault_32.c +++ b/arch/sh/mm/fault_32.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -157,7 +157,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if ((regs->sr & SR_IMASK) != SR_IMASK) local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -208,11 +208,11 @@ survive: } if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c index 2dcc48528f7..de0b0e88182 100644 --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, /* Not an IO address, so reenable interrupts */ local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt or have no user @@ -201,11 +201,11 @@ survive: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 86b82348b97..97fca4695e0 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,7 +25,7 @@ config SPARC select ARCH_WANT_OPTIONAL_GPIOLIB select RTC_CLASS select RTC_DRV_M48T59 - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG @@ -47,7 +47,7 @@ config SPARC64 select RTC_DRV_BQ4802 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h deleted file mode 100644 index 5d7a8ca0e49..00000000000 --- a/arch/sparc/include/asm/perf_counter.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __ASM_SPARC_PERF_COUNTER_H -#define __ASM_SPARC_PERF_COUNTER_H - -extern void set_perf_counter_pending(void); - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -#else -static inline void init_hw_perf_counters(void) { } -#endif - -#endif diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h new file mode 100644 index 00000000000..7e2669894ce --- /dev/null +++ b/arch/sparc/include/asm/perf_event.h @@ -0,0 +1,14 @@ +#ifndef __ASM_SPARC_PERF_EVENT_H +#define __ASM_SPARC_PERF_EVENT_H + +extern void set_perf_event_pending(void); + +#define PERF_EVENT_INDEX_OFFSET 0 + +#ifdef CONFIG_PERF_EVENTS +extern void init_hw_perf_events(void); +#else +static inline void init_hw_perf_events(void) { } +#endif + +#endif diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 706df669f3b..42f2316c3ea 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -395,7 +395,7 @@ #define __NR_preadv 324 #define __NR_pwritev 325 #define __NR_rt_tgsigqueueinfo 326 -#define __NR_perf_counter_open 327 +#define __NR_perf_event_open 327 #define NR_SYSCALLS 328 diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 247cc620cee..3a048fad7ee 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -104,5 +104,5 @@ obj-$(CONFIG_AUDIT) += audit.o audit--$(CONFIG_AUDIT) := compat_audit.o obj-$(CONFIG_COMPAT) += $(audit--y) -pc--$(CONFIG_PERF_COUNTERS) := perf_counter.o +pc--$(CONFIG_PERF_EVENTS) := perf_event.o obj-$(CONFIG_SPARC64) += $(pc--y) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 378eb53e077..b129611590a 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include @@ -265,7 +265,7 @@ int __init nmi_init(void) } } if (!err) - init_hw_perf_counters(); + init_hw_perf_events(); return err; } diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 68ff0010707..2d94e7a03af 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include @@ -15,7 +15,7 @@ /* This code is shared between various users of the performance * counters. Users will be oprofile, pseudo-NMI watchdog, and the - * perf_counter support layer. + * perf_event support layer. */ #define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) @@ -42,14 +42,14 @@ void deferred_pcr_work_irq(int irq, struct pt_regs *regs) old_regs = set_irq_regs(regs); irq_enter(); -#ifdef CONFIG_PERF_COUNTERS - perf_counter_do_pending(); +#ifdef CONFIG_PERF_EVENTS + perf_event_do_pending(); #endif irq_exit(); set_irq_regs(old_regs); } -void set_perf_counter_pending(void) +void set_perf_event_pending(void) { set_softint(1 << PIL_DEFERRED_PCR_WORK); } diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c deleted file mode 100644 index b1265ce8a05..00000000000 --- a/arch/sparc/kernel/perf_counter.c +++ /dev/null @@ -1,556 +0,0 @@ -/* Performance counter support for sparc64. - * - * Copyright (C) 2009 David S. Miller - * - * This code is based almost entirely upon the x86 perf counter - * code, which is: - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Sparc64 chips have two performance counters, 32-bits each, with - * overflow interrupts generated on transition from 0xffffffff to 0. - * The counters are accessed in one go using a 64-bit register. - * - * Both counters are controlled using a single control register. The - * only way to stop all sampling is to clear all of the context (user, - * supervisor, hypervisor) sampling enable bits. But these bits apply - * to both counters, thus the two counters can't be enabled/disabled - * individually. - * - * The control register has two event fields, one for each of the two - * counters. It's thus nearly impossible to have one counter going - * while keeping the other one stopped. Therefore it is possible to - * get overflow interrupts for counters not currently "in use" and - * that condition must be checked in the overflow interrupt handler. - * - * So we use a hack, in that we program inactive counters with the - * "sw_count0" and "sw_count1" events. These count how many times - * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an - * unusual way to encode a NOP and therefore will not trigger in - * normal code. - */ - -#define MAX_HWCOUNTERS 2 -#define MAX_PERIOD ((1UL << 32) - 1) - -#define PIC_UPPER_INDEX 0 -#define PIC_LOWER_INDEX 1 - -struct cpu_hw_counters { - struct perf_counter *counters[MAX_HWCOUNTERS]; - unsigned long used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)]; - unsigned long active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)]; - int enabled; -}; -DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; - -struct perf_event_map { - u16 encoding; - u8 pic_mask; -#define PIC_NONE 0x00 -#define PIC_UPPER 0x01 -#define PIC_LOWER 0x02 -}; - -struct sparc_pmu { - const struct perf_event_map *(*event_map)(int); - int max_events; - int upper_shift; - int lower_shift; - int event_mask; - int hv_bit; - int irq_bit; - int upper_nop; - int lower_nop; -}; - -static const struct perf_event_map ultra3i_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, - [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, -}; - -static const struct perf_event_map *ultra3i_event_map(int event) -{ - return &ultra3i_perfmon_event_map[event]; -} - -static const struct sparc_pmu ultra3i_pmu = { - .event_map = ultra3i_event_map, - .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), - .upper_shift = 11, - .lower_shift = 4, - .event_mask = 0x3f, - .upper_nop = 0x1c, - .lower_nop = 0x14, -}; - -static const struct perf_event_map niagara2_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, -}; - -static const struct perf_event_map *niagara2_event_map(int event) -{ - return &niagara2_perfmon_event_map[event]; -} - -static const struct sparc_pmu niagara2_pmu = { - .event_map = niagara2_event_map, - .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), - .upper_shift = 19, - .lower_shift = 6, - .event_mask = 0xfff, - .hv_bit = 0x8, - .irq_bit = 0x03, - .upper_nop = 0x220, - .lower_nop = 0x220, -}; - -static const struct sparc_pmu *sparc_pmu __read_mostly; - -static u64 event_encoding(u64 event, int idx) -{ - if (idx == PIC_UPPER_INDEX) - event <<= sparc_pmu->upper_shift; - else - event <<= sparc_pmu->lower_shift; - return event; -} - -static u64 mask_for_index(int idx) -{ - return event_encoding(sparc_pmu->event_mask, idx); -} - -static u64 nop_for_index(int idx) -{ - return event_encoding(idx == PIC_UPPER_INDEX ? - sparc_pmu->upper_nop : - sparc_pmu->lower_nop, idx); -} - -static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc, - int idx) -{ - u64 val, mask = mask_for_index(idx); - - val = pcr_ops->read(); - pcr_ops->write((val & ~mask) | hwc->config); -} - -static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc, - int idx) -{ - u64 mask = mask_for_index(idx); - u64 nop = nop_for_index(idx); - u64 val = pcr_ops->read(); - - pcr_ops->write((val & ~mask) | nop); -} - -void hw_perf_enable(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - int i; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - val = pcr_ops->read(); - - for (i = 0; i < MAX_HWCOUNTERS; i++) { - struct perf_counter *cp = cpuc->counters[i]; - struct hw_perf_counter *hwc; - - if (!cp) - continue; - hwc = &cp->hw; - val |= hwc->config_base; - } - - pcr_ops->write(val); -} - -void hw_perf_disable(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - - val = pcr_ops->read(); - val &= ~(PCR_UTRACE | PCR_STRACE | - sparc_pmu->hv_bit | sparc_pmu->irq_bit); - pcr_ops->write(val); -} - -static u32 read_pmc(int idx) -{ - u64 val; - - read_pic(val); - if (idx == PIC_UPPER_INDEX) - val >>= 32; - - return val & 0xffffffff; -} - -static void write_pmc(int idx, u64 val) -{ - u64 shift, mask, pic; - - shift = 0; - if (idx == PIC_UPPER_INDEX) - shift = 32; - - mask = ((u64) 0xffffffff) << shift; - val <<= shift; - - read_pic(pic); - pic &= ~mask; - pic |= val; - write_pic(pic); -} - -static int sparc_perf_counter_set_period(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - if (left > MAX_PERIOD) - left = MAX_PERIOD; - - atomic64_set(&hwc->prev_count, (u64)-left); - - write_pmc(idx, (u64)(-left) & 0xffffffff); - - perf_counter_update_userpage(counter); - - return ret; -} - -static int sparc_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - sparc_pmu_disable_counter(hwc, idx); - - cpuc->counters[idx] = counter; - set_bit(idx, cpuc->active_mask); - - sparc_perf_counter_set_period(counter, hwc, idx); - sparc_pmu_enable_counter(hwc, idx); - perf_counter_update_userpage(counter); - return 0; -} - -static u64 sparc_perf_counter_update(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - int shift = 64 - 32; - u64 prev_raw_count, new_raw_count; - s64 delta; - -again: - prev_raw_count = atomic64_read(&hwc->prev_count); - new_raw_count = read_pmc(idx); - - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void sparc_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - clear_bit(idx, cpuc->active_mask); - sparc_pmu_disable_counter(hwc, idx); - - barrier(); - - sparc_perf_counter_update(counter, hwc, idx); - cpuc->counters[idx] = NULL; - clear_bit(idx, cpuc->used_mask); - - perf_counter_update_userpage(counter); -} - -static void sparc_pmu_read(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - sparc_perf_counter_update(counter, hwc, hwc->idx); -} - -static void sparc_pmu_unthrottle(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - sparc_pmu_enable_counter(hwc, hwc->idx); -} - -static atomic_t active_counters = ATOMIC_INIT(0); -static DEFINE_MUTEX(pmc_grab_mutex); - -void perf_counter_grab_pmc(void) -{ - if (atomic_inc_not_zero(&active_counters)) - return; - - mutex_lock(&pmc_grab_mutex); - if (atomic_read(&active_counters) == 0) { - if (atomic_read(&nmi_active) > 0) { - on_each_cpu(stop_nmi_watchdog, NULL, 1); - BUG_ON(atomic_read(&nmi_active) != 0); - } - atomic_inc(&active_counters); - } - mutex_unlock(&pmc_grab_mutex); -} - -void perf_counter_release_pmc(void) -{ - if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) { - if (atomic_read(&nmi_active) == 0) - on_each_cpu(start_nmi_watchdog, NULL, 1); - mutex_unlock(&pmc_grab_mutex); - } -} - -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - perf_counter_release_pmc(); -} - -static int __hw_perf_counter_init(struct perf_counter *counter) -{ - struct perf_counter_attr *attr = &counter->attr; - struct hw_perf_counter *hwc = &counter->hw; - const struct perf_event_map *pmap; - u64 enc; - - if (atomic_read(&nmi_active) < 0) - return -ENODEV; - - if (attr->type != PERF_TYPE_HARDWARE) - return -EOPNOTSUPP; - - if (attr->config >= sparc_pmu->max_events) - return -EINVAL; - - perf_counter_grab_pmc(); - counter->destroy = hw_perf_counter_destroy; - - /* We save the enable bits in the config_base. So to - * turn off sampling just write 'config', and to enable - * things write 'config | config_base'. - */ - hwc->config_base = sparc_pmu->irq_bit; - if (!attr->exclude_user) - hwc->config_base |= PCR_UTRACE; - if (!attr->exclude_kernel) - hwc->config_base |= PCR_STRACE; - if (!attr->exclude_hv) - hwc->config_base |= sparc_pmu->hv_bit; - - if (!hwc->sample_period) { - hwc->sample_period = MAX_PERIOD; - hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); - } - - pmap = sparc_pmu->event_map(attr->config); - - enc = pmap->encoding; - if (pmap->pic_mask & PIC_UPPER) { - hwc->idx = PIC_UPPER_INDEX; - enc <<= sparc_pmu->upper_shift; - } else { - hwc->idx = PIC_LOWER_INDEX; - enc <<= sparc_pmu->lower_shift; - } - - hwc->config |= enc; - return 0; -} - -static const struct pmu pmu = { - .enable = sparc_pmu_enable, - .disable = sparc_pmu_disable, - .read = sparc_pmu_read, - .unthrottle = sparc_pmu_unthrottle, -}; - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - int err = __hw_perf_counter_init(counter); - - if (err) - return ERR_PTR(err); - return &pmu; -} - -void perf_counter_print_debug(void) -{ - unsigned long flags; - u64 pcr, pic; - int cpu; - - if (!sparc_pmu) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - - pcr = pcr_ops->read(); - read_pic(pic); - - pr_info("\n"); - pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", - cpu, pcr, pic); - - local_irq_restore(flags); -} - -static int __kprobes perf_counter_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) -{ - struct die_args *args = __args; - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct pt_regs *regs; - int idx; - - if (!atomic_read(&active_counters)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - for (idx = 0; idx < MAX_HWCOUNTERS; idx++) { - struct perf_counter *counter = cpuc->counters[idx]; - struct hw_perf_counter *hwc; - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - hwc = &counter->hw; - val = sparc_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << 31)) - continue; - - data.period = counter->hw.last_period; - if (!sparc_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - sparc_pmu_disable_counter(hwc, idx); - } - - return NOTIFY_STOP; -} - -static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler, -}; - -static bool __init supported_pmu(void) -{ - if (!strcmp(sparc_pmu_type, "ultra3i")) { - sparc_pmu = &ultra3i_pmu; - return true; - } - if (!strcmp(sparc_pmu_type, "niagara2")) { - sparc_pmu = &niagara2_pmu; - return true; - } - return false; -} - -void __init init_hw_perf_counters(void) -{ - pr_info("Performance counters: "); - - if (!supported_pmu()) { - pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); - return; - } - - pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - - /* All sparc64 PMUs currently have 2 counters. But this simple - * driver only supports one active counter at a time. - */ - perf_max_counters = 1; - - register_die_notifier(&perf_counter_nmi_notifier); -} diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c new file mode 100644 index 00000000000..2d6a1b10c81 --- /dev/null +++ b/arch/sparc/kernel/perf_event.c @@ -0,0 +1,556 @@ +/* Performance event support for sparc64. + * + * Copyright (C) 2009 David S. Miller + * + * This code is based almost entirely upon the x86 perf event + * code, which is: + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Sparc64 chips have two performance counters, 32-bits each, with + * overflow interrupts generated on transition from 0xffffffff to 0. + * The counters are accessed in one go using a 64-bit register. + * + * Both counters are controlled using a single control register. The + * only way to stop all sampling is to clear all of the context (user, + * supervisor, hypervisor) sampling enable bits. But these bits apply + * to both counters, thus the two counters can't be enabled/disabled + * individually. + * + * The control register has two event fields, one for each of the two + * counters. It's thus nearly impossible to have one counter going + * while keeping the other one stopped. Therefore it is possible to + * get overflow interrupts for counters not currently "in use" and + * that condition must be checked in the overflow interrupt handler. + * + * So we use a hack, in that we program inactive counters with the + * "sw_count0" and "sw_count1" events. These count how many times + * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an + * unusual way to encode a NOP and therefore will not trigger in + * normal code. + */ + +#define MAX_HWEVENTS 2 +#define MAX_PERIOD ((1UL << 32) - 1) + +#define PIC_UPPER_INDEX 0 +#define PIC_LOWER_INDEX 1 + +struct cpu_hw_events { + struct perf_event *events[MAX_HWEVENTS]; + unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + int enabled; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; + +struct perf_event_map { + u16 encoding; + u8 pic_mask; +#define PIC_NONE 0x00 +#define PIC_UPPER 0x01 +#define PIC_LOWER 0x02 +}; + +struct sparc_pmu { + const struct perf_event_map *(*event_map)(int); + int max_events; + int upper_shift; + int lower_shift; + int event_mask; + int hv_bit; + int irq_bit; + int upper_nop; + int lower_nop; +}; + +static const struct perf_event_map ultra3i_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, +}; + +static const struct perf_event_map *ultra3i_event_map(int event_id) +{ + return &ultra3i_perfmon_event_map[event_id]; +} + +static const struct sparc_pmu ultra3i_pmu = { + .event_map = ultra3i_event_map, + .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), + .upper_shift = 11, + .lower_shift = 4, + .event_mask = 0x3f, + .upper_nop = 0x1c, + .lower_nop = 0x14, +}; + +static const struct perf_event_map niagara2_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, +}; + +static const struct perf_event_map *niagara2_event_map(int event_id) +{ + return &niagara2_perfmon_event_map[event_id]; +} + +static const struct sparc_pmu niagara2_pmu = { + .event_map = niagara2_event_map, + .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), + .upper_shift = 19, + .lower_shift = 6, + .event_mask = 0xfff, + .hv_bit = 0x8, + .irq_bit = 0x03, + .upper_nop = 0x220, + .lower_nop = 0x220, +}; + +static const struct sparc_pmu *sparc_pmu __read_mostly; + +static u64 event_encoding(u64 event_id, int idx) +{ + if (idx == PIC_UPPER_INDEX) + event_id <<= sparc_pmu->upper_shift; + else + event_id <<= sparc_pmu->lower_shift; + return event_id; +} + +static u64 mask_for_index(int idx) +{ + return event_encoding(sparc_pmu->event_mask, idx); +} + +static u64 nop_for_index(int idx) +{ + return event_encoding(idx == PIC_UPPER_INDEX ? + sparc_pmu->upper_nop : + sparc_pmu->lower_nop, idx); +} + +static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + u64 val, mask = mask_for_index(idx); + + val = pcr_ops->read(); + pcr_ops->write((val & ~mask) | hwc->config); +} + +static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + u64 mask = mask_for_index(idx); + u64 nop = nop_for_index(idx); + u64 val = pcr_ops->read(); + + pcr_ops->write((val & ~mask) | nop); +} + +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + int i; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + val = pcr_ops->read(); + + for (i = 0; i < MAX_HWEVENTS; i++) { + struct perf_event *cp = cpuc->events[i]; + struct hw_perf_event *hwc; + + if (!cp) + continue; + hwc = &cp->hw; + val |= hwc->config_base; + } + + pcr_ops->write(val); +} + +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + + val = pcr_ops->read(); + val &= ~(PCR_UTRACE | PCR_STRACE | + sparc_pmu->hv_bit | sparc_pmu->irq_bit); + pcr_ops->write(val); +} + +static u32 read_pmc(int idx) +{ + u64 val; + + read_pic(val); + if (idx == PIC_UPPER_INDEX) + val >>= 32; + + return val & 0xffffffff; +} + +static void write_pmc(int idx, u64 val) +{ + u64 shift, mask, pic; + + shift = 0; + if (idx == PIC_UPPER_INDEX) + shift = 32; + + mask = ((u64) 0xffffffff) << shift; + val <<= shift; + + read_pic(pic); + pic &= ~mask; + pic |= val; + write_pic(pic); +} + +static int sparc_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + if (left > MAX_PERIOD) + left = MAX_PERIOD; + + atomic64_set(&hwc->prev_count, (u64)-left); + + write_pmc(idx, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +static int sparc_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + sparc_pmu_disable_event(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + sparc_perf_event_set_period(event, hwc, idx); + sparc_pmu_enable_event(hwc, idx); + perf_event_update_userpage(event); + return 0; +} + +static u64 sparc_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + int shift = 64 - 32; + u64 prev_raw_count, new_raw_count; + s64 delta; + +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + new_raw_count = read_pmc(idx); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); + atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void sparc_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + clear_bit(idx, cpuc->active_mask); + sparc_pmu_disable_event(hwc, idx); + + barrier(); + + sparc_perf_event_update(event, hwc, idx); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static void sparc_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + sparc_perf_event_update(event, hwc, hwc->idx); +} + +static void sparc_pmu_unthrottle(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + sparc_pmu_enable_event(hwc, hwc->idx); +} + +static atomic_t active_events = ATOMIC_INIT(0); +static DEFINE_MUTEX(pmc_grab_mutex); + +void perf_event_grab_pmc(void) +{ + if (atomic_inc_not_zero(&active_events)) + return; + + mutex_lock(&pmc_grab_mutex); + if (atomic_read(&active_events) == 0) { + if (atomic_read(&nmi_active) > 0) { + on_each_cpu(stop_nmi_watchdog, NULL, 1); + BUG_ON(atomic_read(&nmi_active) != 0); + } + atomic_inc(&active_events); + } + mutex_unlock(&pmc_grab_mutex); +} + +void perf_event_release_pmc(void) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { + if (atomic_read(&nmi_active) == 0) + on_each_cpu(start_nmi_watchdog, NULL, 1); + mutex_unlock(&pmc_grab_mutex); + } +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + perf_event_release_pmc(); +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + const struct perf_event_map *pmap; + u64 enc; + + if (atomic_read(&nmi_active) < 0) + return -ENODEV; + + if (attr->type != PERF_TYPE_HARDWARE) + return -EOPNOTSUPP; + + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + + perf_event_grab_pmc(); + event->destroy = hw_perf_event_destroy; + + /* We save the enable bits in the config_base. So to + * turn off sampling just write 'config', and to enable + * things write 'config | config_base'. + */ + hwc->config_base = sparc_pmu->irq_bit; + if (!attr->exclude_user) + hwc->config_base |= PCR_UTRACE; + if (!attr->exclude_kernel) + hwc->config_base |= PCR_STRACE; + if (!attr->exclude_hv) + hwc->config_base |= sparc_pmu->hv_bit; + + if (!hwc->sample_period) { + hwc->sample_period = MAX_PERIOD; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } + + pmap = sparc_pmu->event_map(attr->config); + + enc = pmap->encoding; + if (pmap->pic_mask & PIC_UPPER) { + hwc->idx = PIC_UPPER_INDEX; + enc <<= sparc_pmu->upper_shift; + } else { + hwc->idx = PIC_LOWER_INDEX; + enc <<= sparc_pmu->lower_shift; + } + + hwc->config |= enc; + return 0; +} + +static const struct pmu pmu = { + .enable = sparc_pmu_enable, + .disable = sparc_pmu_disable, + .read = sparc_pmu_read, + .unthrottle = sparc_pmu_unthrottle, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err = __hw_perf_event_init(event); + + if (err) + return ERR_PTR(err); + return &pmu; +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + u64 pcr, pic; + int cpu; + + if (!sparc_pmu) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + pcr = pcr_ops->read(); + read_pic(pic); + + pr_info("\n"); + pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", + cpu, pcr, pic); + + local_irq_restore(flags); +} + +static int __kprobes perf_event_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!atomic_read(&active_events)) + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx < MAX_HWEVENTS; idx++) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + hwc = &event->hw; + val = sparc_perf_event_update(event, hwc, idx); + if (val & (1ULL << 31)) + continue; + + data.period = event->hw.last_period; + if (!sparc_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + sparc_pmu_disable_event(hwc, idx); + } + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, +}; + +static bool __init supported_pmu(void) +{ + if (!strcmp(sparc_pmu_type, "ultra3i")) { + sparc_pmu = &ultra3i_pmu; + return true; + } + if (!strcmp(sparc_pmu_type, "niagara2")) { + sparc_pmu = &niagara2_pmu; + return true; + } + return false; +} + +void __init init_hw_perf_events(void) +{ + pr_info("Performance events: "); + + if (!supported_pmu()) { + pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); + return; + } + + pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); + + /* All sparc64 PMUs currently have 2 events. But this simple + * driver only supports one active event at a time. + */ + perf_max_events = 1; + + register_die_notifier(&perf_event_nmi_notifier); +} diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 04181577cb6..0f1658d3749 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -82,5 +82,5 @@ sys_call_table: /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv -/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 91b06b7f7ac..009825f6e73 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -83,7 +83,7 @@ sys_call_table32: /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv - .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_counter_open + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open #endif /* CONFIG_COMPAT */ @@ -158,4 +158,4 @@ sys_call_table: /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv - .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open + .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 51c59015b28..e4ff5d1280c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,7 +24,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE - select HAVE_PERF_COUNTERS if (!M386 && !M486) + select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index ba331bfd111..74619c4f9fd 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -831,5 +831,5 @@ ia32_sys_call_table: .quad compat_sys_preadv .quad compat_sys_pwritev .quad compat_sys_rt_tgsigqueueinfo /* 335 */ - .quad sys_perf_counter_open + .quad sys_perf_event_open ia32_syscall_end: diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 5e3f2044f0d..f5693c81a1d 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) #endif diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h deleted file mode 100644 index e7b7c938ae2..00000000000 --- a/arch/x86/include/asm/perf_counter.h +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef _ASM_X86_PERF_COUNTER_H -#define _ASM_X86_PERF_COUNTER_H - -/* - * Performance counter hw details: - */ - -#define X86_PMC_MAX_GENERIC 8 -#define X86_PMC_MAX_FIXED 3 - -#define X86_PMC_IDX_GENERIC 0 -#define X86_PMC_IDX_FIXED 32 -#define X86_PMC_IDX_MAX 64 - -#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 -#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 - -#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 -#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 - -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) - -/* - * Includes eventsel and unit mask as well: - */ -#define ARCH_PERFMON_EVENT_MASK 0xffff - -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ - (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) - -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 - -/* - * Intel "Architectural Performance Monitoring" CPUID - * detection/enumeration details: - */ -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -union cpuid10_edx { - struct { - unsigned int num_counters_fixed:4; - unsigned int reserved:28; - } split; - unsigned int full; -}; - - -/* - * Fixed-purpose performance counters: - */ - -/* - * All 3 fixed-mode PMCs are configured via this single MSR: - */ -#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d - -/* - * The counts are available in three separate MSRs: - */ - -/* Instr_Retired.Any: */ -#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 -#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) - -/* CPU_CLK_Unhalted.Core: */ -#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a -#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) - -/* CPU_CLK_Unhalted.Ref: */ -#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b -#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) - -/* - * We model BTS tracing as another fixed-mode PMC. - * - * We choose a value in the middle of the fixed counter range, since lower - * values are used by actual fixed counters and higher values are used - * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. - */ -#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) - - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -extern void perf_counters_lapic_init(void); - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#else -static inline void init_hw_perf_counters(void) { } -static inline void perf_counters_lapic_init(void) { } -#endif - -#endif /* _ASM_X86_PERF_COUNTER_H */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h new file mode 100644 index 00000000000..ad7ce3fd506 --- /dev/null +++ b/arch/x86/include/asm/perf_event.h @@ -0,0 +1,108 @@ +#ifndef _ASM_X86_PERF_EVENT_H +#define _ASM_X86_PERF_EVENT_H + +/* + * Performance event hw details: + */ + +#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_FIXED 3 + +#define X86_PMC_IDX_GENERIC 0 +#define X86_PMC_IDX_FIXED 32 +#define X86_PMC_IDX_MAX 64 + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +/* + * Includes eventsel and unit mask as well: + */ +#define ARCH_PERFMON_EVENT_MASK 0xffff + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 + +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_events:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +union cpuid10_edx { + struct { + unsigned int num_events_fixed:4; + unsigned int reserved:28; + } split; + unsigned int full; +}; + + +/* + * Fixed-purpose performance events: + */ + +/* + * All 3 fixed-mode PMCs are configured via this single MSR: + */ +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d + +/* + * The counts are available in three separate MSRs: + */ + +/* Instr_Retired.Any: */ +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) + +/* CPU_CLK_Unhalted.Core: */ +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) + +/* CPU_CLK_Unhalted.Ref: */ +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) + +/* + * We model BTS tracing as another fixed-mode PMC. + * + * We choose a value in the middle of the fixed event range, since lower + * values are used by actual fixed events and higher values are used + * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. + */ +#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) + + +#ifdef CONFIG_PERF_EVENTS +extern void init_hw_perf_events(void); +extern void perf_events_lapic_init(void); + +#define PERF_EVENT_INDEX_OFFSET 0 + +#else +static inline void init_hw_perf_events(void) { } +static inline void perf_events_lapic_init(void) { } +#endif + +#endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 8deaada61bc..6fb3c209a7e 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -341,7 +341,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index b9f3c60de5f..8d3ad0adbc6 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv) __SYSCALL(__NR_pwritev, sys_pwritev) #define __NR_rt_tgsigqueueinfo 297 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) -#define __NR_perf_counter_open 298 -__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) +#define __NR_perf_event_open 298 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a34601f5298..754174d09de 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -14,7 +14,7 @@ * Mikael Pettersson : PM converted to driver model. */ -#include +#include #include #include #include @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include @@ -1189,7 +1189,7 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - perf_counters_lapic_init(); + perf_events_lapic_init(); preempt_disable(); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 8dd30638fe4..68537e957a9 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2fea97eccf7..cc25c2b4a56 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include @@ -869,7 +869,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_counters(); + init_hw_perf_events(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c deleted file mode 100644 index b1f115696c8..00000000000 --- a/arch/x86/kernel/cpu/perf_counter.c +++ /dev/null @@ -1,2298 +0,0 @@ -/* - * Performance counter x86 architecture code - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright (C) 2009 Intel Corporation, - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static u64 perf_counter_mask __read_mostly; - -/* The maximal number of PEBS counters: */ -#define MAX_PEBS_COUNTERS 4 - -/* The size of a BTS record in bytes: */ -#define BTS_RECORD_SIZE 24 - -/* The size of a per-cpu BTS buffer in bytes: */ -#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) - -/* The BTS overflow threshold in bytes from the end of the buffer: */ -#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) - - -/* - * Bits in the debugctlmsr controlling branch tracing. - */ -#define X86_DEBUGCTL_TR (1 << 6) -#define X86_DEBUGCTL_BTS (1 << 7) -#define X86_DEBUGCTL_BTINT (1 << 8) -#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) -#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) - -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; -}; - -struct cpu_hw_counters { - struct perf_counter *counters[X86_PMC_IDX_MAX]; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long interrupts; - int enabled; - struct debug_store *ds; -}; - -/* - * struct x86_pmu - generic x86 pmu - */ -struct x86_pmu { - const char *name; - int version; - int (*handle_irq)(struct pt_regs *); - void (*disable_all)(void); - void (*enable_all)(void); - void (*enable)(struct hw_perf_counter *, int); - void (*disable)(struct hw_perf_counter *, int); - unsigned eventsel; - unsigned perfctr; - u64 (*event_map)(int); - u64 (*raw_event)(u64); - int max_events; - int num_counters; - int num_counters_fixed; - int counter_bits; - u64 counter_mask; - int apic; - u64 max_period; - u64 intel_ctrl; - void (*enable_bts)(u64 config); - void (*disable_bts)(void); -}; - -static struct x86_pmu x86_pmu __read_mostly; - -static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { - .enabled = 1, -}; - -/* - * Not sure about some of these - */ -static const u64 p6_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, -}; - -static u64 p6_pmu_event_map(int hw_event) -{ - return p6_perfmon_event_map[hw_event]; -} - -/* - * Counter setting that is specified not to count anything. - * We use this to effectively disable a counter. - * - * L2_RQSTS with 0 MESI unit mask. - */ -#define P6_NOP_COUNTER 0x0000002EULL - -static u64 p6_pmu_raw_event(u64 hw_event) -{ -#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL -#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL -#define P6_EVNTSEL_INV_MASK 0x00800000ULL -#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL - -#define P6_EVNTSEL_MASK \ - (P6_EVNTSEL_EVENT_MASK | \ - P6_EVNTSEL_UNIT_MASK | \ - P6_EVNTSEL_EDGE_MASK | \ - P6_EVNTSEL_INV_MASK | \ - P6_EVNTSEL_COUNTER_MASK) - - return hw_event & P6_EVNTSEL_MASK; -} - - -/* - * Intel PerfMon v3. Used on Core2 and later. - */ -static const u64 intel_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, -}; - -static u64 intel_pmu_event_map(int hw_event) -{ - return intel_perfmon_event_map[hw_event]; -} - -/* - * Generalized hw caching related hw_event table, filled - * in on a per model basis. A value of 0 means - * 'not supported', -1 means 'hw_event makes no sense on - * this CPU', any other value means the raw hw_event - * ID. - */ - -#define C(x) PERF_COUNT_HW_CACHE_##x - -static u64 __read_mostly hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - -static const u64 nehalem_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static const u64 core2_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static const u64 atom_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static u64 intel_pmu_raw_event(u64 hw_event) -{ -#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL -#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL -#define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL - -#define CORE_EVNTSEL_MASK \ - (CORE_EVNTSEL_EVENT_MASK | \ - CORE_EVNTSEL_UNIT_MASK | \ - CORE_EVNTSEL_EDGE_MASK | \ - CORE_EVNTSEL_INV_MASK | \ - CORE_EVNTSEL_COUNTER_MASK) - - return hw_event & CORE_EVNTSEL_MASK; -} - -static const u64 amd_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ - [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ - [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ - [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ - [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -/* - * AMD Performance Monitor K7 and later. - */ -static const u64 amd_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, -}; - -static u64 amd_pmu_event_map(int hw_event) -{ - return amd_perfmon_event_map[hw_event]; -} - -static u64 amd_pmu_raw_event(u64 hw_event) -{ -#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL -#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL -#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL -#define K7_EVNTSEL_INV_MASK 0x000800000ULL -#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL - -#define K7_EVNTSEL_MASK \ - (K7_EVNTSEL_EVENT_MASK | \ - K7_EVNTSEL_UNIT_MASK | \ - K7_EVNTSEL_EDGE_MASK | \ - K7_EVNTSEL_INV_MASK | \ - K7_EVNTSEL_COUNTER_MASK) - - return hw_event & K7_EVNTSEL_MASK; -} - -/* - * Propagate counter elapsed time into the generic counter. - * Can only be executed on the CPU where the counter is active. - * Returns the delta events processed. - */ -static u64 -x86_perf_counter_update(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - int shift = 64 - x86_pmu.counter_bits; - u64 prev_raw_count, new_raw_count; - s64 delta; - - if (idx == X86_PMC_IDX_FIXED_BTS) - return 0; - - /* - * Careful: an NMI might modify the previous counter value. - * - * Our tactic to handle this is to first atomically read and - * exchange a new raw count - then add that new-prev delta - * count to the generic counter atomically: - */ -again: - prev_raw_count = atomic64_read(&hwc->prev_count); - rdmsrl(hwc->counter_base + idx, new_raw_count); - - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - /* - * Now we have the new raw value and have updated the prev - * timestamp already. We can now calculate the elapsed delta - * (counter-)time and add that to the generic counter. - * - * Careful, not all hw sign-extends above the physical width - * of the count. - */ - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static atomic_t active_counters; -static DEFINE_MUTEX(pmc_reserve_mutex); - -static bool reserve_pmc_hardware(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int i; - - if (nmi_watchdog == NMI_LOCAL_APIC) - disable_lapic_nmi_watchdog(); - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) - goto perfctr_fail; - } - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) - goto eventsel_fail; - } -#endif - - return true; - -#ifdef CONFIG_X86_LOCAL_APIC -eventsel_fail: - for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu.eventsel + i); - - i = x86_pmu.num_counters; - -perfctr_fail: - for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu.perfctr + i); - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); - - return false; -#endif -} - -static void release_pmc_hardware(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int i; - - for (i = 0; i < x86_pmu.num_counters; i++) { - release_perfctr_nmi(x86_pmu.perfctr + i); - release_evntsel_nmi(x86_pmu.eventsel + i); - } - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); -#endif -} - -static inline bool bts_available(void) -{ - return x86_pmu.enable_bts != NULL; -} - -static inline void init_debug_store_on_cpu(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; - - if (!ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, - (u32)((u64)(unsigned long)ds), - (u32)((u64)(unsigned long)ds >> 32)); -} - -static inline void fini_debug_store_on_cpu(int cpu) -{ - if (!per_cpu(cpu_hw_counters, cpu).ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); -} - -static void release_bts_hardware(void) -{ - int cpu; - - if (!bts_available()) - return; - - get_online_cpus(); - - for_each_online_cpu(cpu) - fini_debug_store_on_cpu(cpu); - - for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; - - if (!ds) - continue; - - per_cpu(cpu_hw_counters, cpu).ds = NULL; - - kfree((void *)(unsigned long)ds->bts_buffer_base); - kfree(ds); - } - - put_online_cpus(); -} - -static int reserve_bts_hardware(void) -{ - int cpu, err = 0; - - if (!bts_available()) - return 0; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - struct debug_store *ds; - void *buffer; - - err = -ENOMEM; - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) { - kfree(buffer); - break; - } - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = - ds->bts_buffer_base + BTS_BUFFER_SIZE; - ds->bts_interrupt_threshold = - ds->bts_absolute_maximum - BTS_OVFL_TH; - - per_cpu(cpu_hw_counters, cpu).ds = ds; - err = 0; - } - - if (err) - release_bts_hardware(); - else { - for_each_online_cpu(cpu) - init_debug_store_on_cpu(cpu); - } - - put_online_cpus(); - - return err; -} - -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { - release_pmc_hardware(); - release_bts_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -static inline int x86_pmu_initialized(void) -{ - return x86_pmu.handle_irq != NULL; -} - -static inline int -set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) -{ - unsigned int cache_type, cache_op, cache_result; - u64 config, val; - - config = attr->config; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - val = hw_cache_event_ids[cache_type][cache_op][cache_result]; - - if (val == 0) - return -ENOENT; - - if (val == -1) - return -EINVAL; - - hwc->config |= val; - - return 0; -} - -static void intel_pmu_enable_bts(u64 config) -{ - unsigned long debugctlmsr; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr |= X86_DEBUGCTL_TR; - debugctlmsr |= X86_DEBUGCTL_BTS; - debugctlmsr |= X86_DEBUGCTL_BTINT; - - if (!(config & ARCH_PERFMON_EVENTSEL_OS)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; - - if (!(config & ARCH_PERFMON_EVENTSEL_USR)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; - - update_debugctlmsr(debugctlmsr); -} - -static void intel_pmu_disable_bts(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - unsigned long debugctlmsr; - - if (!cpuc->ds) - return; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr &= - ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | - X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); - - update_debugctlmsr(debugctlmsr); -} - -/* - * Setup the hardware configuration for a given attr_type - */ -static int __hw_perf_counter_init(struct perf_counter *counter) -{ - struct perf_counter_attr *attr = &counter->attr; - struct hw_perf_counter *hwc = &counter->hw; - u64 config; - int err; - - if (!x86_pmu_initialized()) - return -ENODEV; - - err = 0; - if (!atomic_inc_not_zero(&active_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - err = reserve_bts_hardware(); - } - if (!err) - atomic_inc(&active_counters); - mutex_unlock(&pmc_reserve_mutex); - } - if (err) - return err; - - counter->destroy = hw_perf_counter_destroy; - - /* - * Generate PMC IRQs: - * (keep 'enabled' bit clear for now) - */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; - - /* - * Count user and OS events unless requested not to. - */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - - if (!hwc->sample_period) { - hwc->sample_period = x86_pmu.max_period; - hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * counters (user-space has to fall back and - * sample via a hrtimer based software counter): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; - } - - /* - * Raw hw_event type provide the config in the hw_event structure - */ - if (attr->type == PERF_TYPE_RAW) { - hwc->config |= x86_pmu.raw_event(attr->config); - return 0; - } - - if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, attr); - - if (attr->config >= x86_pmu.max_events) - return -EINVAL; - - /* - * The generic map: - */ - config = x86_pmu.event_map(attr->config); - - if (config == 0) - return -ENOENT; - - if (config == -1LL) - return -EINVAL; - - /* - * Branch tracing: - */ - if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && - (hwc->sample_period == 1)) { - /* BTS is not supported by this architecture. */ - if (!bts_available()) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - return -EOPNOTSUPP; - } - - hwc->config |= config; - - return 0; -} - -static void p6_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void intel_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) - intel_pmu_disable_bts(); -} - -static void amd_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int idx; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - /* - * ensure we write the disable before we start disabling the - * counters proper, so that amd_pmu_enable_counter() does the - * right thing. - */ - barrier(); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) - continue; - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } -} - -void hw_perf_disable(void) -{ - if (!x86_pmu_initialized()) - return; - return x86_pmu.disable_all(); -} - -static void p6_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - unsigned long val; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void intel_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { - struct perf_counter *counter = - cpuc->counters[X86_PMC_IDX_FIXED_BTS]; - - if (WARN_ON_ONCE(!counter)) - return; - - intel_pmu_enable_bts(counter->hw.config); - } -} - -static void amd_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int idx; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_counter *counter = cpuc->counters[idx]; - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - val = counter->hw.config; - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } -} - -void hw_perf_enable(void) -{ - if (!x86_pmu_initialized()) - return; - x86_pmu.enable_all(); -} - -static inline u64 intel_pmu_get_status(void) -{ - u64 status; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - - return status; -} - -static inline void intel_pmu_ack_status(u64 ack) -{ - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); -} - -static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - (void)checking_wrmsrl(hwc->config_base + idx, - hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); -} - -static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); -} - -static inline void -intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, mask; - - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - (void)checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static inline void -p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val = P6_NOP_COUNTER; - - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - (void)checking_wrmsrl(hwc->config_base + idx, val); -} - -static inline void -intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - intel_pmu_disable_bts(); - return; - } - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_disable_fixed(hwc, idx); - return; - } - - x86_pmu_disable_counter(hwc, idx); -} - -static inline void -amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - x86_pmu_disable_counter(hwc, idx); -} - -static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); - -/* - * Set the next IRQ period, based on the hwc->period_left value. - * To be called with the counter disabled in hw: - */ -static int -x86_perf_counter_set_period(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int err, ret = 0; - - if (idx == X86_PMC_IDX_FIXED_BTS) - return 0; - - /* - * If we are way outside a reasoable range then just skip forward: - */ - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - /* - * Quirk: certain CPUs dont like it if just 1 hw_event is left: - */ - if (unlikely(left < 2)) - left = 2; - - if (left > x86_pmu.max_period) - left = x86_pmu.max_period; - - per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; - - /* - * The hw counter starts counting from this counter offset, - * mark it to be able to extra future deltas: - */ - atomic64_set(&hwc->prev_count, (u64)-left); - - err = checking_wrmsrl(hwc->counter_base + idx, - (u64)(-left) & x86_pmu.counter_mask); - - perf_counter_update_userpage(counter); - - return ret; -} - -static inline void -intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, bits, mask; - int err; - - /* - * Enable IRQ generation (0x8), - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) - * if requested: - */ - bits = 0x8ULL; - if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) - bits |= 0x2; - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - bits |= 0x1; - bits <<= (idx * 4); - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - ctrl_val |= bits; - err = checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - (void)checking_wrmsrl(hwc->config_base + idx, val); -} - - -static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - if (!__get_cpu_var(cpu_hw_counters).enabled) - return; - - intel_pmu_enable_bts(hwc->config); - return; - } - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc, idx); - return; - } - - x86_pmu_enable_counter(hwc, idx); -} - -static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (cpuc->enabled) - x86_pmu_enable_counter(hwc, idx); -} - -static int -fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) -{ - unsigned int hw_event; - - hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; - - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (hwc->sample_period == 1))) - return X86_PMC_IDX_FIXED_BTS; - - if (!x86_pmu.num_counters_fixed) - return -1; - - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) - return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) - return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) - return X86_PMC_IDX_FIXED_BUS_CYCLES; - - return -1; -} - -/* - * Find a PMC slot for the freshly enabled / scheduled in counter: - */ -static int x86_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx; - - idx = fixed_mode_idx(counter, hwc); - if (idx == X86_PMC_IDX_FIXED_BTS) { - /* BTS is already occupied. */ - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - hwc->config_base = 0; - hwc->counter_base = 0; - hwc->idx = idx; - } else if (idx >= 0) { - /* - * Try to get the fixed counter, if that is already taken - * then try to get a generic counter: - */ - if (test_and_set_bit(idx, cpuc->used_mask)) - goto try_generic; - - hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that counter_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->counter_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; - hwc->idx = idx; - } else { - idx = hwc->idx; - /* Try to get the previous generic counter again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { -try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_counters); - if (idx == x86_pmu.num_counters) - return -EAGAIN; - - set_bit(idx, cpuc->used_mask); - hwc->idx = idx; - } - hwc->config_base = x86_pmu.eventsel; - hwc->counter_base = x86_pmu.perfctr; - } - - perf_counters_lapic_init(); - - x86_pmu.disable(hwc, idx); - - cpuc->counters[idx] = counter; - set_bit(idx, cpuc->active_mask); - - x86_perf_counter_set_period(counter, hwc, idx); - x86_pmu.enable(hwc, idx); - - perf_counter_update_userpage(counter); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - - if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || - cpuc->counters[hwc->idx] != counter)) - return; - - x86_pmu.enable(hwc, hwc->idx); -} - -void perf_counter_print_debug(void) -{ - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; - struct cpu_hw_counters *cpuc; - unsigned long flags; - int cpu, idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - if (x86_pmu.version >= 2) { - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); - - pr_info("\n"); - pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); - pr_info("CPU#%d: status: %016llx\n", cpu, status); - pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); - pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); - } - pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); - rdmsrl(x86_pmu.perfctr + idx, pmc_count); - - prev_left = per_cpu(pmc_prev_left[idx], cpu); - - pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", - cpu, idx, pmc_ctrl); - pr_info("CPU#%d: gen-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - pr_info("CPU#%d: gen-PMC%d left: %016llx\n", - cpu, idx, prev_left); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); - - pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - } - local_irq_restore(flags); -} - -static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc) -{ - struct debug_store *ds = cpuc->ds; - struct bts_record { - u64 from; - u64 to; - u64 flags; - }; - struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_sample_data data; - struct pt_regs regs; - - if (!counter) - return; - - if (!ds) - return; - - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; - - if (top <= at) - return; - - ds->bts_index = ds->bts_buffer_base; - - - data.period = counter->hw.last_period; - data.addr = 0; - regs.ip = 0; - - /* - * Prepare a generic sample, i.e. fill in the invariant fields. - * We will overwrite the from and to address before we output - * the sample. - */ - perf_prepare_sample(&header, &data, counter, ®s); - - if (perf_output_begin(&handle, counter, - header.size * (top - at), 1, 1)) - return; - - for (; at < top; at++) { - data.ip = at->from; - data.addr = at->to; - - perf_output_sample(&handle, &header, &data, counter); - } - - perf_output_end(&handle); - - /* There's new data available. */ - counter->hw.interrupts++; - counter->pending_kill = POLL_IN; -} - -static void x86_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - /* - * Must be done before we disable, otherwise the nmi handler - * could reenable again: - */ - clear_bit(idx, cpuc->active_mask); - x86_pmu.disable(hwc, idx); - - /* - * Make sure the cleared pointer becomes visible before we - * (potentially) free the counter: - */ - barrier(); - - /* - * Drain the remaining delta count out of a counter - * that we are disabling: - */ - x86_perf_counter_update(counter, hwc, idx); - - /* Drain the remaining BTS records. */ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) - intel_pmu_drain_bts_buffer(cpuc); - - cpuc->counters[idx] = NULL; - clear_bit(idx, cpuc->used_mask); - - perf_counter_update_userpage(counter); -} - -/* - * Save and restart an expired counter. Called by NMI contexts, - * so it has to be careful about preempting normal counter ops: - */ -static int intel_pmu_save_and_restart(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - int ret; - - x86_perf_counter_update(counter, hwc, idx); - ret = x86_perf_counter_set_period(counter, hwc, idx); - - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - intel_pmu_enable_counter(hwc, idx); - - return ret; -} - -static void intel_pmu_reset(void) -{ - struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; - unsigned long flags; - int idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - printk("clearing PMU state on CPU#%d\n", smp_processor_id()); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); - checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); - } - if (ds) - ds->bts_index = ds->bts_buffer_base; - - local_irq_restore(flags); -} - -static int p6_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - int idx, handled = 0; - u64 val; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - counter = cpuc->counters[idx]; - hwc = &counter->hw; - - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) - continue; - - /* - * counter overflow - */ - handled = 1; - data.period = counter->hw.last_period; - - if (!x86_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - p6_pmu_disable_counter(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - -/* - * This handler is triggered by the local APIC, so the APIC IRQ handling - * rules apply: - */ -static int intel_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - int bit, loops; - u64 ack, status; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - perf_disable(); - intel_pmu_drain_bts_buffer(cpuc); - status = intel_pmu_get_status(); - if (!status) { - perf_enable(); - return 0; - } - - loops = 0; -again: - if (++loops > 100) { - WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); - perf_counter_print_debug(); - intel_pmu_reset(); - perf_enable(); - return 1; - } - - inc_irq_stat(apic_perf_irqs); - ack = status; - for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; - - clear_bit(bit, (unsigned long *) &status); - if (!test_bit(bit, cpuc->active_mask)) - continue; - - if (!intel_pmu_save_and_restart(counter)) - continue; - - data.period = counter->hw.last_period; - - if (perf_counter_overflow(counter, 1, &data, regs)) - intel_pmu_disable_counter(&counter->hw, bit); - } - - intel_pmu_ack_status(ack); - - /* - * Repeat if there is more work to be done: - */ - status = intel_pmu_get_status(); - if (status) - goto again; - - perf_enable(); - - return 1; -} - -static int amd_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - int idx, handled = 0; - u64 val; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - counter = cpuc->counters[idx]; - hwc = &counter->hw; - - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) - continue; - - /* - * counter overflow - */ - handled = 1; - data.period = counter->hw.last_period; - - if (!x86_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - amd_pmu_disable_counter(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - -void smp_perf_pending_interrupt(struct pt_regs *regs) -{ - irq_enter(); - ack_APIC_irq(); - inc_irq_stat(apic_pending_irqs); - perf_counter_do_pending(); - irq_exit(); -} - -void set_perf_counter_pending(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - apic->send_IPI_self(LOCAL_PENDING_VECTOR); -#endif -} - -void perf_counters_lapic_init(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - if (!x86_pmu.apic || !x86_pmu_initialized()) - return; - - /* - * Always use NMI for PMU - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif -} - -static int __kprobes -perf_counter_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) -{ - struct die_args *args = __args; - struct pt_regs *regs; - - if (!atomic_read(&active_counters)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - case DIE_NMI_IPI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; - -#ifdef CONFIG_X86_LOCAL_APIC - apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif - /* - * Can't rely on the handled return value to say it was our NMI, two - * counters could trigger 'simultaneously' raising two back-to-back NMIs. - * - * If the first NMI handles both, the latter will be empty and daze - * the CPU. - */ - x86_pmu.handle_irq(regs); - - return NOTIFY_STOP; -} - -static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler, - .next = NULL, - .priority = 1 -}; - -static struct x86_pmu p6_pmu = { - .name = "p6", - .handle_irq = p6_pmu_handle_irq, - .disable_all = p6_pmu_disable_all, - .enable_all = p6_pmu_enable_all, - .enable = p6_pmu_enable_counter, - .disable = p6_pmu_disable_counter, - .eventsel = MSR_P6_EVNTSEL0, - .perfctr = MSR_P6_PERFCTR0, - .event_map = p6_pmu_event_map, - .raw_event = p6_pmu_raw_event, - .max_events = ARRAY_SIZE(p6_perfmon_event_map), - .apic = 1, - .max_period = (1ULL << 31) - 1, - .version = 0, - .num_counters = 2, - /* - * Counters have 40 bits implemented. However they are designed such - * that bits [32-39] are sign extensions of bit 31. As such the - * effective width of a counter for P6-like PMU is 32 bits only. - * - * See IA-32 Intel Architecture Software developer manual Vol 3B - */ - .counter_bits = 32, - .counter_mask = (1ULL << 32) - 1, -}; - -static struct x86_pmu intel_pmu = { - .name = "Intel", - .handle_irq = intel_pmu_handle_irq, - .disable_all = intel_pmu_disable_all, - .enable_all = intel_pmu_enable_all, - .enable = intel_pmu_enable_counter, - .disable = intel_pmu_disable_counter, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .raw_event = intel_pmu_raw_event, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic counter period: - */ - .max_period = (1ULL << 31) - 1, - .enable_bts = intel_pmu_enable_bts, - .disable_bts = intel_pmu_disable_bts, -}; - -static struct x86_pmu amd_pmu = { - .name = "AMD", - .handle_irq = amd_pmu_handle_irq, - .disable_all = amd_pmu_disable_all, - .enable_all = amd_pmu_enable_all, - .enable = amd_pmu_enable_counter, - .disable = amd_pmu_disable_counter, - .eventsel = MSR_K7_EVNTSEL0, - .perfctr = MSR_K7_PERFCTR0, - .event_map = amd_pmu_event_map, - .raw_event = amd_pmu_raw_event, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 4, - .counter_bits = 48, - .counter_mask = (1ULL << 48) - 1, - .apic = 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, -}; - -static int p6_pmu_init(void) -{ - switch (boot_cpu_data.x86_model) { - case 1: - case 3: /* Pentium Pro */ - case 5: - case 6: /* Pentium II */ - case 7: - case 8: - case 11: /* Pentium III */ - break; - case 9: - case 13: - /* Pentium M */ - break; - default: - pr_cont("unsupported p6 CPU model %d ", - boot_cpu_data.x86_model); - return -ENODEV; - } - - x86_pmu = p6_pmu; - - if (!cpu_has_apic) { - pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - pr_info("no hardware sampling interrupt available.\n"); - x86_pmu.apic = 0; - } - - return 0; -} - -static int intel_pmu_init(void) -{ - union cpuid10_edx edx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int ebx; - int version; - - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - /* check for P6 processor family */ - if (boot_cpu_data.x86 == 6) { - return p6_pmu_init(); - } else { - return -ENODEV; - } - } - - /* - * Check whether the Architectural PerfMon supports - * Branch Misses Retired hw_event or not. - */ - cpuid(10, &eax.full, &ebx, &unused, &edx.full); - if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) - return -ENODEV; - - version = eax.split.version_id; - if (version < 2) - return -ENODEV; - - x86_pmu = intel_pmu; - x86_pmu.version = version; - x86_pmu.num_counters = eax.split.num_counters; - x86_pmu.counter_bits = eax.split.bit_width; - x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; - - /* - * Quirk: v2 perfmon does not report fixed-purpose counters, so - * assume at least 3 counters: - */ - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); - - /* - * Install the hw-cache-events table: - */ - switch (boot_cpu_data.x86_model) { - case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 29: /* six-core 45 nm xeon "Dunnington" */ - memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Core2 events, "); - break; - default: - case 26: - memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Nehalem/Corei7 events, "); - break; - case 28: - memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Atom events, "); - break; - } - return 0; -} - -static int amd_pmu_init(void) -{ - /* Performance-monitoring supported from K7 and later: */ - if (boot_cpu_data.x86 < 6) - return -ENODEV; - - x86_pmu = amd_pmu; - - /* Events are common for all AMDs */ - memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - return 0; -} - -void __init init_hw_perf_counters(void) -{ - int err; - - pr_info("Performance Counters: "); - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - err = intel_pmu_init(); - break; - case X86_VENDOR_AMD: - err = amd_pmu_init(); - break; - default: - return; - } - if (err != 0) { - pr_cont("no PMU driver, software counters only.\n"); - return; - } - - pr_cont("%s PMU driver.\n", x86_pmu.name); - - if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { - WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - x86_pmu.num_counters, X86_PMC_MAX_GENERIC); - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; - } - perf_counter_mask = (1 << x86_pmu.num_counters) - 1; - perf_max_counters = x86_pmu.num_counters; - - if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { - WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; - } - - perf_counter_mask |= - ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; - x86_pmu.intel_ctrl = perf_counter_mask; - - perf_counters_lapic_init(); - register_die_notifier(&perf_counter_nmi_notifier); - - pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.counter_bits); - pr_info("... generic counters: %d\n", x86_pmu.num_counters); - pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); - pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); - pr_info("... counter mask: %016Lx\n", perf_counter_mask); -} - -static inline void x86_pmu_read(struct perf_counter *counter) -{ - x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); -} - -static const struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, -}; - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - int err; - - err = __hw_perf_counter_init(counter); - if (err) { - if (counter->destroy) - counter->destroy(counter); - return ERR_PTR(err); - } - - return &pmu; -} - -/* - * callchain support - */ - -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_nmi_frame); - - -static void -backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - -static int backtrace_stack(void *data, char *name) -{ - per_cpu(in_nmi_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name); - - return 0; -} - -static void backtrace_address(void *data, unsigned long addr, int reliable) -{ - struct perf_callchain_entry *entry = data; - - if (per_cpu(in_nmi_frame, smp_processor_id())) - return; - - if (reliable) - callchain_store(entry, addr); -} - -static const struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, -}; - -#include "../dumpstack.h" - -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); - - dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); -} - -/* - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context - */ -static unsigned long -copy_from_user_nmi(void *to, const void __user *from, unsigned long n) -{ - unsigned long offset, addr = (unsigned long)from; - int type = in_nmi() ? KM_NMI : KM_IRQ0; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page, type); - memcpy(to, map+offset, size); - kunmap_atomic(map, type); - put_page(page); - - len += size; - to += size; - addr += size; - - } while (len < n); - - return len; -} - -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) -{ - unsigned long bytes; - - bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); - - return bytes == sizeof(*frame); -} - -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - struct stack_frame frame; - const void __user *fp; - - if (!user_mode(regs)) - regs = task_pt_regs(current); - - fp = (void __user *)regs->bp; - - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); - - while (entry->nr < PERF_MAX_STACK_DEPTH) { - frame.next_frame = NULL; - frame.return_address = 0; - - if (!copy_stack_frame(fp, &frame)) - break; - - if ((unsigned long)fp < regs->sp) - break; - - callchain_store(entry, frame.return_address); - fp = frame.next_frame; - } -} - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || current->pid == 0) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} - -void hw_perf_counter_setup_online(int cpu) -{ - init_debug_store_on_cpu(cpu); -} diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c new file mode 100644 index 00000000000..0d03629fb1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event.c @@ -0,0 +1,2298 @@ +/* + * Performance events x86 architecture code + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static u64 perf_event_mask __read_mostly; + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +/* The size of a per-cpu BTS buffer in bytes: */ +#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) + +/* The BTS overflow threshold in bytes from the end of the buffer: */ +#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) + + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +}; + +struct cpu_hw_events { + struct perf_event *events[X86_PMC_IDX_MAX]; + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long interrupts; + int enabled; + struct debug_store *ds; +}; + +/* + * struct x86_pmu - generic x86 pmu + */ +struct x86_pmu { + const char *name; + int version; + int (*handle_irq)(struct pt_regs *); + void (*disable_all)(void); + void (*enable_all)(void); + void (*enable)(struct hw_perf_event *, int); + void (*disable)(struct hw_perf_event *, int); + unsigned eventsel; + unsigned perfctr; + u64 (*event_map)(int); + u64 (*raw_event)(u64); + int max_events; + int num_events; + int num_events_fixed; + int event_bits; + u64 event_mask; + int apic; + u64 max_period; + u64 intel_ctrl; + void (*enable_bts)(u64 config); + void (*disable_bts)(void); +}; + +static struct x86_pmu x86_pmu __read_mostly; + +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .enabled = 1, +}; + +/* + * Not sure about some of these + */ +static const u64 p6_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, +}; + +static u64 p6_pmu_event_map(int hw_event) +{ + return p6_perfmon_event_map[hw_event]; +} + +/* + * Event setting that is specified not to count anything. + * We use this to effectively disable a counter. + * + * L2_RQSTS with 0 MESI unit mask. + */ +#define P6_NOP_EVENT 0x0000002EULL + +static u64 p6_pmu_raw_event(u64 hw_event) +{ +#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL +#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL +#define P6_EVNTSEL_INV_MASK 0x00800000ULL +#define P6_EVNTSEL_REG_MASK 0xFF000000ULL + +#define P6_EVNTSEL_MASK \ + (P6_EVNTSEL_EVENT_MASK | \ + P6_EVNTSEL_UNIT_MASK | \ + P6_EVNTSEL_EDGE_MASK | \ + P6_EVNTSEL_INV_MASK | \ + P6_EVNTSEL_REG_MASK) + + return hw_event & P6_EVNTSEL_MASK; +} + + +/* + * Intel PerfMon v3. Used on Core2 and later. + */ +static const u64 intel_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, +}; + +static u64 intel_pmu_event_map(int hw_event) +{ + return intel_perfmon_event_map[hw_event]; +} + +/* + * Generalized hw caching related hw_event table, filled + * in on a per model basis. A value of 0 means + * 'not supported', -1 means 'hw_event makes no sense on + * this CPU', any other value means the raw hw_event + * ID. + */ + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static u64 __read_mostly hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + +static const u64 nehalem_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static const u64 core2_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static const u64 atom_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static u64 intel_pmu_raw_event(u64 hw_event) +{ +#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL +#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL +#define CORE_EVNTSEL_INV_MASK 0x00800000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL + +#define CORE_EVNTSEL_MASK \ + (CORE_EVNTSEL_EVENT_MASK | \ + CORE_EVNTSEL_UNIT_MASK | \ + CORE_EVNTSEL_EDGE_MASK | \ + CORE_EVNTSEL_INV_MASK | \ + CORE_EVNTSEL_REG_MASK) + + return hw_event & CORE_EVNTSEL_MASK; +} + +static const u64 amd_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ + [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ + [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ + [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +/* + * AMD Performance Monitor K7 and later. + */ +static const u64 amd_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, +}; + +static u64 amd_pmu_event_map(int hw_event) +{ + return amd_perfmon_event_map[hw_event]; +} + +static u64 amd_pmu_raw_event(u64 hw_event) +{ +#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL +#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL +#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL +#define K7_EVNTSEL_INV_MASK 0x000800000ULL +#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL + +#define K7_EVNTSEL_MASK \ + (K7_EVNTSEL_EVENT_MASK | \ + K7_EVNTSEL_UNIT_MASK | \ + K7_EVNTSEL_EDGE_MASK | \ + K7_EVNTSEL_INV_MASK | \ + K7_EVNTSEL_REG_MASK) + + return hw_event & K7_EVNTSEL_MASK; +} + +/* + * Propagate event elapsed time into the generic event. + * Can only be executed on the CPU where the event is active. + * Returns the delta events processed. + */ +static u64 +x86_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + int shift = 64 - x86_pmu.event_bits; + u64 prev_raw_count, new_raw_count; + s64 delta; + + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + + /* + * Careful: an NMI might modify the previous event value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic event atomically: + */ +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + rdmsrl(hwc->event_base + idx, new_raw_count); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); + atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static atomic_t active_events; +static DEFINE_MUTEX(pmc_reserve_mutex); + +static bool reserve_pmc_hardware(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int i; + + if (nmi_watchdog == NMI_LOCAL_APIC) + disable_lapic_nmi_watchdog(); + + for (i = 0; i < x86_pmu.num_events; i++) { + if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) + goto perfctr_fail; + } + + for (i = 0; i < x86_pmu.num_events; i++) { + if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) + goto eventsel_fail; + } +#endif + + return true; + +#ifdef CONFIG_X86_LOCAL_APIC +eventsel_fail: + for (i--; i >= 0; i--) + release_evntsel_nmi(x86_pmu.eventsel + i); + + i = x86_pmu.num_events; + +perfctr_fail: + for (i--; i >= 0; i--) + release_perfctr_nmi(x86_pmu.perfctr + i); + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); + + return false; +#endif +} + +static void release_pmc_hardware(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int i; + + for (i = 0; i < x86_pmu.num_events; i++) { + release_perfctr_nmi(x86_pmu.perfctr + i); + release_evntsel_nmi(x86_pmu.eventsel + i); + } + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); +#endif +} + +static inline bool bts_available(void) +{ + return x86_pmu.enable_bts != NULL; +} + +static inline void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(unsigned long)ds), + (u32)((u64)(unsigned long)ds >> 32)); +} + +static inline void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_events, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_bts_hardware(void) +{ + int cpu; + + if (!bts_available()) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_events, cpu).ds = NULL; + + kfree((void *)(unsigned long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_bts_hardware(void) +{ + int cpu, err = 0; + + if (!bts_available()) + return 0; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + + err = -ENOMEM; + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) { + kfree(buffer); + break; + } + + ds->bts_buffer_base = (u64)(unsigned long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = + ds->bts_buffer_base + BTS_BUFFER_SIZE; + ds->bts_interrupt_threshold = + ds->bts_absolute_maximum - BTS_OVFL_TH; + + per_cpu(cpu_hw_events, cpu).ds = ds; + err = 0; + } + + if (err) + release_bts_hardware(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { + release_pmc_hardware(); + release_bts_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static inline int x86_pmu_initialized(void) +{ + return x86_pmu.handle_irq != NULL; +} + +static inline int +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) +{ + unsigned int cache_type, cache_op, cache_result; + u64 config, val; + + config = attr->config; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + val = hw_cache_event_ids[cache_type][cache_op][cache_result]; + + if (val == 0) + return -ENOENT; + + if (val == -1) + return -EINVAL; + + hwc->config |= val; + + return 0; +} + +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + +/* + * Setup the hardware configuration for a given attr_type + */ +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + u64 config; + int err; + + if (!x86_pmu_initialized()) + return -ENODEV; + + err = 0; + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&active_events) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + err = reserve_bts_hardware(); + } + if (!err) + atomic_inc(&active_events); + mutex_unlock(&pmc_reserve_mutex); + } + if (err) + return err; + + event->destroy = hw_perf_event_destroy; + + /* + * Generate PMC IRQs: + * (keep 'enabled' bit clear for now) + */ + hwc->config = ARCH_PERFMON_EVENTSEL_INT; + + /* + * Count user and OS events unless requested not to. + */ + if (!attr->exclude_user) + hwc->config |= ARCH_PERFMON_EVENTSEL_USR; + if (!attr->exclude_kernel) + hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + + if (!hwc->sample_period) { + hwc->sample_period = x86_pmu.max_period; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } else { + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + if (!x86_pmu.apic) + return -EOPNOTSUPP; + } + + /* + * Raw hw_event type provide the config in the hw_event structure + */ + if (attr->type == PERF_TYPE_RAW) { + hwc->config |= x86_pmu.raw_event(attr->config); + return 0; + } + + if (attr->type == PERF_TYPE_HW_CACHE) + return set_ext_hw_attr(hwc, attr); + + if (attr->config >= x86_pmu.max_events) + return -EINVAL; + + /* + * The generic map: + */ + config = x86_pmu.event_map(attr->config); + + if (config == 0) + return -ENOENT; + + if (config == -1LL) + return -EINVAL; + + /* + * Branch tracing: + */ + if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && + (hwc->sample_period == 1)) { + /* BTS is not supported by this architecture. */ + if (!bts_available()) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + return -EOPNOTSUPP; + } + + hwc->config |= config; + + return 0; +} + +static void p6_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void intel_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); +} + +static void amd_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + /* + * ensure we write the disable before we start disabling the + * events proper, so that amd_pmu_enable_event() does the + * right thing. + */ + barrier(); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + rdmsrl(MSR_K7_EVNTSEL0 + idx, val); + if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) + continue; + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } +} + +void hw_perf_disable(void) +{ + if (!x86_pmu_initialized()) + return; + return x86_pmu.disable_all(); +} + +static void p6_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long val; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void intel_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_event *event = + cpuc->events[X86_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!event)) + return; + + intel_pmu_enable_bts(event->hw.config); + } +} + +static void amd_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + val = event->hw.config; + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } +} + +void hw_perf_enable(void) +{ + if (!x86_pmu_initialized()) + return; + x86_pmu.enable_all(); +} + +static inline u64 intel_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void intel_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + (void)checking_wrmsrl(hwc->config_base + idx, + hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); +} + +static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); +} + +static inline void +intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, mask; + + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + (void)checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static inline void +p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val = P6_NOP_EVENT; + + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + +static inline void +intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + intel_pmu_disable_bts(); + return; + } + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_disable_fixed(hwc, idx); + return; + } + + x86_pmu_disable_event(hwc, idx); +} + +static inline void +amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + x86_pmu_disable_event(hwc, idx); +} + +static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); + +/* + * Set the next IRQ period, based on the hwc->period_left value. + * To be called with the event disabled in hw: + */ +static int +x86_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int err, ret = 0; + + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + + /* + * If we are way outside a reasoable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + /* + * Quirk: certain CPUs dont like it if just 1 hw_event is left: + */ + if (unlikely(left < 2)) + left = 2; + + if (left > x86_pmu.max_period) + left = x86_pmu.max_period; + + per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; + + /* + * The hw event starts counting from this event offset, + * mark it to be able to extra future deltas: + */ + atomic64_set(&hwc->prev_count, (u64)-left); + + err = checking_wrmsrl(hwc->event_base + idx, + (u64)(-left) & x86_pmu.event_mask); + + perf_event_update_userpage(event); + + return ret; +} + +static inline void +intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, bits, mask; + int err; + + /* + * Enable IRQ generation (0x8), + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: + */ + bits = 0x8ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + ctrl_val |= bits; + err = checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + val = hwc->config; + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + + +static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + if (!__get_cpu_var(cpu_hw_events).enabled) + return; + + intel_pmu_enable_bts(hwc->config); + return; + } + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_enable_fixed(hwc, idx); + return; + } + + x86_pmu_enable_event(hwc, idx); +} + +static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->enabled) + x86_pmu_enable_event(hwc, idx); +} + +static int +fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +{ + unsigned int hw_event; + + hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely((hw_event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (hwc->sample_period == 1))) + return X86_PMC_IDX_FIXED_BTS; + + if (!x86_pmu.num_events_fixed) + return -1; + + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) + return X86_PMC_IDX_FIXED_INSTRUCTIONS; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) + return X86_PMC_IDX_FIXED_CPU_CYCLES; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) + return X86_PMC_IDX_FIXED_BUS_CYCLES; + + return -1; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = fixed_mode_idx(event, hwc); + if (idx == X86_PMC_IDX_FIXED_BTS) { + /* BTS is already occupied. */ + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + hwc->config_base = 0; + hwc->event_base = 0; + hwc->idx = idx; + } else if (idx >= 0) { + /* + * Try to get the fixed event, if that is already taken + * then try to get a generic event: + */ + if (test_and_set_bit(idx, cpuc->used_mask)) + goto try_generic; + + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + /* + * We set it so that event_base + idx in wrmsr/rdmsr maps to + * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: + */ + hwc->event_base = + MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + hwc->idx = idx; + } else { + idx = hwc->idx; + /* Try to get the previous generic event again */ + if (test_and_set_bit(idx, cpuc->used_mask)) { +try_generic: + idx = find_first_zero_bit(cpuc->used_mask, + x86_pmu.num_events); + if (idx == x86_pmu.num_events) + return -EAGAIN; + + set_bit(idx, cpuc->used_mask); + hwc->idx = idx; + } + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; + } + + perf_events_lapic_init(); + + x86_pmu.disable(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + x86_perf_event_set_period(event, hwc, idx); + x86_pmu.enable(hwc, idx); + + perf_event_update_userpage(event); + + return 0; +} + +static void x86_pmu_unthrottle(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || + cpuc->events[hwc->idx] != event)) + return; + + x86_pmu.enable(hwc, hwc->idx); +} + +void perf_event_print_debug(void) +{ + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + struct cpu_hw_events *cpuc; + unsigned long flags; + int cpu, idx; + + if (!x86_pmu.num_events) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_events, cpu); + + if (x86_pmu.version >= 2) { + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + + pr_info("\n"); + pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); + pr_info("CPU#%d: status: %016llx\n", cpu, status); + pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); + pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); + } + pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); + rdmsrl(x86_pmu.perfctr + idx, pmc_count); + + prev_left = per_cpu(pmc_prev_left[idx], cpu); + + pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", + cpu, idx, pmc_ctrl); + pr_info("CPU#%d: gen-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + pr_info("CPU#%d: gen-PMC%d left: %016llx\n", + cpu, idx, prev_left); + } + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); + + pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + } + local_irq_restore(flags); +} + +static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) +{ + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; + struct bts_record *at, *top; + struct perf_output_handle handle; + struct perf_event_header header; + struct perf_sample_data data; + struct pt_regs regs; + + if (!event) + return; + + if (!ds) + return; + + at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; + + if (top <= at) + return; + + ds->bts_index = ds->bts_buffer_base; + + + data.period = event->hw.last_period; + data.addr = 0; + regs.ip = 0; + + /* + * Prepare a generic sample, i.e. fill in the invariant fields. + * We will overwrite the from and to address before we output + * the sample. + */ + perf_prepare_sample(&header, &data, event, ®s); + + if (perf_output_begin(&handle, event, + header.size * (top - at), 1, 1)) + return; + + for (; at < top; at++) { + data.ip = at->from; + data.addr = at->to; + + perf_output_sample(&handle, &header, &data, event); + } + + perf_output_end(&handle); + + /* There's new data available. */ + event->hw.interrupts++; + event->pending_kill = POLL_IN; +} + +static void x86_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * Must be done before we disable, otherwise the nmi handler + * could reenable again: + */ + clear_bit(idx, cpuc->active_mask); + x86_pmu.disable(hwc, idx); + + /* + * Make sure the cleared pointer becomes visible before we + * (potentially) free the event: + */ + barrier(); + + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event, hwc, idx); + + /* Drain the remaining BTS records. */ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) + intel_pmu_drain_bts_buffer(cpuc); + + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +/* + * Save and restart an expired event. Called by NMI contexts, + * so it has to be careful about preempting normal event ops: + */ +static int intel_pmu_save_and_restart(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + int ret; + + x86_perf_event_update(event, hwc, idx); + ret = x86_perf_event_set_period(event, hwc, idx); + + if (event->state == PERF_EVENT_STATE_ACTIVE) + intel_pmu_enable_event(hwc, idx); + + return ret; +} + +static void intel_pmu_reset(void) +{ + struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; + unsigned long flags; + int idx; + + if (!x86_pmu.num_events) + return; + + local_irq_save(flags); + + printk("clearing PMU state on CPU#%d\n", smp_processor_id()); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); + checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); + } + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + } + if (ds) + ds->bts_index = ds->bts_buffer_base; + + local_irq_restore(flags); +} + +static int p6_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + + val = x86_perf_event_update(event, hwc, idx); + if (val & (1ULL << (x86_pmu.event_bits - 1))) + continue; + + /* + * event overflow + */ + handled = 1; + data.period = event->hw.last_period; + + if (!x86_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + p6_pmu_disable_event(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static int intel_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int bit, loops; + u64 ack, status; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + perf_disable(); + intel_pmu_drain_bts_buffer(cpuc); + status = intel_pmu_get_status(); + if (!status) { + perf_enable(); + return 0; + } + + loops = 0; +again: + if (++loops > 100) { + WARN_ONCE(1, "perfevents: irq loop stuck!\n"); + perf_event_print_debug(); + intel_pmu_reset(); + perf_enable(); + return 1; + } + + inc_irq_stat(apic_perf_irqs); + ack = status; + for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + clear_bit(bit, (unsigned long *) &status); + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + data.period = event->hw.last_period; + + if (perf_event_overflow(event, 1, &data, regs)) + intel_pmu_disable_event(&event->hw, bit); + } + + intel_pmu_ack_status(ack); + + /* + * Repeat if there is more work to be done: + */ + status = intel_pmu_get_status(); + if (status) + goto again; + + perf_enable(); + + return 1; +} + +static int amd_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + + val = x86_perf_event_update(event, hwc, idx); + if (val & (1ULL << (x86_pmu.event_bits - 1))) + continue; + + /* + * event overflow + */ + handled = 1; + data.period = event->hw.last_period; + + if (!x86_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + amd_pmu_disable_event(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} + +void smp_perf_pending_interrupt(struct pt_regs *regs) +{ + irq_enter(); + ack_APIC_irq(); + inc_irq_stat(apic_pending_irqs); + perf_event_do_pending(); + irq_exit(); +} + +void set_perf_event_pending(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + apic->send_IPI_self(LOCAL_PENDING_VECTOR); +#endif +} + +void perf_events_lapic_init(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) + return; + + /* + * Always use NMI for PMU + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif +} + +static int __kprobes +perf_event_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct pt_regs *regs; + + if (!atomic_read(&active_events)) + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + case DIE_NMI_IPI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + +#ifdef CONFIG_X86_LOCAL_APIC + apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif + /* + * Can't rely on the handled return value to say it was our NMI, two + * events could trigger 'simultaneously' raising two back-to-back NMIs. + * + * If the first NMI handles both, the latter will be empty and daze + * the CPU. + */ + x86_pmu.handle_irq(regs); + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, + .next = NULL, + .priority = 1 +}; + +static struct x86_pmu p6_pmu = { + .name = "p6", + .handle_irq = p6_pmu_handle_irq, + .disable_all = p6_pmu_disable_all, + .enable_all = p6_pmu_enable_all, + .enable = p6_pmu_enable_event, + .disable = p6_pmu_disable_event, + .eventsel = MSR_P6_EVNTSEL0, + .perfctr = MSR_P6_PERFCTR0, + .event_map = p6_pmu_event_map, + .raw_event = p6_pmu_raw_event, + .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_events = 2, + /* + * Events have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a event for P6-like PMU is 32 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ + .event_bits = 32, + .event_mask = (1ULL << 32) - 1, +}; + +static struct x86_pmu intel_pmu = { + .name = "Intel", + .handle_irq = intel_pmu_handle_irq, + .disable_all = intel_pmu_disable_all, + .enable_all = intel_pmu_enable_all, + .enable = intel_pmu_enable_event, + .disable = intel_pmu_disable_event, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .raw_event = intel_pmu_raw_event, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL << 31) - 1, + .enable_bts = intel_pmu_enable_bts, + .disable_bts = intel_pmu_disable_bts, +}; + +static struct x86_pmu amd_pmu = { + .name = "AMD", + .handle_irq = amd_pmu_handle_irq, + .disable_all = amd_pmu_disable_all, + .enable_all = amd_pmu_enable_all, + .enable = amd_pmu_enable_event, + .disable = amd_pmu_disable_event, + .eventsel = MSR_K7_EVNTSEL0, + .perfctr = MSR_K7_PERFCTR0, + .event_map = amd_pmu_event_map, + .raw_event = amd_pmu_raw_event, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_events = 4, + .event_bits = 48, + .event_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, +}; + +static int p6_pmu_init(void) +{ + switch (boot_cpu_data.x86_model) { + case 1: + case 3: /* Pentium Pro */ + case 5: + case 6: /* Pentium II */ + case 7: + case 8: + case 11: /* Pentium III */ + break; + case 9: + case 13: + /* Pentium M */ + break; + default: + pr_cont("unsupported p6 CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + x86_pmu = p6_pmu; + + if (!cpu_has_apic) { + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); + x86_pmu.apic = 0; + } + + return 0; +} + +static int intel_pmu_init(void) +{ + union cpuid10_edx edx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int ebx; + int version; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + /* check for P6 processor family */ + if (boot_cpu_data.x86 == 6) { + return p6_pmu_init(); + } else { + return -ENODEV; + } + } + + /* + * Check whether the Architectural PerfMon supports + * Branch Misses Retired hw_event or not. + */ + cpuid(10, &eax.full, &ebx, &unused, &edx.full); + if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) + return -ENODEV; + + version = eax.split.version_id; + if (version < 2) + return -ENODEV; + + x86_pmu = intel_pmu; + x86_pmu.version = version; + x86_pmu.num_events = eax.split.num_events; + x86_pmu.event_bits = eax.split.bit_width; + x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; + + /* + * Quirk: v2 perfmon does not report fixed-purpose events, so + * assume at least 3 events: + */ + x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + + /* + * Install the hw-cache-events table: + */ + switch (boot_cpu_data.x86_model) { + case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 29: /* six-core 45 nm xeon "Dunnington" */ + memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Core2 events, "); + break; + default: + case 26: + memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Nehalem/Corei7 events, "); + break; + case 28: + memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Atom events, "); + break; + } + return 0; +} + +static int amd_pmu_init(void) +{ + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + + x86_pmu = amd_pmu; + + /* Events are common for all AMDs */ + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; +} + +void __init init_hw_perf_events(void) +{ + int err; + + pr_info("Performance Events: "); + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + err = intel_pmu_init(); + break; + case X86_VENDOR_AMD: + err = amd_pmu_init(); + break; + default: + return; + } + if (err != 0) { + pr_cont("no PMU driver, software events only.\n"); + return; + } + + pr_cont("%s PMU driver.\n", x86_pmu.name); + + if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { + WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", + x86_pmu.num_events, X86_PMC_MAX_GENERIC); + x86_pmu.num_events = X86_PMC_MAX_GENERIC; + } + perf_event_mask = (1 << x86_pmu.num_events) - 1; + perf_max_events = x86_pmu.num_events; + + if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { + WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", + x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); + x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; + } + + perf_event_mask |= + ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; + x86_pmu.intel_ctrl = perf_event_mask; + + perf_events_lapic_init(); + register_die_notifier(&perf_event_nmi_notifier); + + pr_info("... version: %d\n", x86_pmu.version); + pr_info("... bit width: %d\n", x86_pmu.event_bits); + pr_info("... generic events: %d\n", x86_pmu.num_events); + pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); + pr_info("... max period: %016Lx\n", x86_pmu.max_period); + pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); + pr_info("... event mask: %016Lx\n", perf_event_mask); +} + +static inline void x86_pmu_read(struct perf_event *event) +{ + x86_perf_event_update(event, &event->hw, event->hw.idx); +} + +static const struct pmu pmu = { + .enable = x86_pmu_enable, + .disable = x86_pmu_disable, + .read = x86_pmu_read, + .unthrottle = x86_pmu_unthrottle, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err; + + err = __hw_perf_event_init(event); + if (err) { + if (event->destroy) + event->destroy(event); + return ERR_PTR(err); + } + + return &pmu; +} + +/* + * callchain support + */ + +static inline +void callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); +static DEFINE_PER_CPU(int, in_nmi_frame); + + +static void +backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + /* Ignore warnings */ +} + +static void backtrace_warning(void *data, char *msg) +{ + /* Ignore warnings */ +} + +static int backtrace_stack(void *data, char *name) +{ + per_cpu(in_nmi_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name); + + return 0; +} + +static void backtrace_address(void *data, unsigned long addr, int reliable) +{ + struct perf_callchain_entry *entry = data; + + if (per_cpu(in_nmi_frame, smp_processor_id())) + return; + + if (reliable) + callchain_store(entry, addr); +} + +static const struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; + +#include "../dumpstack.h" + +static void +perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->ip); + + dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); +} + +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; + + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; + + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); + + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + unsigned long bytes; + + bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); + + return bytes == sizeof(*frame); +} + +static void +perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + struct stack_frame frame; + const void __user *fp; + + if (!user_mode(regs)) + regs = task_pt_regs(current); + + fp = (void __user *)regs->bp; + + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, regs->ip); + + while (entry->nr < PERF_MAX_STACK_DEPTH) { + frame.next_frame = NULL; + frame.return_address = 0; + + if (!copy_stack_frame(fp, &frame)) + break; + + if ((unsigned long)fp < regs->sp) + break; + + callchain_store(entry, frame.return_address); + fp = frame.next_frame; + } +} + +static void +perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + int is_user; + + if (!regs) + return; + + is_user = user_mode(regs); + + if (!current || current->pid == 0) + return; + + if (is_user && current->state != TASK_RUNNING) + return; + + if (!is_user) + perf_callchain_kernel(regs, entry); + + if (current->mm) + perf_callchain_user(regs, entry); +} + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry; + + if (in_nmi()) + entry = &__get_cpu_var(pmc_nmi_entry); + else + entry = &__get_cpu_var(pmc_irq_entry); + + entry->nr = 0; + + perf_do_callchain(regs, entry); + + return entry; +} + +void hw_perf_event_setup_online(int cpu) +{ + init_debug_store_on_cpu(cpu); +} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 392bea43b89..fab786f60ed 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -20,7 +20,7 @@ #include #include -#include +#include struct nmi_watchdog_ctlblk { unsigned int cccr_msr; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d59fe323807..681c3fda739 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1021,7 +1021,7 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS apicinterrupt LOCAL_PENDING_VECTOR \ perf_pending_interrupt smp_perf_pending_interrupt #endif diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 300883112e3..40f30773fb2 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -208,7 +208,7 @@ static void __init apic_intr_init(void) alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); /* Performance monitoring interrupts: */ -# ifdef CONFIG_PERF_COUNTERS +# ifdef CONFIG_PERF_EVENTS alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); # endif diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d51321ddafd..0157cd26d7c 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -335,4 +335,4 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 775a020990a..82728f2c6d5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -10,7 +10,7 @@ #include /* max_low_pfn */ #include /* __kprobes, ... */ #include /* kmmio_handler, ... */ -#include /* perf_swcounter_event */ +#include /* perf_sw_event */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ @@ -1017,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1114,11 +1114,11 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 4899215999d..8eb05878554 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void) if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) { eax.split.version_id = 2; - eax.split.num_counters = 2; + eax.split.num_events = 2; eax.split.bit_width = 40; } - num_counters = eax.split.num_counters; + num_counters = eax.split.num_events; op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index b83776180c7..7b8e75d1608 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -13,7 +13,7 @@ #define OP_X86_MODEL_H #include -#include +#include struct op_msr { unsigned long addr; diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 50eecfe1d72..44203ff599d 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -252,7 +252,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty) struct pt_regs *regs = get_irq_regs(); if (regs) show_regs(regs); - perf_counter_print_debug(); + perf_event_print_debug(); } static struct sysrq_key_op sysrq_showregs_op = { .handler = sysrq_handle_showregs, diff --git a/fs/exec.c b/fs/exec.c index 172ceb6edde..434dba778cc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include @@ -923,7 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) task_lock(tsk); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_counter_comm(tsk); + perf_event_comm(tsk); } int flush_old_exec(struct linux_binprm * bprm) @@ -997,7 +997,7 @@ int flush_old_exec(struct linux_binprm * bprm) * security domain: */ if (!get_dumpable(current->mm)) - perf_counter_exit_task(current); + perf_event_exit_task(current); /* An exec changes our domain. We are no longer part of the thread group */ diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 1125e5a1ee5..d76b66acea9 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -620,8 +620,8 @@ __SYSCALL(__NR_move_pages, sys_move_pages) #define __NR_rt_tgsigqueueinfo 240 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) -#define __NR_perf_counter_open 241 -__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) +#define __NR_perf_event_open 241 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) #undef __NR_syscalls #define __NR_syscalls 242 diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9e7f2e8fc66..21a6f5d9af2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -106,13 +106,13 @@ extern struct group_info init_groups; extern struct cred init_cred; -#ifdef CONFIG_PERF_COUNTERS -# define INIT_PERF_COUNTERS(tsk) \ - .perf_counter_mutex = \ - __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ - .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), +#ifdef CONFIG_PERF_EVENTS +# define INIT_PERF_EVENTS(tsk) \ + .perf_event_mutex = \ + __MUTEX_INITIALIZER(tsk.perf_event_mutex), \ + .perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list), #else -# define INIT_PERF_COUNTERS(tsk) +# define INIT_PERF_EVENTS(tsk) #endif /* @@ -178,7 +178,7 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ - INIT_PERF_COUNTERS(tsk) \ + INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h deleted file mode 100644 index f6486273267..00000000000 --- a/include/linux/perf_counter.h +++ /dev/null @@ -1,858 +0,0 @@ -/* - * Performance counters: - * - * Copyright (C) 2008-2009, Thomas Gleixner - * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra - * - * Data type definitions, declarations, prototypes. - * - * Started by: Thomas Gleixner and Ingo Molnar - * - * For licencing details see kernel-base/COPYING - */ -#ifndef _LINUX_PERF_COUNTER_H -#define _LINUX_PERF_COUNTER_H - -#include -#include -#include - -/* - * User-space ABI bits: - */ - -/* - * attr.type - */ -enum perf_type_id { - PERF_TYPE_HARDWARE = 0, - PERF_TYPE_SOFTWARE = 1, - PERF_TYPE_TRACEPOINT = 2, - PERF_TYPE_HW_CACHE = 3, - PERF_TYPE_RAW = 4, - - PERF_TYPE_MAX, /* non-ABI */ -}; - -/* - * Generalized performance counter event types, used by the - * attr.event_id parameter of the sys_perf_counter_open() - * syscall: - */ -enum perf_hw_id { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_HW_CPU_CYCLES = 0, - PERF_COUNT_HW_INSTRUCTIONS = 1, - PERF_COUNT_HW_CACHE_REFERENCES = 2, - PERF_COUNT_HW_CACHE_MISSES = 3, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_HW_BRANCH_MISSES = 5, - PERF_COUNT_HW_BUS_CYCLES = 6, - - PERF_COUNT_HW_MAX, /* non-ABI */ -}; - -/* - * Generalized hardware cache counters: - * - * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x - * { read, write, prefetch } x - * { accesses, misses } - */ -enum perf_hw_cache_id { - PERF_COUNT_HW_CACHE_L1D = 0, - PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_LL = 2, - PERF_COUNT_HW_CACHE_DTLB = 3, - PERF_COUNT_HW_CACHE_ITLB = 4, - PERF_COUNT_HW_CACHE_BPU = 5, - - PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ = 0, - PERF_COUNT_HW_CACHE_OP_WRITE = 1, - PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - - PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_result_id { - PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, - PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - - PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ -}; - -/* - * Special "software" counters provided by the kernel, even if the hardware - * does not support performance counters. These counters measure various - * physical and sw events of the kernel (and allow the profiling of them as - * well): - */ -enum perf_sw_ids { - PERF_COUNT_SW_CPU_CLOCK = 0, - PERF_COUNT_SW_TASK_CLOCK = 1, - PERF_COUNT_SW_PAGE_FAULTS = 2, - PERF_COUNT_SW_CONTEXT_SWITCHES = 3, - PERF_COUNT_SW_CPU_MIGRATIONS = 4, - PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, - PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, - - PERF_COUNT_SW_MAX, /* non-ABI */ -}; - -/* - * Bits that can be set in attr.sample_type to request information - * in the overflow packets. - */ -enum perf_counter_sample_format { - PERF_SAMPLE_IP = 1U << 0, - PERF_SAMPLE_TID = 1U << 1, - PERF_SAMPLE_TIME = 1U << 2, - PERF_SAMPLE_ADDR = 1U << 3, - PERF_SAMPLE_READ = 1U << 4, - PERF_SAMPLE_CALLCHAIN = 1U << 5, - PERF_SAMPLE_ID = 1U << 6, - PERF_SAMPLE_CPU = 1U << 7, - PERF_SAMPLE_PERIOD = 1U << 8, - PERF_SAMPLE_STREAM_ID = 1U << 9, - PERF_SAMPLE_RAW = 1U << 10, - - PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ -}; - -/* - * The format of the data returned by read() on a perf counter fd, - * as specified by attr.read_format: - * - * struct read_format { - * { u64 value; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 id; } && PERF_FORMAT_ID - * } && !PERF_FORMAT_GROUP - * - * { u64 nr; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 value; - * { u64 id; } && PERF_FORMAT_ID - * } cntr[nr]; - * } && PERF_FORMAT_GROUP - * }; - */ -enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, - PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, - PERF_FORMAT_ID = 1U << 2, - PERF_FORMAT_GROUP = 1U << 3, - - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ -}; - -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ - -/* - * Hardware event to monitor via a performance monitoring counter: - */ -struct perf_counter_attr { - - /* - * Major type: hardware/software/tracepoint/etc. - */ - __u32 type; - - /* - * Size of the attr structure, for fwd/bwd compat. - */ - __u32 size; - - /* - * Type specific configuration information. - */ - __u64 config; - - union { - __u64 sample_period; - __u64 sample_freq; - }; - - __u64 sample_type; - __u64 read_format; - - __u64 disabled : 1, /* off by default */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* ditto kernel */ - exclude_hv : 1, /* ditto hypervisor */ - exclude_idle : 1, /* don't count when idle */ - mmap : 1, /* include mmap data */ - comm : 1, /* include comm data */ - freq : 1, /* use freq, not period */ - inherit_stat : 1, /* per task counts */ - enable_on_exec : 1, /* next exec enables */ - task : 1, /* trace fork/exit */ - watermark : 1, /* wakeup_watermark */ - - __reserved_1 : 49; - - union { - __u32 wakeup_events; /* wakeup every n events */ - __u32 wakeup_watermark; /* bytes before wakeup */ - }; - __u32 __reserved_2; - - __u64 __reserved_3; -}; - -/* - * Ioctls that can be done on a perf counter fd: - */ -#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) -#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) -#define PERF_COUNTER_IOC_RESET _IO ('$', 3) -#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) -#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) - -enum perf_counter_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, -}; - -/* - * Structure of the page that can be mapped via mmap - */ -struct perf_counter_mmap_page { - __u32 version; /* version number of this structure */ - __u32 compat_version; /* lowest version this is compat with */ - - /* - * Bits needed to read the hw counters in user-space. - * - * u32 seq; - * s64 count; - * - * do { - * seq = pc->lock; - * - * barrier() - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; - * - * barrier(); - * } while (pc->lock != seq); - * - * NOTE: for obvious reason this only works on self-monitoring - * processes. - */ - __u32 lock; /* seqlock for synchronization */ - __u32 index; /* hardware counter identifier */ - __s64 offset; /* add to hardware counter value */ - __u64 time_enabled; /* time counter active */ - __u64 time_running; /* time counter on cpu */ - - /* - * Hole for extension of the self monitor capabilities - */ - - __u64 __reserved[123]; /* align to 1k */ - - /* - * Control data for the mmap() data buffer. - * - * User-space reading the @data_head value should issue an rmb(), on - * SMP capable platforms, after reading this value -- see - * perf_counter_wakeup(). - * - * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data. In this case - * the kernel will not over-write unread data. - */ - __u64 data_head; /* head in the data section */ - __u64 data_tail; /* user-space written tail */ -}; - -#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) -#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (2 << 0) -#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) - -struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; -}; - -enum perf_event_type { - - /* - * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: - * - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * u64 addr; - * u64 len; - * u64 pgoff; - * char filename[]; - * }; - */ - PERF_EVENT_MMAP = 1, - - /* - * struct { - * struct perf_event_header header; - * u64 id; - * u64 lost; - * }; - */ - PERF_EVENT_LOST = 2, - - /* - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * char comm[]; - * }; - */ - PERF_EVENT_COMM = 3, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, ppid; - * u32 tid, ptid; - * u64 time; - * }; - */ - PERF_EVENT_EXIT = 4, - - /* - * struct { - * struct perf_event_header header; - * u64 time; - * u64 id; - * u64 stream_id; - * }; - */ - PERF_EVENT_THROTTLE = 5, - PERF_EVENT_UNTHROTTLE = 6, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, ppid; - * u32 tid, ptid; - * { u64 time; } && PERF_SAMPLE_TIME - * }; - */ - PERF_EVENT_FORK = 7, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, tid; - * - * struct read_format values; - * }; - */ - PERF_EVENT_READ = 8, - - /* - * struct { - * struct perf_event_header header; - * - * { u64 ip; } && PERF_SAMPLE_IP - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 addr; } && PERF_SAMPLE_ADDR - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU - * { u64 period; } && PERF_SAMPLE_PERIOD - * - * { struct read_format values; } && PERF_SAMPLE_READ - * - * { u64 nr, - * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN - * - * # - * # The RAW record below is opaque data wrt the ABI - * # - * # That is, the ABI doesn't make any promises wrt to - * # the stability of its content, it may vary depending - * # on event, hardware, kernel version and phase of - * # the moon. - * # - * # In other words, PERF_SAMPLE_RAW contents are not an ABI. - * # - * - * { u32 size; - * char data[size];}&& PERF_SAMPLE_RAW - * }; - */ - PERF_EVENT_SAMPLE = 9, - - PERF_EVENT_MAX, /* non-ABI */ -}; - -enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, - - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, - - PERF_CONTEXT_MAX = (__u64)-4095, -}; - -#define PERF_FLAG_FD_NO_GROUP (1U << 0) -#define PERF_FLAG_FD_OUTPUT (1U << 1) - -#ifdef __KERNEL__ -/* - * Kernel-internal data types and definitions: - */ - -#ifdef CONFIG_PERF_COUNTERS -# include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define PERF_MAX_STACK_DEPTH 255 - -struct perf_callchain_entry { - __u64 nr; - __u64 ip[PERF_MAX_STACK_DEPTH]; -}; - -struct perf_raw_record { - u32 size; - void *data; -}; - -struct task_struct; - -/** - * struct hw_perf_counter - performance counter hardware details: - */ -struct hw_perf_counter { -#ifdef CONFIG_PERF_COUNTERS - union { - struct { /* hardware */ - u64 config; - unsigned long config_base; - unsigned long counter_base; - int idx; - }; - union { /* software */ - atomic64_t count; - struct hrtimer hrtimer; - }; - }; - atomic64_t prev_count; - u64 sample_period; - u64 last_period; - atomic64_t period_left; - u64 interrupts; - - u64 freq_count; - u64 freq_interrupts; - u64 freq_stamp; -#endif -}; - -struct perf_counter; - -/** - * struct pmu - generic performance monitoring unit - */ -struct pmu { - int (*enable) (struct perf_counter *counter); - void (*disable) (struct perf_counter *counter); - void (*read) (struct perf_counter *counter); - void (*unthrottle) (struct perf_counter *counter); -}; - -/** - * enum perf_counter_active_state - the states of a counter - */ -enum perf_counter_active_state { - PERF_COUNTER_STATE_ERROR = -2, - PERF_COUNTER_STATE_OFF = -1, - PERF_COUNTER_STATE_INACTIVE = 0, - PERF_COUNTER_STATE_ACTIVE = 1, -}; - -struct file; - -struct perf_mmap_data { - struct rcu_head rcu_head; - int nr_pages; /* nr of data pages */ - int writable; /* are we writable */ - int nr_locked; /* nr pages mlocked */ - - atomic_t poll; /* POLL_ for wakeups */ - atomic_t events; /* event limit */ - - atomic_long_t head; /* write position */ - atomic_long_t done_head; /* completed head */ - - atomic_t lock; /* concurrent writes */ - atomic_t wakeup; /* needs a wakeup */ - atomic_t lost; /* nr records lost */ - - long watermark; /* wakeup watermark */ - - struct perf_counter_mmap_page *user_page; - void *data_pages[0]; -}; - -struct perf_pending_entry { - struct perf_pending_entry *next; - void (*func)(struct perf_pending_entry *); -}; - -/** - * struct perf_counter - performance counter kernel representation: - */ -struct perf_counter { -#ifdef CONFIG_PERF_COUNTERS - struct list_head group_entry; - struct list_head event_entry; - struct list_head sibling_list; - int nr_siblings; - struct perf_counter *group_leader; - struct perf_counter *output; - const struct pmu *pmu; - - enum perf_counter_active_state state; - atomic64_t count; - - /* - * These are the total time in nanoseconds that the counter - * has been enabled (i.e. eligible to run, and the task has - * been scheduled in, if this is a per-task counter) - * and running (scheduled onto the CPU), respectively. - * - * They are computed from tstamp_enabled, tstamp_running and - * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. - */ - u64 total_time_enabled; - u64 total_time_running; - - /* - * These are timestamps used for computing total_time_enabled - * and total_time_running when the counter is in INACTIVE or - * ACTIVE state, measured in nanoseconds from an arbitrary point - * in time. - * tstamp_enabled: the notional time when the counter was enabled - * tstamp_running: the notional time when the counter was scheduled on - * tstamp_stopped: in INACTIVE state, the notional time when the - * counter was scheduled off. - */ - u64 tstamp_enabled; - u64 tstamp_running; - u64 tstamp_stopped; - - struct perf_counter_attr attr; - struct hw_perf_counter hw; - - struct perf_counter_context *ctx; - struct file *filp; - - /* - * These accumulate total time (in nanoseconds) that children - * counters have been enabled and running, respectively. - */ - atomic64_t child_total_time_enabled; - atomic64_t child_total_time_running; - - /* - * Protect attach/detach and child_list: - */ - struct mutex child_mutex; - struct list_head child_list; - struct perf_counter *parent; - - int oncpu; - int cpu; - - struct list_head owner_entry; - struct task_struct *owner; - - /* mmap bits */ - struct mutex mmap_mutex; - atomic_t mmap_count; - struct perf_mmap_data *data; - - /* poll related */ - wait_queue_head_t waitq; - struct fasync_struct *fasync; - - /* delayed work for NMIs and such */ - int pending_wakeup; - int pending_kill; - int pending_disable; - struct perf_pending_entry pending; - - atomic_t event_limit; - - void (*destroy)(struct perf_counter *); - struct rcu_head rcu_head; - - struct pid_namespace *ns; - u64 id; -#endif -}; - -/** - * struct perf_counter_context - counter context structure - * - * Used as a container for task counters and CPU counters as well: - */ -struct perf_counter_context { - /* - * Protect the states of the counters in the list, - * nr_active, and the list: - */ - spinlock_t lock; - /* - * Protect the list of counters. Locking either mutex or lock - * is sufficient to ensure the list doesn't change; to change - * the list you need to lock both the mutex and the spinlock. - */ - struct mutex mutex; - - struct list_head group_list; - struct list_head event_list; - int nr_counters; - int nr_active; - int is_active; - int nr_stat; - atomic_t refcount; - struct task_struct *task; - - /* - * Context clock, runs when context enabled. - */ - u64 time; - u64 timestamp; - - /* - * These fields let us detect when two contexts have both - * been cloned (inherited) from a common ancestor. - */ - struct perf_counter_context *parent_ctx; - u64 parent_gen; - u64 generation; - int pin_count; - struct rcu_head rcu_head; -}; - -/** - * struct perf_counter_cpu_context - per cpu counter context structure - */ -struct perf_cpu_context { - struct perf_counter_context ctx; - struct perf_counter_context *task_ctx; - int active_oncpu; - int max_pertask; - int exclusive; - - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; -}; - -struct perf_output_handle { - struct perf_counter *counter; - struct perf_mmap_data *data; - unsigned long head; - unsigned long offset; - int nmi; - int sample; - int locked; - unsigned long flags; -}; - -#ifdef CONFIG_PERF_COUNTERS - -/* - * Set by architecture code: - */ -extern int perf_max_counters; - -extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); - -extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); -extern void perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu); -extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern int perf_counter_init_task(struct task_struct *child); -extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_free_task(struct task_struct *task); -extern void set_perf_counter_pending(void); -extern void perf_counter_do_pending(void); -extern void perf_counter_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); -extern int perf_counter_task_disable(void); -extern int perf_counter_task_enable(void); -extern int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu); -extern void perf_counter_update_userpage(struct perf_counter *counter); - -struct perf_sample_data { - u64 type; - - u64 ip; - struct { - u32 pid; - u32 tid; - } tid_entry; - u64 time; - u64 addr; - u64 id; - u64 stream_id; - struct { - u32 cpu; - u32 reserved; - } cpu_entry; - u64 period; - struct perf_callchain_entry *callchain; - struct perf_raw_record *raw; -}; - -extern void perf_output_sample(struct perf_output_handle *handle, - struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter); -extern void perf_prepare_sample(struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter, - struct pt_regs *regs); - -extern int perf_counter_overflow(struct perf_counter *counter, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs); - -/* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return (counter->attr.type != PERF_TYPE_RAW) && - (counter->attr.type != PERF_TYPE_HARDWARE) && - (counter->attr.type != PERF_TYPE_HW_CACHE); -} - -extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; - -extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); - -static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) -{ - if (atomic_read(&perf_swcounter_enabled[event])) - __perf_swcounter_event(event, nr, nmi, regs, addr); -} - -extern void __perf_counter_mmap(struct vm_area_struct *vma); - -static inline void perf_counter_mmap(struct vm_area_struct *vma) -{ - if (vma->vm_flags & VM_EXEC) - __perf_counter_mmap(vma); -} - -extern void perf_counter_comm(struct task_struct *tsk); -extern void perf_counter_fork(struct task_struct *tsk); - -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); - -extern int sysctl_perf_counter_paranoid; -extern int sysctl_perf_counter_mlock; -extern int sysctl_perf_counter_sample_rate; - -extern void perf_counter_init(void); -extern void perf_tpcounter_event(int event_id, u64 addr, u64 count, - void *record, int entry_size); - -#ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ - PERF_EVENT_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) -#endif - -extern int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size, - int nmi, int sample); -extern void perf_output_end(struct perf_output_handle *handle); -extern void perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len); -#else -static inline void -perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -static inline void -perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) { } -static inline void -perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline int perf_counter_init_task(struct task_struct *child) { return 0; } -static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_free_task(struct task_struct *task) { } -static inline void perf_counter_do_pending(void) { } -static inline void perf_counter_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } -static inline int perf_counter_task_disable(void) { return -EINVAL; } -static inline int perf_counter_task_enable(void) { return -EINVAL; } - -static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) { } - -static inline void perf_counter_mmap(struct vm_area_struct *vma) { } -static inline void perf_counter_comm(struct task_struct *tsk) { } -static inline void perf_counter_fork(struct task_struct *tsk) { } -static inline void perf_counter_init(void) { } - -#endif - -#define perf_output_put(handle, x) \ - perf_output_copy((handle), &(x), sizeof(x)) - -#endif /* __KERNEL__ */ -#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h new file mode 100644 index 00000000000..ae9d9ed6df2 --- /dev/null +++ b/include/linux/perf_event.h @@ -0,0 +1,858 @@ +/* + * Performance events: + * + * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_EVENT_H +#define _LINUX_PERF_EVENT_H + +#include +#include +#include + +/* + * User-space ABI bits: + */ + +/* + * attr.type + */ +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + + PERF_TYPE_MAX, /* non-ABI */ +}; + +/* + * Generalized performance event event_id types, used by the + * attr.event_id parameter of the sys_perf_event_open() + * syscall: + */ +enum perf_hw_id { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + + PERF_COUNT_HW_MAX, /* non-ABI */ +}; + +/* + * Generalized hardware cache events: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +/* + * Special "software" events provided by the kernel, even if the hardware + * does not support performance events. These events measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non-ABI */ +}; + +/* + * Bits that can be set in attr.sample_type to request information + * in the overflow packets. + */ +enum perf_event_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_READ = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_RAW = 1U << 10, + + PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ +}; + +/* + * The format of the data returned by read() on a perf event fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; + */ +enum perf_event_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, + + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ +}; + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +/* + * Hardware event_id to monitor via a performance monitoring event: + */ +struct perf_event_attr { + + /* + * Major type: hardware/software/tracepoint/etc. + */ + __u32 type; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; + + /* + * Type specific configuration information. + */ + __u64 config; + + union { + __u64 sample_period; + __u64 sample_freq; + }; + + __u64 sample_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ + watermark : 1, /* wakeup_watermark */ + + __reserved_1 : 49; + + union { + __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_watermark; /* bytes before wakeup */ + }; + __u32 __reserved_2; + + __u64 __reserved_3; +}; + +/* + * Ioctls that can be done on a perf event fd: + */ +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) + +enum perf_event_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; + +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_event_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw events in user-space. + * + * u32 seq; + * s64 count; + * + * do { + * seq = pc->lock; + * + * barrier() + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; + * + * barrier(); + * } while (pc->lock != seq); + * + * NOTE: for obvious reason this only works on self-monitoring + * processes. + */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware event identifier */ + __s64 offset; /* add to hardware event value */ + __u64 time_enabled; /* time event active */ + __u64 time_running; /* time event on cpu */ + + /* + * Hole for extension of the self monitor capabilities + */ + + __u64 __reserved[123]; /* align to 1k */ + + /* + * Control data for the mmap() data buffer. + * + * User-space reading the @data_head value should issue an rmb(), on + * SMP capable platforms, after reading this value -- see + * perf_event_wakeup(). + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data. In this case + * the kernel will not over-write unread data. + */ + __u64 data_head; /* head in the data section */ + __u64 data_tail; /* user-space written tail */ +}; + +#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (2 << 0) +#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) + +struct perf_event_header { + __u32 type; + __u16 misc; + __u16 size; +}; + +enum perf_event_type { + + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ + PERF_RECORD_MMAP = 1, + + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * }; + */ + PERF_RECORD_LOST = 2, + + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_RECORD_COMM = 3, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * }; + */ + PERF_RECORD_EXIT = 4, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * u64 stream_id; + * }; + */ + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * { u64 time; } && PERF_SAMPLE_TIME + * }; + */ + PERF_RECORD_FORK = 7, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * + * struct read_format values; + * }; + */ + PERF_RECORD_READ = 8, + + /* + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD + * + * { struct read_format values; } && PERF_SAMPLE_READ + * + * { u64 nr, + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event_id, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW + * }; + */ + PERF_RECORD_SAMPLE = 9, + + PERF_RECORD_MAX, /* non-ABI */ +}; + +enum perf_callchain_context { + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, + + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, + + PERF_CONTEXT_MAX = (__u64)-4095, +}; + +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) + +#ifdef __KERNEL__ +/* + * Kernel-internal data types and definitions: + */ + +#ifdef CONFIG_PERF_EVENTS +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PERF_MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u64 nr; + __u64 ip[PERF_MAX_STACK_DEPTH]; +}; + +struct perf_raw_record { + u32 size; + void *data; +}; + +struct task_struct; + +/** + * struct hw_perf_event - performance event hardware details: + */ +struct hw_perf_event { +#ifdef CONFIG_PERF_EVENTS + union { + struct { /* hardware */ + u64 config; + unsigned long config_base; + unsigned long event_base; + int idx; + }; + union { /* software */ + atomic64_t count; + struct hrtimer hrtimer; + }; + }; + atomic64_t prev_count; + u64 sample_period; + u64 last_period; + atomic64_t period_left; + u64 interrupts; + + u64 freq_count; + u64 freq_interrupts; + u64 freq_stamp; +#endif +}; + +struct perf_event; + +/** + * struct pmu - generic performance monitoring unit + */ +struct pmu { + int (*enable) (struct perf_event *event); + void (*disable) (struct perf_event *event); + void (*read) (struct perf_event *event); + void (*unthrottle) (struct perf_event *event); +}; + +/** + * enum perf_event_active_state - the states of a event + */ +enum perf_event_active_state { + PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_OFF = -1, + PERF_EVENT_STATE_INACTIVE = 0, + PERF_EVENT_STATE_ACTIVE = 1, +}; + +struct file; + +struct perf_mmap_data { + struct rcu_head rcu_head; + int nr_pages; /* nr of data pages */ + int writable; /* are we writable */ + int nr_locked; /* nr pages mlocked */ + + atomic_t poll; /* POLL_ for wakeups */ + atomic_t events; /* event_id limit */ + + atomic_long_t head; /* write position */ + atomic_long_t done_head; /* completed head */ + + atomic_t lock; /* concurrent writes */ + atomic_t wakeup; /* needs a wakeup */ + atomic_t lost; /* nr records lost */ + + long watermark; /* wakeup watermark */ + + struct perf_event_mmap_page *user_page; + void *data_pages[0]; +}; + +struct perf_pending_entry { + struct perf_pending_entry *next; + void (*func)(struct perf_pending_entry *); +}; + +/** + * struct perf_event - performance event kernel representation: + */ +struct perf_event { +#ifdef CONFIG_PERF_EVENTS + struct list_head group_entry; + struct list_head event_entry; + struct list_head sibling_list; + int nr_siblings; + struct perf_event *group_leader; + struct perf_event *output; + const struct pmu *pmu; + + enum perf_event_active_state state; + atomic64_t count; + + /* + * These are the total time in nanoseconds that the event + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task event) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the event is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the event is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the event was enabled + * tstamp_running: the notional time when the event was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * event was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + + struct perf_event_attr attr; + struct hw_perf_event hw; + + struct perf_event_context *ctx; + struct file *filp; + + /* + * These accumulate total time (in nanoseconds) that children + * events have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + + /* + * Protect attach/detach and child_list: + */ + struct mutex child_mutex; + struct list_head child_list; + struct perf_event *parent; + + int oncpu; + int cpu; + + struct list_head owner_entry; + struct task_struct *owner; + + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + struct perf_mmap_data *data; + + /* poll related */ + wait_queue_head_t waitq; + struct fasync_struct *fasync; + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_kill; + int pending_disable; + struct perf_pending_entry pending; + + atomic_t event_limit; + + void (*destroy)(struct perf_event *); + struct rcu_head rcu_head; + + struct pid_namespace *ns; + u64 id; +#endif +}; + +/** + * struct perf_event_context - event context structure + * + * Used as a container for task events and CPU events as well: + */ +struct perf_event_context { + /* + * Protect the states of the events in the list, + * nr_active, and the list: + */ + spinlock_t lock; + /* + * Protect the list of events. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; + + struct list_head group_list; + struct list_head event_list; + int nr_events; + int nr_active; + int is_active; + int nr_stat; + atomic_t refcount; + struct task_struct *task; + + /* + * Context clock, runs when context enabled. + */ + u64 time; + u64 timestamp; + + /* + * These fields let us detect when two contexts have both + * been cloned (inherited) from a common ancestor. + */ + struct perf_event_context *parent_ctx; + u64 parent_gen; + u64 generation; + int pin_count; + struct rcu_head rcu_head; +}; + +/** + * struct perf_event_cpu_context - per cpu event context structure + */ +struct perf_cpu_context { + struct perf_event_context ctx; + struct perf_event_context *task_ctx; + int active_oncpu; + int max_pertask; + int exclusive; + + /* + * Recursion avoidance: + * + * task, softirq, irq, nmi context + */ + int recursion[4]; +}; + +struct perf_output_handle { + struct perf_event *event; + struct perf_mmap_data *data; + unsigned long head; + unsigned long offset; + int nmi; + int sample; + int locked; + unsigned long flags; +}; + +#ifdef CONFIG_PERF_EVENTS + +/* + * Set by architecture code: + */ +extern int perf_max_events; + +extern const struct pmu *hw_perf_event_init(struct perf_event *event); + +extern void perf_event_task_sched_in(struct task_struct *task, int cpu); +extern void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu); +extern void perf_event_task_tick(struct task_struct *task, int cpu); +extern int perf_event_init_task(struct task_struct *child); +extern void perf_event_exit_task(struct task_struct *child); +extern void perf_event_free_task(struct task_struct *task); +extern void set_perf_event_pending(void); +extern void perf_event_do_pending(void); +extern void perf_event_print_debug(void); +extern void __perf_disable(void); +extern bool __perf_enable(void); +extern void perf_disable(void); +extern void perf_enable(void); +extern int perf_event_task_disable(void); +extern int perf_event_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu); +extern void perf_event_update_userpage(struct perf_event *event); + +struct perf_sample_data { + u64 type; + + u64 ip; + struct { + u32 pid; + u32 tid; + } tid_entry; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + struct { + u32 cpu; + u32 reserved; + } cpu_entry; + u64 period; + struct perf_callchain_entry *callchain; + struct perf_raw_record *raw; +}; + +extern void perf_output_sample(struct perf_output_handle *handle, + struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event); +extern void perf_prepare_sample(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs); + +extern int perf_event_overflow(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs); + +/* + * Return 1 for a software event, 0 for a hardware event + */ +static inline int is_software_event(struct perf_event *event) +{ + return (event->attr.type != PERF_TYPE_RAW) && + (event->attr.type != PERF_TYPE_HARDWARE) && + (event->attr.type != PERF_TYPE_HW_CACHE); +} + +extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; + +extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); + +static inline void +perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +{ + if (atomic_read(&perf_swevent_enabled[event_id])) + __perf_sw_event(event_id, nr, nmi, regs, addr); +} + +extern void __perf_event_mmap(struct vm_area_struct *vma); + +static inline void perf_event_mmap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + __perf_event_mmap(vma); +} + +extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_fork(struct task_struct *tsk); + +extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); + +extern int sysctl_perf_event_paranoid; +extern int sysctl_perf_event_mlock; +extern int sysctl_perf_event_sample_rate; + +extern void perf_event_init(void); +extern void perf_tp_event(int event_id, u64 addr, u64 count, + void *record, int entry_size); + +#ifndef perf_misc_flags +#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ + PERF_RECORD_MISC_KERNEL) +#define perf_instruction_pointer(regs) instruction_pointer(regs) +#endif + +extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size, + int nmi, int sample); +extern void perf_output_end(struct perf_output_handle *handle); +extern void perf_output_copy(struct perf_output_handle *handle, + const void *buf, unsigned int len); +#else +static inline void +perf_event_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) { } +static inline void +perf_event_task_tick(struct task_struct *task, int cpu) { } +static inline int perf_event_init_task(struct task_struct *child) { return 0; } +static inline void perf_event_exit_task(struct task_struct *child) { } +static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_do_pending(void) { } +static inline void perf_event_print_debug(void) { } +static inline void perf_disable(void) { } +static inline void perf_enable(void) { } +static inline int perf_event_task_disable(void) { return -EINVAL; } +static inline int perf_event_task_enable(void) { return -EINVAL; } + +static inline void +perf_sw_event(u32 event_id, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { } + +static inline void perf_event_mmap(struct vm_area_struct *vma) { } +static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_fork(struct task_struct *tsk) { } +static inline void perf_event_init(void) { } + +#endif + +#define perf_output_put(handle, x) \ + perf_output_copy((handle), &(x), sizeof(x)) + +#endif /* __KERNEL__ */ +#endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index b00df4c79c6..07bff666e65 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -85,7 +85,7 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 8af3d249170..8b265a8986d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -100,7 +100,7 @@ struct robust_list_head; struct bio; struct fs_struct; struct bts_context; -struct perf_counter_context; +struct perf_event_context; /* * List of flags we want to share for kernel threads, @@ -701,7 +701,7 @@ struct user_struct { #endif #endif -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS atomic_long_t locked_vm; #endif }; @@ -1449,10 +1449,10 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif -#ifdef CONFIG_PERF_COUNTERS - struct perf_counter_context *perf_counter_ctxp; - struct mutex perf_counter_mutex; - struct list_head perf_counter_list; +#ifdef CONFIG_PERF_EVENTS + struct perf_event_context *perf_event_ctxp; + struct mutex perf_event_mutex; + struct list_head perf_event_list; #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a8e37821cc6..02f19f9a76c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -55,7 +55,7 @@ struct compat_timeval; struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; -struct perf_counter_attr; +struct perf_event_attr; #include #include @@ -885,7 +885,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage long sys_perf_counter_open( - struct perf_counter_attr __user *attr_uptr, +asmlinkage long sys_perf_event_open( + struct perf_event_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 72a3b437b82..ec91e78244f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -378,7 +378,7 @@ static inline int ftrace_get_offsets_##call( \ #ifdef CONFIG_EVENT_PROFILE /* - * Generate the functions needed for tracepoint perf_counter support. + * Generate the functions needed for tracepoint perf_event support. * * NOTE: The insertion profile callback (ftrace_profile_) is defined later * @@ -656,7 +656,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * { * struct ftrace_data_offsets_ __maybe_unused __data_offsets; * struct ftrace_event_call *event_call = &event_; - * extern void perf_tpcounter_event(int, u64, u64, void *, int); + * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; @@ -691,7 +691,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * * <- affect our values * - * perf_tpcounter_event(event_call->id, __addr, __count, entry, + * perf_tp_event(event_call->id, __addr, __count, entry, * __entry_size); <- submit them to perf counter * } while (0); * @@ -712,7 +712,7 @@ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tpcounter_event(int, u64, u64, void *, int); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ @@ -742,7 +742,7 @@ static void ftrace_profile_##call(proto) \ \ { assign; } \ \ - perf_tpcounter_event(event_call->id, __addr, __count, entry,\ + perf_tp_event(event_call->id, __addr, __count, entry,\ __entry_size); \ } while (0); \ \ diff --git a/init/Kconfig b/init/Kconfig index 8e8b76d8a27..cfdf5c32280 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -915,17 +915,17 @@ config AIO by some high performance threaded applications. Disabling this option saves about 7k. -config HAVE_PERF_COUNTERS +config HAVE_PERF_EVENTS bool help See tools/perf/design.txt for details. menu "Performance Counters" -config PERF_COUNTERS +config PERF_EVENTS bool "Kernel Performance Counters" default y if PROFILING - depends on HAVE_PERF_COUNTERS + depends on HAVE_PERF_EVENTS select ANON_INODES help Enable kernel support for performance counter hardware. @@ -947,7 +947,7 @@ config PERF_COUNTERS config EVENT_PROFILE bool "Tracepoint profiling sources" - depends on PERF_COUNTERS && EVENT_TRACING + depends on PERF_EVENTS && EVENT_TRACING default y help Allow the use of tracepoints as software performance counters. diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f..e26a546eac4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -96,7 +96,7 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/exit.c b/kernel/exit.c index ae5d8660ddf..e47ee8a0613 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include @@ -154,8 +154,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); -#ifdef CONFIG_PERF_COUNTERS - WARN_ON_ONCE(tsk->perf_counter_ctxp); +#ifdef CONFIG_PERF_EVENTS + WARN_ON_ONCE(tsk->perf_event_ctxp); #endif trace_sched_process_free(tsk); put_task_struct(tsk); @@ -981,7 +981,7 @@ NORET_TYPE void do_exit(long code) * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. */ - perf_counter_exit_task(tsk); + perf_event_exit_task(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA diff --git a/kernel/fork.c b/kernel/fork.c index bfee931ee3f..2cebfb23b0b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -61,7 +61,7 @@ #include #include #include -#include +#include #include #include @@ -1078,7 +1078,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - retval = perf_counter_init_task(p); + retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; @@ -1253,7 +1253,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); - perf_counter_fork(p); + perf_event_fork(p); return p; bad_fork_free_pid: @@ -1280,7 +1280,7 @@ bad_fork_cleanup_semundo: bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: - perf_counter_free_task(p); + perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c deleted file mode 100644 index 62de0db8092..00000000000 --- a/kernel/perf_counter.c +++ /dev/null @@ -1,5000 +0,0 @@ -/* - * Performance counter core code - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright © 2009 Paul Mackerras, IBM Corp. - * - * For licensing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * Each CPU has a list of per CPU counters: - */ -DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); - -int perf_max_counters __read_mostly = 1; -static int perf_reserved_percpu __read_mostly; -static int perf_overcommit __read_mostly = 1; - -static atomic_t nr_counters __read_mostly; -static atomic_t nr_mmap_counters __read_mostly; -static atomic_t nr_comm_counters __read_mostly; -static atomic_t nr_task_counters __read_mostly; - -/* - * perf counter paranoia level: - * -1 - not paranoid at all - * 0 - disallow raw tracepoint access for unpriv - * 1 - disallow cpu counters for unpriv - * 2 - disallow kernel profiling for unpriv - */ -int sysctl_perf_counter_paranoid __read_mostly = 1; - -static inline bool perf_paranoid_tracepoint_raw(void) -{ - return sysctl_perf_counter_paranoid > -1; -} - -static inline bool perf_paranoid_cpu(void) -{ - return sysctl_perf_counter_paranoid > 0; -} - -static inline bool perf_paranoid_kernel(void) -{ - return sysctl_perf_counter_paranoid > 1; -} - -int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ - -/* - * max perf counter sample rate - */ -int sysctl_perf_counter_sample_rate __read_mostly = 100000; - -static atomic64_t perf_counter_id; - -/* - * Lock for (sysadmin-configurable) counter reservations: - */ -static DEFINE_SPINLOCK(perf_resource_lock); - -/* - * Architecture provided APIs - weak aliases: - */ -extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - return NULL; -} - -void __weak hw_perf_disable(void) { barrier(); } -void __weak hw_perf_enable(void) { barrier(); } - -void __weak hw_perf_counter_setup(int cpu) { barrier(); } -void __weak hw_perf_counter_setup_online(int cpu) { barrier(); } - -int __weak -hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) -{ - return 0; -} - -void __weak perf_counter_print_debug(void) { } - -static DEFINE_PER_CPU(int, perf_disable_count); - -void __perf_disable(void) -{ - __get_cpu_var(perf_disable_count)++; -} - -bool __perf_enable(void) -{ - return !--__get_cpu_var(perf_disable_count); -} - -void perf_disable(void) -{ - __perf_disable(); - hw_perf_disable(); -} - -void perf_enable(void) -{ - if (__perf_enable()) - hw_perf_enable(); -} - -static void get_ctx(struct perf_counter_context *ctx) -{ - WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); -} - -static void free_ctx(struct rcu_head *head) -{ - struct perf_counter_context *ctx; - - ctx = container_of(head, struct perf_counter_context, rcu_head); - kfree(ctx); -} - -static void put_ctx(struct perf_counter_context *ctx) -{ - if (atomic_dec_and_test(&ctx->refcount)) { - if (ctx->parent_ctx) - put_ctx(ctx->parent_ctx); - if (ctx->task) - put_task_struct(ctx->task); - call_rcu(&ctx->rcu_head, free_ctx); - } -} - -static void unclone_ctx(struct perf_counter_context *ctx) -{ - if (ctx->parent_ctx) { - put_ctx(ctx->parent_ctx); - ctx->parent_ctx = NULL; - } -} - -/* - * If we inherit counters we want to return the parent counter id - * to userspace. - */ -static u64 primary_counter_id(struct perf_counter *counter) -{ - u64 id = counter->id; - - if (counter->parent) - id = counter->parent->id; - - return id; -} - -/* - * Get the perf_counter_context for a task and lock it. - * This has to cope with with the fact that until it is locked, - * the context could get moved to another task. - */ -static struct perf_counter_context * -perf_lock_task_context(struct task_struct *task, unsigned long *flags) -{ - struct perf_counter_context *ctx; - - rcu_read_lock(); - retry: - ctx = rcu_dereference(task->perf_counter_ctxp); - if (ctx) { - /* - * If this context is a clone of another, it might - * get swapped for another underneath us by - * perf_counter_task_sched_out, though the - * rcu_read_lock() protects us from any context - * getting freed. Lock the context and check if it - * got swapped before we could get the lock, and retry - * if so. If we locked the right context, then it - * can't get swapped on us any more. - */ - spin_lock_irqsave(&ctx->lock, *flags); - if (ctx != rcu_dereference(task->perf_counter_ctxp)) { - spin_unlock_irqrestore(&ctx->lock, *flags); - goto retry; - } - - if (!atomic_inc_not_zero(&ctx->refcount)) { - spin_unlock_irqrestore(&ctx->lock, *flags); - ctx = NULL; - } - } - rcu_read_unlock(); - return ctx; -} - -/* - * Get the context for a task and increment its pin_count so it - * can't get swapped to another task. This also increments its - * reference count so that the context can't get freed. - */ -static struct perf_counter_context *perf_pin_task_context(struct task_struct *task) -{ - struct perf_counter_context *ctx; - unsigned long flags; - - ctx = perf_lock_task_context(task, &flags); - if (ctx) { - ++ctx->pin_count; - spin_unlock_irqrestore(&ctx->lock, flags); - } - return ctx; -} - -static void perf_unpin_context(struct perf_counter_context *ctx) -{ - unsigned long flags; - - spin_lock_irqsave(&ctx->lock, flags); - --ctx->pin_count; - spin_unlock_irqrestore(&ctx->lock, flags); - put_ctx(ctx); -} - -/* - * Add a counter from the lists for its context. - * Must be called with ctx->mutex and ctx->lock held. - */ -static void -list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) -{ - struct perf_counter *group_leader = counter->group_leader; - - /* - * Depending on whether it is a standalone or sibling counter, - * add it straight to the context's counter list, or to the group - * leader's sibling list: - */ - if (group_leader == counter) - list_add_tail(&counter->group_entry, &ctx->group_list); - else { - list_add_tail(&counter->group_entry, &group_leader->sibling_list); - group_leader->nr_siblings++; - } - - list_add_rcu(&counter->event_entry, &ctx->event_list); - ctx->nr_counters++; - if (counter->attr.inherit_stat) - ctx->nr_stat++; -} - -/* - * Remove a counter from the lists for its context. - * Must be called with ctx->mutex and ctx->lock held. - */ -static void -list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) -{ - struct perf_counter *sibling, *tmp; - - if (list_empty(&counter->group_entry)) - return; - ctx->nr_counters--; - if (counter->attr.inherit_stat) - ctx->nr_stat--; - - list_del_init(&counter->group_entry); - list_del_rcu(&counter->event_entry); - - if (counter->group_leader != counter) - counter->group_leader->nr_siblings--; - - /* - * If this was a group counter with sibling counters then - * upgrade the siblings to singleton counters by adding them - * to the context list directly: - */ - list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, group_entry) { - - list_move_tail(&sibling->group_entry, &ctx->group_list); - sibling->group_leader = sibling; - } -} - -static void -counter_sched_out(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) -{ - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - return; - - counter->state = PERF_COUNTER_STATE_INACTIVE; - if (counter->pending_disable) { - counter->pending_disable = 0; - counter->state = PERF_COUNTER_STATE_OFF; - } - counter->tstamp_stopped = ctx->time; - counter->pmu->disable(counter); - counter->oncpu = -1; - - if (!is_software_counter(counter)) - cpuctx->active_oncpu--; - ctx->nr_active--; - if (counter->attr.exclusive || !cpuctx->active_oncpu) - cpuctx->exclusive = 0; -} - -static void -group_sched_out(struct perf_counter *group_counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - - if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) - return; - - counter_sched_out(group_counter, cpuctx, ctx); - - /* - * Schedule out siblings (if any): - */ - list_for_each_entry(counter, &group_counter->sibling_list, group_entry) - counter_sched_out(counter, cpuctx, ctx); - - if (group_counter->attr.exclusive) - cpuctx->exclusive = 0; -} - -/* - * Cross CPU call to remove a performance counter - * - * We disable the counter on the hardware level first. After that we - * remove it from the context list. - */ -static void __perf_counter_remove_from_context(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - - spin_lock(&ctx->lock); - /* - * Protect the list operation against NMI by disabling the - * counters on a global level. - */ - perf_disable(); - - counter_sched_out(counter, cpuctx, ctx); - - list_del_counter(counter, ctx); - - if (!ctx->task) { - /* - * Allow more per task counters with respect to the - * reservation: - */ - cpuctx->max_pertask = - min(perf_max_counters - ctx->nr_counters, - perf_max_counters - perf_reserved_percpu); - } - - perf_enable(); - spin_unlock(&ctx->lock); -} - - -/* - * Remove the counter from a task's (or a CPU's) list of counters. - * - * Must be called with ctx->mutex held. - * - * CPU counters are removed with a smp call. For task counters we only - * call when the task is on a CPU. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This is OK when called from perf_release since - * that only calls us on the top-level context, which can't be a clone. - * When called from perf_counter_exit_task, it's OK because the - * context has been detached from its task. - */ -static void perf_counter_remove_from_context(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Per cpu counters are removed via an smp call and - * the removal is always sucessful. - */ - smp_call_function_single(counter->cpu, - __perf_counter_remove_from_context, - counter, 1); - return; - } - -retry: - task_oncpu_function_call(task, __perf_counter_remove_from_context, - counter); - - spin_lock_irq(&ctx->lock); - /* - * If the context is active we need to retry the smp call. - */ - if (ctx->nr_active && !list_empty(&counter->group_entry)) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * The lock prevents that this context is scheduled in so we - * can remove the counter safely, if the call above did not - * succeed. - */ - if (!list_empty(&counter->group_entry)) { - list_del_counter(counter, ctx); - } - spin_unlock_irq(&ctx->lock); -} - -static inline u64 perf_clock(void) -{ - return cpu_clock(smp_processor_id()); -} - -/* - * Update the record of the current time in a context. - */ -static void update_context_time(struct perf_counter_context *ctx) -{ - u64 now = perf_clock(); - - ctx->time += now - ctx->timestamp; - ctx->timestamp = now; -} - -/* - * Update the total_time_enabled and total_time_running fields for a counter. - */ -static void update_counter_times(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - u64 run_end; - - if (counter->state < PERF_COUNTER_STATE_INACTIVE || - counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE) - return; - - counter->total_time_enabled = ctx->time - counter->tstamp_enabled; - - if (counter->state == PERF_COUNTER_STATE_INACTIVE) - run_end = counter->tstamp_stopped; - else - run_end = ctx->time; - - counter->total_time_running = run_end - counter->tstamp_running; -} - -/* - * Update total_time_enabled and total_time_running for all counters in a group. - */ -static void update_group_times(struct perf_counter *leader) -{ - struct perf_counter *counter; - - update_counter_times(leader); - list_for_each_entry(counter, &leader->sibling_list, group_entry) - update_counter_times(counter); -} - -/* - * Cross CPU call to disable a performance counter - */ -static void __perf_counter_disable(void *info) -{ - struct perf_counter *counter = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = counter->ctx; - - /* - * If this is a per-task counter, need to check whether this - * counter's task is the current task on this cpu. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - - spin_lock(&ctx->lock); - - /* - * If the counter is on, turn it off. - * If it is in error state, leave it in error state. - */ - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { - update_context_time(ctx); - update_group_times(counter); - if (counter == counter->group_leader) - group_sched_out(counter, cpuctx, ctx); - else - counter_sched_out(counter, cpuctx, ctx); - counter->state = PERF_COUNTER_STATE_OFF; - } - - spin_unlock(&ctx->lock); -} - -/* - * Disable a counter. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This condition is satisifed when called through - * perf_counter_for_each_child or perf_counter_for_each because they - * hold the top-level counter's child_mutex, so any descendant that - * goes to exit will block in sync_child_counter. - * When called from perf_pending_counter it's OK because counter->ctx - * is the current context on this CPU and preemption is disabled, - * hence we can't get into perf_counter_task_sched_out for this context. - */ -static void perf_counter_disable(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Disable the counter on the cpu that it's on - */ - smp_call_function_single(counter->cpu, __perf_counter_disable, - counter, 1); - return; - } - - retry: - task_oncpu_function_call(task, __perf_counter_disable, counter); - - spin_lock_irq(&ctx->lock); - /* - * If the counter is still active, we need to retry the cross-call. - */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * Since we have the lock this context can't be scheduled - * in, so we can change the state safely. - */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_group_times(counter); - counter->state = PERF_COUNTER_STATE_OFF; - } - - spin_unlock_irq(&ctx->lock); -} - -static int -counter_sched_in(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, - int cpu) -{ - if (counter->state <= PERF_COUNTER_STATE_OFF) - return 0; - - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ - /* - * The new state must be visible before we turn it on in the hardware: - */ - smp_wmb(); - - if (counter->pmu->enable(counter)) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->oncpu = -1; - return -EAGAIN; - } - - counter->tstamp_running += ctx->time - counter->tstamp_stopped; - - if (!is_software_counter(counter)) - cpuctx->active_oncpu++; - ctx->nr_active++; - - if (counter->attr.exclusive) - cpuctx->exclusive = 1; - - return 0; -} - -static int -group_sched_in(struct perf_counter *group_counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, - int cpu) -{ - struct perf_counter *counter, *partial_group; - int ret; - - if (group_counter->state == PERF_COUNTER_STATE_OFF) - return 0; - - ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); - if (ret) - return ret < 0 ? ret : 0; - - if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) - return -EAGAIN; - - /* - * Schedule in siblings as one group (if any): - */ - list_for_each_entry(counter, &group_counter->sibling_list, group_entry) { - if (counter_sched_in(counter, cpuctx, ctx, cpu)) { - partial_group = counter; - goto group_error; - } - } - - return 0; - -group_error: - /* - * Groups can be scheduled in as one unit only, so undo any - * partial group before returning: - */ - list_for_each_entry(counter, &group_counter->sibling_list, group_entry) { - if (counter == partial_group) - break; - counter_sched_out(counter, cpuctx, ctx); - } - counter_sched_out(group_counter, cpuctx, ctx); - - return -EAGAIN; -} - -/* - * Return 1 for a group consisting entirely of software counters, - * 0 if the group contains any hardware counters. - */ -static int is_software_only_group(struct perf_counter *leader) -{ - struct perf_counter *counter; - - if (!is_software_counter(leader)) - return 0; - - list_for_each_entry(counter, &leader->sibling_list, group_entry) - if (!is_software_counter(counter)) - return 0; - - return 1; -} - -/* - * Work out whether we can put this counter group on the CPU now. - */ -static int group_can_go_on(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - int can_add_hw) -{ - /* - * Groups consisting entirely of software counters can always go on. - */ - if (is_software_only_group(counter)) - return 1; - /* - * If an exclusive group is already on, no other hardware - * counters can go on. - */ - if (cpuctx->exclusive) - return 0; - /* - * If this group is exclusive and there are already - * counters on the CPU, it can't go on. - */ - if (counter->attr.exclusive && cpuctx->active_oncpu) - return 0; - /* - * Otherwise, try to add it if all previous groups were able - * to go on. - */ - return can_add_hw; -} - -static void add_counter_to_ctx(struct perf_counter *counter, - struct perf_counter_context *ctx) -{ - list_add_counter(counter, ctx); - counter->tstamp_enabled = ctx->time; - counter->tstamp_running = ctx->time; - counter->tstamp_stopped = ctx->time; -} - -/* - * Cross CPU call to install and enable a performance counter - * - * Must be called with ctx->mutex held - */ -static void __perf_install_in_context(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *leader = counter->group_leader; - int cpu = smp_processor_id(); - int err; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - * Or possibly this is the right context but it isn't - * on this cpu because it had no counters. - */ - if (ctx->task && cpuctx->task_ctx != ctx) { - if (cpuctx->task_ctx || ctx->task != current) - return; - cpuctx->task_ctx = ctx; - } - - spin_lock(&ctx->lock); - ctx->is_active = 1; - update_context_time(ctx); - - /* - * Protect the list operation against NMI by disabling the - * counters on a global level. NOP for non NMI based counters. - */ - perf_disable(); - - add_counter_to_ctx(counter, ctx); - - /* - * Don't put the counter on if it is disabled or if - * it is in a group and the group isn't on. - */ - if (counter->state != PERF_COUNTER_STATE_INACTIVE || - (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) - goto unlock; - - /* - * An exclusive counter can't go on if there are already active - * hardware counters, and no hardware counter can go on if there - * is already an exclusive counter on. - */ - if (!group_can_go_on(counter, cpuctx, 1)) - err = -EEXIST; - else - err = counter_sched_in(counter, cpuctx, ctx, cpu); - - if (err) { - /* - * This counter couldn't go on. If it is in a group - * then we have to pull the whole group off. - * If the counter group is pinned then put it in error state. - */ - if (leader != counter) - group_sched_out(leader, cpuctx, ctx); - if (leader->attr.pinned) { - update_group_times(leader); - leader->state = PERF_COUNTER_STATE_ERROR; - } - } - - if (!err && !ctx->task && cpuctx->max_pertask) - cpuctx->max_pertask--; - - unlock: - perf_enable(); - - spin_unlock(&ctx->lock); -} - -/* - * Attach a performance counter to a context - * - * First we add the counter to the list with the hardware enable bit - * in counter->hw_config cleared. - * - * If the counter is attached to a task which is on a CPU we use a smp - * call to enable it in the task context. The task might have been - * scheduled away, but we check this in the smp call again. - * - * Must be called with ctx->mutex held. - */ -static void -perf_install_in_context(struct perf_counter_context *ctx, - struct perf_counter *counter, - int cpu) -{ - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Per cpu counters are installed via an smp call and - * the install is always sucessful. - */ - smp_call_function_single(cpu, __perf_install_in_context, - counter, 1); - return; - } - -retry: - task_oncpu_function_call(task, __perf_install_in_context, - counter); - - spin_lock_irq(&ctx->lock); - /* - * we need to retry the smp call. - */ - if (ctx->is_active && list_empty(&counter->group_entry)) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * The lock prevents that this context is scheduled in so we - * can add the counter safely, if it the call above did not - * succeed. - */ - if (list_empty(&counter->group_entry)) - add_counter_to_ctx(counter, ctx); - spin_unlock_irq(&ctx->lock); -} - -/* - * Put a counter into inactive state and update time fields. - * Enabling the leader of a group effectively enables all - * the group members that aren't explicitly disabled, so we - * have to update their ->tstamp_enabled also. - * Note: this works for group members as well as group leaders - * since the non-leader members' sibling_lists will be empty. - */ -static void __perf_counter_mark_enabled(struct perf_counter *counter, - struct perf_counter_context *ctx) -{ - struct perf_counter *sub; - - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time - counter->total_time_enabled; - list_for_each_entry(sub, &counter->sibling_list, group_entry) - if (sub->state >= PERF_COUNTER_STATE_INACTIVE) - sub->tstamp_enabled = - ctx->time - sub->total_time_enabled; -} - -/* - * Cross CPU call to enable a performance counter - */ -static void __perf_counter_enable(void *info) -{ - struct perf_counter *counter = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *leader = counter->group_leader; - int err; - - /* - * If this is a per-task counter, need to check whether this - * counter's task is the current task on this cpu. - */ - if (ctx->task && cpuctx->task_ctx != ctx) { - if (cpuctx->task_ctx || ctx->task != current) - return; - cpuctx->task_ctx = ctx; - } - - spin_lock(&ctx->lock); - ctx->is_active = 1; - update_context_time(ctx); - - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - goto unlock; - __perf_counter_mark_enabled(counter, ctx); - - /* - * If the counter is in a group and isn't the group leader, - * then don't put it on unless the group is on. - */ - if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) - goto unlock; - - if (!group_can_go_on(counter, cpuctx, 1)) { - err = -EEXIST; - } else { - perf_disable(); - if (counter == leader) - err = group_sched_in(counter, cpuctx, ctx, - smp_processor_id()); - else - err = counter_sched_in(counter, cpuctx, ctx, - smp_processor_id()); - perf_enable(); - } - - if (err) { - /* - * If this counter can't go on and it's part of a - * group, then the whole group has to come off. - */ - if (leader != counter) - group_sched_out(leader, cpuctx, ctx); - if (leader->attr.pinned) { - update_group_times(leader); - leader->state = PERF_COUNTER_STATE_ERROR; - } - } - - unlock: - spin_unlock(&ctx->lock); -} - -/* - * Enable a counter. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This condition is satisfied when called through - * perf_counter_for_each_child or perf_counter_for_each as described - * for perf_counter_disable. - */ -static void perf_counter_enable(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Enable the counter on the cpu that it's on - */ - smp_call_function_single(counter->cpu, __perf_counter_enable, - counter, 1); - return; - } - - spin_lock_irq(&ctx->lock); - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - goto out; - - /* - * If the counter is in error state, clear that first. - * That way, if we see the counter in error state below, we - * know that it has gone back into error state, as distinct - * from the task having been scheduled away before the - * cross-call arrived. - */ - if (counter->state == PERF_COUNTER_STATE_ERROR) - counter->state = PERF_COUNTER_STATE_OFF; - - retry: - spin_unlock_irq(&ctx->lock); - task_oncpu_function_call(task, __perf_counter_enable, counter); - - spin_lock_irq(&ctx->lock); - - /* - * If the context is active and the counter is still off, - * we need to retry the cross-call. - */ - if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) - goto retry; - - /* - * Since we have the lock this context can't be scheduled - * in, so we can change the state safely. - */ - if (counter->state == PERF_COUNTER_STATE_OFF) - __perf_counter_mark_enabled(counter, ctx); - - out: - spin_unlock_irq(&ctx->lock); -} - -static int perf_counter_refresh(struct perf_counter *counter, int refresh) -{ - /* - * not supported on inherited counters - */ - if (counter->attr.inherit) - return -EINVAL; - - atomic_add(refresh, &counter->event_limit); - perf_counter_enable(counter); - - return 0; -} - -void __perf_counter_sched_out(struct perf_counter_context *ctx, - struct perf_cpu_context *cpuctx) -{ - struct perf_counter *counter; - - spin_lock(&ctx->lock); - ctx->is_active = 0; - if (likely(!ctx->nr_counters)) - goto out; - update_context_time(ctx); - - perf_disable(); - if (ctx->nr_active) { - list_for_each_entry(counter, &ctx->group_list, group_entry) { - if (counter != counter->group_leader) - counter_sched_out(counter, cpuctx, ctx); - else - group_sched_out(counter, cpuctx, ctx); - } - } - perf_enable(); - out: - spin_unlock(&ctx->lock); -} - -/* - * Test whether two contexts are equivalent, i.e. whether they - * have both been cloned from the same version of the same context - * and they both have the same number of enabled counters. - * If the number of enabled counters is the same, then the set - * of enabled counters should be the same, because these are both - * inherited contexts, therefore we can't access individual counters - * in them directly with an fd; we can only enable/disable all - * counters via prctl, or enable/disable all counters in a family - * via ioctl, which will have the same effect on both contexts. - */ -static int context_equiv(struct perf_counter_context *ctx1, - struct perf_counter_context *ctx2) -{ - return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx - && ctx1->parent_gen == ctx2->parent_gen - && !ctx1->pin_count && !ctx2->pin_count; -} - -static void __perf_counter_read(void *counter); - -static void __perf_counter_sync_stat(struct perf_counter *counter, - struct perf_counter *next_counter) -{ - u64 value; - - if (!counter->attr.inherit_stat) - return; - - /* - * Update the counter value, we cannot use perf_counter_read() - * because we're in the middle of a context switch and have IRQs - * disabled, which upsets smp_call_function_single(), however - * we know the counter must be on the current CPU, therefore we - * don't need to use it. - */ - switch (counter->state) { - case PERF_COUNTER_STATE_ACTIVE: - __perf_counter_read(counter); - break; - - case PERF_COUNTER_STATE_INACTIVE: - update_counter_times(counter); - break; - - default: - break; - } - - /* - * In order to keep per-task stats reliable we need to flip the counter - * values when we flip the contexts. - */ - value = atomic64_read(&next_counter->count); - value = atomic64_xchg(&counter->count, value); - atomic64_set(&next_counter->count, value); - - swap(counter->total_time_enabled, next_counter->total_time_enabled); - swap(counter->total_time_running, next_counter->total_time_running); - - /* - * Since we swizzled the values, update the user visible data too. - */ - perf_counter_update_userpage(counter); - perf_counter_update_userpage(next_counter); -} - -#define list_next_entry(pos, member) \ - list_entry(pos->member.next, typeof(*pos), member) - -static void perf_counter_sync_stat(struct perf_counter_context *ctx, - struct perf_counter_context *next_ctx) -{ - struct perf_counter *counter, *next_counter; - - if (!ctx->nr_stat) - return; - - counter = list_first_entry(&ctx->event_list, - struct perf_counter, event_entry); - - next_counter = list_first_entry(&next_ctx->event_list, - struct perf_counter, event_entry); - - while (&counter->event_entry != &ctx->event_list && - &next_counter->event_entry != &next_ctx->event_list) { - - __perf_counter_sync_stat(counter, next_counter); - - counter = list_next_entry(counter, event_entry); - next_counter = list_next_entry(next_counter, event_entry); - } -} - -/* - * Called from scheduler to remove the counters of the current task, - * with interrupts disabled. - * - * We stop each counter and update the counter value in counter->count. - * - * This does not protect us against NMI, but disable() - * sets the disabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * not restart the counter. - */ -void perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = task->perf_counter_ctxp; - struct perf_counter_context *next_ctx; - struct perf_counter_context *parent; - struct pt_regs *regs; - int do_switch = 1; - - regs = task_pt_regs(task); - perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); - - if (likely(!ctx || !cpuctx->task_ctx)) - return; - - update_context_time(ctx); - - rcu_read_lock(); - parent = rcu_dereference(ctx->parent_ctx); - next_ctx = next->perf_counter_ctxp; - if (parent && next_ctx && - rcu_dereference(next_ctx->parent_ctx) == parent) { - /* - * Looks like the two contexts are clones, so we might be - * able to optimize the context switch. We lock both - * contexts and check that they are clones under the - * lock (including re-checking that neither has been - * uncloned in the meantime). It doesn't matter which - * order we take the locks because no other cpu could - * be trying to lock both of these tasks. - */ - spin_lock(&ctx->lock); - spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); - if (context_equiv(ctx, next_ctx)) { - /* - * XXX do we need a memory barrier of sorts - * wrt to rcu_dereference() of perf_counter_ctxp - */ - task->perf_counter_ctxp = next_ctx; - next->perf_counter_ctxp = ctx; - ctx->task = next; - next_ctx->task = task; - do_switch = 0; - - perf_counter_sync_stat(ctx, next_ctx); - } - spin_unlock(&next_ctx->lock); - spin_unlock(&ctx->lock); - } - rcu_read_unlock(); - - if (do_switch) { - __perf_counter_sched_out(ctx, cpuctx); - cpuctx->task_ctx = NULL; - } -} - -/* - * Called with IRQs disabled - */ -static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - - if (!cpuctx->task_ctx) - return; - - if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) - return; - - __perf_counter_sched_out(ctx, cpuctx); - cpuctx->task_ctx = NULL; -} - -/* - * Called with IRQs disabled - */ -static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) -{ - __perf_counter_sched_out(&cpuctx->ctx, cpuctx); -} - -static void -__perf_counter_sched_in(struct perf_counter_context *ctx, - struct perf_cpu_context *cpuctx, int cpu) -{ - struct perf_counter *counter; - int can_add_hw = 1; - - spin_lock(&ctx->lock); - ctx->is_active = 1; - if (likely(!ctx->nr_counters)) - goto out; - - ctx->timestamp = perf_clock(); - - perf_disable(); - - /* - * First go through the list and put on any pinned groups - * in order to give them the best chance of going on. - */ - list_for_each_entry(counter, &ctx->group_list, group_entry) { - if (counter->state <= PERF_COUNTER_STATE_OFF || - !counter->attr.pinned) - continue; - if (counter->cpu != -1 && counter->cpu != cpu) - continue; - - if (counter != counter->group_leader) - counter_sched_in(counter, cpuctx, ctx, cpu); - else { - if (group_can_go_on(counter, cpuctx, 1)) - group_sched_in(counter, cpuctx, ctx, cpu); - } - - /* - * If this pinned group hasn't been scheduled, - * put it in error state. - */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_group_times(counter); - counter->state = PERF_COUNTER_STATE_ERROR; - } - } - - list_for_each_entry(counter, &ctx->group_list, group_entry) { - /* - * Ignore counters in OFF or ERROR state, and - * ignore pinned counters since we did them already. - */ - if (counter->state <= PERF_COUNTER_STATE_OFF || - counter->attr.pinned) - continue; - - /* - * Listen to the 'cpu' scheduling filter constraint - * of counters: - */ - if (counter->cpu != -1 && counter->cpu != cpu) - continue; - - if (counter != counter->group_leader) { - if (counter_sched_in(counter, cpuctx, ctx, cpu)) - can_add_hw = 0; - } else { - if (group_can_go_on(counter, cpuctx, can_add_hw)) { - if (group_sched_in(counter, cpuctx, ctx, cpu)) - can_add_hw = 0; - } - } - } - perf_enable(); - out: - spin_unlock(&ctx->lock); -} - -/* - * Called from scheduler to add the counters of the current task - * with interrupts disabled. - * - * We restore the counter value and then enable it. - * - * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * keep the counter running. - */ -void perf_counter_task_sched_in(struct task_struct *task, int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = task->perf_counter_ctxp; - - if (likely(!ctx)) - return; - if (cpuctx->task_ctx == ctx) - return; - __perf_counter_sched_in(ctx, cpuctx, cpu); - cpuctx->task_ctx = ctx; -} - -static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) -{ - struct perf_counter_context *ctx = &cpuctx->ctx; - - __perf_counter_sched_in(ctx, cpuctx, cpu); -} - -#define MAX_INTERRUPTS (~0ULL) - -static void perf_log_throttle(struct perf_counter *counter, int enable); - -static void perf_adjust_period(struct perf_counter *counter, u64 events) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 period, sample_period; - s64 delta; - - events *= hwc->sample_period; - period = div64_u64(events, counter->attr.sample_freq); - - delta = (s64)(period - hwc->sample_period); - delta = (delta + 7) / 8; /* low pass filter */ - - sample_period = hwc->sample_period + delta; - - if (!sample_period) - sample_period = 1; - - hwc->sample_period = sample_period; -} - -static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - struct hw_perf_counter *hwc; - u64 interrupts, freq; - - spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->group_list, group_entry) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - continue; - - hwc = &counter->hw; - - interrupts = hwc->interrupts; - hwc->interrupts = 0; - - /* - * unthrottle counters on the tick - */ - if (interrupts == MAX_INTERRUPTS) { - perf_log_throttle(counter, 1); - counter->pmu->unthrottle(counter); - interrupts = 2*sysctl_perf_counter_sample_rate/HZ; - } - - if (!counter->attr.freq || !counter->attr.sample_freq) - continue; - - /* - * if the specified freq < HZ then we need to skip ticks - */ - if (counter->attr.sample_freq < HZ) { - freq = counter->attr.sample_freq; - - hwc->freq_count += freq; - hwc->freq_interrupts += interrupts; - - if (hwc->freq_count < HZ) - continue; - - interrupts = hwc->freq_interrupts; - hwc->freq_interrupts = 0; - hwc->freq_count -= HZ; - } else - freq = HZ; - - perf_adjust_period(counter, freq * interrupts); - - /* - * In order to avoid being stalled by an (accidental) huge - * sample period, force reset the sample period if we didn't - * get any events in this freq period. - */ - if (!interrupts) { - perf_disable(); - counter->pmu->disable(counter); - atomic64_set(&hwc->period_left, 0); - counter->pmu->enable(counter); - perf_enable(); - } - } - spin_unlock(&ctx->lock); -} - -/* - * Round-robin a context's counters: - */ -static void rotate_ctx(struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - - if (!ctx->nr_counters) - return; - - spin_lock(&ctx->lock); - /* - * Rotate the first entry last (works just fine for group counters too): - */ - perf_disable(); - list_for_each_entry(counter, &ctx->group_list, group_entry) { - list_move_tail(&counter->group_entry, &ctx->group_list); - break; - } - perf_enable(); - - spin_unlock(&ctx->lock); -} - -void perf_counter_task_tick(struct task_struct *curr, int cpu) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - - if (!atomic_read(&nr_counters)) - return; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = curr->perf_counter_ctxp; - - perf_ctx_adjust_freq(&cpuctx->ctx); - if (ctx) - perf_ctx_adjust_freq(ctx); - - perf_counter_cpu_sched_out(cpuctx); - if (ctx) - __perf_counter_task_sched_out(ctx); - - rotate_ctx(&cpuctx->ctx); - if (ctx) - rotate_ctx(ctx); - - perf_counter_cpu_sched_in(cpuctx, cpu); - if (ctx) - perf_counter_task_sched_in(curr, cpu); -} - -/* - * Enable all of a task's counters that have been marked enable-on-exec. - * This expects task == current. - */ -static void perf_counter_enable_on_exec(struct task_struct *task) -{ - struct perf_counter_context *ctx; - struct perf_counter *counter; - unsigned long flags; - int enabled = 0; - - local_irq_save(flags); - ctx = task->perf_counter_ctxp; - if (!ctx || !ctx->nr_counters) - goto out; - - __perf_counter_task_sched_out(ctx); - - spin_lock(&ctx->lock); - - list_for_each_entry(counter, &ctx->group_list, group_entry) { - if (!counter->attr.enable_on_exec) - continue; - counter->attr.enable_on_exec = 0; - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - continue; - __perf_counter_mark_enabled(counter, ctx); - enabled = 1; - } - - /* - * Unclone this context if we enabled any counter. - */ - if (enabled) - unclone_ctx(ctx); - - spin_unlock(&ctx->lock); - - perf_counter_task_sched_in(task, smp_processor_id()); - out: - local_irq_restore(flags); -} - -/* - * Cross CPU call to read the hardware counter - */ -static void __perf_counter_read(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - unsigned long flags; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. In that case - * counter->count would have been updated to a recent sample - * when the counter was scheduled out. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - - local_irq_save(flags); - if (ctx->is_active) - update_context_time(ctx); - counter->pmu->read(counter); - update_counter_times(counter); - local_irq_restore(flags); -} - -static u64 perf_counter_read(struct perf_counter *counter) -{ - /* - * If counter is enabled and currently active on a CPU, update the - * value in the counter structure: - */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - smp_call_function_single(counter->oncpu, - __perf_counter_read, counter, 1); - } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_counter_times(counter); - } - - return atomic64_read(&counter->count); -} - -/* - * Initialize the perf_counter context in a task_struct: - */ -static void -__perf_counter_init_context(struct perf_counter_context *ctx, - struct task_struct *task) -{ - memset(ctx, 0, sizeof(*ctx)); - spin_lock_init(&ctx->lock); - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->group_list); - INIT_LIST_HEAD(&ctx->event_list); - atomic_set(&ctx->refcount, 1); - ctx->task = task; -} - -static struct perf_counter_context *find_get_context(pid_t pid, int cpu) -{ - struct perf_counter_context *ctx; - struct perf_cpu_context *cpuctx; - struct task_struct *task; - unsigned long flags; - int err; - - /* - * If cpu is not a wildcard then this is a percpu counter: - */ - if (cpu != -1) { - /* Must be root to operate on a CPU counter: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); - - if (cpu < 0 || cpu > num_possible_cpus()) - return ERR_PTR(-EINVAL); - - /* - * We could be clever and allow to attach a counter to an - * offline CPU and activate it when the CPU comes up, but - * that's for later. - */ - if (!cpu_isset(cpu, cpu_online_map)) - return ERR_PTR(-ENODEV); - - cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = &cpuctx->ctx; - get_ctx(ctx); - - return ctx; - } - - rcu_read_lock(); - if (!pid) - task = current; - else - task = find_task_by_vpid(pid); - if (task) - get_task_struct(task); - rcu_read_unlock(); - - if (!task) - return ERR_PTR(-ESRCH); - - /* - * Can't attach counters to a dying task. - */ - err = -ESRCH; - if (task->flags & PF_EXITING) - goto errout; - - /* Reuse ptrace permission checks for now. */ - err = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto errout; - - retry: - ctx = perf_lock_task_context(task, &flags); - if (ctx) { - unclone_ctx(ctx); - spin_unlock_irqrestore(&ctx->lock, flags); - } - - if (!ctx) { - ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); - err = -ENOMEM; - if (!ctx) - goto errout; - __perf_counter_init_context(ctx, task); - get_ctx(ctx); - if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) { - /* - * We raced with some other task; use - * the context they set. - */ - kfree(ctx); - goto retry; - } - get_task_struct(task); - } - - put_task_struct(task); - return ctx; - - errout: - put_task_struct(task); - return ERR_PTR(err); -} - -static void free_counter_rcu(struct rcu_head *head) -{ - struct perf_counter *counter; - - counter = container_of(head, struct perf_counter, rcu_head); - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); -} - -static void perf_pending_sync(struct perf_counter *counter); - -static void free_counter(struct perf_counter *counter) -{ - perf_pending_sync(counter); - - if (!counter->parent) { - atomic_dec(&nr_counters); - if (counter->attr.mmap) - atomic_dec(&nr_mmap_counters); - if (counter->attr.comm) - atomic_dec(&nr_comm_counters); - if (counter->attr.task) - atomic_dec(&nr_task_counters); - } - - if (counter->output) { - fput(counter->output->filp); - counter->output = NULL; - } - - if (counter->destroy) - counter->destroy(counter); - - put_ctx(counter->ctx); - call_rcu(&counter->rcu_head, free_counter_rcu); -} - -/* - * Called when the last reference to the file is gone. - */ -static int perf_release(struct inode *inode, struct file *file) -{ - struct perf_counter *counter = file->private_data; - struct perf_counter_context *ctx = counter->ctx; - - file->private_data = NULL; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - perf_counter_remove_from_context(counter); - mutex_unlock(&ctx->mutex); - - mutex_lock(&counter->owner->perf_counter_mutex); - list_del_init(&counter->owner_entry); - mutex_unlock(&counter->owner->perf_counter_mutex); - put_task_struct(counter->owner); - - free_counter(counter); - - return 0; -} - -static int perf_counter_read_size(struct perf_counter *counter) -{ - int entry = sizeof(u64); /* value */ - int size = 0; - int nr = 1; - - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - size += sizeof(u64); - - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - size += sizeof(u64); - - if (counter->attr.read_format & PERF_FORMAT_ID) - entry += sizeof(u64); - - if (counter->attr.read_format & PERF_FORMAT_GROUP) { - nr += counter->group_leader->nr_siblings; - size += sizeof(u64); - } - - size += entry * nr; - - return size; -} - -static u64 perf_counter_read_value(struct perf_counter *counter) -{ - struct perf_counter *child; - u64 total = 0; - - total += perf_counter_read(counter); - list_for_each_entry(child, &counter->child_list, child_list) - total += perf_counter_read(child); - - return total; -} - -static int perf_counter_read_entry(struct perf_counter *counter, - u64 read_format, char __user *buf) -{ - int n = 0, count = 0; - u64 values[2]; - - values[n++] = perf_counter_read_value(counter); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); - - count = n * sizeof(u64); - - if (copy_to_user(buf, values, count)) - return -EFAULT; - - return count; -} - -static int perf_counter_read_group(struct perf_counter *counter, - u64 read_format, char __user *buf) -{ - struct perf_counter *leader = counter->group_leader, *sub; - int n = 0, size = 0, err = -EFAULT; - u64 values[3]; - - values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = leader->total_time_enabled + - atomic64_read(&leader->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = leader->total_time_running + - atomic64_read(&leader->child_total_time_running); - } - - size = n * sizeof(u64); - - if (copy_to_user(buf, values, size)) - return -EFAULT; - - err = perf_counter_read_entry(leader, read_format, buf + size); - if (err < 0) - return err; - - size += err; - - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - err = perf_counter_read_entry(sub, read_format, - buf + size); - if (err < 0) - return err; - - size += err; - } - - return size; -} - -static int perf_counter_read_one(struct perf_counter *counter, - u64 read_format, char __user *buf) -{ - u64 values[4]; - int n = 0; - - values[n++] = perf_counter_read_value(counter); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - } - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); - - if (copy_to_user(buf, values, n * sizeof(u64))) - return -EFAULT; - - return n * sizeof(u64); -} - -/* - * Read the performance counter - simple non blocking version for now - */ -static ssize_t -perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) -{ - u64 read_format = counter->attr.read_format; - int ret; - - /* - * Return end-of-file for a read on a counter that is in - * error state (i.e. because it was pinned but it couldn't be - * scheduled on to the CPU at some point). - */ - if (counter->state == PERF_COUNTER_STATE_ERROR) - return 0; - - if (count < perf_counter_read_size(counter)) - return -ENOSPC; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - if (read_format & PERF_FORMAT_GROUP) - ret = perf_counter_read_group(counter, read_format, buf); - else - ret = perf_counter_read_one(counter, read_format, buf); - mutex_unlock(&counter->child_mutex); - - return ret; -} - -static ssize_t -perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) -{ - struct perf_counter *counter = file->private_data; - - return perf_read_hw(counter, buf, count); -} - -static unsigned int perf_poll(struct file *file, poll_table *wait) -{ - struct perf_counter *counter = file->private_data; - struct perf_mmap_data *data; - unsigned int events = POLL_HUP; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (data) - events = atomic_xchg(&data->poll, 0); - rcu_read_unlock(); - - poll_wait(file, &counter->waitq, wait); - - return events; -} - -static void perf_counter_reset(struct perf_counter *counter) -{ - (void)perf_counter_read(counter); - atomic64_set(&counter->count, 0); - perf_counter_update_userpage(counter); -} - -/* - * Holding the top-level counter's child_mutex means that any - * descendant process that has inherited this counter will block - * in sync_child_counter if it goes to exit, thus satisfying the - * task existence requirements of perf_counter_enable/disable. - */ -static void perf_counter_for_each_child(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter *child; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - func(counter); - list_for_each_entry(child, &counter->child_list, child_list) - func(child); - mutex_unlock(&counter->child_mutex); -} - -static void perf_counter_for_each(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *sibling; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - counter = counter->group_leader; - - perf_counter_for_each_child(counter, func); - func(counter); - list_for_each_entry(sibling, &counter->sibling_list, group_entry) - perf_counter_for_each_child(counter, func); - mutex_unlock(&ctx->mutex); -} - -static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) -{ - struct perf_counter_context *ctx = counter->ctx; - unsigned long size; - int ret = 0; - u64 value; - - if (!counter->attr.sample_period) - return -EINVAL; - - size = copy_from_user(&value, arg, sizeof(value)); - if (size != sizeof(value)) - return -EFAULT; - - if (!value) - return -EINVAL; - - spin_lock_irq(&ctx->lock); - if (counter->attr.freq) { - if (value > sysctl_perf_counter_sample_rate) { - ret = -EINVAL; - goto unlock; - } - - counter->attr.sample_freq = value; - } else { - counter->attr.sample_period = value; - counter->hw.sample_period = value; - } -unlock: - spin_unlock_irq(&ctx->lock); - - return ret; -} - -int perf_counter_set_output(struct perf_counter *counter, int output_fd); - -static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct perf_counter *counter = file->private_data; - void (*func)(struct perf_counter *); - u32 flags = arg; - - switch (cmd) { - case PERF_COUNTER_IOC_ENABLE: - func = perf_counter_enable; - break; - case PERF_COUNTER_IOC_DISABLE: - func = perf_counter_disable; - break; - case PERF_COUNTER_IOC_RESET: - func = perf_counter_reset; - break; - - case PERF_COUNTER_IOC_REFRESH: - return perf_counter_refresh(counter, arg); - - case PERF_COUNTER_IOC_PERIOD: - return perf_counter_period(counter, (u64 __user *)arg); - - case PERF_COUNTER_IOC_SET_OUTPUT: - return perf_counter_set_output(counter, arg); - - default: - return -ENOTTY; - } - - if (flags & PERF_IOC_FLAG_GROUP) - perf_counter_for_each(counter, func); - else - perf_counter_for_each_child(counter, func); - - return 0; -} - -int perf_counter_task_enable(void) -{ - struct perf_counter *counter; - - mutex_lock(¤t->perf_counter_mutex); - list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) - perf_counter_for_each_child(counter, perf_counter_enable); - mutex_unlock(¤t->perf_counter_mutex); - - return 0; -} - -int perf_counter_task_disable(void) -{ - struct perf_counter *counter; - - mutex_lock(¤t->perf_counter_mutex); - list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) - perf_counter_for_each_child(counter, perf_counter_disable); - mutex_unlock(¤t->perf_counter_mutex); - - return 0; -} - -#ifndef PERF_COUNTER_INDEX_OFFSET -# define PERF_COUNTER_INDEX_OFFSET 0 -#endif - -static int perf_counter_index(struct perf_counter *counter) -{ - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - return 0; - - return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET; -} - -/* - * Callers need to ensure there can be no nesting of this function, otherwise - * the seqlock logic goes bad. We can not serialize this because the arch - * code calls this from NMI context. - */ -void perf_counter_update_userpage(struct perf_counter *counter) -{ - struct perf_counter_mmap_page *userpg; - struct perf_mmap_data *data; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (!data) - goto unlock; - - userpg = data->user_page; - - /* - * Disable preemption so as to not let the corresponding user-space - * spin too long if we get preempted. - */ - preempt_disable(); - ++userpg->lock; - barrier(); - userpg->index = perf_counter_index(counter); - userpg->offset = atomic64_read(&counter->count); - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - userpg->offset -= atomic64_read(&counter->hw.prev_count); - - userpg->time_enabled = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - - userpg->time_running = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - - barrier(); - ++userpg->lock; - preempt_enable(); -unlock: - rcu_read_unlock(); -} - -static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct perf_counter *counter = vma->vm_file->private_data; - struct perf_mmap_data *data; - int ret = VM_FAULT_SIGBUS; - - if (vmf->flags & FAULT_FLAG_MKWRITE) { - if (vmf->pgoff == 0) - ret = 0; - return ret; - } - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (!data) - goto unlock; - - if (vmf->pgoff == 0) { - vmf->page = virt_to_page(data->user_page); - } else { - int nr = vmf->pgoff - 1; - - if ((unsigned)nr > data->nr_pages) - goto unlock; - - if (vmf->flags & FAULT_FLAG_WRITE) - goto unlock; - - vmf->page = virt_to_page(data->data_pages[nr]); - } - - get_page(vmf->page); - vmf->page->mapping = vma->vm_file->f_mapping; - vmf->page->index = vmf->pgoff; - - ret = 0; -unlock: - rcu_read_unlock(); - - return ret; -} - -static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) -{ - struct perf_mmap_data *data; - unsigned long size; - int i; - - WARN_ON(atomic_read(&counter->mmap_count)); - - size = sizeof(struct perf_mmap_data); - size += nr_pages * sizeof(void *); - - data = kzalloc(size, GFP_KERNEL); - if (!data) - goto fail; - - data->user_page = (void *)get_zeroed_page(GFP_KERNEL); - if (!data->user_page) - goto fail_user_page; - - for (i = 0; i < nr_pages; i++) { - data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); - if (!data->data_pages[i]) - goto fail_data_pages; - } - - data->nr_pages = nr_pages; - atomic_set(&data->lock, -1); - - if (counter->attr.watermark) { - data->watermark = min_t(long, PAGE_SIZE * nr_pages, - counter->attr.wakeup_watermark); - } - if (!data->watermark) - data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); - - rcu_assign_pointer(counter->data, data); - - return 0; - -fail_data_pages: - for (i--; i >= 0; i--) - free_page((unsigned long)data->data_pages[i]); - - free_page((unsigned long)data->user_page); - -fail_user_page: - kfree(data); - -fail: - return -ENOMEM; -} - -static void perf_mmap_free_page(unsigned long addr) -{ - struct page *page = virt_to_page((void *)addr); - - page->mapping = NULL; - __free_page(page); -} - -static void __perf_mmap_data_free(struct rcu_head *rcu_head) -{ - struct perf_mmap_data *data; - int i; - - data = container_of(rcu_head, struct perf_mmap_data, rcu_head); - - perf_mmap_free_page((unsigned long)data->user_page); - for (i = 0; i < data->nr_pages; i++) - perf_mmap_free_page((unsigned long)data->data_pages[i]); - - kfree(data); -} - -static void perf_mmap_data_free(struct perf_counter *counter) -{ - struct perf_mmap_data *data = counter->data; - - WARN_ON(atomic_read(&counter->mmap_count)); - - rcu_assign_pointer(counter->data, NULL); - call_rcu(&data->rcu_head, __perf_mmap_data_free); -} - -static void perf_mmap_open(struct vm_area_struct *vma) -{ - struct perf_counter *counter = vma->vm_file->private_data; - - atomic_inc(&counter->mmap_count); -} - -static void perf_mmap_close(struct vm_area_struct *vma) -{ - struct perf_counter *counter = vma->vm_file->private_data; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { - struct user_struct *user = current_user(); - - atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm); - vma->vm_mm->locked_vm -= counter->data->nr_locked; - perf_mmap_data_free(counter); - mutex_unlock(&counter->mmap_mutex); - } -} - -static struct vm_operations_struct perf_mmap_vmops = { - .open = perf_mmap_open, - .close = perf_mmap_close, - .fault = perf_mmap_fault, - .page_mkwrite = perf_mmap_fault, -}; - -static int perf_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct perf_counter *counter = file->private_data; - unsigned long user_locked, user_lock_limit; - struct user_struct *user = current_user(); - unsigned long locked, lock_limit; - unsigned long vma_size; - unsigned long nr_pages; - long user_extra, extra; - int ret = 0; - - if (!(vma->vm_flags & VM_SHARED)) - return -EINVAL; - - vma_size = vma->vm_end - vma->vm_start; - nr_pages = (vma_size / PAGE_SIZE) - 1; - - /* - * If we have data pages ensure they're a power-of-two number, so we - * can do bitmasks instead of modulo. - */ - if (nr_pages != 0 && !is_power_of_2(nr_pages)) - return -EINVAL; - - if (vma_size != PAGE_SIZE * (1 + nr_pages)) - return -EINVAL; - - if (vma->vm_pgoff != 0) - return -EINVAL; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->mmap_mutex); - if (counter->output) { - ret = -EINVAL; - goto unlock; - } - - if (atomic_inc_not_zero(&counter->mmap_count)) { - if (nr_pages != counter->data->nr_pages) - ret = -EINVAL; - goto unlock; - } - - user_extra = nr_pages + 1; - user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); - - /* - * Increase the limit linearly with more CPUs: - */ - user_lock_limit *= num_online_cpus(); - - user_locked = atomic_long_read(&user->locked_vm) + user_extra; - - extra = 0; - if (user_locked > user_lock_limit) - extra = user_locked - user_lock_limit; - - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; - locked = vma->vm_mm->locked_vm + extra; - - if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && - !capable(CAP_IPC_LOCK)) { - ret = -EPERM; - goto unlock; - } - - WARN_ON(counter->data); - ret = perf_mmap_data_alloc(counter, nr_pages); - if (ret) - goto unlock; - - atomic_set(&counter->mmap_count, 1); - atomic_long_add(user_extra, &user->locked_vm); - vma->vm_mm->locked_vm += extra; - counter->data->nr_locked = extra; - if (vma->vm_flags & VM_WRITE) - counter->data->writable = 1; - -unlock: - mutex_unlock(&counter->mmap_mutex); - - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &perf_mmap_vmops; - - return ret; -} - -static int perf_fasync(int fd, struct file *filp, int on) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct perf_counter *counter = filp->private_data; - int retval; - - mutex_lock(&inode->i_mutex); - retval = fasync_helper(fd, filp, on, &counter->fasync); - mutex_unlock(&inode->i_mutex); - - if (retval < 0) - return retval; - - return 0; -} - -static const struct file_operations perf_fops = { - .release = perf_release, - .read = perf_read, - .poll = perf_poll, - .unlocked_ioctl = perf_ioctl, - .compat_ioctl = perf_ioctl, - .mmap = perf_mmap, - .fasync = perf_fasync, -}; - -/* - * Perf counter wakeup - * - * If there's data, ensure we set the poll() state and publish everything - * to user-space before waking everybody up. - */ - -void perf_counter_wakeup(struct perf_counter *counter) -{ - wake_up_all(&counter->waitq); - - if (counter->pending_kill) { - kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); - counter->pending_kill = 0; - } -} - -/* - * Pending wakeups - * - * Handle the case where we need to wakeup up from NMI (or rq->lock) context. - * - * The NMI bit means we cannot possibly take locks. Therefore, maintain a - * single linked list and use cmpxchg() to add entries lockless. - */ - -static void perf_pending_counter(struct perf_pending_entry *entry) -{ - struct perf_counter *counter = container_of(entry, - struct perf_counter, pending); - - if (counter->pending_disable) { - counter->pending_disable = 0; - __perf_counter_disable(counter); - } - - if (counter->pending_wakeup) { - counter->pending_wakeup = 0; - perf_counter_wakeup(counter); - } -} - -#define PENDING_TAIL ((struct perf_pending_entry *)-1UL) - -static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { - PENDING_TAIL, -}; - -static void perf_pending_queue(struct perf_pending_entry *entry, - void (*func)(struct perf_pending_entry *)) -{ - struct perf_pending_entry **head; - - if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) - return; - - entry->func = func; - - head = &get_cpu_var(perf_pending_head); - - do { - entry->next = *head; - } while (cmpxchg(head, entry->next, entry) != entry->next); - - set_perf_counter_pending(); - - put_cpu_var(perf_pending_head); -} - -static int __perf_pending_run(void) -{ - struct perf_pending_entry *list; - int nr = 0; - - list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); - while (list != PENDING_TAIL) { - void (*func)(struct perf_pending_entry *); - struct perf_pending_entry *entry = list; - - list = list->next; - - func = entry->func; - entry->next = NULL; - /* - * Ensure we observe the unqueue before we issue the wakeup, - * so that we won't be waiting forever. - * -- see perf_not_pending(). - */ - smp_wmb(); - - func(entry); - nr++; - } - - return nr; -} - -static inline int perf_not_pending(struct perf_counter *counter) -{ - /* - * If we flush on whatever cpu we run, there is a chance we don't - * need to wait. - */ - get_cpu(); - __perf_pending_run(); - put_cpu(); - - /* - * Ensure we see the proper queue state before going to sleep - * so that we do not miss the wakeup. -- see perf_pending_handle() - */ - smp_rmb(); - return counter->pending.next == NULL; -} - -static void perf_pending_sync(struct perf_counter *counter) -{ - wait_event(counter->waitq, perf_not_pending(counter)); -} - -void perf_counter_do_pending(void) -{ - __perf_pending_run(); -} - -/* - * Callchain support -- arch specific - */ - -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - return NULL; -} - -/* - * Output - */ -static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, - unsigned long offset, unsigned long head) -{ - unsigned long mask; - - if (!data->writable) - return true; - - mask = (data->nr_pages << PAGE_SHIFT) - 1; - - offset = (offset - tail) & mask; - head = (head - tail) & mask; - - if ((int)(head - offset) < 0) - return false; - - return true; -} - -static void perf_output_wakeup(struct perf_output_handle *handle) -{ - atomic_set(&handle->data->poll, POLL_IN); - - if (handle->nmi) { - handle->counter->pending_wakeup = 1; - perf_pending_queue(&handle->counter->pending, - perf_pending_counter); - } else - perf_counter_wakeup(handle->counter); -} - -/* - * Curious locking construct. - * - * We need to ensure a later event doesn't publish a head when a former - * event isn't done writing. However since we need to deal with NMIs we - * cannot fully serialize things. - * - * What we do is serialize between CPUs so we only have to deal with NMI - * nesting on a single CPU. - * - * We only publish the head (and generate a wakeup) when the outer-most - * event completes. - */ -static void perf_output_lock(struct perf_output_handle *handle) -{ - struct perf_mmap_data *data = handle->data; - int cpu; - - handle->locked = 0; - - local_irq_save(handle->flags); - cpu = smp_processor_id(); - - if (in_nmi() && atomic_read(&data->lock) == cpu) - return; - - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) - cpu_relax(); - - handle->locked = 1; -} - -static void perf_output_unlock(struct perf_output_handle *handle) -{ - struct perf_mmap_data *data = handle->data; - unsigned long head; - int cpu; - - data->done_head = data->head; - - if (!handle->locked) - goto out; - -again: - /* - * The xchg implies a full barrier that ensures all writes are done - * before we publish the new head, matched by a rmb() in userspace when - * reading this position. - */ - while ((head = atomic_long_xchg(&data->done_head, 0))) - data->user_page->data_head = head; - - /* - * NMI can happen here, which means we can miss a done_head update. - */ - - cpu = atomic_xchg(&data->lock, -1); - WARN_ON_ONCE(cpu != smp_processor_id()); - - /* - * Therefore we have to validate we did not indeed do so. - */ - if (unlikely(atomic_long_read(&data->done_head))) { - /* - * Since we had it locked, we can lock it again. - */ - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) - cpu_relax(); - - goto again; - } - - if (atomic_xchg(&data->wakeup, 0)) - perf_output_wakeup(handle); -out: - local_irq_restore(handle->flags); -} - -void perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len) -{ - unsigned int pages_mask; - unsigned int offset; - unsigned int size; - void **pages; - - offset = handle->offset; - pages_mask = handle->data->nr_pages - 1; - pages = handle->data->data_pages; - - do { - unsigned int page_offset; - int nr; - - nr = (offset >> PAGE_SHIFT) & pages_mask; - page_offset = offset & (PAGE_SIZE - 1); - size = min_t(unsigned int, PAGE_SIZE - page_offset, len); - - memcpy(pages[nr] + page_offset, buf, size); - - len -= size; - buf += size; - offset += size; - } while (len); - - handle->offset = offset; - - /* - * Check we didn't copy past our reservation window, taking the - * possible unsigned int wrap into account. - */ - WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); -} - -int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size, - int nmi, int sample) -{ - struct perf_counter *output_counter; - struct perf_mmap_data *data; - unsigned long tail, offset, head; - int have_lost; - struct { - struct perf_event_header header; - u64 id; - u64 lost; - } lost_event; - - rcu_read_lock(); - /* - * For inherited counters we send all the output towards the parent. - */ - if (counter->parent) - counter = counter->parent; - - output_counter = rcu_dereference(counter->output); - if (output_counter) - counter = output_counter; - - data = rcu_dereference(counter->data); - if (!data) - goto out; - - handle->data = data; - handle->counter = counter; - handle->nmi = nmi; - handle->sample = sample; - - if (!data->nr_pages) - goto fail; - - have_lost = atomic_read(&data->lost); - if (have_lost) - size += sizeof(lost_event); - - perf_output_lock(handle); - - do { - /* - * Userspace could choose to issue a mb() before updating the - * tail pointer. So that all reads will be completed before the - * write is issued. - */ - tail = ACCESS_ONCE(data->user_page->data_tail); - smp_rmb(); - offset = head = atomic_long_read(&data->head); - head += size; - if (unlikely(!perf_output_space(data, tail, offset, head))) - goto fail; - } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); - - handle->offset = offset; - handle->head = head; - - if (head - tail > data->watermark) - atomic_set(&data->wakeup, 1); - - if (have_lost) { - lost_event.header.type = PERF_EVENT_LOST; - lost_event.header.misc = 0; - lost_event.header.size = sizeof(lost_event); - lost_event.id = counter->id; - lost_event.lost = atomic_xchg(&data->lost, 0); - - perf_output_put(handle, lost_event); - } - - return 0; - -fail: - atomic_inc(&data->lost); - perf_output_unlock(handle); -out: - rcu_read_unlock(); - - return -ENOSPC; -} - -void perf_output_end(struct perf_output_handle *handle) -{ - struct perf_counter *counter = handle->counter; - struct perf_mmap_data *data = handle->data; - - int wakeup_events = counter->attr.wakeup_events; - - if (handle->sample && wakeup_events) { - int events = atomic_inc_return(&data->events); - if (events >= wakeup_events) { - atomic_sub(wakeup_events, &data->events); - atomic_set(&data->wakeup, 1); - } - } - - perf_output_unlock(handle); - rcu_read_unlock(); -} - -static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p) -{ - /* - * only top level counters have the pid namespace they were created in - */ - if (counter->parent) - counter = counter->parent; - - return task_tgid_nr_ns(p, counter->ns); -} - -static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) -{ - /* - * only top level counters have the pid namespace they were created in - */ - if (counter->parent) - counter = counter->parent; - - return task_pid_nr_ns(p, counter->ns); -} - -static void perf_output_read_one(struct perf_output_handle *handle, - struct perf_counter *counter) -{ - u64 read_format = counter->attr.read_format; - u64 values[4]; - int n = 0; - - values[n++] = atomic64_read(&counter->count); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - } - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); - - perf_output_copy(handle, values, n * sizeof(u64)); -} - -/* - * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult. - */ -static void perf_output_read_group(struct perf_output_handle *handle, - struct perf_counter *counter) -{ - struct perf_counter *leader = counter->group_leader, *sub; - u64 read_format = counter->attr.read_format; - u64 values[5]; - int n = 0; - - values[n++] = 1 + leader->nr_siblings; - - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = leader->total_time_enabled; - - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = leader->total_time_running; - - if (leader != counter) - leader->pmu->read(leader); - - values[n++] = atomic64_read(&leader->count); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(leader); - - perf_output_copy(handle, values, n * sizeof(u64)); - - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - n = 0; - - if (sub != counter) - sub->pmu->read(sub); - - values[n++] = atomic64_read(&sub->count); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(sub); - - perf_output_copy(handle, values, n * sizeof(u64)); - } -} - -static void perf_output_read(struct perf_output_handle *handle, - struct perf_counter *counter) -{ - if (counter->attr.read_format & PERF_FORMAT_GROUP) - perf_output_read_group(handle, counter); - else - perf_output_read_one(handle, counter); -} - -void perf_output_sample(struct perf_output_handle *handle, - struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter) -{ - u64 sample_type = data->type; - - perf_output_put(handle, *header); - - if (sample_type & PERF_SAMPLE_IP) - perf_output_put(handle, data->ip); - - if (sample_type & PERF_SAMPLE_TID) - perf_output_put(handle, data->tid_entry); - - if (sample_type & PERF_SAMPLE_TIME) - perf_output_put(handle, data->time); - - if (sample_type & PERF_SAMPLE_ADDR) - perf_output_put(handle, data->addr); - - if (sample_type & PERF_SAMPLE_ID) - perf_output_put(handle, data->id); - - if (sample_type & PERF_SAMPLE_STREAM_ID) - perf_output_put(handle, data->stream_id); - - if (sample_type & PERF_SAMPLE_CPU) - perf_output_put(handle, data->cpu_entry); - - if (sample_type & PERF_SAMPLE_PERIOD) - perf_output_put(handle, data->period); - - if (sample_type & PERF_SAMPLE_READ) - perf_output_read(handle, counter); - - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - if (data->callchain) { - int size = 1; - - if (data->callchain) - size += data->callchain->nr; - - size *= sizeof(u64); - - perf_output_copy(handle, data->callchain, size); - } else { - u64 nr = 0; - perf_output_put(handle, nr); - } - } - - if (sample_type & PERF_SAMPLE_RAW) { - if (data->raw) { - perf_output_put(handle, data->raw->size); - perf_output_copy(handle, data->raw->data, - data->raw->size); - } else { - struct { - u32 size; - u32 data; - } raw = { - .size = sizeof(u32), - .data = 0, - }; - perf_output_put(handle, raw); - } - } -} - -void perf_prepare_sample(struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter, - struct pt_regs *regs) -{ - u64 sample_type = counter->attr.sample_type; - - data->type = sample_type; - - header->type = PERF_EVENT_SAMPLE; - header->size = sizeof(*header); - - header->misc = 0; - header->misc |= perf_misc_flags(regs); - - if (sample_type & PERF_SAMPLE_IP) { - data->ip = perf_instruction_pointer(regs); - - header->size += sizeof(data->ip); - } - - if (sample_type & PERF_SAMPLE_TID) { - /* namespace issues */ - data->tid_entry.pid = perf_counter_pid(counter, current); - data->tid_entry.tid = perf_counter_tid(counter, current); - - header->size += sizeof(data->tid_entry); - } - - if (sample_type & PERF_SAMPLE_TIME) { - data->time = perf_clock(); - - header->size += sizeof(data->time); - } - - if (sample_type & PERF_SAMPLE_ADDR) - header->size += sizeof(data->addr); - - if (sample_type & PERF_SAMPLE_ID) { - data->id = primary_counter_id(counter); - - header->size += sizeof(data->id); - } - - if (sample_type & PERF_SAMPLE_STREAM_ID) { - data->stream_id = counter->id; - - header->size += sizeof(data->stream_id); - } - - if (sample_type & PERF_SAMPLE_CPU) { - data->cpu_entry.cpu = raw_smp_processor_id(); - data->cpu_entry.reserved = 0; - - header->size += sizeof(data->cpu_entry); - } - - if (sample_type & PERF_SAMPLE_PERIOD) - header->size += sizeof(data->period); - - if (sample_type & PERF_SAMPLE_READ) - header->size += perf_counter_read_size(counter); - - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int size = 1; - - data->callchain = perf_callchain(regs); - - if (data->callchain) - size += data->callchain->nr; - - header->size += size * sizeof(u64); - } - - if (sample_type & PERF_SAMPLE_RAW) { - int size = sizeof(u32); - - if (data->raw) - size += data->raw->size; - else - size += sizeof(u32); - - WARN_ON_ONCE(size & (sizeof(u64)-1)); - header->size += size; - } -} - -static void perf_counter_output(struct perf_counter *counter, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct perf_output_handle handle; - struct perf_event_header header; - - perf_prepare_sample(&header, data, counter, regs); - - if (perf_output_begin(&handle, counter, header.size, nmi, 1)) - return; - - perf_output_sample(&handle, &header, data, counter); - - perf_output_end(&handle); -} - -/* - * read event - */ - -struct perf_read_event { - struct perf_event_header header; - - u32 pid; - u32 tid; -}; - -static void -perf_counter_read_event(struct perf_counter *counter, - struct task_struct *task) -{ - struct perf_output_handle handle; - struct perf_read_event read_event = { - .header = { - .type = PERF_EVENT_READ, - .misc = 0, - .size = sizeof(read_event) + perf_counter_read_size(counter), - }, - .pid = perf_counter_pid(counter, task), - .tid = perf_counter_tid(counter, task), - }; - int ret; - - ret = perf_output_begin(&handle, counter, read_event.header.size, 0, 0); - if (ret) - return; - - perf_output_put(&handle, read_event); - perf_output_read(&handle, counter); - - perf_output_end(&handle); -} - -/* - * task tracking -- fork/exit - * - * enabled by: attr.comm | attr.mmap | attr.task - */ - -struct perf_task_event { - struct task_struct *task; - struct perf_counter_context *task_ctx; - - struct { - struct perf_event_header header; - - u32 pid; - u32 ppid; - u32 tid; - u32 ptid; - u64 time; - } event; -}; - -static void perf_counter_task_output(struct perf_counter *counter, - struct perf_task_event *task_event) -{ - struct perf_output_handle handle; - int size; - struct task_struct *task = task_event->task; - int ret; - - size = task_event->event.header.size; - ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - task_event->event.pid = perf_counter_pid(counter, task); - task_event->event.ppid = perf_counter_pid(counter, current); - - task_event->event.tid = perf_counter_tid(counter, task); - task_event->event.ptid = perf_counter_tid(counter, current); - - task_event->event.time = perf_clock(); - - perf_output_put(&handle, task_event->event); - - perf_output_end(&handle); -} - -static int perf_counter_task_match(struct perf_counter *counter) -{ - if (counter->attr.comm || counter->attr.mmap || counter->attr.task) - return 1; - - return 0; -} - -static void perf_counter_task_ctx(struct perf_counter_context *ctx, - struct perf_task_event *task_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_task_match(counter)) - perf_counter_task_output(counter, task_event); - } - rcu_read_unlock(); -} - -static void perf_counter_task_event(struct perf_task_event *task_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx = task_event->task_ctx; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_task_ctx(&cpuctx->ctx, task_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - if (!ctx) - ctx = rcu_dereference(task_event->task->perf_counter_ctxp); - if (ctx) - perf_counter_task_ctx(ctx, task_event); - rcu_read_unlock(); -} - -static void perf_counter_task(struct task_struct *task, - struct perf_counter_context *task_ctx, - int new) -{ - struct perf_task_event task_event; - - if (!atomic_read(&nr_comm_counters) && - !atomic_read(&nr_mmap_counters) && - !atomic_read(&nr_task_counters)) - return; - - task_event = (struct perf_task_event){ - .task = task, - .task_ctx = task_ctx, - .event = { - .header = { - .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, - .misc = 0, - .size = sizeof(task_event.event), - }, - /* .pid */ - /* .ppid */ - /* .tid */ - /* .ptid */ - }, - }; - - perf_counter_task_event(&task_event); -} - -void perf_counter_fork(struct task_struct *task) -{ - perf_counter_task(task, NULL, 1); -} - -/* - * comm tracking - */ - -struct perf_comm_event { - struct task_struct *task; - char *comm; - int comm_size; - - struct { - struct perf_event_header header; - - u32 pid; - u32 tid; - } event; -}; - -static void perf_counter_comm_output(struct perf_counter *counter, - struct perf_comm_event *comm_event) -{ - struct perf_output_handle handle; - int size = comm_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - comm_event->event.pid = perf_counter_pid(counter, comm_event->task); - comm_event->event.tid = perf_counter_tid(counter, comm_event->task); - - perf_output_put(&handle, comm_event->event); - perf_output_copy(&handle, comm_event->comm, - comm_event->comm_size); - perf_output_end(&handle); -} - -static int perf_counter_comm_match(struct perf_counter *counter) -{ - if (counter->attr.comm) - return 1; - - return 0; -} - -static void perf_counter_comm_ctx(struct perf_counter_context *ctx, - struct perf_comm_event *comm_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_comm_match(counter)) - perf_counter_comm_output(counter, comm_event); - } - rcu_read_unlock(); -} - -static void perf_counter_comm_event(struct perf_comm_event *comm_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - unsigned int size; - char comm[TASK_COMM_LEN]; - - memset(comm, 0, sizeof(comm)); - strncpy(comm, comm_event->task->comm, sizeof(comm)); - size = ALIGN(strlen(comm)+1, sizeof(u64)); - - comm_event->comm = comm; - comm_event->comm_size = size; - - comm_event->event.header.size = sizeof(comm_event->event) + size; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_comm_ctx(&cpuctx->ctx, comm_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_counter_comm_ctx(ctx, comm_event); - rcu_read_unlock(); -} - -void perf_counter_comm(struct task_struct *task) -{ - struct perf_comm_event comm_event; - - if (task->perf_counter_ctxp) - perf_counter_enable_on_exec(task); - - if (!atomic_read(&nr_comm_counters)) - return; - - comm_event = (struct perf_comm_event){ - .task = task, - /* .comm */ - /* .comm_size */ - .event = { - .header = { - .type = PERF_EVENT_COMM, - .misc = 0, - /* .size */ - }, - /* .pid */ - /* .tid */ - }, - }; - - perf_counter_comm_event(&comm_event); -} - -/* - * mmap tracking - */ - -struct perf_mmap_event { - struct vm_area_struct *vma; - - const char *file_name; - int file_size; - - struct { - struct perf_event_header header; - - u32 pid; - u32 tid; - u64 start; - u64 len; - u64 pgoff; - } event; -}; - -static void perf_counter_mmap_output(struct perf_counter *counter, - struct perf_mmap_event *mmap_event) -{ - struct perf_output_handle handle; - int size = mmap_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - mmap_event->event.pid = perf_counter_pid(counter, current); - mmap_event->event.tid = perf_counter_tid(counter, current); - - perf_output_put(&handle, mmap_event->event); - perf_output_copy(&handle, mmap_event->file_name, - mmap_event->file_size); - perf_output_end(&handle); -} - -static int perf_counter_mmap_match(struct perf_counter *counter, - struct perf_mmap_event *mmap_event) -{ - if (counter->attr.mmap) - return 1; - - return 0; -} - -static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, - struct perf_mmap_event *mmap_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_mmap_match(counter, mmap_event)) - perf_counter_mmap_output(counter, mmap_event); - } - rcu_read_unlock(); -} - -static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - struct vm_area_struct *vma = mmap_event->vma; - struct file *file = vma->vm_file; - unsigned int size; - char tmp[16]; - char *buf = NULL; - const char *name; - - memset(tmp, 0, sizeof(tmp)); - - if (file) { - /* - * d_path works from the end of the buffer backwards, so we - * need to add enough zero bytes after the string to handle - * the 64bit alignment we do later. - */ - buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); - if (!buf) { - name = strncpy(tmp, "//enomem", sizeof(tmp)); - goto got_name; - } - name = d_path(&file->f_path, buf, PATH_MAX); - if (IS_ERR(name)) { - name = strncpy(tmp, "//toolong", sizeof(tmp)); - goto got_name; - } - } else { - if (arch_vma_name(mmap_event->vma)) { - name = strncpy(tmp, arch_vma_name(mmap_event->vma), - sizeof(tmp)); - goto got_name; - } - - if (!vma->vm_mm) { - name = strncpy(tmp, "[vdso]", sizeof(tmp)); - goto got_name; - } - - name = strncpy(tmp, "//anon", sizeof(tmp)); - goto got_name; - } - -got_name: - size = ALIGN(strlen(name)+1, sizeof(u64)); - - mmap_event->file_name = name; - mmap_event->file_size = size; - - mmap_event->event.header.size = sizeof(mmap_event->event) + size; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_counter_mmap_ctx(ctx, mmap_event); - rcu_read_unlock(); - - kfree(buf); -} - -void __perf_counter_mmap(struct vm_area_struct *vma) -{ - struct perf_mmap_event mmap_event; - - if (!atomic_read(&nr_mmap_counters)) - return; - - mmap_event = (struct perf_mmap_event){ - .vma = vma, - /* .file_name */ - /* .file_size */ - .event = { - .header = { - .type = PERF_EVENT_MMAP, - .misc = 0, - /* .size */ - }, - /* .pid */ - /* .tid */ - .start = vma->vm_start, - .len = vma->vm_end - vma->vm_start, - .pgoff = vma->vm_pgoff, - }, - }; - - perf_counter_mmap_event(&mmap_event); -} - -/* - * IRQ throttle logging - */ - -static void perf_log_throttle(struct perf_counter *counter, int enable) -{ - struct perf_output_handle handle; - int ret; - - struct { - struct perf_event_header header; - u64 time; - u64 id; - u64 stream_id; - } throttle_event = { - .header = { - .type = PERF_EVENT_THROTTLE, - .misc = 0, - .size = sizeof(throttle_event), - }, - .time = perf_clock(), - .id = primary_counter_id(counter), - .stream_id = counter->id, - }; - - if (enable) - throttle_event.header.type = PERF_EVENT_UNTHROTTLE; - - ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); - if (ret) - return; - - perf_output_put(&handle, throttle_event); - perf_output_end(&handle); -} - -/* - * Generic counter overflow handling, sampling. - */ - -static int __perf_counter_overflow(struct perf_counter *counter, int nmi, - int throttle, struct perf_sample_data *data, - struct pt_regs *regs) -{ - int events = atomic_read(&counter->event_limit); - struct hw_perf_counter *hwc = &counter->hw; - int ret = 0; - - throttle = (throttle && counter->pmu->unthrottle != NULL); - - if (!throttle) { - hwc->interrupts++; - } else { - if (hwc->interrupts != MAX_INTERRUPTS) { - hwc->interrupts++; - if (HZ * hwc->interrupts > - (u64)sysctl_perf_counter_sample_rate) { - hwc->interrupts = MAX_INTERRUPTS; - perf_log_throttle(counter, 0); - ret = 1; - } - } else { - /* - * Keep re-disabling counters even though on the previous - * pass we disabled it - just in case we raced with a - * sched-in and the counter got enabled again: - */ - ret = 1; - } - } - - if (counter->attr.freq) { - u64 now = perf_clock(); - s64 delta = now - hwc->freq_stamp; - - hwc->freq_stamp = now; - - if (delta > 0 && delta < TICK_NSEC) - perf_adjust_period(counter, NSEC_PER_SEC / (int)delta); - } - - /* - * XXX event_limit might not quite work as expected on inherited - * counters - */ - - counter->pending_kill = POLL_IN; - if (events && atomic_dec_and_test(&counter->event_limit)) { - ret = 1; - counter->pending_kill = POLL_HUP; - if (nmi) { - counter->pending_disable = 1; - perf_pending_queue(&counter->pending, - perf_pending_counter); - } else - perf_counter_disable(counter); - } - - perf_counter_output(counter, nmi, data, regs); - return ret; -} - -int perf_counter_overflow(struct perf_counter *counter, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - return __perf_counter_overflow(counter, nmi, 1, data, regs); -} - -/* - * Generic software counter infrastructure - */ - -/* - * We directly increment counter->count and keep a second value in - * counter->hw.period_left to count intervals. This period counter - * is kept in the range [-sample_period, 0] so that we can use the - * sign as trigger. - */ - -static u64 perf_swcounter_set_period(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 period = hwc->last_period; - u64 nr, offset; - s64 old, val; - - hwc->last_period = hwc->sample_period; - -again: - old = val = atomic64_read(&hwc->period_left); - if (val < 0) - return 0; - - nr = div64_u64(period + val, period); - offset = nr * period; - val -= offset; - if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) - goto again; - - return nr; -} - -static void perf_swcounter_overflow(struct perf_counter *counter, - int nmi, struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct hw_perf_counter *hwc = &counter->hw; - int throttle = 0; - u64 overflow; - - data->period = counter->hw.last_period; - overflow = perf_swcounter_set_period(counter); - - if (hwc->interrupts == MAX_INTERRUPTS) - return; - - for (; overflow; overflow--) { - if (__perf_counter_overflow(counter, nmi, throttle, - data, regs)) { - /* - * We inhibit the overflow from happening when - * hwc->interrupts == MAX_INTERRUPTS. - */ - break; - } - throttle = 1; - } -} - -static void perf_swcounter_unthrottle(struct perf_counter *counter) -{ - /* - * Nothing to do, we already reset hwc->interrupts. - */ -} - -static void perf_swcounter_add(struct perf_counter *counter, u64 nr, - int nmi, struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct hw_perf_counter *hwc = &counter->hw; - - atomic64_add(nr, &counter->count); - - if (!hwc->sample_period) - return; - - if (!regs) - return; - - if (!atomic64_add_negative(nr, &hwc->period_left)) - perf_swcounter_overflow(counter, nmi, data, regs); -} - -static int perf_swcounter_is_counting(struct perf_counter *counter) -{ - /* - * The counter is active, we're good! - */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - return 1; - - /* - * The counter is off/error, not counting. - */ - if (counter->state != PERF_COUNTER_STATE_INACTIVE) - return 0; - - /* - * The counter is inactive, if the context is active - * we're part of a group that didn't make it on the 'pmu', - * not counting. - */ - if (counter->ctx->is_active) - return 0; - - /* - * We're inactive and the context is too, this means the - * task is scheduled out, we're counting events that happen - * to us, like migration events. - */ - return 1; -} - -static int perf_swcounter_match(struct perf_counter *counter, - enum perf_type_id type, - u32 event_id, struct pt_regs *regs) -{ - if (!perf_swcounter_is_counting(counter)) - return 0; - - if (counter->attr.type != type) - return 0; - if (counter->attr.config != event_id) - return 0; - - if (regs) { - if (counter->attr.exclude_user && user_mode(regs)) - return 0; - - if (counter->attr.exclude_kernel && !user_mode(regs)) - return 0; - } - - return 1; -} - -static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, - enum perf_type_id type, - u32 event_id, u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_swcounter_match(counter, type, event_id, regs)) - perf_swcounter_add(counter, nr, nmi, data, regs); - } - rcu_read_unlock(); -} - -static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) -{ - if (in_nmi()) - return &cpuctx->recursion[3]; - - if (in_irq()) - return &cpuctx->recursion[2]; - - if (in_softirq()) - return &cpuctx->recursion[1]; - - return &cpuctx->recursion[0]; -} - -static void do_perf_swcounter_event(enum perf_type_id type, u32 event, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swcounter_recursion_context(cpuctx); - struct perf_counter_context *ctx; - - if (*recursion) - goto out; - - (*recursion)++; - barrier(); - - perf_swcounter_ctx_event(&cpuctx->ctx, type, event, - nr, nmi, data, regs); - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs); - rcu_read_unlock(); - - barrier(); - (*recursion)--; - -out: - put_cpu_var(perf_cpu_context); -} - -void __perf_swcounter_event(u32 event, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) -{ - struct perf_sample_data data = { - .addr = addr, - }; - - do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, - &data, regs); -} - -static void perf_swcounter_read(struct perf_counter *counter) -{ -} - -static int perf_swcounter_enable(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - - if (hwc->sample_period) { - hwc->last_period = hwc->sample_period; - perf_swcounter_set_period(counter); - } - return 0; -} - -static void perf_swcounter_disable(struct perf_counter *counter) -{ -} - -static const struct pmu perf_ops_generic = { - .enable = perf_swcounter_enable, - .disable = perf_swcounter_disable, - .read = perf_swcounter_read, - .unthrottle = perf_swcounter_unthrottle, -}; - -/* - * hrtimer based swcounter callback - */ - -static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) -{ - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct pt_regs *regs; - struct perf_counter *counter; - u64 period; - - counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); - counter->pmu->read(counter); - - data.addr = 0; - regs = get_irq_regs(); - /* - * In case we exclude kernel IPs or are somehow not in interrupt - * context, provide the next best thing, the user IP. - */ - if ((counter->attr.exclude_kernel || !regs) && - !counter->attr.exclude_user) - regs = task_pt_regs(current); - - if (regs) { - if (perf_counter_overflow(counter, 0, &data, regs)) - ret = HRTIMER_NORESTART; - } - - period = max_t(u64, 10000, counter->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - - return ret; -} - -/* - * Software counter: cpu wall time clock - */ - -static void cpu_clock_perf_counter_update(struct perf_counter *counter) -{ - int cpu = raw_smp_processor_id(); - s64 prev; - u64 now; - - now = cpu_clock(cpu); - prev = atomic64_read(&counter->hw.prev_count); - atomic64_set(&counter->hw.prev_count, now); - atomic64_add(now - prev, &counter->count); -} - -static int cpu_clock_perf_counter_enable(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - int cpu = raw_smp_processor_id(); - - atomic64_set(&hwc->prev_count, cpu_clock(cpu)); - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; - if (hwc->sample_period) { - u64 period = max_t(u64, 10000, hwc->sample_period); - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } - - return 0; -} - -static void cpu_clock_perf_counter_disable(struct perf_counter *counter) -{ - if (counter->hw.sample_period) - hrtimer_cancel(&counter->hw.hrtimer); - cpu_clock_perf_counter_update(counter); -} - -static void cpu_clock_perf_counter_read(struct perf_counter *counter) -{ - cpu_clock_perf_counter_update(counter); -} - -static const struct pmu perf_ops_cpu_clock = { - .enable = cpu_clock_perf_counter_enable, - .disable = cpu_clock_perf_counter_disable, - .read = cpu_clock_perf_counter_read, -}; - -/* - * Software counter: task time clock - */ - -static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) -{ - u64 prev; - s64 delta; - - prev = atomic64_xchg(&counter->hw.prev_count, now); - delta = now - prev; - atomic64_add(delta, &counter->count); -} - -static int task_clock_perf_counter_enable(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 now; - - now = counter->ctx->time; - - atomic64_set(&hwc->prev_count, now); - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; - if (hwc->sample_period) { - u64 period = max_t(u64, 10000, hwc->sample_period); - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } - - return 0; -} - -static void task_clock_perf_counter_disable(struct perf_counter *counter) -{ - if (counter->hw.sample_period) - hrtimer_cancel(&counter->hw.hrtimer); - task_clock_perf_counter_update(counter, counter->ctx->time); - -} - -static void task_clock_perf_counter_read(struct perf_counter *counter) -{ - u64 time; - - if (!in_nmi()) { - update_context_time(counter->ctx); - time = counter->ctx->time; - } else { - u64 now = perf_clock(); - u64 delta = now - counter->ctx->timestamp; - time = counter->ctx->time + delta; - } - - task_clock_perf_counter_update(counter, time); -} - -static const struct pmu perf_ops_task_clock = { - .enable = task_clock_perf_counter_enable, - .disable = task_clock_perf_counter_disable, - .read = task_clock_perf_counter_read, -}; - -#ifdef CONFIG_EVENT_PROFILE -void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, - int entry_size) -{ - struct perf_raw_record raw = { - .size = entry_size, - .data = record, - }; - - struct perf_sample_data data = { - .addr = addr, - .raw = &raw, - }; - - struct pt_regs *regs = get_irq_regs(); - - if (!regs) - regs = task_pt_regs(current); - - do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, - &data, regs); -} -EXPORT_SYMBOL_GPL(perf_tpcounter_event); - -extern int ftrace_profile_enable(int); -extern void ftrace_profile_disable(int); - -static void tp_perf_counter_destroy(struct perf_counter *counter) -{ - ftrace_profile_disable(counter->attr.config); -} - -static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) -{ - /* - * Raw tracepoint data is a severe data leak, only allow root to - * have these. - */ - if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && - perf_paranoid_tracepoint_raw() && - !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - - if (ftrace_profile_enable(counter->attr.config)) - return NULL; - - counter->destroy = tp_perf_counter_destroy; - - return &perf_ops_generic; -} -#else -static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) -{ - return NULL; -} -#endif - -atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; - -static void sw_perf_counter_destroy(struct perf_counter *counter) -{ - u64 event_id = counter->attr.config; - - WARN_ON(counter->parent); - - atomic_dec(&perf_swcounter_enabled[event_id]); -} - -static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) -{ - const struct pmu *pmu = NULL; - u64 event_id = counter->attr.config; - - /* - * Software counters (currently) can't in general distinguish - * between user, kernel and hypervisor events. - * However, context switches and cpu migrations are considered - * to be kernel events, and page faults are never hypervisor - * events. - */ - switch (event_id) { - case PERF_COUNT_SW_CPU_CLOCK: - pmu = &perf_ops_cpu_clock; - - break; - case PERF_COUNT_SW_TASK_CLOCK: - /* - * If the user instantiates this as a per-cpu counter, - * use the cpu_clock counter instead. - */ - if (counter->ctx->task) - pmu = &perf_ops_task_clock; - else - pmu = &perf_ops_cpu_clock; - - break; - case PERF_COUNT_SW_PAGE_FAULTS: - case PERF_COUNT_SW_PAGE_FAULTS_MIN: - case PERF_COUNT_SW_PAGE_FAULTS_MAJ: - case PERF_COUNT_SW_CONTEXT_SWITCHES: - case PERF_COUNT_SW_CPU_MIGRATIONS: - if (!counter->parent) { - atomic_inc(&perf_swcounter_enabled[event_id]); - counter->destroy = sw_perf_counter_destroy; - } - pmu = &perf_ops_generic; - break; - } - - return pmu; -} - -/* - * Allocate and initialize a counter structure - */ -static struct perf_counter * -perf_counter_alloc(struct perf_counter_attr *attr, - int cpu, - struct perf_counter_context *ctx, - struct perf_counter *group_leader, - struct perf_counter *parent_counter, - gfp_t gfpflags) -{ - const struct pmu *pmu; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - long err; - - counter = kzalloc(sizeof(*counter), gfpflags); - if (!counter) - return ERR_PTR(-ENOMEM); - - /* - * Single counters are their own group leaders, with an - * empty sibling list: - */ - if (!group_leader) - group_leader = counter; - - mutex_init(&counter->child_mutex); - INIT_LIST_HEAD(&counter->child_list); - - INIT_LIST_HEAD(&counter->group_entry); - INIT_LIST_HEAD(&counter->event_entry); - INIT_LIST_HEAD(&counter->sibling_list); - init_waitqueue_head(&counter->waitq); - - mutex_init(&counter->mmap_mutex); - - counter->cpu = cpu; - counter->attr = *attr; - counter->group_leader = group_leader; - counter->pmu = NULL; - counter->ctx = ctx; - counter->oncpu = -1; - - counter->parent = parent_counter; - - counter->ns = get_pid_ns(current->nsproxy->pid_ns); - counter->id = atomic64_inc_return(&perf_counter_id); - - counter->state = PERF_COUNTER_STATE_INACTIVE; - - if (attr->disabled) - counter->state = PERF_COUNTER_STATE_OFF; - - pmu = NULL; - - hwc = &counter->hw; - hwc->sample_period = attr->sample_period; - if (attr->freq && attr->sample_freq) - hwc->sample_period = 1; - hwc->last_period = hwc->sample_period; - - atomic64_set(&hwc->period_left, hwc->sample_period); - - /* - * we currently do not support PERF_FORMAT_GROUP on inherited counters - */ - if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) - goto done; - - switch (attr->type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - pmu = hw_perf_counter_init(counter); - break; - - case PERF_TYPE_SOFTWARE: - pmu = sw_perf_counter_init(counter); - break; - - case PERF_TYPE_TRACEPOINT: - pmu = tp_perf_counter_init(counter); - break; - - default: - break; - } -done: - err = 0; - if (!pmu) - err = -EINVAL; - else if (IS_ERR(pmu)) - err = PTR_ERR(pmu); - - if (err) { - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); - return ERR_PTR(err); - } - - counter->pmu = pmu; - - if (!counter->parent) { - atomic_inc(&nr_counters); - if (counter->attr.mmap) - atomic_inc(&nr_mmap_counters); - if (counter->attr.comm) - atomic_inc(&nr_comm_counters); - if (counter->attr.task) - atomic_inc(&nr_task_counters); - } - - return counter; -} - -static int perf_copy_attr(struct perf_counter_attr __user *uattr, - struct perf_counter_attr *attr) -{ - u32 size; - int ret; - - if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) - return -EFAULT; - - /* - * zero the full structure, so that a short copy will be nice. - */ - memset(attr, 0, sizeof(*attr)); - - ret = get_user(size, &uattr->size); - if (ret) - return ret; - - if (size > PAGE_SIZE) /* silly large */ - goto err_size; - - if (!size) /* abi compat */ - size = PERF_ATTR_SIZE_VER0; - - if (size < PERF_ATTR_SIZE_VER0) - goto err_size; - - /* - * If we're handed a bigger struct than we know of, - * ensure all the unknown bits are 0 - i.e. new - * user-space does not rely on any kernel feature - * extensions we dont know about yet. - */ - if (size > sizeof(*attr)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uattr + sizeof(*attr); - end = (void __user *)uattr + size; - - for (; addr < end; addr++) { - ret = get_user(val, addr); - if (ret) - return ret; - if (val) - goto err_size; - } - size = sizeof(*attr); - } - - ret = copy_from_user(attr, uattr, size); - if (ret) - return -EFAULT; - - /* - * If the type exists, the corresponding creation will verify - * the attr->config. - */ - if (attr->type >= PERF_TYPE_MAX) - return -EINVAL; - - if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) - return -EINVAL; - - if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) - return -EINVAL; - - if (attr->read_format & ~(PERF_FORMAT_MAX-1)) - return -EINVAL; - -out: - return ret; - -err_size: - put_user(sizeof(*attr), &uattr->size); - ret = -E2BIG; - goto out; -} - -int perf_counter_set_output(struct perf_counter *counter, int output_fd) -{ - struct perf_counter *output_counter = NULL; - struct file *output_file = NULL; - struct perf_counter *old_output; - int fput_needed = 0; - int ret = -EINVAL; - - if (!output_fd) - goto set; - - output_file = fget_light(output_fd, &fput_needed); - if (!output_file) - return -EBADF; - - if (output_file->f_op != &perf_fops) - goto out; - - output_counter = output_file->private_data; - - /* Don't chain output fds */ - if (output_counter->output) - goto out; - - /* Don't set an output fd when we already have an output channel */ - if (counter->data) - goto out; - - atomic_long_inc(&output_file->f_count); - -set: - mutex_lock(&counter->mmap_mutex); - old_output = counter->output; - rcu_assign_pointer(counter->output, output_counter); - mutex_unlock(&counter->mmap_mutex); - - if (old_output) { - /* - * we need to make sure no existing perf_output_*() - * is still referencing this counter. - */ - synchronize_rcu(); - fput(old_output->filp); - } - - ret = 0; -out: - fput_light(output_file, fput_needed); - return ret; -} - -/** - * sys_perf_counter_open - open a performance counter, associate it to a task/cpu - * - * @attr_uptr: event type attributes for monitoring/sampling - * @pid: target pid - * @cpu: target cpu - * @group_fd: group leader counter fd - */ -SYSCALL_DEFINE5(perf_counter_open, - struct perf_counter_attr __user *, attr_uptr, - pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) -{ - struct perf_counter *counter, *group_leader; - struct perf_counter_attr attr; - struct perf_counter_context *ctx; - struct file *counter_file = NULL; - struct file *group_file = NULL; - int fput_needed = 0; - int fput_needed2 = 0; - int err; - - /* for future expandability... */ - if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) - return -EINVAL; - - err = perf_copy_attr(attr_uptr, &attr); - if (err) - return err; - - if (!attr.exclude_kernel) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; - } - - if (attr.freq) { - if (attr.sample_freq > sysctl_perf_counter_sample_rate) - return -EINVAL; - } - - /* - * Get the target context (task or percpu): - */ - ctx = find_get_context(pid, cpu); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - /* - * Look up the group leader (we will attach this counter to it): - */ - group_leader = NULL; - if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { - err = -EINVAL; - group_file = fget_light(group_fd, &fput_needed); - if (!group_file) - goto err_put_context; - if (group_file->f_op != &perf_fops) - goto err_put_context; - - group_leader = group_file->private_data; - /* - * Do not allow a recursive hierarchy (this new sibling - * becoming part of another group-sibling): - */ - if (group_leader->group_leader != group_leader) - goto err_put_context; - /* - * Do not allow to attach to a group in a different - * task or CPU context: - */ - if (group_leader->ctx != ctx) - goto err_put_context; - /* - * Only a group leader can be exclusive or pinned - */ - if (attr.exclusive || attr.pinned) - goto err_put_context; - } - - counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, - NULL, GFP_KERNEL); - err = PTR_ERR(counter); - if (IS_ERR(counter)) - goto err_put_context; - - err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); - if (err < 0) - goto err_free_put_context; - - counter_file = fget_light(err, &fput_needed2); - if (!counter_file) - goto err_free_put_context; - - if (flags & PERF_FLAG_FD_OUTPUT) { - err = perf_counter_set_output(counter, group_fd); - if (err) - goto err_fput_free_put_context; - } - - counter->filp = counter_file; - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - perf_install_in_context(ctx, counter, cpu); - ++ctx->generation; - mutex_unlock(&ctx->mutex); - - counter->owner = current; - get_task_struct(current); - mutex_lock(¤t->perf_counter_mutex); - list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); - mutex_unlock(¤t->perf_counter_mutex); - -err_fput_free_put_context: - fput_light(counter_file, fput_needed2); - -err_free_put_context: - if (err < 0) - kfree(counter); - -err_put_context: - if (err < 0) - put_ctx(ctx); - - fput_light(group_file, fput_needed); - - return err; -} - -/* - * inherit a counter from parent task to child task: - */ -static struct perf_counter * -inherit_counter(struct perf_counter *parent_counter, - struct task_struct *parent, - struct perf_counter_context *parent_ctx, - struct task_struct *child, - struct perf_counter *group_leader, - struct perf_counter_context *child_ctx) -{ - struct perf_counter *child_counter; - - /* - * Instead of creating recursive hierarchies of counters, - * we link inherited counters back to the original parent, - * which has a filp for sure, which we use as the reference - * count: - */ - if (parent_counter->parent) - parent_counter = parent_counter->parent; - - child_counter = perf_counter_alloc(&parent_counter->attr, - parent_counter->cpu, child_ctx, - group_leader, parent_counter, - GFP_KERNEL); - if (IS_ERR(child_counter)) - return child_counter; - get_ctx(child_ctx); - - /* - * Make the child state follow the state of the parent counter, - * not its attr.disabled bit. We hold the parent's mutex, - * so we won't race with perf_counter_{en, dis}able_family. - */ - if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) - child_counter->state = PERF_COUNTER_STATE_INACTIVE; - else - child_counter->state = PERF_COUNTER_STATE_OFF; - - if (parent_counter->attr.freq) - child_counter->hw.sample_period = parent_counter->hw.sample_period; - - /* - * Link it up in the child's context: - */ - add_counter_to_ctx(child_counter, child_ctx); - - /* - * Get a reference to the parent filp - we will fput it - * when the child counter exits. This is safe to do because - * we are in the parent and we know that the filp still - * exists and has a nonzero count: - */ - atomic_long_inc(&parent_counter->filp->f_count); - - /* - * Link this into the parent counter's child list - */ - WARN_ON_ONCE(parent_counter->ctx->parent_ctx); - mutex_lock(&parent_counter->child_mutex); - list_add_tail(&child_counter->child_list, &parent_counter->child_list); - mutex_unlock(&parent_counter->child_mutex); - - return child_counter; -} - -static int inherit_group(struct perf_counter *parent_counter, - struct task_struct *parent, - struct perf_counter_context *parent_ctx, - struct task_struct *child, - struct perf_counter_context *child_ctx) -{ - struct perf_counter *leader; - struct perf_counter *sub; - struct perf_counter *child_ctr; - - leader = inherit_counter(parent_counter, parent, parent_ctx, - child, NULL, child_ctx); - if (IS_ERR(leader)) - return PTR_ERR(leader); - list_for_each_entry(sub, &parent_counter->sibling_list, group_entry) { - child_ctr = inherit_counter(sub, parent, parent_ctx, - child, leader, child_ctx); - if (IS_ERR(child_ctr)) - return PTR_ERR(child_ctr); - } - return 0; -} - -static void sync_child_counter(struct perf_counter *child_counter, - struct task_struct *child) -{ - struct perf_counter *parent_counter = child_counter->parent; - u64 child_val; - - if (child_counter->attr.inherit_stat) - perf_counter_read_event(child_counter, child); - - child_val = atomic64_read(&child_counter->count); - - /* - * Add back the child's count to the parent's count: - */ - atomic64_add(child_val, &parent_counter->count); - atomic64_add(child_counter->total_time_enabled, - &parent_counter->child_total_time_enabled); - atomic64_add(child_counter->total_time_running, - &parent_counter->child_total_time_running); - - /* - * Remove this counter from the parent's list - */ - WARN_ON_ONCE(parent_counter->ctx->parent_ctx); - mutex_lock(&parent_counter->child_mutex); - list_del_init(&child_counter->child_list); - mutex_unlock(&parent_counter->child_mutex); - - /* - * Release the parent counter, if this was the last - * reference to it. - */ - fput(parent_counter->filp); -} - -static void -__perf_counter_exit_task(struct perf_counter *child_counter, - struct perf_counter_context *child_ctx, - struct task_struct *child) -{ - struct perf_counter *parent_counter; - - update_counter_times(child_counter); - perf_counter_remove_from_context(child_counter); - - parent_counter = child_counter->parent; - /* - * It can happen that parent exits first, and has counters - * that are still around due to the child reference. These - * counters need to be zapped - but otherwise linger. - */ - if (parent_counter) { - sync_child_counter(child_counter, child); - free_counter(child_counter); - } -} - -/* - * When a child task exits, feed back counter values to parent counters. - */ -void perf_counter_exit_task(struct task_struct *child) -{ - struct perf_counter *child_counter, *tmp; - struct perf_counter_context *child_ctx; - unsigned long flags; - - if (likely(!child->perf_counter_ctxp)) { - perf_counter_task(child, NULL, 0); - return; - } - - local_irq_save(flags); - /* - * We can't reschedule here because interrupts are disabled, - * and either child is current or it is a task that can't be - * scheduled, so we are now safe from rescheduling changing - * our context. - */ - child_ctx = child->perf_counter_ctxp; - __perf_counter_task_sched_out(child_ctx); - - /* - * Take the context lock here so that if find_get_context is - * reading child->perf_counter_ctxp, we wait until it has - * incremented the context's refcount before we do put_ctx below. - */ - spin_lock(&child_ctx->lock); - child->perf_counter_ctxp = NULL; - /* - * If this context is a clone; unclone it so it can't get - * swapped to another process while we're removing all - * the counters from it. - */ - unclone_ctx(child_ctx); - spin_unlock_irqrestore(&child_ctx->lock, flags); - - /* - * Report the task dead after unscheduling the counters so that we - * won't get any samples after PERF_EVENT_EXIT. We can however still - * get a few PERF_EVENT_READ events. - */ - perf_counter_task(child, child_ctx, 0); - - /* - * We can recurse on the same lock type through: - * - * __perf_counter_exit_task() - * sync_child_counter() - * fput(parent_counter->filp) - * perf_release() - * mutex_lock(&ctx->mutex) - * - * But since its the parent context it won't be the same instance. - */ - mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); - -again: - list_for_each_entry_safe(child_counter, tmp, &child_ctx->group_list, - group_entry) - __perf_counter_exit_task(child_counter, child_ctx, child); - - /* - * If the last counter was a group counter, it will have appended all - * its siblings to the list, but we obtained 'tmp' before that which - * will still point to the list head terminating the iteration. - */ - if (!list_empty(&child_ctx->group_list)) - goto again; - - mutex_unlock(&child_ctx->mutex); - - put_ctx(child_ctx); -} - -/* - * free an unexposed, unused context as created by inheritance by - * init_task below, used by fork() in case of fail. - */ -void perf_counter_free_task(struct task_struct *task) -{ - struct perf_counter_context *ctx = task->perf_counter_ctxp; - struct perf_counter *counter, *tmp; - - if (!ctx) - return; - - mutex_lock(&ctx->mutex); -again: - list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) { - struct perf_counter *parent = counter->parent; - - if (WARN_ON_ONCE(!parent)) - continue; - - mutex_lock(&parent->child_mutex); - list_del_init(&counter->child_list); - mutex_unlock(&parent->child_mutex); - - fput(parent->filp); - - list_del_counter(counter, ctx); - free_counter(counter); - } - - if (!list_empty(&ctx->group_list)) - goto again; - - mutex_unlock(&ctx->mutex); - - put_ctx(ctx); -} - -/* - * Initialize the perf_counter context in task_struct - */ -int perf_counter_init_task(struct task_struct *child) -{ - struct perf_counter_context *child_ctx, *parent_ctx; - struct perf_counter_context *cloned_ctx; - struct perf_counter *counter; - struct task_struct *parent = current; - int inherited_all = 1; - int ret = 0; - - child->perf_counter_ctxp = NULL; - - mutex_init(&child->perf_counter_mutex); - INIT_LIST_HEAD(&child->perf_counter_list); - - if (likely(!parent->perf_counter_ctxp)) - return 0; - - /* - * This is executed from the parent task context, so inherit - * counters that have been marked for cloning. - * First allocate and initialize a context for the child. - */ - - child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); - if (!child_ctx) - return -ENOMEM; - - __perf_counter_init_context(child_ctx, child); - child->perf_counter_ctxp = child_ctx; - get_task_struct(child); - - /* - * If the parent's context is a clone, pin it so it won't get - * swapped under us. - */ - parent_ctx = perf_pin_task_context(parent); - - /* - * No need to check if parent_ctx != NULL here; since we saw - * it non-NULL earlier, the only reason for it to become NULL - * is if we exit, and since we're currently in the middle of - * a fork we can't be exiting at the same time. - */ - - /* - * Lock the parent list. No need to lock the child - not PID - * hashed yet and not running, so nobody can access it. - */ - mutex_lock(&parent_ctx->mutex); - - /* - * We dont have to disable NMIs - we are only looking at - * the list, not manipulating it: - */ - list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) { - if (counter != counter->group_leader) - continue; - - if (!counter->attr.inherit) { - inherited_all = 0; - continue; - } - - ret = inherit_group(counter, parent, parent_ctx, - child, child_ctx); - if (ret) { - inherited_all = 0; - break; - } - } - - if (inherited_all) { - /* - * Mark the child context as a clone of the parent - * context, or of whatever the parent is a clone of. - * Note that if the parent is a clone, it could get - * uncloned at any point, but that doesn't matter - * because the list of counters and the generation - * count can't have changed since we took the mutex. - */ - cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); - if (cloned_ctx) { - child_ctx->parent_ctx = cloned_ctx; - child_ctx->parent_gen = parent_ctx->parent_gen; - } else { - child_ctx->parent_ctx = parent_ctx; - child_ctx->parent_gen = parent_ctx->generation; - } - get_ctx(child_ctx->parent_ctx); - } - - mutex_unlock(&parent_ctx->mutex); - - perf_unpin_context(parent_ctx); - - return ret; -} - -static void __cpuinit perf_counter_init_cpu(int cpu) -{ - struct perf_cpu_context *cpuctx; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - __perf_counter_init_context(&cpuctx->ctx, NULL); - - spin_lock(&perf_resource_lock); - cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; - spin_unlock(&perf_resource_lock); - - hw_perf_counter_setup(cpu); -} - -#ifdef CONFIG_HOTPLUG_CPU -static void __perf_counter_exit_cpu(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = &cpuctx->ctx; - struct perf_counter *counter, *tmp; - - list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) - __perf_counter_remove_from_context(counter); -} -static void perf_counter_exit_cpu(int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &cpuctx->ctx; - - mutex_lock(&ctx->mutex); - smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); - mutex_unlock(&ctx->mutex); -} -#else -static inline void perf_counter_exit_cpu(int cpu) { } -#endif - -static int __cpuinit -perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action) { - - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - perf_counter_init_cpu(cpu); - break; - - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - hw_perf_counter_setup_online(cpu); - break; - - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - perf_counter_exit_cpu(cpu); - break; - - default: - break; - } - - return NOTIFY_OK; -} - -/* - * This has to have a higher priority than migration_notifier in sched.c. - */ -static struct notifier_block __cpuinitdata perf_cpu_nb = { - .notifier_call = perf_cpu_notify, - .priority = 20, -}; - -void __init perf_counter_init(void) -{ - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, - (void *)(long)smp_processor_id()); - register_cpu_notifier(&perf_cpu_nb); -} - -static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) -{ - return sprintf(buf, "%d\n", perf_reserved_percpu); -} - -static ssize_t -perf_set_reserve_percpu(struct sysdev_class *class, - const char *buf, - size_t count) -{ - struct perf_cpu_context *cpuctx; - unsigned long val; - int err, cpu, mpt; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > perf_max_counters) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_reserved_percpu = val; - for_each_online_cpu(cpu) { - cpuctx = &per_cpu(perf_cpu_context, cpu); - spin_lock_irq(&cpuctx->ctx.lock); - mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, - perf_max_counters - perf_reserved_percpu); - cpuctx->max_pertask = mpt; - spin_unlock_irq(&cpuctx->ctx.lock); - } - spin_unlock(&perf_resource_lock); - - return count; -} - -static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) -{ - return sprintf(buf, "%d\n", perf_overcommit); -} - -static ssize_t -perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) -{ - unsigned long val; - int err; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > 1) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_overcommit = val; - spin_unlock(&perf_resource_lock); - - return count; -} - -static SYSDEV_CLASS_ATTR( - reserve_percpu, - 0644, - perf_show_reserve_percpu, - perf_set_reserve_percpu - ); - -static SYSDEV_CLASS_ATTR( - overcommit, - 0644, - perf_show_overcommit, - perf_set_overcommit - ); - -static struct attribute *perfclass_attrs[] = { - &attr_reserve_percpu.attr, - &attr_overcommit.attr, - NULL -}; - -static struct attribute_group perfclass_attr_group = { - .attrs = perfclass_attrs, - .name = "perf_counters", -}; - -static int __init perf_counter_sysfs_init(void) -{ - return sysfs_create_group(&cpu_sysdev_class.kset.kobj, - &perfclass_attr_group); -} -device_initcall(perf_counter_sysfs_init); diff --git a/kernel/perf_event.c b/kernel/perf_event.c new file mode 100644 index 00000000000..6e8b99a04e1 --- /dev/null +++ b/kernel/perf_event.c @@ -0,0 +1,5000 @@ +/* + * Performance event core code + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright © 2009 Paul Mackerras, IBM Corp. + * + * For licensing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * Each CPU has a list of per CPU events: + */ +DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); + +int perf_max_events __read_mostly = 1; +static int perf_reserved_percpu __read_mostly; +static int perf_overcommit __read_mostly = 1; + +static atomic_t nr_events __read_mostly; +static atomic_t nr_mmap_events __read_mostly; +static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_task_events __read_mostly; + +/* + * perf event paranoia level: + * -1 - not paranoid at all + * 0 - disallow raw tracepoint access for unpriv + * 1 - disallow cpu events for unpriv + * 2 - disallow kernel profiling for unpriv + */ +int sysctl_perf_event_paranoid __read_mostly = 1; + +static inline bool perf_paranoid_tracepoint_raw(void) +{ + return sysctl_perf_event_paranoid > -1; +} + +static inline bool perf_paranoid_cpu(void) +{ + return sysctl_perf_event_paranoid > 0; +} + +static inline bool perf_paranoid_kernel(void) +{ + return sysctl_perf_event_paranoid > 1; +} + +int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ + +/* + * max perf event sample rate + */ +int sysctl_perf_event_sample_rate __read_mostly = 100000; + +static atomic64_t perf_event_id; + +/* + * Lock for (sysadmin-configurable) event reservations: + */ +static DEFINE_SPINLOCK(perf_resource_lock); + +/* + * Architecture provided APIs - weak aliases: + */ +extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + return NULL; +} + +void __weak hw_perf_disable(void) { barrier(); } +void __weak hw_perf_enable(void) { barrier(); } + +void __weak hw_perf_event_setup(int cpu) { barrier(); } +void __weak hw_perf_event_setup_online(int cpu) { barrier(); } + +int __weak +hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + return 0; +} + +void __weak perf_event_print_debug(void) { } + +static DEFINE_PER_CPU(int, perf_disable_count); + +void __perf_disable(void) +{ + __get_cpu_var(perf_disable_count)++; +} + +bool __perf_enable(void) +{ + return !--__get_cpu_var(perf_disable_count); +} + +void perf_disable(void) +{ + __perf_disable(); + hw_perf_disable(); +} + +void perf_enable(void) +{ + if (__perf_enable()) + hw_perf_enable(); +} + +static void get_ctx(struct perf_event_context *ctx) +{ + WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); +} + +static void free_ctx(struct rcu_head *head) +{ + struct perf_event_context *ctx; + + ctx = container_of(head, struct perf_event_context, rcu_head); + kfree(ctx); +} + +static void put_ctx(struct perf_event_context *ctx) +{ + if (atomic_dec_and_test(&ctx->refcount)) { + if (ctx->parent_ctx) + put_ctx(ctx->parent_ctx); + if (ctx->task) + put_task_struct(ctx->task); + call_rcu(&ctx->rcu_head, free_ctx); + } +} + +static void unclone_ctx(struct perf_event_context *ctx) +{ + if (ctx->parent_ctx) { + put_ctx(ctx->parent_ctx); + ctx->parent_ctx = NULL; + } +} + +/* + * If we inherit events we want to return the parent event id + * to userspace. + */ +static u64 primary_event_id(struct perf_event *event) +{ + u64 id = event->id; + + if (event->parent) + id = event->parent->id; + + return id; +} + +/* + * Get the perf_event_context for a task and lock it. + * This has to cope with with the fact that until it is locked, + * the context could get moved to another task. + */ +static struct perf_event_context * +perf_lock_task_context(struct task_struct *task, unsigned long *flags) +{ + struct perf_event_context *ctx; + + rcu_read_lock(); + retry: + ctx = rcu_dereference(task->perf_event_ctxp); + if (ctx) { + /* + * If this context is a clone of another, it might + * get swapped for another underneath us by + * perf_event_task_sched_out, though the + * rcu_read_lock() protects us from any context + * getting freed. Lock the context and check if it + * got swapped before we could get the lock, and retry + * if so. If we locked the right context, then it + * can't get swapped on us any more. + */ + spin_lock_irqsave(&ctx->lock, *flags); + if (ctx != rcu_dereference(task->perf_event_ctxp)) { + spin_unlock_irqrestore(&ctx->lock, *flags); + goto retry; + } + + if (!atomic_inc_not_zero(&ctx->refcount)) { + spin_unlock_irqrestore(&ctx->lock, *flags); + ctx = NULL; + } + } + rcu_read_unlock(); + return ctx; +} + +/* + * Get the context for a task and increment its pin_count so it + * can't get swapped to another task. This also increments its + * reference count so that the context can't get freed. + */ +static struct perf_event_context *perf_pin_task_context(struct task_struct *task) +{ + struct perf_event_context *ctx; + unsigned long flags; + + ctx = perf_lock_task_context(task, &flags); + if (ctx) { + ++ctx->pin_count; + spin_unlock_irqrestore(&ctx->lock, flags); + } + return ctx; +} + +static void perf_unpin_context(struct perf_event_context *ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&ctx->lock, flags); + --ctx->pin_count; + spin_unlock_irqrestore(&ctx->lock, flags); + put_ctx(ctx); +} + +/* + * Add a event from the lists for its context. + * Must be called with ctx->mutex and ctx->lock held. + */ +static void +list_add_event(struct perf_event *event, struct perf_event_context *ctx) +{ + struct perf_event *group_leader = event->group_leader; + + /* + * Depending on whether it is a standalone or sibling event, + * add it straight to the context's event list, or to the group + * leader's sibling list: + */ + if (group_leader == event) + list_add_tail(&event->group_entry, &ctx->group_list); + else { + list_add_tail(&event->group_entry, &group_leader->sibling_list); + group_leader->nr_siblings++; + } + + list_add_rcu(&event->event_entry, &ctx->event_list); + ctx->nr_events++; + if (event->attr.inherit_stat) + ctx->nr_stat++; +} + +/* + * Remove a event from the lists for its context. + * Must be called with ctx->mutex and ctx->lock held. + */ +static void +list_del_event(struct perf_event *event, struct perf_event_context *ctx) +{ + struct perf_event *sibling, *tmp; + + if (list_empty(&event->group_entry)) + return; + ctx->nr_events--; + if (event->attr.inherit_stat) + ctx->nr_stat--; + + list_del_init(&event->group_entry); + list_del_rcu(&event->event_entry); + + if (event->group_leader != event) + event->group_leader->nr_siblings--; + + /* + * If this was a group event with sibling events then + * upgrade the siblings to singleton events by adding them + * to the context list directly: + */ + list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { + + list_move_tail(&sibling->group_entry, &ctx->group_list); + sibling->group_leader = sibling; + } +} + +static void +event_sched_out(struct perf_event *event, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + if (event->state != PERF_EVENT_STATE_ACTIVE) + return; + + event->state = PERF_EVENT_STATE_INACTIVE; + if (event->pending_disable) { + event->pending_disable = 0; + event->state = PERF_EVENT_STATE_OFF; + } + event->tstamp_stopped = ctx->time; + event->pmu->disable(event); + event->oncpu = -1; + + if (!is_software_event(event)) + cpuctx->active_oncpu--; + ctx->nr_active--; + if (event->attr.exclusive || !cpuctx->active_oncpu) + cpuctx->exclusive = 0; +} + +static void +group_sched_out(struct perf_event *group_event, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + struct perf_event *event; + + if (group_event->state != PERF_EVENT_STATE_ACTIVE) + return; + + event_sched_out(group_event, cpuctx, ctx); + + /* + * Schedule out siblings (if any): + */ + list_for_each_entry(event, &group_event->sibling_list, group_entry) + event_sched_out(event, cpuctx, ctx); + + if (group_event->attr.exclusive) + cpuctx->exclusive = 0; +} + +/* + * Cross CPU call to remove a performance event + * + * We disable the event on the hardware level first. After that we + * remove it from the context list. + */ +static void __perf_event_remove_from_context(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + spin_lock(&ctx->lock); + /* + * Protect the list operation against NMI by disabling the + * events on a global level. + */ + perf_disable(); + + event_sched_out(event, cpuctx, ctx); + + list_del_event(event, ctx); + + if (!ctx->task) { + /* + * Allow more per task events with respect to the + * reservation: + */ + cpuctx->max_pertask = + min(perf_max_events - ctx->nr_events, + perf_max_events - perf_reserved_percpu); + } + + perf_enable(); + spin_unlock(&ctx->lock); +} + + +/* + * Remove the event from a task's (or a CPU's) list of events. + * + * Must be called with ctx->mutex held. + * + * CPU events are removed with a smp call. For task events we only + * call when the task is on a CPU. + * + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to + * remains valid. This is OK when called from perf_release since + * that only calls us on the top-level context, which can't be a clone. + * When called from perf_event_exit_task, it's OK because the + * context has been detached from its task. + */ +static void perf_event_remove_from_context(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Per cpu events are removed via an smp call and + * the removal is always sucessful. + */ + smp_call_function_single(event->cpu, + __perf_event_remove_from_context, + event, 1); + return; + } + +retry: + task_oncpu_function_call(task, __perf_event_remove_from_context, + event); + + spin_lock_irq(&ctx->lock); + /* + * If the context is active we need to retry the smp call. + */ + if (ctx->nr_active && !list_empty(&event->group_entry)) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * The lock prevents that this context is scheduled in so we + * can remove the event safely, if the call above did not + * succeed. + */ + if (!list_empty(&event->group_entry)) { + list_del_event(event, ctx); + } + spin_unlock_irq(&ctx->lock); +} + +static inline u64 perf_clock(void) +{ + return cpu_clock(smp_processor_id()); +} + +/* + * Update the record of the current time in a context. + */ +static void update_context_time(struct perf_event_context *ctx) +{ + u64 now = perf_clock(); + + ctx->time += now - ctx->timestamp; + ctx->timestamp = now; +} + +/* + * Update the total_time_enabled and total_time_running fields for a event. + */ +static void update_event_times(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + u64 run_end; + + if (event->state < PERF_EVENT_STATE_INACTIVE || + event->group_leader->state < PERF_EVENT_STATE_INACTIVE) + return; + + event->total_time_enabled = ctx->time - event->tstamp_enabled; + + if (event->state == PERF_EVENT_STATE_INACTIVE) + run_end = event->tstamp_stopped; + else + run_end = ctx->time; + + event->total_time_running = run_end - event->tstamp_running; +} + +/* + * Update total_time_enabled and total_time_running for all events in a group. + */ +static void update_group_times(struct perf_event *leader) +{ + struct perf_event *event; + + update_event_times(leader); + list_for_each_entry(event, &leader->sibling_list, group_entry) + update_event_times(event); +} + +/* + * Cross CPU call to disable a performance event + */ +static void __perf_event_disable(void *info) +{ + struct perf_event *event = info; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event_context *ctx = event->ctx; + + /* + * If this is a per-task event, need to check whether this + * event's task is the current task on this cpu. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + spin_lock(&ctx->lock); + + /* + * If the event is on, turn it off. + * If it is in error state, leave it in error state. + */ + if (event->state >= PERF_EVENT_STATE_INACTIVE) { + update_context_time(ctx); + update_group_times(event); + if (event == event->group_leader) + group_sched_out(event, cpuctx, ctx); + else + event_sched_out(event, cpuctx, ctx); + event->state = PERF_EVENT_STATE_OFF; + } + + spin_unlock(&ctx->lock); +} + +/* + * Disable a event. + * + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to + * remains valid. This condition is satisifed when called through + * perf_event_for_each_child or perf_event_for_each because they + * hold the top-level event's child_mutex, so any descendant that + * goes to exit will block in sync_child_event. + * When called from perf_pending_event it's OK because event->ctx + * is the current context on this CPU and preemption is disabled, + * hence we can't get into perf_event_task_sched_out for this context. + */ +static void perf_event_disable(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Disable the event on the cpu that it's on + */ + smp_call_function_single(event->cpu, __perf_event_disable, + event, 1); + return; + } + + retry: + task_oncpu_function_call(task, __perf_event_disable, event); + + spin_lock_irq(&ctx->lock); + /* + * If the event is still active, we need to retry the cross-call. + */ + if (event->state == PERF_EVENT_STATE_ACTIVE) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_group_times(event); + event->state = PERF_EVENT_STATE_OFF; + } + + spin_unlock_irq(&ctx->lock); +} + +static int +event_sched_in(struct perf_event *event, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, + int cpu) +{ + if (event->state <= PERF_EVENT_STATE_OFF) + return 0; + + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ + /* + * The new state must be visible before we turn it on in the hardware: + */ + smp_wmb(); + + if (event->pmu->enable(event)) { + event->state = PERF_EVENT_STATE_INACTIVE; + event->oncpu = -1; + return -EAGAIN; + } + + event->tstamp_running += ctx->time - event->tstamp_stopped; + + if (!is_software_event(event)) + cpuctx->active_oncpu++; + ctx->nr_active++; + + if (event->attr.exclusive) + cpuctx->exclusive = 1; + + return 0; +} + +static int +group_sched_in(struct perf_event *group_event, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, + int cpu) +{ + struct perf_event *event, *partial_group; + int ret; + + if (group_event->state == PERF_EVENT_STATE_OFF) + return 0; + + ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); + if (ret) + return ret < 0 ? ret : 0; + + if (event_sched_in(group_event, cpuctx, ctx, cpu)) + return -EAGAIN; + + /* + * Schedule in siblings as one group (if any): + */ + list_for_each_entry(event, &group_event->sibling_list, group_entry) { + if (event_sched_in(event, cpuctx, ctx, cpu)) { + partial_group = event; + goto group_error; + } + } + + return 0; + +group_error: + /* + * Groups can be scheduled in as one unit only, so undo any + * partial group before returning: + */ + list_for_each_entry(event, &group_event->sibling_list, group_entry) { + if (event == partial_group) + break; + event_sched_out(event, cpuctx, ctx); + } + event_sched_out(group_event, cpuctx, ctx); + + return -EAGAIN; +} + +/* + * Return 1 for a group consisting entirely of software events, + * 0 if the group contains any hardware events. + */ +static int is_software_only_group(struct perf_event *leader) +{ + struct perf_event *event; + + if (!is_software_event(leader)) + return 0; + + list_for_each_entry(event, &leader->sibling_list, group_entry) + if (!is_software_event(event)) + return 0; + + return 1; +} + +/* + * Work out whether we can put this event group on the CPU now. + */ +static int group_can_go_on(struct perf_event *event, + struct perf_cpu_context *cpuctx, + int can_add_hw) +{ + /* + * Groups consisting entirely of software events can always go on. + */ + if (is_software_only_group(event)) + return 1; + /* + * If an exclusive group is already on, no other hardware + * events can go on. + */ + if (cpuctx->exclusive) + return 0; + /* + * If this group is exclusive and there are already + * events on the CPU, it can't go on. + */ + if (event->attr.exclusive && cpuctx->active_oncpu) + return 0; + /* + * Otherwise, try to add it if all previous groups were able + * to go on. + */ + return can_add_hw; +} + +static void add_event_to_ctx(struct perf_event *event, + struct perf_event_context *ctx) +{ + list_add_event(event, ctx); + event->tstamp_enabled = ctx->time; + event->tstamp_running = ctx->time; + event->tstamp_stopped = ctx->time; +} + +/* + * Cross CPU call to install and enable a performance event + * + * Must be called with ctx->mutex held + */ +static void __perf_install_in_context(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; + struct perf_event *leader = event->group_leader; + int cpu = smp_processor_id(); + int err; + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. + * Or possibly this is the right context but it isn't + * on this cpu because it had no events. + */ + if (ctx->task && cpuctx->task_ctx != ctx) { + if (cpuctx->task_ctx || ctx->task != current) + return; + cpuctx->task_ctx = ctx; + } + + spin_lock(&ctx->lock); + ctx->is_active = 1; + update_context_time(ctx); + + /* + * Protect the list operation against NMI by disabling the + * events on a global level. NOP for non NMI based events. + */ + perf_disable(); + + add_event_to_ctx(event, ctx); + + /* + * Don't put the event on if it is disabled or if + * it is in a group and the group isn't on. + */ + if (event->state != PERF_EVENT_STATE_INACTIVE || + (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) + goto unlock; + + /* + * An exclusive event can't go on if there are already active + * hardware events, and no hardware event can go on if there + * is already an exclusive event on. + */ + if (!group_can_go_on(event, cpuctx, 1)) + err = -EEXIST; + else + err = event_sched_in(event, cpuctx, ctx, cpu); + + if (err) { + /* + * This event couldn't go on. If it is in a group + * then we have to pull the whole group off. + * If the event group is pinned then put it in error state. + */ + if (leader != event) + group_sched_out(leader, cpuctx, ctx); + if (leader->attr.pinned) { + update_group_times(leader); + leader->state = PERF_EVENT_STATE_ERROR; + } + } + + if (!err && !ctx->task && cpuctx->max_pertask) + cpuctx->max_pertask--; + + unlock: + perf_enable(); + + spin_unlock(&ctx->lock); +} + +/* + * Attach a performance event to a context + * + * First we add the event to the list with the hardware enable bit + * in event->hw_config cleared. + * + * If the event is attached to a task which is on a CPU we use a smp + * call to enable it in the task context. The task might have been + * scheduled away, but we check this in the smp call again. + * + * Must be called with ctx->mutex held. + */ +static void +perf_install_in_context(struct perf_event_context *ctx, + struct perf_event *event, + int cpu) +{ + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Per cpu events are installed via an smp call and + * the install is always sucessful. + */ + smp_call_function_single(cpu, __perf_install_in_context, + event, 1); + return; + } + +retry: + task_oncpu_function_call(task, __perf_install_in_context, + event); + + spin_lock_irq(&ctx->lock); + /* + * we need to retry the smp call. + */ + if (ctx->is_active && list_empty(&event->group_entry)) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * The lock prevents that this context is scheduled in so we + * can add the event safely, if it the call above did not + * succeed. + */ + if (list_empty(&event->group_entry)) + add_event_to_ctx(event, ctx); + spin_unlock_irq(&ctx->lock); +} + +/* + * Put a event into inactive state and update time fields. + * Enabling the leader of a group effectively enables all + * the group members that aren't explicitly disabled, so we + * have to update their ->tstamp_enabled also. + * Note: this works for group members as well as group leaders + * since the non-leader members' sibling_lists will be empty. + */ +static void __perf_event_mark_enabled(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *sub; + + event->state = PERF_EVENT_STATE_INACTIVE; + event->tstamp_enabled = ctx->time - event->total_time_enabled; + list_for_each_entry(sub, &event->sibling_list, group_entry) + if (sub->state >= PERF_EVENT_STATE_INACTIVE) + sub->tstamp_enabled = + ctx->time - sub->total_time_enabled; +} + +/* + * Cross CPU call to enable a performance event + */ +static void __perf_event_enable(void *info) +{ + struct perf_event *event = info; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event_context *ctx = event->ctx; + struct perf_event *leader = event->group_leader; + int err; + + /* + * If this is a per-task event, need to check whether this + * event's task is the current task on this cpu. + */ + if (ctx->task && cpuctx->task_ctx != ctx) { + if (cpuctx->task_ctx || ctx->task != current) + return; + cpuctx->task_ctx = ctx; + } + + spin_lock(&ctx->lock); + ctx->is_active = 1; + update_context_time(ctx); + + if (event->state >= PERF_EVENT_STATE_INACTIVE) + goto unlock; + __perf_event_mark_enabled(event, ctx); + + /* + * If the event is in a group and isn't the group leader, + * then don't put it on unless the group is on. + */ + if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) + goto unlock; + + if (!group_can_go_on(event, cpuctx, 1)) { + err = -EEXIST; + } else { + perf_disable(); + if (event == leader) + err = group_sched_in(event, cpuctx, ctx, + smp_processor_id()); + else + err = event_sched_in(event, cpuctx, ctx, + smp_processor_id()); + perf_enable(); + } + + if (err) { + /* + * If this event can't go on and it's part of a + * group, then the whole group has to come off. + */ + if (leader != event) + group_sched_out(leader, cpuctx, ctx); + if (leader->attr.pinned) { + update_group_times(leader); + leader->state = PERF_EVENT_STATE_ERROR; + } + } + + unlock: + spin_unlock(&ctx->lock); +} + +/* + * Enable a event. + * + * If event->ctx is a cloned context, callers must make sure that + * every task struct that event->ctx->task could possibly point to + * remains valid. This condition is satisfied when called through + * perf_event_for_each_child or perf_event_for_each as described + * for perf_event_disable. + */ +static void perf_event_enable(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Enable the event on the cpu that it's on + */ + smp_call_function_single(event->cpu, __perf_event_enable, + event, 1); + return; + } + + spin_lock_irq(&ctx->lock); + if (event->state >= PERF_EVENT_STATE_INACTIVE) + goto out; + + /* + * If the event is in error state, clear that first. + * That way, if we see the event in error state below, we + * know that it has gone back into error state, as distinct + * from the task having been scheduled away before the + * cross-call arrived. + */ + if (event->state == PERF_EVENT_STATE_ERROR) + event->state = PERF_EVENT_STATE_OFF; + + retry: + spin_unlock_irq(&ctx->lock); + task_oncpu_function_call(task, __perf_event_enable, event); + + spin_lock_irq(&ctx->lock); + + /* + * If the context is active and the event is still off, + * we need to retry the cross-call. + */ + if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) + goto retry; + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (event->state == PERF_EVENT_STATE_OFF) + __perf_event_mark_enabled(event, ctx); + + out: + spin_unlock_irq(&ctx->lock); +} + +static int perf_event_refresh(struct perf_event *event, int refresh) +{ + /* + * not supported on inherited events + */ + if (event->attr.inherit) + return -EINVAL; + + atomic_add(refresh, &event->event_limit); + perf_event_enable(event); + + return 0; +} + +void __perf_event_sched_out(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx) +{ + struct perf_event *event; + + spin_lock(&ctx->lock); + ctx->is_active = 0; + if (likely(!ctx->nr_events)) + goto out; + update_context_time(ctx); + + perf_disable(); + if (ctx->nr_active) { + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event != event->group_leader) + event_sched_out(event, cpuctx, ctx); + else + group_sched_out(event, cpuctx, ctx); + } + } + perf_enable(); + out: + spin_unlock(&ctx->lock); +} + +/* + * Test whether two contexts are equivalent, i.e. whether they + * have both been cloned from the same version of the same context + * and they both have the same number of enabled events. + * If the number of enabled events is the same, then the set + * of enabled events should be the same, because these are both + * inherited contexts, therefore we can't access individual events + * in them directly with an fd; we can only enable/disable all + * events via prctl, or enable/disable all events in a family + * via ioctl, which will have the same effect on both contexts. + */ +static int context_equiv(struct perf_event_context *ctx1, + struct perf_event_context *ctx2) +{ + return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx + && ctx1->parent_gen == ctx2->parent_gen + && !ctx1->pin_count && !ctx2->pin_count; +} + +static void __perf_event_read(void *event); + +static void __perf_event_sync_stat(struct perf_event *event, + struct perf_event *next_event) +{ + u64 value; + + if (!event->attr.inherit_stat) + return; + + /* + * Update the event value, we cannot use perf_event_read() + * because we're in the middle of a context switch and have IRQs + * disabled, which upsets smp_call_function_single(), however + * we know the event must be on the current CPU, therefore we + * don't need to use it. + */ + switch (event->state) { + case PERF_EVENT_STATE_ACTIVE: + __perf_event_read(event); + break; + + case PERF_EVENT_STATE_INACTIVE: + update_event_times(event); + break; + + default: + break; + } + + /* + * In order to keep per-task stats reliable we need to flip the event + * values when we flip the contexts. + */ + value = atomic64_read(&next_event->count); + value = atomic64_xchg(&event->count, value); + atomic64_set(&next_event->count, value); + + swap(event->total_time_enabled, next_event->total_time_enabled); + swap(event->total_time_running, next_event->total_time_running); + + /* + * Since we swizzled the values, update the user visible data too. + */ + perf_event_update_userpage(event); + perf_event_update_userpage(next_event); +} + +#define list_next_entry(pos, member) \ + list_entry(pos->member.next, typeof(*pos), member) + +static void perf_event_sync_stat(struct perf_event_context *ctx, + struct perf_event_context *next_ctx) +{ + struct perf_event *event, *next_event; + + if (!ctx->nr_stat) + return; + + event = list_first_entry(&ctx->event_list, + struct perf_event, event_entry); + + next_event = list_first_entry(&next_ctx->event_list, + struct perf_event, event_entry); + + while (&event->event_entry != &ctx->event_list && + &next_event->event_entry != &next_ctx->event_list) { + + __perf_event_sync_stat(event, next_event); + + event = list_next_entry(event, event_entry); + next_event = list_next_entry(next_event, event_entry); + } +} + +/* + * Called from scheduler to remove the events of the current task, + * with interrupts disabled. + * + * We stop each event and update the event value in event->count. + * + * This does not protect us against NMI, but disable() + * sets the disabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * not restart the event. + */ +void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *next_ctx; + struct perf_event_context *parent; + struct pt_regs *regs; + int do_switch = 1; + + regs = task_pt_regs(task); + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); + + if (likely(!ctx || !cpuctx->task_ctx)) + return; + + update_context_time(ctx); + + rcu_read_lock(); + parent = rcu_dereference(ctx->parent_ctx); + next_ctx = next->perf_event_ctxp; + if (parent && next_ctx && + rcu_dereference(next_ctx->parent_ctx) == parent) { + /* + * Looks like the two contexts are clones, so we might be + * able to optimize the context switch. We lock both + * contexts and check that they are clones under the + * lock (including re-checking that neither has been + * uncloned in the meantime). It doesn't matter which + * order we take the locks because no other cpu could + * be trying to lock both of these tasks. + */ + spin_lock(&ctx->lock); + spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); + if (context_equiv(ctx, next_ctx)) { + /* + * XXX do we need a memory barrier of sorts + * wrt to rcu_dereference() of perf_event_ctxp + */ + task->perf_event_ctxp = next_ctx; + next->perf_event_ctxp = ctx; + ctx->task = next; + next_ctx->task = task; + do_switch = 0; + + perf_event_sync_stat(ctx, next_ctx); + } + spin_unlock(&next_ctx->lock); + spin_unlock(&ctx->lock); + } + rcu_read_unlock(); + + if (do_switch) { + __perf_event_sched_out(ctx, cpuctx); + cpuctx->task_ctx = NULL; + } +} + +/* + * Called with IRQs disabled + */ +static void __perf_event_task_sched_out(struct perf_event_context *ctx) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + + if (!cpuctx->task_ctx) + return; + + if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) + return; + + __perf_event_sched_out(ctx, cpuctx); + cpuctx->task_ctx = NULL; +} + +/* + * Called with IRQs disabled + */ +static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) +{ + __perf_event_sched_out(&cpuctx->ctx, cpuctx); +} + +static void +__perf_event_sched_in(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, int cpu) +{ + struct perf_event *event; + int can_add_hw = 1; + + spin_lock(&ctx->lock); + ctx->is_active = 1; + if (likely(!ctx->nr_events)) + goto out; + + ctx->timestamp = perf_clock(); + + perf_disable(); + + /* + * First go through the list and put on any pinned groups + * in order to give them the best chance of going on. + */ + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event->state <= PERF_EVENT_STATE_OFF || + !event->attr.pinned) + continue; + if (event->cpu != -1 && event->cpu != cpu) + continue; + + if (event != event->group_leader) + event_sched_in(event, cpuctx, ctx, cpu); + else { + if (group_can_go_on(event, cpuctx, 1)) + group_sched_in(event, cpuctx, ctx, cpu); + } + + /* + * If this pinned group hasn't been scheduled, + * put it in error state. + */ + if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_group_times(event); + event->state = PERF_EVENT_STATE_ERROR; + } + } + + list_for_each_entry(event, &ctx->group_list, group_entry) { + /* + * Ignore events in OFF or ERROR state, and + * ignore pinned events since we did them already. + */ + if (event->state <= PERF_EVENT_STATE_OFF || + event->attr.pinned) + continue; + + /* + * Listen to the 'cpu' scheduling filter constraint + * of events: + */ + if (event->cpu != -1 && event->cpu != cpu) + continue; + + if (event != event->group_leader) { + if (event_sched_in(event, cpuctx, ctx, cpu)) + can_add_hw = 0; + } else { + if (group_can_go_on(event, cpuctx, can_add_hw)) { + if (group_sched_in(event, cpuctx, ctx, cpu)) + can_add_hw = 0; + } + } + } + perf_enable(); + out: + spin_unlock(&ctx->lock); +} + +/* + * Called from scheduler to add the events of the current task + * with interrupts disabled. + * + * We restore the event value and then enable it. + * + * This does not protect us against NMI, but enable() + * sets the enabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * keep the event running. + */ +void perf_event_task_sched_in(struct task_struct *task, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_event_context *ctx = task->perf_event_ctxp; + + if (likely(!ctx)) + return; + if (cpuctx->task_ctx == ctx) + return; + __perf_event_sched_in(ctx, cpuctx, cpu); + cpuctx->task_ctx = ctx; +} + +static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) +{ + struct perf_event_context *ctx = &cpuctx->ctx; + + __perf_event_sched_in(ctx, cpuctx, cpu); +} + +#define MAX_INTERRUPTS (~0ULL) + +static void perf_log_throttle(struct perf_event *event, int enable); + +static void perf_adjust_period(struct perf_event *event, u64 events) +{ + struct hw_perf_event *hwc = &event->hw; + u64 period, sample_period; + s64 delta; + + events *= hwc->sample_period; + period = div64_u64(events, event->attr.sample_freq); + + delta = (s64)(period - hwc->sample_period); + delta = (delta + 7) / 8; /* low pass filter */ + + sample_period = hwc->sample_period + delta; + + if (!sample_period) + sample_period = 1; + + hwc->sample_period = sample_period; +} + +static void perf_ctx_adjust_freq(struct perf_event_context *ctx) +{ + struct perf_event *event; + struct hw_perf_event *hwc; + u64 interrupts, freq; + + spin_lock(&ctx->lock); + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (event->state != PERF_EVENT_STATE_ACTIVE) + continue; + + hwc = &event->hw; + + interrupts = hwc->interrupts; + hwc->interrupts = 0; + + /* + * unthrottle events on the tick + */ + if (interrupts == MAX_INTERRUPTS) { + perf_log_throttle(event, 1); + event->pmu->unthrottle(event); + interrupts = 2*sysctl_perf_event_sample_rate/HZ; + } + + if (!event->attr.freq || !event->attr.sample_freq) + continue; + + /* + * if the specified freq < HZ then we need to skip ticks + */ + if (event->attr.sample_freq < HZ) { + freq = event->attr.sample_freq; + + hwc->freq_count += freq; + hwc->freq_interrupts += interrupts; + + if (hwc->freq_count < HZ) + continue; + + interrupts = hwc->freq_interrupts; + hwc->freq_interrupts = 0; + hwc->freq_count -= HZ; + } else + freq = HZ; + + perf_adjust_period(event, freq * interrupts); + + /* + * In order to avoid being stalled by an (accidental) huge + * sample period, force reset the sample period if we didn't + * get any events in this freq period. + */ + if (!interrupts) { + perf_disable(); + event->pmu->disable(event); + atomic64_set(&hwc->period_left, 0); + event->pmu->enable(event); + perf_enable(); + } + } + spin_unlock(&ctx->lock); +} + +/* + * Round-robin a context's events: + */ +static void rotate_ctx(struct perf_event_context *ctx) +{ + struct perf_event *event; + + if (!ctx->nr_events) + return; + + spin_lock(&ctx->lock); + /* + * Rotate the first entry last (works just fine for group events too): + */ + perf_disable(); + list_for_each_entry(event, &ctx->group_list, group_entry) { + list_move_tail(&event->group_entry, &ctx->group_list); + break; + } + perf_enable(); + + spin_unlock(&ctx->lock); +} + +void perf_event_task_tick(struct task_struct *curr, int cpu) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + + if (!atomic_read(&nr_events)) + return; + + cpuctx = &per_cpu(perf_cpu_context, cpu); + ctx = curr->perf_event_ctxp; + + perf_ctx_adjust_freq(&cpuctx->ctx); + if (ctx) + perf_ctx_adjust_freq(ctx); + + perf_event_cpu_sched_out(cpuctx); + if (ctx) + __perf_event_task_sched_out(ctx); + + rotate_ctx(&cpuctx->ctx); + if (ctx) + rotate_ctx(ctx); + + perf_event_cpu_sched_in(cpuctx, cpu); + if (ctx) + perf_event_task_sched_in(curr, cpu); +} + +/* + * Enable all of a task's events that have been marked enable-on-exec. + * This expects task == current. + */ +static void perf_event_enable_on_exec(struct task_struct *task) +{ + struct perf_event_context *ctx; + struct perf_event *event; + unsigned long flags; + int enabled = 0; + + local_irq_save(flags); + ctx = task->perf_event_ctxp; + if (!ctx || !ctx->nr_events) + goto out; + + __perf_event_task_sched_out(ctx); + + spin_lock(&ctx->lock); + + list_for_each_entry(event, &ctx->group_list, group_entry) { + if (!event->attr.enable_on_exec) + continue; + event->attr.enable_on_exec = 0; + if (event->state >= PERF_EVENT_STATE_INACTIVE) + continue; + __perf_event_mark_enabled(event, ctx); + enabled = 1; + } + + /* + * Unclone this context if we enabled any event. + */ + if (enabled) + unclone_ctx(ctx); + + spin_unlock(&ctx->lock); + + perf_event_task_sched_in(task, smp_processor_id()); + out: + local_irq_restore(flags); +} + +/* + * Cross CPU call to read the hardware event + */ +static void __perf_event_read(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event *event = info; + struct perf_event_context *ctx = event->ctx; + unsigned long flags; + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. In that case + * event->count would have been updated to a recent sample + * when the event was scheduled out. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + local_irq_save(flags); + if (ctx->is_active) + update_context_time(ctx); + event->pmu->read(event); + update_event_times(event); + local_irq_restore(flags); +} + +static u64 perf_event_read(struct perf_event *event) +{ + /* + * If event is enabled and currently active on a CPU, update the + * value in the event structure: + */ + if (event->state == PERF_EVENT_STATE_ACTIVE) { + smp_call_function_single(event->oncpu, + __perf_event_read, event, 1); + } else if (event->state == PERF_EVENT_STATE_INACTIVE) { + update_event_times(event); + } + + return atomic64_read(&event->count); +} + +/* + * Initialize the perf_event context in a task_struct: + */ +static void +__perf_event_init_context(struct perf_event_context *ctx, + struct task_struct *task) +{ + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->lock); + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->group_list); + INIT_LIST_HEAD(&ctx->event_list); + atomic_set(&ctx->refcount, 1); + ctx->task = task; +} + +static struct perf_event_context *find_get_context(pid_t pid, int cpu) +{ + struct perf_event_context *ctx; + struct perf_cpu_context *cpuctx; + struct task_struct *task; + unsigned long flags; + int err; + + /* + * If cpu is not a wildcard then this is a percpu event: + */ + if (cpu != -1) { + /* Must be root to operate on a CPU event: */ + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EACCES); + + if (cpu < 0 || cpu > num_possible_cpus()) + return ERR_PTR(-EINVAL); + + /* + * We could be clever and allow to attach a event to an + * offline CPU and activate it when the CPU comes up, but + * that's for later. + */ + if (!cpu_isset(cpu, cpu_online_map)) + return ERR_PTR(-ENODEV); + + cpuctx = &per_cpu(perf_cpu_context, cpu); + ctx = &cpuctx->ctx; + get_ctx(ctx); + + return ctx; + } + + rcu_read_lock(); + if (!pid) + task = current; + else + task = find_task_by_vpid(pid); + if (task) + get_task_struct(task); + rcu_read_unlock(); + + if (!task) + return ERR_PTR(-ESRCH); + + /* + * Can't attach events to a dying task. + */ + err = -ESRCH; + if (task->flags & PF_EXITING) + goto errout; + + /* Reuse ptrace permission checks for now. */ + err = -EACCES; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto errout; + + retry: + ctx = perf_lock_task_context(task, &flags); + if (ctx) { + unclone_ctx(ctx); + spin_unlock_irqrestore(&ctx->lock, flags); + } + + if (!ctx) { + ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); + err = -ENOMEM; + if (!ctx) + goto errout; + __perf_event_init_context(ctx, task); + get_ctx(ctx); + if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { + /* + * We raced with some other task; use + * the context they set. + */ + kfree(ctx); + goto retry; + } + get_task_struct(task); + } + + put_task_struct(task); + return ctx; + + errout: + put_task_struct(task); + return ERR_PTR(err); +} + +static void free_event_rcu(struct rcu_head *head) +{ + struct perf_event *event; + + event = container_of(head, struct perf_event, rcu_head); + if (event->ns) + put_pid_ns(event->ns); + kfree(event); +} + +static void perf_pending_sync(struct perf_event *event); + +static void free_event(struct perf_event *event) +{ + perf_pending_sync(event); + + if (!event->parent) { + atomic_dec(&nr_events); + if (event->attr.mmap) + atomic_dec(&nr_mmap_events); + if (event->attr.comm) + atomic_dec(&nr_comm_events); + if (event->attr.task) + atomic_dec(&nr_task_events); + } + + if (event->output) { + fput(event->output->filp); + event->output = NULL; + } + + if (event->destroy) + event->destroy(event); + + put_ctx(event->ctx); + call_rcu(&event->rcu_head, free_event_rcu); +} + +/* + * Called when the last reference to the file is gone. + */ +static int perf_release(struct inode *inode, struct file *file) +{ + struct perf_event *event = file->private_data; + struct perf_event_context *ctx = event->ctx; + + file->private_data = NULL; + + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_event_remove_from_context(event); + mutex_unlock(&ctx->mutex); + + mutex_lock(&event->owner->perf_event_mutex); + list_del_init(&event->owner_entry); + mutex_unlock(&event->owner->perf_event_mutex); + put_task_struct(event->owner); + + free_event(event); + + return 0; +} + +static int perf_event_read_size(struct perf_event *event) +{ + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_GROUP) { + nr += event->group_leader->nr_siblings; + size += sizeof(u64); + } + + size += entry * nr; + + return size; +} + +static u64 perf_event_read_value(struct perf_event *event) +{ + struct perf_event *child; + u64 total = 0; + + total += perf_event_read(event); + list_for_each_entry(child, &event->child_list, child_list) + total += perf_event_read(child); + + return total; +} + +static int perf_event_read_entry(struct perf_event *event, + u64 read_format, char __user *buf) +{ + int n = 0, count = 0; + u64 values[2]; + + values[n++] = perf_event_read_value(event); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(event); + + count = n * sizeof(u64); + + if (copy_to_user(buf, values, count)) + return -EFAULT; + + return count; +} + +static int perf_event_read_group(struct perf_event *event, + u64 read_format, char __user *buf) +{ + struct perf_event *leader = event->group_leader, *sub; + int n = 0, size = 0, err = -EFAULT; + u64 values[3]; + + values[n++] = 1 + leader->nr_siblings; + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = leader->total_time_enabled + + atomic64_read(&leader->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = leader->total_time_running + + atomic64_read(&leader->child_total_time_running); + } + + size = n * sizeof(u64); + + if (copy_to_user(buf, values, size)) + return -EFAULT; + + err = perf_event_read_entry(leader, read_format, buf + size); + if (err < 0) + return err; + + size += err; + + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + err = perf_event_read_entry(sub, read_format, + buf + size); + if (err < 0) + return err; + + size += err; + } + + return size; +} + +static int perf_event_read_one(struct perf_event *event, + u64 read_format, char __user *buf) +{ + u64 values[4]; + int n = 0; + + values[n++] = perf_event_read_value(event); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = event->total_time_running + + atomic64_read(&event->child_total_time_running); + } + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(event); + + if (copy_to_user(buf, values, n * sizeof(u64))) + return -EFAULT; + + return n * sizeof(u64); +} + +/* + * Read the performance event - simple non blocking version for now + */ +static ssize_t +perf_read_hw(struct perf_event *event, char __user *buf, size_t count) +{ + u64 read_format = event->attr.read_format; + int ret; + + /* + * Return end-of-file for a read on a event that is in + * error state (i.e. because it was pinned but it couldn't be + * scheduled on to the CPU at some point). + */ + if (event->state == PERF_EVENT_STATE_ERROR) + return 0; + + if (count < perf_event_read_size(event)) + return -ENOSPC; + + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); + if (read_format & PERF_FORMAT_GROUP) + ret = perf_event_read_group(event, read_format, buf); + else + ret = perf_event_read_one(event, read_format, buf); + mutex_unlock(&event->child_mutex); + + return ret; +} + +static ssize_t +perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct perf_event *event = file->private_data; + + return perf_read_hw(event, buf, count); +} + +static unsigned int perf_poll(struct file *file, poll_table *wait) +{ + struct perf_event *event = file->private_data; + struct perf_mmap_data *data; + unsigned int events = POLL_HUP; + + rcu_read_lock(); + data = rcu_dereference(event->data); + if (data) + events = atomic_xchg(&data->poll, 0); + rcu_read_unlock(); + + poll_wait(file, &event->waitq, wait); + + return events; +} + +static void perf_event_reset(struct perf_event *event) +{ + (void)perf_event_read(event); + atomic64_set(&event->count, 0); + perf_event_update_userpage(event); +} + +/* + * Holding the top-level event's child_mutex means that any + * descendant process that has inherited this event will block + * in sync_child_event if it goes to exit, thus satisfying the + * task existence requirements of perf_event_enable/disable. + */ +static void perf_event_for_each_child(struct perf_event *event, + void (*func)(struct perf_event *)) +{ + struct perf_event *child; + + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); + func(event); + list_for_each_entry(child, &event->child_list, child_list) + func(child); + mutex_unlock(&event->child_mutex); +} + +static void perf_event_for_each(struct perf_event *event, + void (*func)(struct perf_event *)) +{ + struct perf_event_context *ctx = event->ctx; + struct perf_event *sibling; + + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + event = event->group_leader; + + perf_event_for_each_child(event, func); + func(event); + list_for_each_entry(sibling, &event->sibling_list, group_entry) + perf_event_for_each_child(event, func); + mutex_unlock(&ctx->mutex); +} + +static int perf_event_period(struct perf_event *event, u64 __user *arg) +{ + struct perf_event_context *ctx = event->ctx; + unsigned long size; + int ret = 0; + u64 value; + + if (!event->attr.sample_period) + return -EINVAL; + + size = copy_from_user(&value, arg, sizeof(value)); + if (size != sizeof(value)) + return -EFAULT; + + if (!value) + return -EINVAL; + + spin_lock_irq(&ctx->lock); + if (event->attr.freq) { + if (value > sysctl_perf_event_sample_rate) { + ret = -EINVAL; + goto unlock; + } + + event->attr.sample_freq = value; + } else { + event->attr.sample_period = value; + event->hw.sample_period = value; + } +unlock: + spin_unlock_irq(&ctx->lock); + + return ret; +} + +int perf_event_set_output(struct perf_event *event, int output_fd); + +static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct perf_event *event = file->private_data; + void (*func)(struct perf_event *); + u32 flags = arg; + + switch (cmd) { + case PERF_EVENT_IOC_ENABLE: + func = perf_event_enable; + break; + case PERF_EVENT_IOC_DISABLE: + func = perf_event_disable; + break; + case PERF_EVENT_IOC_RESET: + func = perf_event_reset; + break; + + case PERF_EVENT_IOC_REFRESH: + return perf_event_refresh(event, arg); + + case PERF_EVENT_IOC_PERIOD: + return perf_event_period(event, (u64 __user *)arg); + + case PERF_EVENT_IOC_SET_OUTPUT: + return perf_event_set_output(event, arg); + + default: + return -ENOTTY; + } + + if (flags & PERF_IOC_FLAG_GROUP) + perf_event_for_each(event, func); + else + perf_event_for_each_child(event, func); + + return 0; +} + +int perf_event_task_enable(void) +{ + struct perf_event *event; + + mutex_lock(¤t->perf_event_mutex); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) + perf_event_for_each_child(event, perf_event_enable); + mutex_unlock(¤t->perf_event_mutex); + + return 0; +} + +int perf_event_task_disable(void) +{ + struct perf_event *event; + + mutex_lock(¤t->perf_event_mutex); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) + perf_event_for_each_child(event, perf_event_disable); + mutex_unlock(¤t->perf_event_mutex); + + return 0; +} + +#ifndef PERF_EVENT_INDEX_OFFSET +# define PERF_EVENT_INDEX_OFFSET 0 +#endif + +static int perf_event_index(struct perf_event *event) +{ + if (event->state != PERF_EVENT_STATE_ACTIVE) + return 0; + + return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; +} + +/* + * Callers need to ensure there can be no nesting of this function, otherwise + * the seqlock logic goes bad. We can not serialize this because the arch + * code calls this from NMI context. + */ +void perf_event_update_userpage(struct perf_event *event) +{ + struct perf_event_mmap_page *userpg; + struct perf_mmap_data *data; + + rcu_read_lock(); + data = rcu_dereference(event->data); + if (!data) + goto unlock; + + userpg = data->user_page; + + /* + * Disable preemption so as to not let the corresponding user-space + * spin too long if we get preempted. + */ + preempt_disable(); + ++userpg->lock; + barrier(); + userpg->index = perf_event_index(event); + userpg->offset = atomic64_read(&event->count); + if (event->state == PERF_EVENT_STATE_ACTIVE) + userpg->offset -= atomic64_read(&event->hw.prev_count); + + userpg->time_enabled = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + + userpg->time_running = event->total_time_running + + atomic64_read(&event->child_total_time_running); + + barrier(); + ++userpg->lock; + preempt_enable(); +unlock: + rcu_read_unlock(); +} + +static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct perf_event *event = vma->vm_file->private_data; + struct perf_mmap_data *data; + int ret = VM_FAULT_SIGBUS; + + if (vmf->flags & FAULT_FLAG_MKWRITE) { + if (vmf->pgoff == 0) + ret = 0; + return ret; + } + + rcu_read_lock(); + data = rcu_dereference(event->data); + if (!data) + goto unlock; + + if (vmf->pgoff == 0) { + vmf->page = virt_to_page(data->user_page); + } else { + int nr = vmf->pgoff - 1; + + if ((unsigned)nr > data->nr_pages) + goto unlock; + + if (vmf->flags & FAULT_FLAG_WRITE) + goto unlock; + + vmf->page = virt_to_page(data->data_pages[nr]); + } + + get_page(vmf->page); + vmf->page->mapping = vma->vm_file->f_mapping; + vmf->page->index = vmf->pgoff; + + ret = 0; +unlock: + rcu_read_unlock(); + + return ret; +} + +static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) +{ + struct perf_mmap_data *data; + unsigned long size; + int i; + + WARN_ON(atomic_read(&event->mmap_count)); + + size = sizeof(struct perf_mmap_data); + size += nr_pages * sizeof(void *); + + data = kzalloc(size, GFP_KERNEL); + if (!data) + goto fail; + + data->user_page = (void *)get_zeroed_page(GFP_KERNEL); + if (!data->user_page) + goto fail_user_page; + + for (i = 0; i < nr_pages; i++) { + data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); + if (!data->data_pages[i]) + goto fail_data_pages; + } + + data->nr_pages = nr_pages; + atomic_set(&data->lock, -1); + + if (event->attr.watermark) { + data->watermark = min_t(long, PAGE_SIZE * nr_pages, + event->attr.wakeup_watermark); + } + if (!data->watermark) + data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); + + rcu_assign_pointer(event->data, data); + + return 0; + +fail_data_pages: + for (i--; i >= 0; i--) + free_page((unsigned long)data->data_pages[i]); + + free_page((unsigned long)data->user_page); + +fail_user_page: + kfree(data); + +fail: + return -ENOMEM; +} + +static void perf_mmap_free_page(unsigned long addr) +{ + struct page *page = virt_to_page((void *)addr); + + page->mapping = NULL; + __free_page(page); +} + +static void __perf_mmap_data_free(struct rcu_head *rcu_head) +{ + struct perf_mmap_data *data; + int i; + + data = container_of(rcu_head, struct perf_mmap_data, rcu_head); + + perf_mmap_free_page((unsigned long)data->user_page); + for (i = 0; i < data->nr_pages; i++) + perf_mmap_free_page((unsigned long)data->data_pages[i]); + + kfree(data); +} + +static void perf_mmap_data_free(struct perf_event *event) +{ + struct perf_mmap_data *data = event->data; + + WARN_ON(atomic_read(&event->mmap_count)); + + rcu_assign_pointer(event->data, NULL); + call_rcu(&data->rcu_head, __perf_mmap_data_free); +} + +static void perf_mmap_open(struct vm_area_struct *vma) +{ + struct perf_event *event = vma->vm_file->private_data; + + atomic_inc(&event->mmap_count); +} + +static void perf_mmap_close(struct vm_area_struct *vma) +{ + struct perf_event *event = vma->vm_file->private_data; + + WARN_ON_ONCE(event->ctx->parent_ctx); + if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { + struct user_struct *user = current_user(); + + atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm); + vma->vm_mm->locked_vm -= event->data->nr_locked; + perf_mmap_data_free(event); + mutex_unlock(&event->mmap_mutex); + } +} + +static struct vm_operations_struct perf_mmap_vmops = { + .open = perf_mmap_open, + .close = perf_mmap_close, + .fault = perf_mmap_fault, + .page_mkwrite = perf_mmap_fault, +}; + +static int perf_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct perf_event *event = file->private_data; + unsigned long user_locked, user_lock_limit; + struct user_struct *user = current_user(); + unsigned long locked, lock_limit; + unsigned long vma_size; + unsigned long nr_pages; + long user_extra, extra; + int ret = 0; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma_size = vma->vm_end - vma->vm_start; + nr_pages = (vma_size / PAGE_SIZE) - 1; + + /* + * If we have data pages ensure they're a power-of-two number, so we + * can do bitmasks instead of modulo. + */ + if (nr_pages != 0 && !is_power_of_2(nr_pages)) + return -EINVAL; + + if (vma_size != PAGE_SIZE * (1 + nr_pages)) + return -EINVAL; + + if (vma->vm_pgoff != 0) + return -EINVAL; + + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->mmap_mutex); + if (event->output) { + ret = -EINVAL; + goto unlock; + } + + if (atomic_inc_not_zero(&event->mmap_count)) { + if (nr_pages != event->data->nr_pages) + ret = -EINVAL; + goto unlock; + } + + user_extra = nr_pages + 1; + user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); + + /* + * Increase the limit linearly with more CPUs: + */ + user_lock_limit *= num_online_cpus(); + + user_locked = atomic_long_read(&user->locked_vm) + user_extra; + + extra = 0; + if (user_locked > user_lock_limit) + extra = user_locked - user_lock_limit; + + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; + locked = vma->vm_mm->locked_vm + extra; + + if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && + !capable(CAP_IPC_LOCK)) { + ret = -EPERM; + goto unlock; + } + + WARN_ON(event->data); + ret = perf_mmap_data_alloc(event, nr_pages); + if (ret) + goto unlock; + + atomic_set(&event->mmap_count, 1); + atomic_long_add(user_extra, &user->locked_vm); + vma->vm_mm->locked_vm += extra; + event->data->nr_locked = extra; + if (vma->vm_flags & VM_WRITE) + event->data->writable = 1; + +unlock: + mutex_unlock(&event->mmap_mutex); + + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &perf_mmap_vmops; + + return ret; +} + +static int perf_fasync(int fd, struct file *filp, int on) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct perf_event *event = filp->private_data; + int retval; + + mutex_lock(&inode->i_mutex); + retval = fasync_helper(fd, filp, on, &event->fasync); + mutex_unlock(&inode->i_mutex); + + if (retval < 0) + return retval; + + return 0; +} + +static const struct file_operations perf_fops = { + .release = perf_release, + .read = perf_read, + .poll = perf_poll, + .unlocked_ioctl = perf_ioctl, + .compat_ioctl = perf_ioctl, + .mmap = perf_mmap, + .fasync = perf_fasync, +}; + +/* + * Perf event wakeup + * + * If there's data, ensure we set the poll() state and publish everything + * to user-space before waking everybody up. + */ + +void perf_event_wakeup(struct perf_event *event) +{ + wake_up_all(&event->waitq); + + if (event->pending_kill) { + kill_fasync(&event->fasync, SIGIO, event->pending_kill); + event->pending_kill = 0; + } +} + +/* + * Pending wakeups + * + * Handle the case where we need to wakeup up from NMI (or rq->lock) context. + * + * The NMI bit means we cannot possibly take locks. Therefore, maintain a + * single linked list and use cmpxchg() to add entries lockless. + */ + +static void perf_pending_event(struct perf_pending_entry *entry) +{ + struct perf_event *event = container_of(entry, + struct perf_event, pending); + + if (event->pending_disable) { + event->pending_disable = 0; + __perf_event_disable(event); + } + + if (event->pending_wakeup) { + event->pending_wakeup = 0; + perf_event_wakeup(event); + } +} + +#define PENDING_TAIL ((struct perf_pending_entry *)-1UL) + +static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { + PENDING_TAIL, +}; + +static void perf_pending_queue(struct perf_pending_entry *entry, + void (*func)(struct perf_pending_entry *)) +{ + struct perf_pending_entry **head; + + if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) + return; + + entry->func = func; + + head = &get_cpu_var(perf_pending_head); + + do { + entry->next = *head; + } while (cmpxchg(head, entry->next, entry) != entry->next); + + set_perf_event_pending(); + + put_cpu_var(perf_pending_head); +} + +static int __perf_pending_run(void) +{ + struct perf_pending_entry *list; + int nr = 0; + + list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); + while (list != PENDING_TAIL) { + void (*func)(struct perf_pending_entry *); + struct perf_pending_entry *entry = list; + + list = list->next; + + func = entry->func; + entry->next = NULL; + /* + * Ensure we observe the unqueue before we issue the wakeup, + * so that we won't be waiting forever. + * -- see perf_not_pending(). + */ + smp_wmb(); + + func(entry); + nr++; + } + + return nr; +} + +static inline int perf_not_pending(struct perf_event *event) +{ + /* + * If we flush on whatever cpu we run, there is a chance we don't + * need to wait. + */ + get_cpu(); + __perf_pending_run(); + put_cpu(); + + /* + * Ensure we see the proper queue state before going to sleep + * so that we do not miss the wakeup. -- see perf_pending_handle() + */ + smp_rmb(); + return event->pending.next == NULL; +} + +static void perf_pending_sync(struct perf_event *event) +{ + wait_event(event->waitq, perf_not_pending(event)); +} + +void perf_event_do_pending(void) +{ + __perf_pending_run(); +} + +/* + * Callchain support -- arch specific + */ + +__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + return NULL; +} + +/* + * Output + */ +static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, + unsigned long offset, unsigned long head) +{ + unsigned long mask; + + if (!data->writable) + return true; + + mask = (data->nr_pages << PAGE_SHIFT) - 1; + + offset = (offset - tail) & mask; + head = (head - tail) & mask; + + if ((int)(head - offset) < 0) + return false; + + return true; +} + +static void perf_output_wakeup(struct perf_output_handle *handle) +{ + atomic_set(&handle->data->poll, POLL_IN); + + if (handle->nmi) { + handle->event->pending_wakeup = 1; + perf_pending_queue(&handle->event->pending, + perf_pending_event); + } else + perf_event_wakeup(handle->event); +} + +/* + * Curious locking construct. + * + * We need to ensure a later event_id doesn't publish a head when a former + * event_id isn't done writing. However since we need to deal with NMIs we + * cannot fully serialize things. + * + * What we do is serialize between CPUs so we only have to deal with NMI + * nesting on a single CPU. + * + * We only publish the head (and generate a wakeup) when the outer-most + * event_id completes. + */ +static void perf_output_lock(struct perf_output_handle *handle) +{ + struct perf_mmap_data *data = handle->data; + int cpu; + + handle->locked = 0; + + local_irq_save(handle->flags); + cpu = smp_processor_id(); + + if (in_nmi() && atomic_read(&data->lock) == cpu) + return; + + while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) + cpu_relax(); + + handle->locked = 1; +} + +static void perf_output_unlock(struct perf_output_handle *handle) +{ + struct perf_mmap_data *data = handle->data; + unsigned long head; + int cpu; + + data->done_head = data->head; + + if (!handle->locked) + goto out; + +again: + /* + * The xchg implies a full barrier that ensures all writes are done + * before we publish the new head, matched by a rmb() in userspace when + * reading this position. + */ + while ((head = atomic_long_xchg(&data->done_head, 0))) + data->user_page->data_head = head; + + /* + * NMI can happen here, which means we can miss a done_head update. + */ + + cpu = atomic_xchg(&data->lock, -1); + WARN_ON_ONCE(cpu != smp_processor_id()); + + /* + * Therefore we have to validate we did not indeed do so. + */ + if (unlikely(atomic_long_read(&data->done_head))) { + /* + * Since we had it locked, we can lock it again. + */ + while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) + cpu_relax(); + + goto again; + } + + if (atomic_xchg(&data->wakeup, 0)) + perf_output_wakeup(handle); +out: + local_irq_restore(handle->flags); +} + +void perf_output_copy(struct perf_output_handle *handle, + const void *buf, unsigned int len) +{ + unsigned int pages_mask; + unsigned int offset; + unsigned int size; + void **pages; + + offset = handle->offset; + pages_mask = handle->data->nr_pages - 1; + pages = handle->data->data_pages; + + do { + unsigned int page_offset; + int nr; + + nr = (offset >> PAGE_SHIFT) & pages_mask; + page_offset = offset & (PAGE_SIZE - 1); + size = min_t(unsigned int, PAGE_SIZE - page_offset, len); + + memcpy(pages[nr] + page_offset, buf, size); + + len -= size; + buf += size; + offset += size; + } while (len); + + handle->offset = offset; + + /* + * Check we didn't copy past our reservation window, taking the + * possible unsigned int wrap into account. + */ + WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); +} + +int perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size, + int nmi, int sample) +{ + struct perf_event *output_event; + struct perf_mmap_data *data; + unsigned long tail, offset, head; + int have_lost; + struct { + struct perf_event_header header; + u64 id; + u64 lost; + } lost_event; + + rcu_read_lock(); + /* + * For inherited events we send all the output towards the parent. + */ + if (event->parent) + event = event->parent; + + output_event = rcu_dereference(event->output); + if (output_event) + event = output_event; + + data = rcu_dereference(event->data); + if (!data) + goto out; + + handle->data = data; + handle->event = event; + handle->nmi = nmi; + handle->sample = sample; + + if (!data->nr_pages) + goto fail; + + have_lost = atomic_read(&data->lost); + if (have_lost) + size += sizeof(lost_event); + + perf_output_lock(handle); + + do { + /* + * Userspace could choose to issue a mb() before updating the + * tail pointer. So that all reads will be completed before the + * write is issued. + */ + tail = ACCESS_ONCE(data->user_page->data_tail); + smp_rmb(); + offset = head = atomic_long_read(&data->head); + head += size; + if (unlikely(!perf_output_space(data, tail, offset, head))) + goto fail; + } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); + + handle->offset = offset; + handle->head = head; + + if (head - tail > data->watermark) + atomic_set(&data->wakeup, 1); + + if (have_lost) { + lost_event.header.type = PERF_RECORD_LOST; + lost_event.header.misc = 0; + lost_event.header.size = sizeof(lost_event); + lost_event.id = event->id; + lost_event.lost = atomic_xchg(&data->lost, 0); + + perf_output_put(handle, lost_event); + } + + return 0; + +fail: + atomic_inc(&data->lost); + perf_output_unlock(handle); +out: + rcu_read_unlock(); + + return -ENOSPC; +} + +void perf_output_end(struct perf_output_handle *handle) +{ + struct perf_event *event = handle->event; + struct perf_mmap_data *data = handle->data; + + int wakeup_events = event->attr.wakeup_events; + + if (handle->sample && wakeup_events) { + int events = atomic_inc_return(&data->events); + if (events >= wakeup_events) { + atomic_sub(wakeup_events, &data->events); + atomic_set(&data->wakeup, 1); + } + } + + perf_output_unlock(handle); + rcu_read_unlock(); +} + +static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) +{ + /* + * only top level events have the pid namespace they were created in + */ + if (event->parent) + event = event->parent; + + return task_tgid_nr_ns(p, event->ns); +} + +static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) +{ + /* + * only top level events have the pid namespace they were created in + */ + if (event->parent) + event = event->parent; + + return task_pid_nr_ns(p, event->ns); +} + +static void perf_output_read_one(struct perf_output_handle *handle, + struct perf_event *event) +{ + u64 read_format = event->attr.read_format; + u64 values[4]; + int n = 0; + + values[n++] = atomic64_read(&event->count); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = event->total_time_running + + atomic64_read(&event->child_total_time_running); + } + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(event); + + perf_output_copy(handle, values, n * sizeof(u64)); +} + +/* + * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. + */ +static void perf_output_read_group(struct perf_output_handle *handle, + struct perf_event *event) +{ + struct perf_event *leader = event->group_leader, *sub; + u64 read_format = event->attr.read_format; + u64 values[5]; + int n = 0; + + values[n++] = 1 + leader->nr_siblings; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = leader->total_time_enabled; + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = leader->total_time_running; + + if (leader != event) + leader->pmu->read(leader); + + values[n++] = atomic64_read(&leader->count); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(leader); + + perf_output_copy(handle, values, n * sizeof(u64)); + + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + n = 0; + + if (sub != event) + sub->pmu->read(sub); + + values[n++] = atomic64_read(&sub->count); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(sub); + + perf_output_copy(handle, values, n * sizeof(u64)); + } +} + +static void perf_output_read(struct perf_output_handle *handle, + struct perf_event *event) +{ + if (event->attr.read_format & PERF_FORMAT_GROUP) + perf_output_read_group(handle, event); + else + perf_output_read_one(handle, event); +} + +void perf_output_sample(struct perf_output_handle *handle, + struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event) +{ + u64 sample_type = data->type; + + perf_output_put(handle, *header); + + if (sample_type & PERF_SAMPLE_IP) + perf_output_put(handle, data->ip); + + if (sample_type & PERF_SAMPLE_TID) + perf_output_put(handle, data->tid_entry); + + if (sample_type & PERF_SAMPLE_TIME) + perf_output_put(handle, data->time); + + if (sample_type & PERF_SAMPLE_ADDR) + perf_output_put(handle, data->addr); + + if (sample_type & PERF_SAMPLE_ID) + perf_output_put(handle, data->id); + + if (sample_type & PERF_SAMPLE_STREAM_ID) + perf_output_put(handle, data->stream_id); + + if (sample_type & PERF_SAMPLE_CPU) + perf_output_put(handle, data->cpu_entry); + + if (sample_type & PERF_SAMPLE_PERIOD) + perf_output_put(handle, data->period); + + if (sample_type & PERF_SAMPLE_READ) + perf_output_read(handle, event); + + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (data->callchain) { + int size = 1; + + if (data->callchain) + size += data->callchain->nr; + + size *= sizeof(u64); + + perf_output_copy(handle, data->callchain, size); + } else { + u64 nr = 0; + perf_output_put(handle, nr); + } + } + + if (sample_type & PERF_SAMPLE_RAW) { + if (data->raw) { + perf_output_put(handle, data->raw->size); + perf_output_copy(handle, data->raw->data, + data->raw->size); + } else { + struct { + u32 size; + u32 data; + } raw = { + .size = sizeof(u32), + .data = 0, + }; + perf_output_put(handle, raw); + } + } +} + +void perf_prepare_sample(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs) +{ + u64 sample_type = event->attr.sample_type; + + data->type = sample_type; + + header->type = PERF_RECORD_SAMPLE; + header->size = sizeof(*header); + + header->misc = 0; + header->misc |= perf_misc_flags(regs); + + if (sample_type & PERF_SAMPLE_IP) { + data->ip = perf_instruction_pointer(regs); + + header->size += sizeof(data->ip); + } + + if (sample_type & PERF_SAMPLE_TID) { + /* namespace issues */ + data->tid_entry.pid = perf_event_pid(event, current); + data->tid_entry.tid = perf_event_tid(event, current); + + header->size += sizeof(data->tid_entry); + } + + if (sample_type & PERF_SAMPLE_TIME) { + data->time = perf_clock(); + + header->size += sizeof(data->time); + } + + if (sample_type & PERF_SAMPLE_ADDR) + header->size += sizeof(data->addr); + + if (sample_type & PERF_SAMPLE_ID) { + data->id = primary_event_id(event); + + header->size += sizeof(data->id); + } + + if (sample_type & PERF_SAMPLE_STREAM_ID) { + data->stream_id = event->id; + + header->size += sizeof(data->stream_id); + } + + if (sample_type & PERF_SAMPLE_CPU) { + data->cpu_entry.cpu = raw_smp_processor_id(); + data->cpu_entry.reserved = 0; + + header->size += sizeof(data->cpu_entry); + } + + if (sample_type & PERF_SAMPLE_PERIOD) + header->size += sizeof(data->period); + + if (sample_type & PERF_SAMPLE_READ) + header->size += perf_event_read_size(event); + + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + int size = 1; + + data->callchain = perf_callchain(regs); + + if (data->callchain) + size += data->callchain->nr; + + header->size += size * sizeof(u64); + } + + if (sample_type & PERF_SAMPLE_RAW) { + int size = sizeof(u32); + + if (data->raw) + size += data->raw->size; + else + size += sizeof(u32); + + WARN_ON_ONCE(size & (sizeof(u64)-1)); + header->size += size; + } +} + +static void perf_event_output(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_output_handle handle; + struct perf_event_header header; + + perf_prepare_sample(&header, data, event, regs); + + if (perf_output_begin(&handle, event, header.size, nmi, 1)) + return; + + perf_output_sample(&handle, &header, data, event); + + perf_output_end(&handle); +} + +/* + * read event_id + */ + +struct perf_read_event { + struct perf_event_header header; + + u32 pid; + u32 tid; +}; + +static void +perf_event_read_event(struct perf_event *event, + struct task_struct *task) +{ + struct perf_output_handle handle; + struct perf_read_event read_event = { + .header = { + .type = PERF_RECORD_READ, + .misc = 0, + .size = sizeof(read_event) + perf_event_read_size(event), + }, + .pid = perf_event_pid(event, task), + .tid = perf_event_tid(event, task), + }; + int ret; + + ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); + if (ret) + return; + + perf_output_put(&handle, read_event); + perf_output_read(&handle, event); + + perf_output_end(&handle); +} + +/* + * task tracking -- fork/exit + * + * enabled by: attr.comm | attr.mmap | attr.task + */ + +struct perf_task_event { + struct task_struct *task; + struct perf_event_context *task_ctx; + + struct { + struct perf_event_header header; + + u32 pid; + u32 ppid; + u32 tid; + u32 ptid; + u64 time; + } event_id; +}; + +static void perf_event_task_output(struct perf_event *event, + struct perf_task_event *task_event) +{ + struct perf_output_handle handle; + int size; + struct task_struct *task = task_event->task; + int ret; + + size = task_event->event_id.header.size; + ret = perf_output_begin(&handle, event, size, 0, 0); + + if (ret) + return; + + task_event->event_id.pid = perf_event_pid(event, task); + task_event->event_id.ppid = perf_event_pid(event, current); + + task_event->event_id.tid = perf_event_tid(event, task); + task_event->event_id.ptid = perf_event_tid(event, current); + + task_event->event_id.time = perf_clock(); + + perf_output_put(&handle, task_event->event_id); + + perf_output_end(&handle); +} + +static int perf_event_task_match(struct perf_event *event) +{ + if (event->attr.comm || event->attr.mmap || event->attr.task) + return 1; + + return 0; +} + +static void perf_event_task_ctx(struct perf_event_context *ctx, + struct perf_task_event *task_event) +{ + struct perf_event *event; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_task_match(event)) + perf_event_task_output(event, task_event); + } + rcu_read_unlock(); +} + +static void perf_event_task_event(struct perf_task_event *task_event) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx = task_event->task_ctx; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_event_task_ctx(&cpuctx->ctx, task_event); + put_cpu_var(perf_cpu_context); + + rcu_read_lock(); + if (!ctx) + ctx = rcu_dereference(task_event->task->perf_event_ctxp); + if (ctx) + perf_event_task_ctx(ctx, task_event); + rcu_read_unlock(); +} + +static void perf_event_task(struct task_struct *task, + struct perf_event_context *task_ctx, + int new) +{ + struct perf_task_event task_event; + + if (!atomic_read(&nr_comm_events) && + !atomic_read(&nr_mmap_events) && + !atomic_read(&nr_task_events)) + return; + + task_event = (struct perf_task_event){ + .task = task, + .task_ctx = task_ctx, + .event_id = { + .header = { + .type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT, + .misc = 0, + .size = sizeof(task_event.event_id), + }, + /* .pid */ + /* .ppid */ + /* .tid */ + /* .ptid */ + }, + }; + + perf_event_task_event(&task_event); +} + +void perf_event_fork(struct task_struct *task) +{ + perf_event_task(task, NULL, 1); +} + +/* + * comm tracking + */ + +struct perf_comm_event { + struct task_struct *task; + char *comm; + int comm_size; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + } event_id; +}; + +static void perf_event_comm_output(struct perf_event *event, + struct perf_comm_event *comm_event) +{ + struct perf_output_handle handle; + int size = comm_event->event_id.header.size; + int ret = perf_output_begin(&handle, event, size, 0, 0); + + if (ret) + return; + + comm_event->event_id.pid = perf_event_pid(event, comm_event->task); + comm_event->event_id.tid = perf_event_tid(event, comm_event->task); + + perf_output_put(&handle, comm_event->event_id); + perf_output_copy(&handle, comm_event->comm, + comm_event->comm_size); + perf_output_end(&handle); +} + +static int perf_event_comm_match(struct perf_event *event) +{ + if (event->attr.comm) + return 1; + + return 0; +} + +static void perf_event_comm_ctx(struct perf_event_context *ctx, + struct perf_comm_event *comm_event) +{ + struct perf_event *event; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_comm_match(event)) + perf_event_comm_output(event, comm_event); + } + rcu_read_unlock(); +} + +static void perf_event_comm_event(struct perf_comm_event *comm_event) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + unsigned int size; + char comm[TASK_COMM_LEN]; + + memset(comm, 0, sizeof(comm)); + strncpy(comm, comm_event->task->comm, sizeof(comm)); + size = ALIGN(strlen(comm)+1, sizeof(u64)); + + comm_event->comm = comm; + comm_event->comm_size = size; + + comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_event_comm_ctx(&cpuctx->ctx, comm_event); + put_cpu_var(perf_cpu_context); + + rcu_read_lock(); + /* + * doesn't really matter which of the child contexts the + * events ends up in. + */ + ctx = rcu_dereference(current->perf_event_ctxp); + if (ctx) + perf_event_comm_ctx(ctx, comm_event); + rcu_read_unlock(); +} + +void perf_event_comm(struct task_struct *task) +{ + struct perf_comm_event comm_event; + + if (task->perf_event_ctxp) + perf_event_enable_on_exec(task); + + if (!atomic_read(&nr_comm_events)) + return; + + comm_event = (struct perf_comm_event){ + .task = task, + /* .comm */ + /* .comm_size */ + .event_id = { + .header = { + .type = PERF_RECORD_COMM, + .misc = 0, + /* .size */ + }, + /* .pid */ + /* .tid */ + }, + }; + + perf_event_comm_event(&comm_event); +} + +/* + * mmap tracking + */ + +struct perf_mmap_event { + struct vm_area_struct *vma; + + const char *file_name; + int file_size; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 start; + u64 len; + u64 pgoff; + } event_id; +}; + +static void perf_event_mmap_output(struct perf_event *event, + struct perf_mmap_event *mmap_event) +{ + struct perf_output_handle handle; + int size = mmap_event->event_id.header.size; + int ret = perf_output_begin(&handle, event, size, 0, 0); + + if (ret) + return; + + mmap_event->event_id.pid = perf_event_pid(event, current); + mmap_event->event_id.tid = perf_event_tid(event, current); + + perf_output_put(&handle, mmap_event->event_id); + perf_output_copy(&handle, mmap_event->file_name, + mmap_event->file_size); + perf_output_end(&handle); +} + +static int perf_event_mmap_match(struct perf_event *event, + struct perf_mmap_event *mmap_event) +{ + if (event->attr.mmap) + return 1; + + return 0; +} + +static void perf_event_mmap_ctx(struct perf_event_context *ctx, + struct perf_mmap_event *mmap_event) +{ + struct perf_event *event; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_event_mmap_match(event, mmap_event)) + perf_event_mmap_output(event, mmap_event); + } + rcu_read_unlock(); +} + +static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + struct vm_area_struct *vma = mmap_event->vma; + struct file *file = vma->vm_file; + unsigned int size; + char tmp[16]; + char *buf = NULL; + const char *name; + + memset(tmp, 0, sizeof(tmp)); + + if (file) { + /* + * d_path works from the end of the buffer backwards, so we + * need to add enough zero bytes after the string to handle + * the 64bit alignment we do later. + */ + buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); + if (!buf) { + name = strncpy(tmp, "//enomem", sizeof(tmp)); + goto got_name; + } + name = d_path(&file->f_path, buf, PATH_MAX); + if (IS_ERR(name)) { + name = strncpy(tmp, "//toolong", sizeof(tmp)); + goto got_name; + } + } else { + if (arch_vma_name(mmap_event->vma)) { + name = strncpy(tmp, arch_vma_name(mmap_event->vma), + sizeof(tmp)); + goto got_name; + } + + if (!vma->vm_mm) { + name = strncpy(tmp, "[vdso]", sizeof(tmp)); + goto got_name; + } + + name = strncpy(tmp, "//anon", sizeof(tmp)); + goto got_name; + } + +got_name: + size = ALIGN(strlen(name)+1, sizeof(u64)); + + mmap_event->file_name = name; + mmap_event->file_size = size; + + mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); + put_cpu_var(perf_cpu_context); + + rcu_read_lock(); + /* + * doesn't really matter which of the child contexts the + * events ends up in. + */ + ctx = rcu_dereference(current->perf_event_ctxp); + if (ctx) + perf_event_mmap_ctx(ctx, mmap_event); + rcu_read_unlock(); + + kfree(buf); +} + +void __perf_event_mmap(struct vm_area_struct *vma) +{ + struct perf_mmap_event mmap_event; + + if (!atomic_read(&nr_mmap_events)) + return; + + mmap_event = (struct perf_mmap_event){ + .vma = vma, + /* .file_name */ + /* .file_size */ + .event_id = { + .header = { + .type = PERF_RECORD_MMAP, + .misc = 0, + /* .size */ + }, + /* .pid */ + /* .tid */ + .start = vma->vm_start, + .len = vma->vm_end - vma->vm_start, + .pgoff = vma->vm_pgoff, + }, + }; + + perf_event_mmap_event(&mmap_event); +} + +/* + * IRQ throttle logging + */ + +static void perf_log_throttle(struct perf_event *event, int enable) +{ + struct perf_output_handle handle; + int ret; + + struct { + struct perf_event_header header; + u64 time; + u64 id; + u64 stream_id; + } throttle_event = { + .header = { + .type = PERF_RECORD_THROTTLE, + .misc = 0, + .size = sizeof(throttle_event), + }, + .time = perf_clock(), + .id = primary_event_id(event), + .stream_id = event->id, + }; + + if (enable) + throttle_event.header.type = PERF_RECORD_UNTHROTTLE; + + ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); + if (ret) + return; + + perf_output_put(&handle, throttle_event); + perf_output_end(&handle); +} + +/* + * Generic event overflow handling, sampling. + */ + +static int __perf_event_overflow(struct perf_event *event, int nmi, + int throttle, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int events = atomic_read(&event->event_limit); + struct hw_perf_event *hwc = &event->hw; + int ret = 0; + + throttle = (throttle && event->pmu->unthrottle != NULL); + + if (!throttle) { + hwc->interrupts++; + } else { + if (hwc->interrupts != MAX_INTERRUPTS) { + hwc->interrupts++; + if (HZ * hwc->interrupts > + (u64)sysctl_perf_event_sample_rate) { + hwc->interrupts = MAX_INTERRUPTS; + perf_log_throttle(event, 0); + ret = 1; + } + } else { + /* + * Keep re-disabling events even though on the previous + * pass we disabled it - just in case we raced with a + * sched-in and the event got enabled again: + */ + ret = 1; + } + } + + if (event->attr.freq) { + u64 now = perf_clock(); + s64 delta = now - hwc->freq_stamp; + + hwc->freq_stamp = now; + + if (delta > 0 && delta < TICK_NSEC) + perf_adjust_period(event, NSEC_PER_SEC / (int)delta); + } + + /* + * XXX event_limit might not quite work as expected on inherited + * events + */ + + event->pending_kill = POLL_IN; + if (events && atomic_dec_and_test(&event->event_limit)) { + ret = 1; + event->pending_kill = POLL_HUP; + if (nmi) { + event->pending_disable = 1; + perf_pending_queue(&event->pending, + perf_pending_event); + } else + perf_event_disable(event); + } + + perf_event_output(event, nmi, data, regs); + return ret; +} + +int perf_event_overflow(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + return __perf_event_overflow(event, nmi, 1, data, regs); +} + +/* + * Generic software event infrastructure + */ + +/* + * We directly increment event->count and keep a second value in + * event->hw.period_left to count intervals. This period event + * is kept in the range [-sample_period, 0] so that we can use the + * sign as trigger. + */ + +static u64 perf_swevent_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 period = hwc->last_period; + u64 nr, offset; + s64 old, val; + + hwc->last_period = hwc->sample_period; + +again: + old = val = atomic64_read(&hwc->period_left); + if (val < 0) + return 0; + + nr = div64_u64(period + val, period); + offset = nr * period; + val -= offset; + if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) + goto again; + + return nr; +} + +static void perf_swevent_overflow(struct perf_event *event, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct hw_perf_event *hwc = &event->hw; + int throttle = 0; + u64 overflow; + + data->period = event->hw.last_period; + overflow = perf_swevent_set_period(event); + + if (hwc->interrupts == MAX_INTERRUPTS) + return; + + for (; overflow; overflow--) { + if (__perf_event_overflow(event, nmi, throttle, + data, regs)) { + /* + * We inhibit the overflow from happening when + * hwc->interrupts == MAX_INTERRUPTS. + */ + break; + } + throttle = 1; + } +} + +static void perf_swevent_unthrottle(struct perf_event *event) +{ + /* + * Nothing to do, we already reset hwc->interrupts. + */ +} + +static void perf_swevent_add(struct perf_event *event, u64 nr, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct hw_perf_event *hwc = &event->hw; + + atomic64_add(nr, &event->count); + + if (!hwc->sample_period) + return; + + if (!regs) + return; + + if (!atomic64_add_negative(nr, &hwc->period_left)) + perf_swevent_overflow(event, nmi, data, regs); +} + +static int perf_swevent_is_counting(struct perf_event *event) +{ + /* + * The event is active, we're good! + */ + if (event->state == PERF_EVENT_STATE_ACTIVE) + return 1; + + /* + * The event is off/error, not counting. + */ + if (event->state != PERF_EVENT_STATE_INACTIVE) + return 0; + + /* + * The event is inactive, if the context is active + * we're part of a group that didn't make it on the 'pmu', + * not counting. + */ + if (event->ctx->is_active) + return 0; + + /* + * We're inactive and the context is too, this means the + * task is scheduled out, we're counting events that happen + * to us, like migration events. + */ + return 1; +} + +static int perf_swevent_match(struct perf_event *event, + enum perf_type_id type, + u32 event_id, struct pt_regs *regs) +{ + if (!perf_swevent_is_counting(event)) + return 0; + + if (event->attr.type != type) + return 0; + if (event->attr.config != event_id) + return 0; + + if (regs) { + if (event->attr.exclude_user && user_mode(regs)) + return 0; + + if (event->attr.exclude_kernel && !user_mode(regs)) + return 0; + } + + return 1; +} + +static void perf_swevent_ctx_event(struct perf_event_context *ctx, + enum perf_type_id type, + u32 event_id, u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_event *event; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (perf_swevent_match(event, type, event_id, regs)) + perf_swevent_add(event, nr, nmi, data, regs); + } + rcu_read_unlock(); +} + +static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) +{ + if (in_nmi()) + return &cpuctx->recursion[3]; + + if (in_irq()) + return &cpuctx->recursion[2]; + + if (in_softirq()) + return &cpuctx->recursion[1]; + + return &cpuctx->recursion[0]; +} + +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int *recursion = perf_swevent_recursion_context(cpuctx); + struct perf_event_context *ctx; + + if (*recursion) + goto out; + + (*recursion)++; + barrier(); + + perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, + nr, nmi, data, regs); + rcu_read_lock(); + /* + * doesn't really matter which of the child contexts the + * events ends up in. + */ + ctx = rcu_dereference(current->perf_event_ctxp); + if (ctx) + perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); + rcu_read_unlock(); + + barrier(); + (*recursion)--; + +out: + put_cpu_var(perf_cpu_context); +} + +void __perf_sw_event(u32 event_id, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) +{ + struct perf_sample_data data = { + .addr = addr, + }; + + do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, + &data, regs); +} + +static void perf_swevent_read(struct perf_event *event) +{ +} + +static int perf_swevent_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->sample_period) { + hwc->last_period = hwc->sample_period; + perf_swevent_set_period(event); + } + return 0; +} + +static void perf_swevent_disable(struct perf_event *event) +{ +} + +static const struct pmu perf_ops_generic = { + .enable = perf_swevent_enable, + .disable = perf_swevent_disable, + .read = perf_swevent_read, + .unthrottle = perf_swevent_unthrottle, +}; + +/* + * hrtimer based swevent callback + */ + +static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) +{ + enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; + struct pt_regs *regs; + struct perf_event *event; + u64 period; + + event = container_of(hrtimer, struct perf_event, hw.hrtimer); + event->pmu->read(event); + + data.addr = 0; + regs = get_irq_regs(); + /* + * In case we exclude kernel IPs or are somehow not in interrupt + * context, provide the next best thing, the user IP. + */ + if ((event->attr.exclude_kernel || !regs) && + !event->attr.exclude_user) + regs = task_pt_regs(current); + + if (regs) { + if (perf_event_overflow(event, 0, &data, regs)) + ret = HRTIMER_NORESTART; + } + + period = max_t(u64, 10000, event->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); + + return ret; +} + +/* + * Software event: cpu wall time clock + */ + +static void cpu_clock_perf_event_update(struct perf_event *event) +{ + int cpu = raw_smp_processor_id(); + s64 prev; + u64 now; + + now = cpu_clock(cpu); + prev = atomic64_read(&event->hw.prev_count); + atomic64_set(&event->hw.prev_count, now); + atomic64_add(now - prev, &event->count); +} + +static int cpu_clock_perf_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int cpu = raw_smp_processor_id(); + + atomic64_set(&hwc->prev_count, cpu_clock(cpu)); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + u64 period = max_t(u64, 10000, hwc->sample_period); + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL, 0); + } + + return 0; +} + +static void cpu_clock_perf_event_disable(struct perf_event *event) +{ + if (event->hw.sample_period) + hrtimer_cancel(&event->hw.hrtimer); + cpu_clock_perf_event_update(event); +} + +static void cpu_clock_perf_event_read(struct perf_event *event) +{ + cpu_clock_perf_event_update(event); +} + +static const struct pmu perf_ops_cpu_clock = { + .enable = cpu_clock_perf_event_enable, + .disable = cpu_clock_perf_event_disable, + .read = cpu_clock_perf_event_read, +}; + +/* + * Software event: task time clock + */ + +static void task_clock_perf_event_update(struct perf_event *event, u64 now) +{ + u64 prev; + s64 delta; + + prev = atomic64_xchg(&event->hw.prev_count, now); + delta = now - prev; + atomic64_add(delta, &event->count); +} + +static int task_clock_perf_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 now; + + now = event->ctx->time; + + atomic64_set(&hwc->prev_count, now); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + u64 period = max_t(u64, 10000, hwc->sample_period); + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL, 0); + } + + return 0; +} + +static void task_clock_perf_event_disable(struct perf_event *event) +{ + if (event->hw.sample_period) + hrtimer_cancel(&event->hw.hrtimer); + task_clock_perf_event_update(event, event->ctx->time); + +} + +static void task_clock_perf_event_read(struct perf_event *event) +{ + u64 time; + + if (!in_nmi()) { + update_context_time(event->ctx); + time = event->ctx->time; + } else { + u64 now = perf_clock(); + u64 delta = now - event->ctx->timestamp; + time = event->ctx->time + delta; + } + + task_clock_perf_event_update(event, time); +} + +static const struct pmu perf_ops_task_clock = { + .enable = task_clock_perf_event_enable, + .disable = task_clock_perf_event_disable, + .read = task_clock_perf_event_read, +}; + +#ifdef CONFIG_EVENT_PROFILE +void perf_tp_event(int event_id, u64 addr, u64 count, void *record, + int entry_size) +{ + struct perf_raw_record raw = { + .size = entry_size, + .data = record, + }; + + struct perf_sample_data data = { + .addr = addr, + .raw = &raw, + }; + + struct pt_regs *regs = get_irq_regs(); + + if (!regs) + regs = task_pt_regs(current); + + do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + &data, regs); +} +EXPORT_SYMBOL_GPL(perf_tp_event); + +extern int ftrace_profile_enable(int); +extern void ftrace_profile_disable(int); + +static void tp_perf_event_destroy(struct perf_event *event) +{ + ftrace_profile_disable(event->attr.config); +} + +static const struct pmu *tp_perf_event_init(struct perf_event *event) +{ + /* + * Raw tracepoint data is a severe data leak, only allow root to + * have these. + */ + if ((event->attr.sample_type & PERF_SAMPLE_RAW) && + perf_paranoid_tracepoint_raw() && + !capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + if (ftrace_profile_enable(event->attr.config)) + return NULL; + + event->destroy = tp_perf_event_destroy; + + return &perf_ops_generic; +} +#else +static const struct pmu *tp_perf_event_init(struct perf_event *event) +{ + return NULL; +} +#endif + +atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; + +static void sw_perf_event_destroy(struct perf_event *event) +{ + u64 event_id = event->attr.config; + + WARN_ON(event->parent); + + atomic_dec(&perf_swevent_enabled[event_id]); +} + +static const struct pmu *sw_perf_event_init(struct perf_event *event) +{ + const struct pmu *pmu = NULL; + u64 event_id = event->attr.config; + + /* + * Software events (currently) can't in general distinguish + * between user, kernel and hypervisor events. + * However, context switches and cpu migrations are considered + * to be kernel events, and page faults are never hypervisor + * events. + */ + switch (event_id) { + case PERF_COUNT_SW_CPU_CLOCK: + pmu = &perf_ops_cpu_clock; + + break; + case PERF_COUNT_SW_TASK_CLOCK: + /* + * If the user instantiates this as a per-cpu event, + * use the cpu_clock event instead. + */ + if (event->ctx->task) + pmu = &perf_ops_task_clock; + else + pmu = &perf_ops_cpu_clock; + + break; + case PERF_COUNT_SW_PAGE_FAULTS: + case PERF_COUNT_SW_PAGE_FAULTS_MIN: + case PERF_COUNT_SW_PAGE_FAULTS_MAJ: + case PERF_COUNT_SW_CONTEXT_SWITCHES: + case PERF_COUNT_SW_CPU_MIGRATIONS: + if (!event->parent) { + atomic_inc(&perf_swevent_enabled[event_id]); + event->destroy = sw_perf_event_destroy; + } + pmu = &perf_ops_generic; + break; + } + + return pmu; +} + +/* + * Allocate and initialize a event structure + */ +static struct perf_event * +perf_event_alloc(struct perf_event_attr *attr, + int cpu, + struct perf_event_context *ctx, + struct perf_event *group_leader, + struct perf_event *parent_event, + gfp_t gfpflags) +{ + const struct pmu *pmu; + struct perf_event *event; + struct hw_perf_event *hwc; + long err; + + event = kzalloc(sizeof(*event), gfpflags); + if (!event) + return ERR_PTR(-ENOMEM); + + /* + * Single events are their own group leaders, with an + * empty sibling list: + */ + if (!group_leader) + group_leader = event; + + mutex_init(&event->child_mutex); + INIT_LIST_HEAD(&event->child_list); + + INIT_LIST_HEAD(&event->group_entry); + INIT_LIST_HEAD(&event->event_entry); + INIT_LIST_HEAD(&event->sibling_list); + init_waitqueue_head(&event->waitq); + + mutex_init(&event->mmap_mutex); + + event->cpu = cpu; + event->attr = *attr; + event->group_leader = group_leader; + event->pmu = NULL; + event->ctx = ctx; + event->oncpu = -1; + + event->parent = parent_event; + + event->ns = get_pid_ns(current->nsproxy->pid_ns); + event->id = atomic64_inc_return(&perf_event_id); + + event->state = PERF_EVENT_STATE_INACTIVE; + + if (attr->disabled) + event->state = PERF_EVENT_STATE_OFF; + + pmu = NULL; + + hwc = &event->hw; + hwc->sample_period = attr->sample_period; + if (attr->freq && attr->sample_freq) + hwc->sample_period = 1; + hwc->last_period = hwc->sample_period; + + atomic64_set(&hwc->period_left, hwc->sample_period); + + /* + * we currently do not support PERF_FORMAT_GROUP on inherited events + */ + if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) + goto done; + + switch (attr->type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + pmu = hw_perf_event_init(event); + break; + + case PERF_TYPE_SOFTWARE: + pmu = sw_perf_event_init(event); + break; + + case PERF_TYPE_TRACEPOINT: + pmu = tp_perf_event_init(event); + break; + + default: + break; + } +done: + err = 0; + if (!pmu) + err = -EINVAL; + else if (IS_ERR(pmu)) + err = PTR_ERR(pmu); + + if (err) { + if (event->ns) + put_pid_ns(event->ns); + kfree(event); + return ERR_PTR(err); + } + + event->pmu = pmu; + + if (!event->parent) { + atomic_inc(&nr_events); + if (event->attr.mmap) + atomic_inc(&nr_mmap_events); + if (event->attr.comm) + atomic_inc(&nr_comm_events); + if (event->attr.task) + atomic_inc(&nr_task_events); + } + + return event; +} + +static int perf_copy_attr(struct perf_event_attr __user *uattr, + struct perf_event_attr *attr) +{ + u32 size; + int ret; + + if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) + return -EFAULT; + + /* + * zero the full structure, so that a short copy will be nice. + */ + memset(attr, 0, sizeof(*attr)); + + ret = get_user(size, &uattr->size); + if (ret) + return ret; + + if (size > PAGE_SIZE) /* silly large */ + goto err_size; + + if (!size) /* abi compat */ + size = PERF_ATTR_SIZE_VER0; + + if (size < PERF_ATTR_SIZE_VER0) + goto err_size; + + /* + * If we're handed a bigger struct than we know of, + * ensure all the unknown bits are 0 - i.e. new + * user-space does not rely on any kernel feature + * extensions we dont know about yet. + */ + if (size > sizeof(*attr)) { + unsigned char __user *addr; + unsigned char __user *end; + unsigned char val; + + addr = (void __user *)uattr + sizeof(*attr); + end = (void __user *)uattr + size; + + for (; addr < end; addr++) { + ret = get_user(val, addr); + if (ret) + return ret; + if (val) + goto err_size; + } + size = sizeof(*attr); + } + + ret = copy_from_user(attr, uattr, size); + if (ret) + return -EFAULT; + + /* + * If the type exists, the corresponding creation will verify + * the attr->config. + */ + if (attr->type >= PERF_TYPE_MAX) + return -EINVAL; + + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) + return -EINVAL; + + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) + return -EINVAL; + + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) + return -EINVAL; + +out: + return ret; + +err_size: + put_user(sizeof(*attr), &uattr->size); + ret = -E2BIG; + goto out; +} + +int perf_event_set_output(struct perf_event *event, int output_fd) +{ + struct perf_event *output_event = NULL; + struct file *output_file = NULL; + struct perf_event *old_output; + int fput_needed = 0; + int ret = -EINVAL; + + if (!output_fd) + goto set; + + output_file = fget_light(output_fd, &fput_needed); + if (!output_file) + return -EBADF; + + if (output_file->f_op != &perf_fops) + goto out; + + output_event = output_file->private_data; + + /* Don't chain output fds */ + if (output_event->output) + goto out; + + /* Don't set an output fd when we already have an output channel */ + if (event->data) + goto out; + + atomic_long_inc(&output_file->f_count); + +set: + mutex_lock(&event->mmap_mutex); + old_output = event->output; + rcu_assign_pointer(event->output, output_event); + mutex_unlock(&event->mmap_mutex); + + if (old_output) { + /* + * we need to make sure no existing perf_output_*() + * is still referencing this event. + */ + synchronize_rcu(); + fput(old_output->filp); + } + + ret = 0; +out: + fput_light(output_file, fput_needed); + return ret; +} + +/** + * sys_perf_event_open - open a performance event, associate it to a task/cpu + * + * @attr_uptr: event_id type attributes for monitoring/sampling + * @pid: target pid + * @cpu: target cpu + * @group_fd: group leader event fd + */ +SYSCALL_DEFINE5(perf_event_open, + struct perf_event_attr __user *, attr_uptr, + pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) +{ + struct perf_event *event, *group_leader; + struct perf_event_attr attr; + struct perf_event_context *ctx; + struct file *event_file = NULL; + struct file *group_file = NULL; + int fput_needed = 0; + int fput_needed2 = 0; + int err; + + /* for future expandability... */ + if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) + return -EINVAL; + + err = perf_copy_attr(attr_uptr, &attr); + if (err) + return err; + + if (!attr.exclude_kernel) { + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + } + + if (attr.freq) { + if (attr.sample_freq > sysctl_perf_event_sample_rate) + return -EINVAL; + } + + /* + * Get the target context (task or percpu): + */ + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + /* + * Look up the group leader (we will attach this event to it): + */ + group_leader = NULL; + if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { + err = -EINVAL; + group_file = fget_light(group_fd, &fput_needed); + if (!group_file) + goto err_put_context; + if (group_file->f_op != &perf_fops) + goto err_put_context; + + group_leader = group_file->private_data; + /* + * Do not allow a recursive hierarchy (this new sibling + * becoming part of another group-sibling): + */ + if (group_leader->group_leader != group_leader) + goto err_put_context; + /* + * Do not allow to attach to a group in a different + * task or CPU context: + */ + if (group_leader->ctx != ctx) + goto err_put_context; + /* + * Only a group leader can be exclusive or pinned + */ + if (attr.exclusive || attr.pinned) + goto err_put_context; + } + + event = perf_event_alloc(&attr, cpu, ctx, group_leader, + NULL, GFP_KERNEL); + err = PTR_ERR(event); + if (IS_ERR(event)) + goto err_put_context; + + err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); + if (err < 0) + goto err_free_put_context; + + event_file = fget_light(err, &fput_needed2); + if (!event_file) + goto err_free_put_context; + + if (flags & PERF_FLAG_FD_OUTPUT) { + err = perf_event_set_output(event, group_fd); + if (err) + goto err_fput_free_put_context; + } + + event->filp = event_file; + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_install_in_context(ctx, event, cpu); + ++ctx->generation; + mutex_unlock(&ctx->mutex); + + event->owner = current; + get_task_struct(current); + mutex_lock(¤t->perf_event_mutex); + list_add_tail(&event->owner_entry, ¤t->perf_event_list); + mutex_unlock(¤t->perf_event_mutex); + +err_fput_free_put_context: + fput_light(event_file, fput_needed2); + +err_free_put_context: + if (err < 0) + kfree(event); + +err_put_context: + if (err < 0) + put_ctx(ctx); + + fput_light(group_file, fput_needed); + + return err; +} + +/* + * inherit a event from parent task to child task: + */ +static struct perf_event * +inherit_event(struct perf_event *parent_event, + struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + struct perf_event *group_leader, + struct perf_event_context *child_ctx) +{ + struct perf_event *child_event; + + /* + * Instead of creating recursive hierarchies of events, + * we link inherited events back to the original parent, + * which has a filp for sure, which we use as the reference + * count: + */ + if (parent_event->parent) + parent_event = parent_event->parent; + + child_event = perf_event_alloc(&parent_event->attr, + parent_event->cpu, child_ctx, + group_leader, parent_event, + GFP_KERNEL); + if (IS_ERR(child_event)) + return child_event; + get_ctx(child_ctx); + + /* + * Make the child state follow the state of the parent event, + * not its attr.disabled bit. We hold the parent's mutex, + * so we won't race with perf_event_{en, dis}able_family. + */ + if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) + child_event->state = PERF_EVENT_STATE_INACTIVE; + else + child_event->state = PERF_EVENT_STATE_OFF; + + if (parent_event->attr.freq) + child_event->hw.sample_period = parent_event->hw.sample_period; + + /* + * Link it up in the child's context: + */ + add_event_to_ctx(child_event, child_ctx); + + /* + * Get a reference to the parent filp - we will fput it + * when the child event exits. This is safe to do because + * we are in the parent and we know that the filp still + * exists and has a nonzero count: + */ + atomic_long_inc(&parent_event->filp->f_count); + + /* + * Link this into the parent event's child list + */ + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_add_tail(&child_event->child_list, &parent_event->child_list); + mutex_unlock(&parent_event->child_mutex); + + return child_event; +} + +static int inherit_group(struct perf_event *parent_event, + struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + struct perf_event_context *child_ctx) +{ + struct perf_event *leader; + struct perf_event *sub; + struct perf_event *child_ctr; + + leader = inherit_event(parent_event, parent, parent_ctx, + child, NULL, child_ctx); + if (IS_ERR(leader)) + return PTR_ERR(leader); + list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { + child_ctr = inherit_event(sub, parent, parent_ctx, + child, leader, child_ctx); + if (IS_ERR(child_ctr)) + return PTR_ERR(child_ctr); + } + return 0; +} + +static void sync_child_event(struct perf_event *child_event, + struct task_struct *child) +{ + struct perf_event *parent_event = child_event->parent; + u64 child_val; + + if (child_event->attr.inherit_stat) + perf_event_read_event(child_event, child); + + child_val = atomic64_read(&child_event->count); + + /* + * Add back the child's count to the parent's count: + */ + atomic64_add(child_val, &parent_event->count); + atomic64_add(child_event->total_time_enabled, + &parent_event->child_total_time_enabled); + atomic64_add(child_event->total_time_running, + &parent_event->child_total_time_running); + + /* + * Remove this event from the parent's list + */ + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_del_init(&child_event->child_list); + mutex_unlock(&parent_event->child_mutex); + + /* + * Release the parent event, if this was the last + * reference to it. + */ + fput(parent_event->filp); +} + +static void +__perf_event_exit_task(struct perf_event *child_event, + struct perf_event_context *child_ctx, + struct task_struct *child) +{ + struct perf_event *parent_event; + + update_event_times(child_event); + perf_event_remove_from_context(child_event); + + parent_event = child_event->parent; + /* + * It can happen that parent exits first, and has events + * that are still around due to the child reference. These + * events need to be zapped - but otherwise linger. + */ + if (parent_event) { + sync_child_event(child_event, child); + free_event(child_event); + } +} + +/* + * When a child task exits, feed back event values to parent events. + */ +void perf_event_exit_task(struct task_struct *child) +{ + struct perf_event *child_event, *tmp; + struct perf_event_context *child_ctx; + unsigned long flags; + + if (likely(!child->perf_event_ctxp)) { + perf_event_task(child, NULL, 0); + return; + } + + local_irq_save(flags); + /* + * We can't reschedule here because interrupts are disabled, + * and either child is current or it is a task that can't be + * scheduled, so we are now safe from rescheduling changing + * our context. + */ + child_ctx = child->perf_event_ctxp; + __perf_event_task_sched_out(child_ctx); + + /* + * Take the context lock here so that if find_get_context is + * reading child->perf_event_ctxp, we wait until it has + * incremented the context's refcount before we do put_ctx below. + */ + spin_lock(&child_ctx->lock); + child->perf_event_ctxp = NULL; + /* + * If this context is a clone; unclone it so it can't get + * swapped to another process while we're removing all + * the events from it. + */ + unclone_ctx(child_ctx); + spin_unlock_irqrestore(&child_ctx->lock, flags); + + /* + * Report the task dead after unscheduling the events so that we + * won't get any samples after PERF_RECORD_EXIT. We can however still + * get a few PERF_RECORD_READ events. + */ + perf_event_task(child, child_ctx, 0); + + /* + * We can recurse on the same lock type through: + * + * __perf_event_exit_task() + * sync_child_event() + * fput(parent_event->filp) + * perf_release() + * mutex_lock(&ctx->mutex) + * + * But since its the parent context it won't be the same instance. + */ + mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); + +again: + list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, + group_entry) + __perf_event_exit_task(child_event, child_ctx, child); + + /* + * If the last event was a group event, it will have appended all + * its siblings to the list, but we obtained 'tmp' before that which + * will still point to the list head terminating the iteration. + */ + if (!list_empty(&child_ctx->group_list)) + goto again; + + mutex_unlock(&child_ctx->mutex); + + put_ctx(child_ctx); +} + +/* + * free an unexposed, unused context as created by inheritance by + * init_task below, used by fork() in case of fail. + */ +void perf_event_free_task(struct task_struct *task) +{ + struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event *event, *tmp; + + if (!ctx) + return; + + mutex_lock(&ctx->mutex); +again: + list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { + struct perf_event *parent = event->parent; + + if (WARN_ON_ONCE(!parent)) + continue; + + mutex_lock(&parent->child_mutex); + list_del_init(&event->child_list); + mutex_unlock(&parent->child_mutex); + + fput(parent->filp); + + list_del_event(event, ctx); + free_event(event); + } + + if (!list_empty(&ctx->group_list)) + goto again; + + mutex_unlock(&ctx->mutex); + + put_ctx(ctx); +} + +/* + * Initialize the perf_event context in task_struct + */ +int perf_event_init_task(struct task_struct *child) +{ + struct perf_event_context *child_ctx, *parent_ctx; + struct perf_event_context *cloned_ctx; + struct perf_event *event; + struct task_struct *parent = current; + int inherited_all = 1; + int ret = 0; + + child->perf_event_ctxp = NULL; + + mutex_init(&child->perf_event_mutex); + INIT_LIST_HEAD(&child->perf_event_list); + + if (likely(!parent->perf_event_ctxp)) + return 0; + + /* + * This is executed from the parent task context, so inherit + * events that have been marked for cloning. + * First allocate and initialize a context for the child. + */ + + child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); + if (!child_ctx) + return -ENOMEM; + + __perf_event_init_context(child_ctx, child); + child->perf_event_ctxp = child_ctx; + get_task_struct(child); + + /* + * If the parent's context is a clone, pin it so it won't get + * swapped under us. + */ + parent_ctx = perf_pin_task_context(parent); + + /* + * No need to check if parent_ctx != NULL here; since we saw + * it non-NULL earlier, the only reason for it to become NULL + * is if we exit, and since we're currently in the middle of + * a fork we can't be exiting at the same time. + */ + + /* + * Lock the parent list. No need to lock the child - not PID + * hashed yet and not running, so nobody can access it. + */ + mutex_lock(&parent_ctx->mutex); + + /* + * We dont have to disable NMIs - we are only looking at + * the list, not manipulating it: + */ + list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) { + if (event != event->group_leader) + continue; + + if (!event->attr.inherit) { + inherited_all = 0; + continue; + } + + ret = inherit_group(event, parent, parent_ctx, + child, child_ctx); + if (ret) { + inherited_all = 0; + break; + } + } + + if (inherited_all) { + /* + * Mark the child context as a clone of the parent + * context, or of whatever the parent is a clone of. + * Note that if the parent is a clone, it could get + * uncloned at any point, but that doesn't matter + * because the list of events and the generation + * count can't have changed since we took the mutex. + */ + cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); + if (cloned_ctx) { + child_ctx->parent_ctx = cloned_ctx; + child_ctx->parent_gen = parent_ctx->parent_gen; + } else { + child_ctx->parent_ctx = parent_ctx; + child_ctx->parent_gen = parent_ctx->generation; + } + get_ctx(child_ctx->parent_ctx); + } + + mutex_unlock(&parent_ctx->mutex); + + perf_unpin_context(parent_ctx); + + return ret; +} + +static void __cpuinit perf_event_init_cpu(int cpu) +{ + struct perf_cpu_context *cpuctx; + + cpuctx = &per_cpu(perf_cpu_context, cpu); + __perf_event_init_context(&cpuctx->ctx, NULL); + + spin_lock(&perf_resource_lock); + cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; + spin_unlock(&perf_resource_lock); + + hw_perf_event_setup(cpu); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void __perf_event_exit_cpu(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event_context *ctx = &cpuctx->ctx; + struct perf_event *event, *tmp; + + list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) + __perf_event_remove_from_context(event); +} +static void perf_event_exit_cpu(int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_event_context *ctx = &cpuctx->ctx; + + mutex_lock(&ctx->mutex); + smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); + mutex_unlock(&ctx->mutex); +} +#else +static inline void perf_event_exit_cpu(int cpu) { } +#endif + +static int __cpuinit +perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action) { + + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + perf_event_init_cpu(cpu); + break; + + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + hw_perf_event_setup_online(cpu); + break; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + perf_event_exit_cpu(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + +/* + * This has to have a higher priority than migration_notifier in sched.c. + */ +static struct notifier_block __cpuinitdata perf_cpu_nb = { + .notifier_call = perf_cpu_notify, + .priority = 20, +}; + +void __init perf_event_init(void) +{ + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, + (void *)(long)smp_processor_id()); + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, + (void *)(long)smp_processor_id()); + register_cpu_notifier(&perf_cpu_nb); +} + +static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) +{ + return sprintf(buf, "%d\n", perf_reserved_percpu); +} + +static ssize_t +perf_set_reserve_percpu(struct sysdev_class *class, + const char *buf, + size_t count) +{ + struct perf_cpu_context *cpuctx; + unsigned long val; + int err, cpu, mpt; + + err = strict_strtoul(buf, 10, &val); + if (err) + return err; + if (val > perf_max_events) + return -EINVAL; + + spin_lock(&perf_resource_lock); + perf_reserved_percpu = val; + for_each_online_cpu(cpu) { + cpuctx = &per_cpu(perf_cpu_context, cpu); + spin_lock_irq(&cpuctx->ctx.lock); + mpt = min(perf_max_events - cpuctx->ctx.nr_events, + perf_max_events - perf_reserved_percpu); + cpuctx->max_pertask = mpt; + spin_unlock_irq(&cpuctx->ctx.lock); + } + spin_unlock(&perf_resource_lock); + + return count; +} + +static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) +{ + return sprintf(buf, "%d\n", perf_overcommit); +} + +static ssize_t +perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) +{ + unsigned long val; + int err; + + err = strict_strtoul(buf, 10, &val); + if (err) + return err; + if (val > 1) + return -EINVAL; + + spin_lock(&perf_resource_lock); + perf_overcommit = val; + spin_unlock(&perf_resource_lock); + + return count; +} + +static SYSDEV_CLASS_ATTR( + reserve_percpu, + 0644, + perf_show_reserve_percpu, + perf_set_reserve_percpu + ); + +static SYSDEV_CLASS_ATTR( + overcommit, + 0644, + perf_show_overcommit, + perf_set_overcommit + ); + +static struct attribute *perfclass_attrs[] = { + &attr_reserve_percpu.attr, + &attr_overcommit.attr, + NULL +}; + +static struct attribute_group perfclass_attr_group = { + .attrs = perfclass_attrs, + .name = "perf_events", +}; + +static int __init perf_event_sysfs_init(void) +{ + return sysfs_create_group(&cpu_sysdev_class.kset.kobj, + &perfclass_attr_group); +} +device_initcall(perf_event_sysfs_init); diff --git a/kernel/sched.c b/kernel/sched.c index faf4d463bbf..291c8d213d1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include @@ -2059,7 +2059,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); #endif - perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, + perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); } p->se.vruntime -= old_cfsrq->min_vruntime - @@ -2724,7 +2724,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); - perf_counter_task_sched_in(current, cpu_of(rq)); + perf_event_task_sched_in(current, cpu_of(rq)); finish_lock_switch(rq, prev); fire_sched_in_preempt_notifiers(current); @@ -5199,7 +5199,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); - perf_counter_task_tick(curr, cpu); + perf_event_task_tick(curr, cpu); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); @@ -5415,7 +5415,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); - perf_counter_task_sched_out(prev, next, cpu); + perf_event_task_sched_out(prev, next, cpu); rq->nr_switches++; rq->curr = next; @@ -7692,7 +7692,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) /* * Register at high priority so that task migration (migrate_all_tasks) * happens before everything else. This has to be lower priority than - * the notifier in the perf_counter subsystem, though. + * the notifier in the perf_event subsystem, though. */ static struct notifier_block __cpuinitdata migration_notifier = { .notifier_call = migration_call, @@ -9549,7 +9549,7 @@ void __init sched_init(void) alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ - perf_counter_init(); + perf_event_init(); scheduler_running = 1; } diff --git a/kernel/sys.c b/kernel/sys.c index b3f1097c76f..ea5c3bcac88 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -1511,11 +1511,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; - case PR_TASK_PERF_COUNTERS_DISABLE: - error = perf_counter_task_disable(); + case PR_TASK_PERF_EVENTS_DISABLE: + error = perf_event_task_disable(); break; - case PR_TASK_PERF_COUNTERS_ENABLE: - error = perf_counter_task_enable(); + case PR_TASK_PERF_EVENTS_ENABLE: + error = perf_event_task_enable(); break; case PR_GET_TIMERSLACK: error = current->timer_slack_ns; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 68320f6b07b..515bc230ac2 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -177,4 +177,4 @@ cond_syscall(sys_eventfd); cond_syscall(sys_eventfd2); /* performance counters: */ -cond_syscall(sys_perf_counter_open); +cond_syscall(sys_perf_event_open); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a631ba684a..6ba49c7cb12 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include @@ -964,28 +964,28 @@ static struct ctl_table kern_table[] = { .child = slow_work_sysctls, }, #endif -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_paranoid", - .data = &sysctl_perf_counter_paranoid, - .maxlen = sizeof(sysctl_perf_counter_paranoid), + .procname = "perf_event_paranoid", + .data = &sysctl_perf_event_paranoid, + .maxlen = sizeof(sysctl_perf_event_paranoid), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_mlock_kb", - .data = &sysctl_perf_counter_mlock, - .maxlen = sizeof(sysctl_perf_counter_mlock), + .procname = "perf_event_mlock_kb", + .data = &sysctl_perf_event_mlock, + .maxlen = sizeof(sysctl_perf_event_mlock), .mode = 0644, .proc_handler = &proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_max_sample_rate", - .data = &sysctl_perf_counter_sample_rate, - .maxlen = sizeof(sysctl_perf_counter_sample_rate), + .procname = "perf_event_max_sample_rate", + .data = &sysctl_perf_event_sample_rate, + .maxlen = sizeof(sysctl_perf_event_sample_rate), .mode = 0644, .proc_handler = &proc_dointvec, }, diff --git a/kernel/timer.c b/kernel/timer.c index bbb51074680..811e5c39145 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include @@ -1187,7 +1187,7 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); - perf_counter_do_pending(); + perf_event_do_pending(); hrtimer_run_pending(); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3c6a0..233f3483ac8 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include "trace_output.h" @@ -414,7 +414,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); + perf_tp_event(sys_data->enter_id, 0, 1, rec, size); } while(0); } @@ -476,7 +476,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec.nr = syscall_nr; rec.ret = syscall_get_return_value(current, regs); - perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); + perf_tp_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); } int reg_prof_syscall_exit(char *name) diff --git a/mm/mmap.c b/mm/mmap.c index 26892e346d8..376492ed08f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include @@ -1220,7 +1220,7 @@ munmap_back: if (correct_wcount) atomic_inc(&inode->i_writecount); out: - perf_counter_mmap(vma); + perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT; vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); @@ -2308,7 +2308,7 @@ int install_special_mapping(struct mm_struct *mm, mm->total_vm += len >> PAGE_SHIFT; - perf_counter_mmap(vma); + perf_event_mmap(vma); return 0; } diff --git a/mm/mprotect.c b/mm/mprotect.c index d80311baeb2..8bc969d8112 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -300,7 +300,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); if (error) goto out; - perf_counter_mmap(vma); + perf_event_mmap(vma); nstart = tmp; if (nstart < prev->vm_end) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0aba8b6e9c5..b5f1953b614 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -318,7 +318,7 @@ export PERL_PATH LIB_FILE=libperf.a -LIB_H += ../../include/linux/perf_counter.h +LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h LIB_H += util/include/linux/list.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 043d85b7e25..1ec74161581 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -505,7 +505,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - if (event->header.misc & PERF_EVENT_MISC_KERNEL) { + if (event->header.misc & PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -513,7 +513,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (event->header.misc & PERF_EVENT_MISC_USER) { + } else if (event->header.misc & PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -565,7 +565,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->mmap.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", + dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, @@ -575,7 +575,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) event->mmap.filename); if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); return 0; } @@ -591,14 +591,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) struct thread *thread; thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } total_comm++; @@ -614,7 +614,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->fork.pid, &threads, &last_match); parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", + dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->fork.pid, event->fork.ppid); @@ -627,7 +627,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) return 0; if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); return -1; } total_fork++; @@ -639,23 +639,23 @@ static int process_event(event_t *event, unsigned long offset, unsigned long head) { switch (event->header.type) { - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MMAP: + case PERF_RECORD_MMAP: return process_mmap_event(event, offset, head); - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_FORK: + case PERF_RECORD_FORK: return process_fork_event(event, offset, head); /* * We dont process them right now but they are fine: */ - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: return 0; default: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2459e5a22ed..a5a050af8e7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -77,7 +77,7 @@ static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; static unsigned long mmap_read_head(struct mmap_data *md) { - struct perf_counter_mmap_page *pc = md->base; + struct perf_event_mmap_page *pc = md->base; long head; head = pc->data_head; @@ -88,7 +88,7 @@ static unsigned long mmap_read_head(struct mmap_data *md) static void mmap_write_tail(struct mmap_data *md, unsigned long tail) { - struct perf_counter_mmap_page *pc = md->base; + struct perf_event_mmap_page *pc = md->base; /* * ensure all reads are done before we write the tail out. @@ -233,7 +233,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full) } } - comm_ev.header.type = PERF_EVENT_COMM; + comm_ev.header.type = PERF_RECORD_COMM; size = ALIGN(size, sizeof(u64)); comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); @@ -288,7 +288,7 @@ static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid) while (1) { char bf[BUFSIZ], *pbf = bf; struct mmap_event mmap_ev = { - .header = { .type = PERF_EVENT_MMAP }, + .header = { .type = PERF_RECORD_MMAP }, }; int n; size_t size; @@ -355,7 +355,7 @@ static void synthesize_all(void) static int group_fd; -static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr) +static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) { struct perf_header_attr *h_attr; @@ -371,7 +371,7 @@ static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int static void create_counter(int counter, int cpu, pid_t pid) { - struct perf_counter_attr *attr = attrs + counter; + struct perf_event_attr *attr = attrs + counter; struct perf_header_attr *h_attr; int track = !counter; /* only the first counter needs these */ struct { @@ -417,7 +417,7 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->disabled = 1; try_again: - fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); + fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); if (fd[nr_cpu][counter] < 0) { int err = errno; @@ -444,7 +444,7 @@ try_again: printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[nr_cpu][counter], strerror(err)); - die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } @@ -478,7 +478,7 @@ try_again: if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { int ret; - ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd); + ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); assert(ret != -1); } else { event_array[nr_poll].fd = fd[nr_cpu][counter]; @@ -496,7 +496,7 @@ try_again: } } - ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); + ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); } static void open_counters(int cpu, pid_t pid) @@ -642,7 +642,7 @@ static int __cmd_record(int argc, const char **argv) if (done) { for (i = 0; i < nr_cpu; i++) { for (counter = 0; counter < nr_counters; counter++) - ioctl(fd[i][counter], PERF_COUNTER_IOC_DISABLE); + ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE); } } } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cdf9a8d27bb..19669c20088 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1121,7 +1121,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1158,9 +1158,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (comm_list && !strlist__has_entry(comm_list, thread->comm)) return 0; - cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_EVENT_MISC_KERNEL) { + if (cpumode == PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1168,7 +1168,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (cpumode == PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -1210,7 +1210,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->mmap.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n", + dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, @@ -1221,7 +1221,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) event->mmap.filename); if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); return 0; } @@ -1238,14 +1238,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm_adjust(thread, event->comm.comm)) { - dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } total_comm++; @@ -1262,10 +1262,10 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->fork.pid, &threads, &last_match); parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", + dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n", (void *)(offset + head), (void *)(long)(event->header.size), - event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT", + event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT", event->fork.pid, event->fork.tid, event->fork.ppid, event->fork.ptid); @@ -1276,11 +1276,11 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) if (thread == parent) return 0; - if (event->header.type == PERF_EVENT_EXIT) + if (event->header.type == PERF_RECORD_EXIT) return 0; if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); return -1; } total_fork++; @@ -1291,7 +1291,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) static int process_lost_event(event_t *event, unsigned long offset, unsigned long head) { - dump_printf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", + dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->lost.id, @@ -1305,7 +1305,7 @@ process_lost_event(event_t *event, unsigned long offset, unsigned long head) static int process_read_event(event_t *event, unsigned long offset, unsigned long head) { - struct perf_counter_attr *attr; + struct perf_event_attr *attr; attr = perf_header__find_attr(event->read.id, header); @@ -1319,7 +1319,7 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) event->read.value); } - dump_printf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", + dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n", (void *)(offset + head), (void *)(long)(event->header.size), event->read.pid, @@ -1337,31 +1337,31 @@ process_event(event_t *event, unsigned long offset, unsigned long head) trace_event(event); switch (event->header.type) { - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MMAP: + case PERF_RECORD_MMAP: return process_mmap_event(event, offset, head); - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_FORK: - case PERF_EVENT_EXIT: + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: return process_task_event(event, offset, head); - case PERF_EVENT_LOST: + case PERF_RECORD_LOST: return process_lost_event(event, offset, head); - case PERF_EVENT_READ: + case PERF_RECORD_READ: return process_read_event(event, offset, head); /* * We dont process them right now but they are fine: */ - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: return 0; default: diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 275d79c6627..ea9c15c0cdf 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1573,7 +1573,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1589,9 +1589,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_EVENT_MISC_KERNEL) { + if (cpumode == PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1599,7 +1599,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (cpumode == PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -1626,23 +1626,23 @@ process_event(event_t *event, unsigned long offset, unsigned long head) nr_events++; switch (event->header.type) { - case PERF_EVENT_MMAP: + case PERF_RECORD_MMAP: return 0; - case PERF_EVENT_LOST: + case PERF_RECORD_LOST: nr_lost_chunks++; nr_lost_events += event->lost.lost; return 0; - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_EXIT ... PERF_EVENT_READ: + case PERF_RECORD_EXIT ... PERF_RECORD_READ: return 0; - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MAX: + case PERF_RECORD_MAX: default: return -1; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 61b828236c1..16af2d82e85 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -48,7 +48,7 @@ #include #include -static struct perf_counter_attr default_attrs[] = { +static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, @@ -130,11 +130,11 @@ struct stats runtime_cycles_stats; attrs[counter].config == PERF_COUNT_##c) #define ERR_PERF_OPEN \ -"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" +"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" static void create_perf_stat_counter(int counter, int pid) { - struct perf_counter_attr *attr = attrs + counter; + struct perf_event_attr *attr = attrs + counter; if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | @@ -144,7 +144,7 @@ static void create_perf_stat_counter(int counter, int pid) unsigned int cpu; for (cpu = 0; cpu < nr_cpus; cpu++) { - fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); + fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); if (fd[cpu][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[cpu][counter], strerror(errno)); @@ -154,7 +154,7 @@ static void create_perf_stat_counter(int counter, int pid) attr->disabled = 1; attr->enable_on_exec = 1; - fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); + fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[0][counter], strerror(errno)); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 60040639627..4405681b313 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -937,21 +937,21 @@ process_event(event_t *event) switch (event->header.type) { - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event); - case PERF_EVENT_FORK: + case PERF_RECORD_FORK: return process_fork_event(event); - case PERF_EVENT_EXIT: + case PERF_RECORD_EXIT: return process_exit_event(event); - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return queue_sample_event(event); /* * We dont process them right now but they are fine: */ - case PERF_EVENT_MMAP: - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: + case PERF_RECORD_MMAP: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: return 0; default: diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4002ccb3675..1ca88896eee 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -901,7 +901,7 @@ struct mmap_data { static unsigned int mmap_read_head(struct mmap_data *md) { - struct perf_counter_mmap_page *pc = md->base; + struct perf_event_mmap_page *pc = md->base; int head; head = pc->data_head; @@ -977,9 +977,9 @@ static void mmap_read_counter(struct mmap_data *md) old += size; - if (event->header.type == PERF_EVENT_SAMPLE) { + if (event->header.type == PERF_RECORD_SAMPLE) { int user = - (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; + (event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER; process_event(event->ip.ip, md->counter, user); } } @@ -1005,7 +1005,7 @@ int group_fd; static void start_counter(int i, int counter) { - struct perf_counter_attr *attr; + struct perf_event_attr *attr; int cpu; cpu = profile_cpu; @@ -1019,7 +1019,7 @@ static void start_counter(int i, int counter) attr->inherit = (cpu < 0) && inherit; try_again: - fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0); + fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); if (fd[i][counter] < 0) { int err = errno; @@ -1044,7 +1044,7 @@ try_again: printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[i][counter], strerror(err)); - die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } assert(fd[i][counter] >= 0); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 914ab366e36..e9d256e2f47 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -35,14 +35,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } total_comm++; @@ -82,7 +82,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -98,9 +98,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_EVENT_MISC_KERNEL) { + if (cpumode == PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -108,7 +108,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (cpumode == PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -146,19 +146,19 @@ process_event(event_t *event, unsigned long offset, unsigned long head) trace_event(event); switch (event->header.type) { - case PERF_EVENT_MMAP ... PERF_EVENT_LOST: + case PERF_RECORD_MMAP ... PERF_RECORD_LOST: return 0; - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_EXIT ... PERF_EVENT_READ: + case PERF_RECORD_EXIT ... PERF_RECORD_READ: return 0; - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MAX: + case PERF_RECORD_MAX: default: return -1; } diff --git a/tools/perf/design.txt b/tools/perf/design.txt index f71e0d245cb..f1946d107b1 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -18,10 +18,10 @@ underlying hardware counters. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. -The special file descriptor is opened via the perf_counter_open() +The special file descriptor is opened via the perf_event_open() system call: - int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, + int sys_perf_event_open(struct perf_event_hw_event *hw_event_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); @@ -32,9 +32,9 @@ can be used to set the blocking mode, etc. Multiple counters can be kept open at a time, and the counters can be poll()ed. -When creating a new counter fd, 'perf_counter_hw_event' is: +When creating a new counter fd, 'perf_event_hw_event' is: -struct perf_counter_hw_event { +struct perf_event_hw_event { /* * The MSB of the config word signifies if the rest contains cpu * specific (raw) counter configuration data, if unset, the next @@ -93,7 +93,7 @@ specified by 'event_id': /* * Generalized performance counter event types, used by the hw_event.event_id - * parameter of the sys_perf_counter_open() syscall: + * parameter of the sys_perf_event_open() syscall: */ enum hw_event_ids { /* @@ -159,7 +159,7 @@ in size. * reads on the counter should return the indicated quantities, * in increasing order of bit value, after the counter value. */ -enum perf_counter_read_format { +enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_ENABLED = 1, PERF_FORMAT_TOTAL_TIME_RUNNING = 2, }; @@ -178,7 +178,7 @@ interrupt: * Bits that can be set in hw_event.record_type to request information * in the overflow packets. */ -enum perf_counter_record_format { +enum perf_event_record_format { PERF_RECORD_IP = 1U << 0, PERF_RECORD_TID = 1U << 1, PERF_RECORD_TIME = 1U << 2, @@ -228,7 +228,7 @@ these events are recorded in the ring-buffer (see below). The 'comm' bit allows tracking of process comm data on process creation. This too is recorded in the ring-buffer (see below). -The 'pid' parameter to the perf_counter_open() system call allows the +The 'pid' parameter to the perf_event_open() system call allows the counter to be specific to a task: pid == 0: if the pid parameter is zero, the counter is attached to the @@ -258,7 +258,7 @@ The 'flags' parameter is currently unused and must be zero. The 'group_fd' parameter allows counter "groups" to be set up. A counter group has one counter which is the group "leader". The leader -is created first, with group_fd = -1 in the perf_counter_open call +is created first, with group_fd = -1 in the perf_event_open call that creates it. The rest of the group members are created subsequently, with group_fd giving the fd of the group leader. (A single counter on its own is created with group_fd = -1 and is @@ -277,13 +277,13 @@ tracking are logged into a ring-buffer. This ring-buffer is created and accessed through mmap(). The mmap size should be 1+2^n pages, where the first page is a meta-data page -(struct perf_counter_mmap_page) that contains various bits of information such +(struct perf_event_mmap_page) that contains various bits of information such as where the ring-buffer head is. /* * Structure of the page that can be mapped via mmap */ -struct perf_counter_mmap_page { +struct perf_event_mmap_page { __u32 version; /* version number of this structure */ __u32 compat_version; /* lowest version this is compat with */ @@ -317,7 +317,7 @@ struct perf_counter_mmap_page { * Control data for the mmap() data buffer. * * User-space reading this value should issue an rmb(), on SMP capable - * platforms, after reading this value -- see perf_counter_wakeup(). + * platforms, after reading this value -- see perf_event_wakeup(). */ __u32 data_head; /* head in the data section */ }; @@ -327,9 +327,9 @@ NOTE: the hw-counter userspace bits are arch specific and are currently only The following 2^n pages are the ring-buffer which contains events of the form: -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (1 << 1) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (1 << 1) +#define PERF_RECORD_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -353,8 +353,8 @@ enum perf_event_type { * char filename[]; * }; */ - PERF_EVENT_MMAP = 1, - PERF_EVENT_MUNMAP = 2, + PERF_RECORD_MMAP = 1, + PERF_RECORD_MUNMAP = 2, /* * struct { @@ -364,10 +364,10 @@ enum perf_event_type { * char comm[]; * }; */ - PERF_EVENT_COMM = 3, + PERF_RECORD_COMM = 3, /* - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field + * When header.misc & PERF_RECORD_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* * * struct { @@ -397,7 +397,7 @@ Notification of new events is possible through poll()/select()/epoll() and fcntl() managing signals. Normally a notification is generated for every page filled, however one can -additionally set perf_counter_hw_event.wakeup_events to generate one every +additionally set perf_event_hw_event.wakeup_events to generate one every so many counter overflow events. Future work will include a splice() interface to the ring-buffer. @@ -409,11 +409,11 @@ events but does continue to exist and maintain its count value. An individual counter or counter group can be enabled with - ioctl(fd, PERF_COUNTER_IOC_ENABLE); + ioctl(fd, PERF_EVENT_IOC_ENABLE); or disabled with - ioctl(fd, PERF_COUNTER_IOC_DISABLE); + ioctl(fd, PERF_EVENT_IOC_DISABLE); Enabling or disabling the leader of a group enables or disables the whole group; that is, while the group leader is disabled, none of the @@ -424,16 +424,16 @@ other counter. Additionally, non-inherited overflow counters can use - ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr); + ioctl(fd, PERF_EVENT_IOC_REFRESH, nr); to enable a counter for 'nr' events, after which it gets disabled again. A process can enable or disable all the counter groups that are attached to it, using prctl: - prctl(PR_TASK_PERF_COUNTERS_ENABLE); + prctl(PR_TASK_PERF_EVENTS_ENABLE); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); + prctl(PR_TASK_PERF_EVENTS_DISABLE); This applies to all counters on the current process, whether created by this process or by another, and doesn't affect any counters that @@ -447,11 +447,11 @@ Arch requirements If your architecture does not have hardware performance metrics, you can still use the generic software counters based on hrtimers for sampling. -So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you +So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you will need at least this: - - asm/perf_counter.h - a basic stub will suffice at first + - asm/perf_event.h - a basic stub will suffice at first - support for atomic64 types (and associated helper functions) - - set_perf_counter_pending() implemented + - set_perf_event_pending() implemented If your architecture does have hardware capabilities, you can override the -weak stub hw_perf_counter_init() to register hardware counters. +weak stub hw_perf_event_init() to register hardware counters. diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 2abeb20d0bf..8cc4623afd6 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,15 +52,15 @@ #include #include -#include "../../include/linux/perf_counter.h" +#include "../../include/linux/perf_event.h" #include "util/types.h" /* - * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all * counters in the current task. */ -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 #ifndef NSEC_PER_SEC # define NSEC_PER_SEC 1000000000ULL @@ -90,12 +90,12 @@ static inline unsigned long long rdclock(void) _min1 < _min2 ? _min1 : _min2; }) static inline int -sys_perf_counter_open(struct perf_counter_attr *attr, +sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { attr->size = sizeof(*attr); - return syscall(__NR_perf_counter_open, attr, pid, cpu, + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 018d414a09d..2c9c26d6ded 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -1,5 +1,5 @@ -#ifndef __PERF_EVENT_H -#define __PERF_EVENT_H +#ifndef __PERF_RECORD_H +#define __PERF_RECORD_H #include "../perf.h" #include "util.h" #include diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index bb4fca3efcc..e306857b2c2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -9,7 +9,7 @@ /* * Create new perf.data header attribute: */ -struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr) +struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr) { struct perf_header_attr *self = malloc(sizeof(*self)); @@ -134,7 +134,7 @@ struct perf_file_section { }; struct perf_file_attr { - struct perf_counter_attr attr; + struct perf_event_attr attr; struct perf_file_section ids; }; @@ -320,7 +320,7 @@ u64 perf_header__sample_type(struct perf_header *header) return type; } -struct perf_counter_attr * +struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header) { int i; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 7b0e84a8717..a0761bc7863 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -1,12 +1,12 @@ #ifndef _PERF_HEADER_H #define _PERF_HEADER_H -#include "../../../include/linux/perf_counter.h" +#include "../../../include/linux/perf_event.h" #include #include "types.h" struct perf_header_attr { - struct perf_counter_attr attr; + struct perf_event_attr attr; int ids, size; u64 *id; off_t id_offset; @@ -34,11 +34,11 @@ char *perf_header__find_event(u64 id); struct perf_header_attr * -perf_header_attr__new(struct perf_counter_attr *attr); +perf_header_attr__new(struct perf_event_attr *attr); void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); -struct perf_counter_attr * +struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 89172fd0038..13ab4b842d4 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -10,7 +10,7 @@ int nr_counters; -struct perf_counter_attr attrs[MAX_COUNTERS]; +struct perf_event_attr attrs[MAX_COUNTERS]; struct event_symbol { u8 type; @@ -48,13 +48,13 @@ static struct event_symbol event_symbols[] = { { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, }; -#define __PERF_COUNTER_FIELD(config, name) \ - ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) +#define __PERF_EVENT_FIELD(config, name) \ + ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) -#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) -#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) -#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) -#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) +#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) +#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) +#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) +#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) static const char *hw_event_names[] = { "cycles", @@ -352,7 +352,7 @@ static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int } static enum event_result -parse_generic_hw_event(const char **str, struct perf_counter_attr *attr) +parse_generic_hw_event(const char **str, struct perf_event_attr *attr) { const char *s = *str; int cache_type = -1, cache_op = -1, cache_result = -1; @@ -417,7 +417,7 @@ parse_single_tracepoint_event(char *sys_name, const char *evt_name, unsigned int evt_length, char *flags, - struct perf_counter_attr *attr, + struct perf_event_attr *attr, const char **strp) { char evt_path[MAXPATHLEN]; @@ -505,7 +505,7 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags) static enum event_result parse_tracepoint_event(const char **strp, - struct perf_counter_attr *attr) + struct perf_event_attr *attr) { const char *evt_name; char *flags; @@ -563,7 +563,7 @@ static int check_events(const char *str, unsigned int i) } static enum event_result -parse_symbolic_event(const char **strp, struct perf_counter_attr *attr) +parse_symbolic_event(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; unsigned int i; @@ -582,7 +582,7 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr) } static enum event_result -parse_raw_event(const char **strp, struct perf_counter_attr *attr) +parse_raw_event(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; u64 config; @@ -601,7 +601,7 @@ parse_raw_event(const char **strp, struct perf_counter_attr *attr) } static enum event_result -parse_numeric_event(const char **strp, struct perf_counter_attr *attr) +parse_numeric_event(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; char *endp; @@ -623,7 +623,7 @@ parse_numeric_event(const char **strp, struct perf_counter_attr *attr) } static enum event_result -parse_event_modifier(const char **strp, struct perf_counter_attr *attr) +parse_event_modifier(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; int eu = 1, ek = 1, eh = 1; @@ -656,7 +656,7 @@ parse_event_modifier(const char **strp, struct perf_counter_attr *attr) * Symbolic names are (almost) exactly matched. */ static enum event_result -parse_event_symbols(const char **str, struct perf_counter_attr *attr) +parse_event_symbols(const char **str, struct perf_event_attr *attr) { enum event_result ret; @@ -711,7 +711,7 @@ static void store_event_type(const char *orgname) int parse_events(const struct option *opt __used, const char *str, int unset __used) { - struct perf_counter_attr attr; + struct perf_event_attr attr; enum event_result ret; if (strchr(str, ':')) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 60704c15961..30c60811284 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -16,7 +16,7 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern int nr_counters; -extern struct perf_counter_attr attrs[MAX_COUNTERS]; +extern struct perf_event_attr attrs[MAX_COUNTERS]; extern const char *event_name(int ctr); extern const char *__event_name(int type, u64 config); diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 1fd824c1f1c..af4b0573b37 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -480,12 +480,12 @@ out: } static struct tracepoint_path * -get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters) +get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) { struct tracepoint_path path, *ppath = &path; int i; - for (i = 0; i < nb_counters; i++) { + for (i = 0; i < nb_events; i++) { if (pattrs[i].type != PERF_TYPE_TRACEPOINT) continue; ppath->next = tracepoint_id_to_path(pattrs[i].config); @@ -496,7 +496,7 @@ get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters) return path.next; } -void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters) +void read_tracing_data(struct perf_event_attr *pattrs, int nb_events) { char buf[BUFSIZ]; struct tracepoint_path *tps; @@ -530,7 +530,7 @@ void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters) page_size = getpagesize(); write_or_die(&page_size, 4); - tps = get_tracepoints_path(pattrs, nb_counters); + tps = get_tracepoints_path(pattrs, nb_events); read_header_files(); read_ftrace_files(tps); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d35ebf1e29f..693f815c942 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -240,6 +240,6 @@ unsigned long long raw_field_value(struct event *event, const char *name, void *data); void *raw_field_ptr(struct event *event, const char *name, void *data); -void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters); +void read_tracing_data(struct perf_event_attr *pattrs, int nb_events); #endif /* _TRACE_EVENTS_H */ -- cgit v1.2.3-70-g09d2 From 2bcd57ab61e7cabed626226a3771617981c11ce1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 24 Sep 2009 04:22:25 +0400 Subject: headers: utsname.h redux * remove asm/atomic.h inclusion from linux/utsname.h -- not needed after kref conversion * remove linux/utsname.h inclusion from files which do not need it NOTE: it looks like fs/binfmt_elf.c do not need utsname.h, however due to some personality stuff it _is_ needed -- cowardly leave ELF-related headers and files alone. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- arch/alpha/kernel/process.c | 1 - arch/arm/kernel/sys_arm.c | 1 - arch/frv/kernel/sys_frv.c | 1 - arch/h8300/kernel/sys_h8300.c | 1 - arch/m68k/kernel/sys_m68k.c | 1 - arch/m68knommu/kernel/sys_m68k.c | 1 - arch/microblaze/kernel/sys_microblaze.c | 1 - arch/mn10300/kernel/sys_mn10300.c | 1 - arch/parisc/kernel/sys_parisc32.c | 1 - arch/powerpc/kernel/setup-common.c | 1 - arch/powerpc/kernel/sys_ppc32.c | 1 - arch/s390/kernel/compat_linux.c | 1 - arch/s390/kernel/process.c | 1 - arch/sh/kernel/sys_sh32.c | 1 - arch/sh/kernel/sys_sh64.c | 1 - arch/sparc/kernel/sys_sparc32.c | 1 - arch/sparc/kernel/systbls.h | 3 ++- arch/x86/kernel/dumpstack_32.c | 1 - arch/x86/kernel/dumpstack_64.c | 1 - arch/x86/kernel/traps.c | 1 - drivers/media/video/usbvision/usbvision-core.c | 1 - drivers/media/video/usbvision/usbvision-i2c.c | 1 - drivers/media/video/usbvision/usbvision-video.c | 1 - drivers/s390/char/zcore.c | 1 - drivers/usb/gadget/f_loopback.c | 1 - drivers/usb/gadget/f_obex.c | 1 - drivers/usb/gadget/f_sourcesink.c | 1 - drivers/usb/gadget/u_audio.c | 1 - drivers/usb/gadget/u_ether.c | 1 - fs/gfs2/ops_inode.c | 1 - fs/lockd/xdr.c | 1 - fs/lockd/xdr4.c | 1 - fs/nfs/nfs2xdr.c | 1 - fs/nfs/nfs3proc.c | 1 - fs/nfs/nfs3xdr.c | 1 - fs/nfs/nfs4proc.c | 1 - fs/nfs/nfs4xdr.c | 1 - fs/nfs/proc.c | 1 - fs/nfsd/nfs4idmap.c | 1 - fs/ocfs2/dlm/dlmast.c | 1 - fs/ocfs2/dlm/dlmconvert.c | 1 - fs/ocfs2/dlm/dlmdebug.c | 1 - fs/ocfs2/dlm/dlmdomain.c | 1 - fs/ocfs2/dlm/dlmlock.c | 1 - fs/ocfs2/dlm/dlmmaster.c | 1 - fs/ocfs2/dlm/dlmrecovery.c | 1 - fs/ocfs2/dlm/dlmthread.c | 1 - fs/ocfs2/dlm/dlmunlock.c | 1 - fs/ocfs2/super.c | 1 - fs/ocfs2/symlink.c | 1 - include/linux/utsname.h | 1 - init/main.c | 1 - kernel/power/swap.c | 1 - kernel/sysctl.c | 1 - kernel/uid16.c | 1 - net/sunrpc/auth_null.c | 1 - 56 files changed, 2 insertions(+), 56 deletions(-) (limited to 'arch/m68knommu') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 3a2fb7a02db..289039bb6bb 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index b3ec641b5cf..78ecaac6520 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c index baadc97f862..2b6b5289cdc 100644 --- a/arch/frv/kernel/sys_frv.c +++ b/arch/frv/kernel/sys_frv.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index 2745656dcc5..8cb5d73a0e3 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 7f54efaf60b..7deb402bfc7 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index 70028163862..efdd090778a 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index b96f1682bb2..07cabed4b94 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c index 3e52a105432..8ca5af00334 100644 --- a/arch/mn10300/kernel/sys_mn10300.c +++ b/arch/mn10300/kernel/sys_mn10300.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 92a0acaa0d1..561388b17c9 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1d5570a1e45..74cd1a7d0d4 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 1cc5e9e5da9..b97c2d67f4a 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 5519cb74510..0debcec23a3 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 59fe6ecc6ed..5417eb57271 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index 63ba12836ea..eb68bfdd86e 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c index 91fb8445a5a..287235768bc 100644 --- a/arch/sh/kernel/sys_sh64.c +++ b/arch/sh/kernel/sys_sh64.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index f5000a460c0..04e28b2671c 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h index 15c2d752b2b..a63c5d2d984 100644 --- a/arch/sparc/kernel/systbls.h +++ b/arch/sparc/kernel/systbls.h @@ -3,10 +3,11 @@ #include #include -#include #include #include +struct new_utsname; + extern asmlinkage unsigned long sys_getpagesize(void); extern asmlinkage unsigned long sparc_brk(unsigned long brk); extern asmlinkage long sparc_pipe(struct pt_regs *regs); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index bca5fba91c9..f7dd2a7c3bf 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 54b0a327676..a071e6be177 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9346e102338..a665c71352b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/usbvision/usbvision-core.c b/drivers/media/video/usbvision/usbvision-core.c index 6ba16abeebd..e0f91e4ab65 100644 --- a/drivers/media/video/usbvision/usbvision-core.c +++ b/drivers/media/video/usbvision/usbvision-core.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/usbvision/usbvision-i2c.c b/drivers/media/video/usbvision/usbvision-i2c.c index f97fd06d594..c19f51dba2e 100644 --- a/drivers/media/video/usbvision/usbvision-i2c.c +++ b/drivers/media/video/usbvision/usbvision-i2c.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c index 90d9b5c0e9a..a2a50d608a3 100644 --- a/drivers/media/video/usbvision/usbvision-video.c +++ b/drivers/media/video/usbvision/usbvision-video.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index c431198bdbc..82daa3c1dc9 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include diff --git a/drivers/usb/gadget/f_loopback.c b/drivers/usb/gadget/f_loopback.c index eb6ddfc2085..6cb29d3df57 100644 --- a/drivers/usb/gadget/f_loopback.c +++ b/drivers/usb/gadget/f_loopback.c @@ -22,7 +22,6 @@ /* #define VERBOSE_DEBUG */ #include -#include #include #include "g_zero.h" diff --git a/drivers/usb/gadget/f_obex.c b/drivers/usb/gadget/f_obex.c index 46d6266f30e..b4a3ba654ea 100644 --- a/drivers/usb/gadget/f_obex.c +++ b/drivers/usb/gadget/f_obex.c @@ -24,7 +24,6 @@ /* #define VERBOSE_DEBUG */ #include -#include #include #include "u_serial.h" diff --git a/drivers/usb/gadget/f_sourcesink.c b/drivers/usb/gadget/f_sourcesink.c index bffe91d525f..09cba273d2d 100644 --- a/drivers/usb/gadget/f_sourcesink.c +++ b/drivers/usb/gadget/f_sourcesink.c @@ -22,7 +22,6 @@ /* #define VERBOSE_DEBUG */ #include -#include #include #include "g_zero.h" diff --git a/drivers/usb/gadget/u_audio.c b/drivers/usb/gadget/u_audio.c index b5200d55145..8252595d619 100644 --- a/drivers/usb/gadget/u_audio.c +++ b/drivers/usb/gadget/u_audio.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index f8751ff863c..2fc02bd9584 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -23,7 +23,6 @@ /* #define VERBOSE_DEBUG */ #include -#include #include #include #include diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index c3ac1805405..247436c10de 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 0336f2beacd..b583ab0a4cb 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -8,7 +8,6 @@ #include #include -#include #include #include diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index e1d52865319..ad9dbbc9145 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -9,7 +9,6 @@ #include #include -#include #include #include diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index c862c9340f9..5e078b222b4 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ee6a13f0544..3f8881d1a05 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 35869a4921f..5fe5492fbd2 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be6544aef41..ed7c269e251 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -36,7 +36,6 @@ */ #include -#include #include #include #include diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cfc30d362f9..83ad47cbdd8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 7be72d90d49..ef583854d8d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index cdfa86fa147..ba2c199592f 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -38,7 +38,6 @@ #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 81eff8e5832..01cf8cc3d28 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 75997b4deaf..ca96bce50e1 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index c5c88124096..ca46002ec10 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 4d9e6b288dd..0334000676d 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 83a9f2972ac..437698e9465 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index f8b653fcd4d..83bcaf266b3 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 43e6e328056..d9fa3d22e17 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 98569e86c61..52ec020ea78 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 756f5b0998e..00f53b2aea7 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 24feb449a1d..4cc3c890a2c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 579dd1b1110..e3421030a69 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #define MLOG_MASK_PREFIX ML_NAMEI diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 3656b300de3..69f39974c04 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -36,7 +36,6 @@ struct new_utsname { #include #include #include -#include struct uts_namespace { struct kref kref; diff --git a/init/main.c b/init/main.c index 6107223124e..76961db298f 100644 --- a/init/main.c +++ b/init/main.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8ba052c86d4..b101cdc4df3 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0dfaa47d7cb..7f4f57bea4c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/uid16.c b/kernel/uid16.c index 0314501688b..419209893d8 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -4,7 +4,6 @@ */ #include -#include #include #include #include diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index c70dd7f5258..1db618f56ec 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -8,7 +8,6 @@ #include #include -#include #include #ifdef RPC_DEBUG -- cgit v1.2.3-70-g09d2