diff options
Diffstat (limited to 'arch')
458 files changed, 27712 insertions, 4831 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc..a62965d057f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -222,6 +222,19 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_PERF_REGS + bool + help + Support selective register dumps for perf events. This includes + bit-mapping of each registers and a unique architecture id. + +config HAVE_PERF_USER_STACK_DUMP + bool + help + Support user stack dumps for perf event samples. This needs + access to the user stack pointer which is not unified across + architectures. + config HAVE_ARCH_JUMP_LABEL bool @@ -281,4 +294,23 @@ config SECCOMP_FILTER See Documentation/prctl/seccomp_filter.txt for details. +config HAVE_RCU_USER_QS + bool + help + Provide kernel entry/exit hooks necessary for userspace + RCU extended quiescent state. Syscalls need to be wrapped inside + rcu_user_exit()-rcu_user_enter() through the slow path using + TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs + are already protected inside rcu_irq_enter/rcu_irq_exit() but + preemption or signal handling on irq exit still need to be protected. + +config HAVE_VIRT_CPU_ACCOUNTING + bool + +config HAVE_IRQ_TIME_ACCOUNTING + bool + help + Archs need to ensure they use a high enough resolution clock to + support irq time accounting and then call enable_sched_clock_irqtime(). + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 9816d5a4d17..ef757147cbf 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -256,12 +256,6 @@ pcibios_fixup_bus(struct pci_bus *bus) } } -void __init -pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index d6fde98b74b..83638aa096d 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/tty.h> #include <linux/console.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/reg.h> #include <asm/uaccess.h> @@ -54,9 +55,12 @@ cpu_idle(void) /* FIXME -- EV6 and LCA45 know how to power down the CPU. */ + rcu_idle_enter(); while (!need_resched()) cpu_relax(); - schedule(); + + rcu_idle_exit(); + schedule_preempt_disabled(); } } diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 35ddc02bfa4..a41ad90a97a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -166,6 +166,7 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm)); + preempt_disable(); /* Do nothing. */ cpu_idle(); } diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 3ea809430ed..5d5865204a1 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -223,6 +223,7 @@ srmcons_init(void) driver->subtype = SYSTEM_TYPE_SYSCONS; driver->init_termios = tty_std_termios; tty_set_operations(driver, &srmcons_ops); + tty_port_link_device(&srmcons_singleton.port, driver, 0); err = tty_register_driver(driver); if (err) { put_tty_driver(driver); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2f88d8d9770..de325f4615b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -273,7 +273,7 @@ config ARCH_INTEGRATOR select ARM_AMBA select ARCH_HAS_CPUFREQ select COMMON_CLK - select CLK_VERSATILE + select COMMON_CLK_VERSATILE select HAVE_TCM select ICST select GENERIC_CLOCKEVENTS @@ -289,13 +289,12 @@ config ARCH_INTEGRATOR config ARCH_REALVIEW bool "ARM Ltd. RealView family" select ARM_AMBA - select CLKDEV_LOOKUP - select HAVE_MACH_CLKDEV + select COMMON_CLK + select COMMON_CLK_VERSATILE select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB select PLAT_VERSATILE - select PLAT_VERSATILE_CLOCK select PLAT_VERSATILE_CLCD select ARM_TIMER_SP804 select GPIO_PL061 if GPIOLIB @@ -413,7 +412,7 @@ config ARCH_PRIMA2 select NO_IOPORT select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS - select CLKDEV_LOOKUP + select COMMON_CLK select GENERIC_IRQ_CHIP select MIGHT_HAVE_CACHE_L2X0 select PINCTRL diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 81769c1341f..bc67cbff394 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -653,6 +653,7 @@ __armv7_mmu_cache_on: mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs #endif mrc p15, 0, r0, c1, c0, 0 @ read control reg + bic r0, r0, #1 << 28 @ clear SCTLR.TRE orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x003c @ write buffer #ifdef CONFIG_MMU diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index 66389c1c6f6..7c95f76398d 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -104,6 +104,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -113,6 +114,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -122,6 +124,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index b460d6ce9eb..195019b7ca0 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -95,6 +95,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff400 { @@ -104,6 +105,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff600 { @@ -113,6 +115,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffff800 { @@ -122,6 +125,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioE: gpio@fffffa00 { @@ -131,6 +135,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@ffffee00 { diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index bafa8806fc1..63751b1e744 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -113,6 +113,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff400 { @@ -122,6 +123,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff600 { @@ -131,6 +133,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffff800 { @@ -140,6 +143,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioE: gpio@fffffa00 { @@ -149,6 +153,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@ffffee00 { diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index bfac0dfc332..ef9336ae961 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -107,6 +107,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -116,6 +117,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -125,6 +127,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffffa00 { @@ -134,6 +137,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 4a18c393b13..8a387a8d61b 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -115,6 +115,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -124,6 +125,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -133,6 +135,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffffa00 { @@ -142,6 +145,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 0cab47d4a83..2fde5fd1acc 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -404,6 +404,7 @@ #define __NR_setns (__NR_SYSCALL_BASE+375) #define __NR_process_vm_readv (__NR_SYSCALL_BASE+376) #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) + /* 378 for kcmp */ /* * The following SWIs are ARM private. @@ -483,6 +484,7 @@ */ #define __IGNORE_fadvise64_64 #define __IGNORE_migrate_pages +#define __IGNORE_kcmp #endif /* __KERNEL__ */ #endif /* __ASM_ARM_UNISTD_H */ diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index 2b2f25e7fef..9cf16b83bbb 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -270,15 +270,6 @@ static void __devinit pci_fixup_it8152(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it8152); - - -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) -{ - if (debug_pci) - printk("PCI: Assigning IRQ %02d to %s\n", irq, pci_name(dev)); - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* * If the bus contains any of these devices, then we must not turn on * parity checking of any kind. Currently this is CyberPro 20x0 only. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 463ff4a0ec8..e337879595e 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -387,6 +387,7 @@ /* 375 */ CALL(sys_setns) CALL(sys_process_vm_readv) CALL(sys_process_vm_writev) + CALL(sys_ni_syscall) /* reserved for sys_kcmp */ #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index fef42b21cec..e1f906989bb 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/clk.h> -#include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/err.h> @@ -96,7 +95,52 @@ static void twd_timer_stop(struct clock_event_device *clk) disable_percpu_irq(clk->irq); } -#ifdef CONFIG_CPU_FREQ +#ifdef CONFIG_COMMON_CLK + +/* + * Updates clockevent frequency when the cpu frequency changes. + * Called on the cpu that is changing frequency with interrupts disabled. + */ +static void twd_update_frequency(void *new_rate) +{ + twd_timer_rate = *((unsigned long *) new_rate); + + clockevents_update_freq(*__this_cpu_ptr(twd_evt), twd_timer_rate); +} + +static int twd_rate_change(struct notifier_block *nb, + unsigned long flags, void *data) +{ + struct clk_notifier_data *cnd = data; + + /* + * The twd clock events must be reprogrammed to account for the new + * frequency. The timer is local to a cpu, so cross-call to the + * changing cpu. + */ + if (flags == POST_RATE_CHANGE) + smp_call_function(twd_update_frequency, + (void *)&cnd->new_rate, 1); + + return NOTIFY_OK; +} + +static struct notifier_block twd_clk_nb = { + .notifier_call = twd_rate_change, +}; + +static int twd_clk_init(void) +{ + if (twd_evt && *__this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + return clk_notifier_register(twd_clk, &twd_clk_nb); + + return 0; +} +core_initcall(twd_clk_init); + +#elif defined (CONFIG_CPU_FREQ) + +#include <linux/cpufreq.h> /* * Updates clockevent frequency when the cpu frequency changes. diff --git a/arch/arm/mach-imx/clk-imx25.c b/arch/arm/mach-imx/clk-imx25.c index 4431a62fff5..d20d4795f4e 100644 --- a/arch/arm/mach-imx/clk-imx25.c +++ b/arch/arm/mach-imx/clk-imx25.c @@ -241,6 +241,6 @@ int __init mx25_clocks_init(void) clk_register_clkdev(clk[sdma_ahb], "ahb", "imx35-sdma"); clk_register_clkdev(clk[iim_ipg], "iim", NULL); - mxc_timer_init(MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), 54); + mxc_timer_init(MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), MX25_INT_GPT1); return 0; } diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c index 2c6ab3273f9..5985ed1b8c9 100644 --- a/arch/arm/mach-imx/mach-armadillo5x0.c +++ b/arch/arm/mach-imx/mach-armadillo5x0.c @@ -526,7 +526,8 @@ static void __init armadillo5x0_init(void) imx31_add_mxc_nand(&armadillo5x0_nand_board_info); /* set NAND page size to 2k if not configured via boot mode pins */ - __raw_writel(__raw_readl(MXC_CCM_RCSR) | (1 << 30), MXC_CCM_RCSR); + __raw_writel(__raw_readl(mx3_ccm_base + MXC_CCM_RCSR) | + (1 << 30), mx3_ccm_base + MXC_CCM_RCSR); /* RTC */ /* Get RTC IRQ and register the chip */ diff --git a/arch/arm/mach-mmp/Kconfig b/arch/arm/mach-mmp/Kconfig index 7fddd01b85b..d697d07a1bf 100644 --- a/arch/arm/mach-mmp/Kconfig +++ b/arch/arm/mach-mmp/Kconfig @@ -108,18 +108,21 @@ endmenu config CPU_PXA168 bool select CPU_MOHAWK + select COMMON_CLK help Select code specific to PXA168 config CPU_PXA910 bool select CPU_MOHAWK + select COMMON_CLK help Select code specific to PXA910 config CPU_MMP2 bool select CPU_PJ4 + select COMMON_CLK help Select code specific to MMP2. MMP2 is ARMv7 compatible. diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 8dabfe81d07..ff886e01a0b 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -261,7 +261,7 @@ static void __init apx4devkit_init(void) enable_clk_enet_out(); if (IS_BUILTIN(CONFIG_PHYLIB)) - phy_register_fixup_for_uid(PHY_ID_KS8051, MICREL_PHY_ID_MASK, + phy_register_fixup_for_uid(PHY_ID_KSZ8051, MICREL_PHY_ID_MASK, apx4devkit_phy_fixup); mxsfb_pdata.mode_list = apx4devkit_video_modes; diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index c1b93c752d7..9e80d209d13 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -81,8 +81,9 @@ static struct omap_uart_port_info omap_serial_default_info[] __initdata = { }; #ifdef CONFIG_PM -static void omap_uart_enable_wakeup(struct platform_device *pdev, bool enable) +static void omap_uart_enable_wakeup(struct device *dev, bool enable) { + struct platform_device *pdev = to_platform_device(dev); struct omap_device *od = to_omap_device(pdev); if (!od) @@ -99,15 +100,17 @@ static void omap_uart_enable_wakeup(struct platform_device *pdev, bool enable) * in Smartidle Mode When Configured for DMA Operations. * WA: configure uart in force idle mode. */ -static void omap_uart_set_noidle(struct platform_device *pdev) +static void omap_uart_set_noidle(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct omap_device *od = to_omap_device(pdev); omap_hwmod_set_slave_idlemode(od->hwmods[0], HWMOD_IDLEMODE_NO); } -static void omap_uart_set_smartidle(struct platform_device *pdev) +static void omap_uart_set_smartidle(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct omap_device *od = to_omap_device(pdev); u8 idlemode; @@ -120,10 +123,10 @@ static void omap_uart_set_smartidle(struct platform_device *pdev) } #else -static void omap_uart_enable_wakeup(struct platform_device *pdev, bool enable) +static void omap_uart_enable_wakeup(struct device *dev, bool enable) {} -static void omap_uart_set_noidle(struct platform_device *pdev) {} -static void omap_uart_set_smartidle(struct platform_device *pdev) {} +static void omap_uart_set_noidle(struct device *dev) {} +static void omap_uart_set_smartidle(struct device *dev) {} #endif /* CONFIG_PM */ #ifdef CONFIG_OMAP_MUX @@ -304,6 +307,9 @@ void __init omap_serial_init_port(struct omap_board_data *bdata, omap_up.dma_rx_timeout = info->dma_rx_timeout; omap_up.dma_rx_poll_rate = info->dma_rx_poll_rate; omap_up.autosuspend_timeout = info->autosuspend_timeout; + omap_up.DTR_gpio = info->DTR_gpio; + omap_up.DTR_inverted = info->DTR_inverted; + omap_up.DTR_present = info->DTR_present; pdata = &omap_up; pdata_size = sizeof(struct omap_uart_port_info); diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 410291c6766..a6cd14ab1e4 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -204,6 +204,13 @@ void __init orion5x_wdt_init(void) void __init orion5x_init_early(void) { orion_time_set_base(TIMER_VIRT_BASE); + + /* + * Some Orion5x devices allocate their coherent buffers from atomic + * context. Increase size of atomic coherent pool to make sure such + * the allocations won't fail. + */ + init_dma_coherent_pool_size(SZ_1M); } int orion5x_tclk; diff --git a/arch/arm/mach-prima2/Makefile b/arch/arm/mach-prima2/Makefile index 13dd1604d95..841847d5603 100644 --- a/arch/arm/mach-prima2/Makefile +++ b/arch/arm/mach-prima2/Makefile @@ -1,6 +1,5 @@ obj-y := timer.o obj-y += irq.o -obj-y += clock.o obj-y += rstc.o obj-y += prima2.o obj-y += rtciobrg.o diff --git a/arch/arm/mach-prima2/clock.c b/arch/arm/mach-prima2/clock.c deleted file mode 100644 index aebad7e565c..00000000000 --- a/arch/arm/mach-prima2/clock.c +++ /dev/null @@ -1,510 +0,0 @@ -/* - * Clock tree for CSR SiRFprimaII - * - * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. - * - * Licensed under GPLv2 or later. - */ - -#include <linux/module.h> -#include <linux/bitops.h> -#include <linux/err.h> -#include <linux/errno.h> -#include <linux/io.h> -#include <linux/clkdev.h> -#include <linux/clk.h> -#include <linux/spinlock.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <asm/mach/map.h> -#include <mach/map.h> - -#define SIRFSOC_CLKC_CLK_EN0 0x0000 -#define SIRFSOC_CLKC_CLK_EN1 0x0004 -#define SIRFSOC_CLKC_REF_CFG 0x0014 -#define SIRFSOC_CLKC_CPU_CFG 0x0018 -#define SIRFSOC_CLKC_MEM_CFG 0x001c -#define SIRFSOC_CLKC_SYS_CFG 0x0020 -#define SIRFSOC_CLKC_IO_CFG 0x0024 -#define SIRFSOC_CLKC_DSP_CFG 0x0028 -#define SIRFSOC_CLKC_GFX_CFG 0x002c -#define SIRFSOC_CLKC_MM_CFG 0x0030 -#define SIRFSOC_LKC_LCD_CFG 0x0034 -#define SIRFSOC_CLKC_MMC_CFG 0x0038 -#define SIRFSOC_CLKC_PLL1_CFG0 0x0040 -#define SIRFSOC_CLKC_PLL2_CFG0 0x0044 -#define SIRFSOC_CLKC_PLL3_CFG0 0x0048 -#define SIRFSOC_CLKC_PLL1_CFG1 0x004c -#define SIRFSOC_CLKC_PLL2_CFG1 0x0050 -#define SIRFSOC_CLKC_PLL3_CFG1 0x0054 -#define SIRFSOC_CLKC_PLL1_CFG2 0x0058 -#define SIRFSOC_CLKC_PLL2_CFG2 0x005c -#define SIRFSOC_CLKC_PLL3_CFG2 0x0060 - -#define SIRFSOC_CLOCK_VA_BASE SIRFSOC_VA(0x005000) - -#define KHZ 1000 -#define MHZ (KHZ * KHZ) - -struct clk_ops { - unsigned long (*get_rate)(struct clk *clk); - long (*round_rate)(struct clk *clk, unsigned long rate); - int (*set_rate)(struct clk *clk, unsigned long rate); - int (*enable)(struct clk *clk); - int (*disable)(struct clk *clk); - struct clk *(*get_parent)(struct clk *clk); - int (*set_parent)(struct clk *clk, struct clk *parent); -}; - -struct clk { - struct clk *parent; /* parent clk */ - unsigned long rate; /* clock rate in Hz */ - signed char usage; /* clock enable count */ - signed char enable_bit; /* enable bit: 0 ~ 63 */ - unsigned short regofs; /* register offset */ - struct clk_ops *ops; /* clock operation */ -}; - -static DEFINE_SPINLOCK(clocks_lock); - -static inline unsigned long clkc_readl(unsigned reg) -{ - return readl(SIRFSOC_CLOCK_VA_BASE + reg); -} - -static inline void clkc_writel(u32 val, unsigned reg) -{ - writel(val, SIRFSOC_CLOCK_VA_BASE + reg); -} - -/* - * osc_rtc - real time oscillator - 32.768KHz - * osc_sys - high speed oscillator - 26MHz - */ - -static struct clk clk_rtc = { - .rate = 32768, -}; - -static struct clk clk_osc = { - .rate = 26 * MHZ, -}; - -/* - * std pll - */ -static unsigned long std_pll_get_rate(struct clk *clk) -{ - unsigned long fin = clk_get_rate(clk->parent); - u32 regcfg2 = clk->regofs + SIRFSOC_CLKC_PLL1_CFG2 - - SIRFSOC_CLKC_PLL1_CFG0; - - if (clkc_readl(regcfg2) & BIT(2)) { - /* pll bypass mode */ - clk->rate = fin; - } else { - /* fout = fin * nf / nr / od */ - u32 cfg0 = clkc_readl(clk->regofs); - u32 nf = (cfg0 & (BIT(13) - 1)) + 1; - u32 nr = ((cfg0 >> 13) & (BIT(6) - 1)) + 1; - u32 od = ((cfg0 >> 19) & (BIT(4) - 1)) + 1; - WARN_ON(fin % MHZ); - clk->rate = fin / MHZ * nf / nr / od * MHZ; - } - - return clk->rate; -} - -static int std_pll_set_rate(struct clk *clk, unsigned long rate) -{ - unsigned long fin, nf, nr, od, reg; - - /* - * fout = fin * nf / (nr * od); - * set od = 1, nr = fin/MHz, so fout = nf * MHz - */ - - nf = rate / MHZ; - if (unlikely((rate % MHZ) || nf > BIT(13) || nf < 1)) - return -EINVAL; - - fin = clk_get_rate(clk->parent); - BUG_ON(fin < MHZ); - - nr = fin / MHZ; - BUG_ON((fin % MHZ) || nr > BIT(6)); - - od = 1; - - reg = (nf - 1) | ((nr - 1) << 13) | ((od - 1) << 19); - clkc_writel(reg, clk->regofs); - - reg = clk->regofs + SIRFSOC_CLKC_PLL1_CFG1 - SIRFSOC_CLKC_PLL1_CFG0; - clkc_writel((nf >> 1) - 1, reg); - - reg = clk->regofs + SIRFSOC_CLKC_PLL1_CFG2 - SIRFSOC_CLKC_PLL1_CFG0; - while (!(clkc_readl(reg) & BIT(6))) - cpu_relax(); - - clk->rate = 0; /* set to zero will force recalculation */ - return 0; -} - -static struct clk_ops std_pll_ops = { - .get_rate = std_pll_get_rate, - .set_rate = std_pll_set_rate, -}; - -static struct clk clk_pll1 = { - .parent = &clk_osc, - .regofs = SIRFSOC_CLKC_PLL1_CFG0, - .ops = &std_pll_ops, -}; - -static struct clk clk_pll2 = { - .parent = &clk_osc, - .regofs = SIRFSOC_CLKC_PLL2_CFG0, - .ops = &std_pll_ops, -}; - -static struct clk clk_pll3 = { - .parent = &clk_osc, - .regofs = SIRFSOC_CLKC_PLL3_CFG0, - .ops = &std_pll_ops, -}; - -/* - * clock domains - cpu, mem, sys/io - */ - -static struct clk clk_mem; - -static struct clk *dmn_get_parent(struct clk *clk) -{ - struct clk *clks[] = { - &clk_osc, &clk_rtc, &clk_pll1, &clk_pll2, &clk_pll3 - }; - u32 cfg = clkc_readl(clk->regofs); - WARN_ON((cfg & (BIT(3) - 1)) > 4); - return clks[cfg & (BIT(3) - 1)]; -} - -static int dmn_set_parent(struct clk *clk, struct clk *parent) -{ - const struct clk *clks[] = { - &clk_osc, &clk_rtc, &clk_pll1, &clk_pll2, &clk_pll3 - }; - u32 cfg = clkc_readl(clk->regofs); - int i; - for (i = 0; i < ARRAY_SIZE(clks); i++) { - if (clks[i] == parent) { - cfg &= ~(BIT(3) - 1); - clkc_writel(cfg | i, clk->regofs); - /* BIT(3) - switching status: 1 - busy, 0 - done */ - while (clkc_readl(clk->regofs) & BIT(3)) - cpu_relax(); - return 0; - } - } - return -EINVAL; -} - -static unsigned long dmn_get_rate(struct clk *clk) -{ - unsigned long fin = clk_get_rate(clk->parent); - u32 cfg = clkc_readl(clk->regofs); - if (cfg & BIT(24)) { - /* fcd bypass mode */ - clk->rate = fin; - } else { - /* - * wait count: bit[19:16], hold count: bit[23:20] - */ - u32 wait = (cfg >> 16) & (BIT(4) - 1); - u32 hold = (cfg >> 20) & (BIT(4) - 1); - - clk->rate = fin / (wait + hold + 2); - } - - return clk->rate; -} - -static int dmn_set_rate(struct clk *clk, unsigned long rate) -{ - unsigned long fin; - unsigned ratio, wait, hold, reg; - unsigned bits = (clk == &clk_mem) ? 3 : 4; - - fin = clk_get_rate(clk->parent); - ratio = fin / rate; - - if (unlikely(ratio < 2 || ratio > BIT(bits + 1))) - return -EINVAL; - - WARN_ON(fin % rate); - - wait = (ratio >> 1) - 1; - hold = ratio - wait - 2; - - reg = clkc_readl(clk->regofs); - reg &= ~(((BIT(bits) - 1) << 16) | ((BIT(bits) - 1) << 20)); - reg |= (wait << 16) | (hold << 20) | BIT(25); - clkc_writel(reg, clk->regofs); - - /* waiting FCD been effective */ - while (clkc_readl(clk->regofs) & BIT(25)) - cpu_relax(); - - clk->rate = 0; /* set to zero will force recalculation */ - - return 0; -} - -/* - * cpu clock has no FCD register in Prima2, can only change pll - */ -static int cpu_set_rate(struct clk *clk, unsigned long rate) -{ - int ret1, ret2; - struct clk *cur_parent, *tmp_parent; - - cur_parent = dmn_get_parent(clk); - BUG_ON(cur_parent == NULL || cur_parent->usage > 1); - - /* switch to tmp pll before setting parent clock's rate */ - tmp_parent = cur_parent == &clk_pll1 ? &clk_pll2 : &clk_pll1; - ret1 = dmn_set_parent(clk, tmp_parent); - BUG_ON(ret1); - - ret2 = clk_set_rate(cur_parent, rate); - - ret1 = dmn_set_parent(clk, cur_parent); - - clk->rate = 0; /* set to zero will force recalculation */ - - return ret2 ? ret2 : ret1; -} - -static struct clk_ops cpu_ops = { - .get_parent = dmn_get_parent, - .set_parent = dmn_set_parent, - .set_rate = cpu_set_rate, -}; - -static struct clk clk_cpu = { - .parent = &clk_pll1, - .regofs = SIRFSOC_CLKC_CPU_CFG, - .ops = &cpu_ops, -}; - - -static struct clk_ops msi_ops = { - .set_rate = dmn_set_rate, - .get_rate = dmn_get_rate, - .set_parent = dmn_set_parent, - .get_parent = dmn_get_parent, -}; - -static struct clk clk_mem = { - .parent = &clk_pll2, - .regofs = SIRFSOC_CLKC_MEM_CFG, - .ops = &msi_ops, -}; - -static struct clk clk_sys = { - .parent = &clk_pll3, - .regofs = SIRFSOC_CLKC_SYS_CFG, - .ops = &msi_ops, -}; - -static struct clk clk_io = { - .parent = &clk_pll3, - .regofs = SIRFSOC_CLKC_IO_CFG, - .ops = &msi_ops, -}; - -/* - * on-chip clock sets - */ -static struct clk_lookup onchip_clks[] = { - { - .dev_id = "rtc", - .clk = &clk_rtc, - }, { - .dev_id = "osc", - .clk = &clk_osc, - }, { - .dev_id = "pll1", - .clk = &clk_pll1, - }, { - .dev_id = "pll2", - .clk = &clk_pll2, - }, { - .dev_id = "pll3", - .clk = &clk_pll3, - }, { - .dev_id = "cpu", - .clk = &clk_cpu, - }, { - .dev_id = "mem", - .clk = &clk_mem, - }, { - .dev_id = "sys", - .clk = &clk_sys, - }, { - .dev_id = "io", - .clk = &clk_io, - }, -}; - -int clk_enable(struct clk *clk) -{ - unsigned long flags; - - if (unlikely(IS_ERR_OR_NULL(clk))) - return -EINVAL; - - if (clk->parent) - clk_enable(clk->parent); - - spin_lock_irqsave(&clocks_lock, flags); - if (!clk->usage++ && clk->ops && clk->ops->enable) - clk->ops->enable(clk); - spin_unlock_irqrestore(&clocks_lock, flags); - return 0; -} -EXPORT_SYMBOL(clk_enable); - -void clk_disable(struct clk *clk) -{ - unsigned long flags; - - if (unlikely(IS_ERR_OR_NULL(clk))) - return; - - WARN_ON(!clk->usage); - - spin_lock_irqsave(&clocks_lock, flags); - if (--clk->usage == 0 && clk->ops && clk->ops->disable) - clk->ops->disable(clk); - spin_unlock_irqrestore(&clocks_lock, flags); - - if (clk->parent) - clk_disable(clk->parent); -} -EXPORT_SYMBOL(clk_disable); - -unsigned long clk_get_rate(struct clk *clk) -{ - if (unlikely(IS_ERR_OR_NULL(clk))) - return 0; - - if (clk->rate) - return clk->rate; - - if (clk->ops && clk->ops->get_rate) - return clk->ops->get_rate(clk); - - return clk_get_rate(clk->parent); -} -EXPORT_SYMBOL(clk_get_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - if (unlikely(IS_ERR_OR_NULL(clk))) - return 0; - - if (clk->ops && clk->ops->round_rate) - return clk->ops->round_rate(clk, rate); - - return 0; -} -EXPORT_SYMBOL(clk_round_rate); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - if (unlikely(IS_ERR_OR_NULL(clk))) - return -EINVAL; - - if (!clk->ops || !clk->ops->set_rate) - return -EINVAL; - - return clk->ops->set_rate(clk, rate); -} -EXPORT_SYMBOL(clk_set_rate); - -int clk_set_parent(struct clk *clk, struct clk *parent) -{ - int ret; - unsigned long flags; - - if (unlikely(IS_ERR_OR_NULL(clk))) - return -EINVAL; - - if (!clk->ops || !clk->ops->set_parent) - return -EINVAL; - - spin_lock_irqsave(&clocks_lock, flags); - ret = clk->ops->set_parent(clk, parent); - if (!ret) { - parent->usage += clk->usage; - clk->parent->usage -= clk->usage; - BUG_ON(clk->parent->usage < 0); - clk->parent = parent; - } - spin_unlock_irqrestore(&clocks_lock, flags); - return ret; -} -EXPORT_SYMBOL(clk_set_parent); - -struct clk *clk_get_parent(struct clk *clk) -{ - unsigned long flags; - - if (unlikely(IS_ERR_OR_NULL(clk))) - return NULL; - - if (!clk->ops || !clk->ops->get_parent) - return clk->parent; - - spin_lock_irqsave(&clocks_lock, flags); - clk->parent = clk->ops->get_parent(clk); - spin_unlock_irqrestore(&clocks_lock, flags); - return clk->parent; -} -EXPORT_SYMBOL(clk_get_parent); - -static void __init sirfsoc_clk_init(void) -{ - clkdev_add_table(onchip_clks, ARRAY_SIZE(onchip_clks)); -} - -static struct of_device_id clkc_ids[] = { - { .compatible = "sirf,prima2-clkc" }, - {}, -}; - -void __init sirfsoc_of_clk_init(void) -{ - struct device_node *np; - struct resource res; - struct map_desc sirfsoc_clkc_iodesc = { - .virtual = SIRFSOC_CLOCK_VA_BASE, - .type = MT_DEVICE, - }; - - np = of_find_matching_node(NULL, clkc_ids); - if (!np) - panic("unable to find compatible clkc node in dtb\n"); - - if (of_address_to_resource(np, 0, &res)) - panic("unable to find clkc range in dtb"); - of_node_put(np); - - sirfsoc_clkc_iodesc.pfn = __phys_to_pfn(res.start); - sirfsoc_clkc_iodesc.length = 1 + res.end - res.start; - - iotable_init(&sirfsoc_clkc_iodesc, 1); - - sirfsoc_clk_init(); -} diff --git a/arch/arm/mach-prima2/prima2.c b/arch/arm/mach-prima2/prima2.c index 8f0429d4b79..e9a17aebe0d 100644 --- a/arch/arm/mach-prima2/prima2.c +++ b/arch/arm/mach-prima2/prima2.c @@ -38,7 +38,6 @@ static const char *prima2cb_dt_match[] __initdata = { MACHINE_START(PRIMA2_EVB, "prima2cb") /* Maintainer: Barry Song <baohua.song@csr.com> */ .atag_offset = 0x100, - .init_early = sirfsoc_of_clk_init, .map_io = sirfsoc_map_lluart, .init_irq = sirfsoc_of_irq_init, .timer = &sirfsoc_timer, diff --git a/arch/arm/mach-prima2/timer.c b/arch/arm/mach-prima2/timer.c index f224107de7b..d95bf252f69 100644 --- a/arch/arm/mach-prima2/timer.c +++ b/arch/arm/mach-prima2/timer.c @@ -21,6 +21,8 @@ #include <asm/sched_clock.h> #include <asm/mach/time.h> +#include "common.h" + #define SIRFSOC_TIMER_COUNTER_LO 0x0000 #define SIRFSOC_TIMER_COUNTER_HI 0x0004 #define SIRFSOC_TIMER_MATCH_0 0x0008 @@ -188,9 +190,13 @@ static void __init sirfsoc_clockevent_init(void) static void __init sirfsoc_timer_init(void) { unsigned long rate; + struct clk *clk; + + /* initialize clocking early, we want to set the OS timer */ + sirfsoc_of_clk_init(); /* timer's input clock is io clock */ - struct clk *clk = clk_get_sys("io", NULL); + clk = clk_get_sys("io", NULL); BUG_ON(IS_ERR(clk)); diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 45868bb43cb..ff007d15e0e 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -30,7 +30,6 @@ #include <linux/ata_platform.h> #include <linux/amba/mmci.h> #include <linux/gfp.h> -#include <linux/clkdev.h> #include <linux/mtd/physmap.h> #include <mach/hardware.h> @@ -226,115 +225,10 @@ struct mmci_platform_data realview_mmc1_plat_data = { .cd_invert = true, }; -/* - * Clock handling - */ -static const struct icst_params realview_oscvco_params = { - .ref = 24000000, - .vco_max = ICST307_VCO_MAX, - .vco_min = ICST307_VCO_MIN, - .vd_min = 4 + 8, - .vd_max = 511 + 8, - .rd_min = 1 + 2, - .rd_max = 127 + 2, - .s2div = icst307_s2div, - .idx2s = icst307_idx2s, -}; - -static void realview_oscvco_set(struct clk *clk, struct icst_vco vco) -{ - void __iomem *sys_lock = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_LOCK_OFFSET; - u32 val; - - val = readl(clk->vcoreg) & ~0x7ffff; - val |= vco.v | (vco.r << 9) | (vco.s << 16); - - writel(0xa05f, sys_lock); - writel(val, clk->vcoreg); - writel(0, sys_lock); -} - -static const struct clk_ops oscvco_clk_ops = { - .round = icst_clk_round, - .set = icst_clk_set, - .setvco = realview_oscvco_set, -}; - -static struct clk oscvco_clk = { - .ops = &oscvco_clk_ops, - .params = &realview_oscvco_params, -}; - -/* - * These are fixed clocks. - */ -static struct clk ref24_clk = { - .rate = 24000000, -}; - -static struct clk sp804_clk = { - .rate = 1000000, -}; - -static struct clk dummy_apb_pclk; - -static struct clk_lookup lookups[] = { - { /* Bus clock */ - .con_id = "apb_pclk", - .clk = &dummy_apb_pclk, - }, { /* UART0 */ - .dev_id = "dev:uart0", - .clk = &ref24_clk, - }, { /* UART1 */ - .dev_id = "dev:uart1", - .clk = &ref24_clk, - }, { /* UART2 */ - .dev_id = "dev:uart2", - .clk = &ref24_clk, - }, { /* UART3 */ - .dev_id = "fpga:uart3", - .clk = &ref24_clk, - }, { /* UART3 is on the dev chip in PB1176 */ - .dev_id = "dev:uart3", - .clk = &ref24_clk, - }, { /* UART4 only exists in PB1176 */ - .dev_id = "fpga:uart4", - .clk = &ref24_clk, - }, { /* KMI0 */ - .dev_id = "fpga:kmi0", - .clk = &ref24_clk, - }, { /* KMI1 */ - .dev_id = "fpga:kmi1", - .clk = &ref24_clk, - }, { /* MMC0 */ - .dev_id = "fpga:mmc0", - .clk = &ref24_clk, - }, { /* CLCD is in the PB1176 and EB DevChip */ - .dev_id = "dev:clcd", - .clk = &oscvco_clk, - }, { /* PB:CLCD */ - .dev_id = "issp:clcd", - .clk = &oscvco_clk, - }, { /* SSP */ - .dev_id = "dev:ssp0", - .clk = &ref24_clk, - }, { /* SP804 timers */ - .dev_id = "sp804", - .clk = &sp804_clk, - }, -}; - void __init realview_init_early(void) { void __iomem *sys = __io_address(REALVIEW_SYS_BASE); - if (machine_is_realview_pb1176()) - oscvco_clk.vcoreg = sys + REALVIEW_SYS_OSC0_OFFSET; - else - oscvco_clk.vcoreg = sys + REALVIEW_SYS_OSC4_OFFSET; - - clkdev_add_table(lookups, ARRAY_SIZE(lookups)); - versatile_sched_clock_init(sys + REALVIEW_SYS_24MHz_OFFSET, 24000000); } diff --git a/arch/arm/mach-realview/include/mach/clkdev.h b/arch/arm/mach-realview/include/mach/clkdev.h deleted file mode 100644 index e58d0771b64..00000000000 --- a/arch/arm/mach-realview/include/mach/clkdev.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __ASM_MACH_CLKDEV_H -#define __ASM_MACH_CLKDEV_H - -#include <plat/clock.h> - -struct clk { - unsigned long rate; - const struct clk_ops *ops; - const struct icst_params *params; - void __iomem *vcoreg; -}; - -#define __clk_get(clk) ({ 1; }) -#define __clk_put(clk) do { } while (0) - -#endif diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index baf382c5e77..a33e33b7673 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -27,6 +27,7 @@ #include <linux/amba/mmci.h> #include <linux/amba/pl022.h> #include <linux/io.h> +#include <linux/platform_data/clk-realview.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -414,6 +415,7 @@ static void __init realview_eb_timer_init(void) else timer_irq = IRQ_EB_TIMER0_1; + realview_clk_init(__io_address(REALVIEW_SYS_BASE), false); realview_timer_init(timer_irq); realview_eb_twd_init(); } diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c index b1d7cafa1a6..f0298cbc203 100644 --- a/arch/arm/mach-realview/realview_pb1176.c +++ b/arch/arm/mach-realview/realview_pb1176.c @@ -29,6 +29,7 @@ #include <linux/mtd/physmap.h> #include <linux/mtd/partitions.h> #include <linux/io.h> +#include <linux/platform_data/clk-realview.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -326,6 +327,7 @@ static void __init realview_pb1176_timer_init(void) timer2_va_base = __io_address(REALVIEW_PB1176_TIMER2_3_BASE); timer3_va_base = __io_address(REALVIEW_PB1176_TIMER2_3_BASE) + 0x20; + realview_clk_init(__io_address(REALVIEW_SYS_BASE), true); realview_timer_init(IRQ_DC1176_TIMER0); } diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c index a98c536e332..1f019f76f7b 100644 --- a/arch/arm/mach-realview/realview_pb11mp.c +++ b/arch/arm/mach-realview/realview_pb11mp.c @@ -27,6 +27,7 @@ #include <linux/amba/mmci.h> #include <linux/amba/pl022.h> #include <linux/io.h> +#include <linux/platform_data/clk-realview.h> #include <mach/hardware.h> #include <asm/irq.h> @@ -312,6 +313,7 @@ static void __init realview_pb11mp_timer_init(void) timer2_va_base = __io_address(REALVIEW_PB11MP_TIMER2_3_BASE); timer3_va_base = __io_address(REALVIEW_PB11MP_TIMER2_3_BASE) + 0x20; + realview_clk_init(__io_address(REALVIEW_SYS_BASE), false); realview_timer_init(IRQ_TC11MP_TIMER0_1); realview_pb11mp_twd_init(); } diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c index 59650174e6e..5032775dbfe 100644 --- a/arch/arm/mach-realview/realview_pba8.c +++ b/arch/arm/mach-realview/realview_pba8.c @@ -27,6 +27,7 @@ #include <linux/amba/mmci.h> #include <linux/amba/pl022.h> #include <linux/io.h> +#include <linux/platform_data/clk-realview.h> #include <asm/irq.h> #include <asm/leds.h> @@ -261,6 +262,7 @@ static void __init realview_pba8_timer_init(void) timer2_va_base = __io_address(REALVIEW_PBA8_TIMER2_3_BASE); timer3_va_base = __io_address(REALVIEW_PBA8_TIMER2_3_BASE) + 0x20; + realview_clk_init(__io_address(REALVIEW_SYS_BASE), false); realview_timer_init(IRQ_PBA8_TIMER0_1); } diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c index 3f2f605624e..de64ba0ddb9 100644 --- a/arch/arm/mach-realview/realview_pbx.c +++ b/arch/arm/mach-realview/realview_pbx.c @@ -26,6 +26,7 @@ #include <linux/amba/mmci.h> #include <linux/amba/pl022.h> #include <linux/io.h> +#include <linux/platform_data/clk-realview.h> #include <asm/irq.h> #include <asm/leds.h> @@ -320,6 +321,7 @@ static void __init realview_pbx_timer_init(void) timer2_va_base = __io_address(REALVIEW_PBX_TIMER2_3_BASE); timer3_va_base = __io_address(REALVIEW_PBX_TIMER2_3_BASE) + 0x20; + realview_clk_init(__io_address(REALVIEW_SYS_BASE), false); realview_timer_init(IRQ_PBX_TIMER0_1); realview_pbx_twd_init(); } diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c index 53b7ea92c32..3b8a0171c3c 100644 --- a/arch/arm/mach-shmobile/board-kzm9g.c +++ b/arch/arm/mach-shmobile/board-kzm9g.c @@ -346,11 +346,11 @@ static struct resource sh_mmcif_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = gic_spi(141), + .start = gic_spi(140), .flags = IORESOURCE_IRQ, }, [2] = { - .start = gic_spi(140), + .start = gic_spi(141), .flags = IORESOURCE_IRQ, }, }; diff --git a/arch/arm/mach-tegra/board-harmony-power.c b/arch/arm/mach-tegra/board-harmony-power.c index b7344beec10..94486e7e9df 100644 --- a/arch/arm/mach-tegra/board-harmony-power.c +++ b/arch/arm/mach-tegra/board-harmony-power.c @@ -67,6 +67,13 @@ static struct regulator_init_data ldo0_data = { }, \ } +static struct regulator_init_data sys_data = { + .supply_regulator = "vdd_5v0", + .constraints = { + .name = "vdd_sys", + }, +}; + HARMONY_REGULATOR_INIT(sm0, "vdd_sm0", "vdd_sys", 725, 1500, 1); HARMONY_REGULATOR_INIT(sm1, "vdd_sm1", "vdd_sys", 725, 1500, 1); HARMONY_REGULATOR_INIT(sm2, "vdd_sm2", "vdd_sys", 3000, 4550, 1); @@ -74,7 +81,7 @@ HARMONY_REGULATOR_INIT(ldo1, "vdd_ldo1", "vdd_sm2", 725, 1500, 1); HARMONY_REGULATOR_INIT(ldo2, "vdd_ldo2", "vdd_sm2", 725, 1500, 0); HARMONY_REGULATOR_INIT(ldo3, "vdd_ldo3", "vdd_sm2", 1250, 3300, 1); HARMONY_REGULATOR_INIT(ldo4, "vdd_ldo4", "vdd_sm2", 1700, 2475, 1); -HARMONY_REGULATOR_INIT(ldo5, "vdd_ldo5", NULL, 1250, 3300, 1); +HARMONY_REGULATOR_INIT(ldo5, "vdd_ldo5", "vdd_sys", 1250, 3300, 1); HARMONY_REGULATOR_INIT(ldo6, "vdd_ldo6", "vdd_sm2", 1250, 3300, 0); HARMONY_REGULATOR_INIT(ldo7, "vdd_ldo7", "vdd_sm2", 1250, 3300, 0); HARMONY_REGULATOR_INIT(ldo8, "vdd_ldo8", "vdd_sm2", 1250, 3300, 0); @@ -88,6 +95,7 @@ HARMONY_REGULATOR_INIT(ldo9, "vdd_ldo9", "vdd_sm2", 1250, 3300, 1); } static struct tps6586x_subdev_info tps_devs[] = { + TPS_REG(SYS, &sys_data), TPS_REG(SM_0, &sm0_data), TPS_REG(SM_1, &sm1_data), TPS_REG(SM_2, &sm2_data), @@ -120,7 +128,7 @@ static struct i2c_board_info __initdata harmony_regulators[] = { int __init harmony_regulator_init(void) { - regulator_register_always_on(0, "vdd_sys", + regulator_register_always_on(0, "vdd_5v0", NULL, 0, 5000000); if (machine_is_harmony()) { diff --git a/arch/arm/mach-tegra/pcie.c b/arch/arm/mach-tegra/pcie.c index d3ad5150d66..c25a2a4f2e3 100644 --- a/arch/arm/mach-tegra/pcie.c +++ b/arch/arm/mach-tegra/pcie.c @@ -367,17 +367,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_fixup_class); /* Tegra PCIE requires relaxed ordering */ static void __devinit tegra_pcie_relax_enable(struct pci_dev *dev) { - u16 val16; - int pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - - if (pos <= 0) { - dev_err(&dev->dev, "skipping relaxed ordering fixup\n"); - return; - } - - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &val16); - val16 |= PCI_EXP_DEVCTL_RELAX_EN; - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, val16); + pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); } DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, tegra_pcie_relax_enable); diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig index 53d3d46dec1..a258996d954 100644 --- a/arch/arm/mach-ux500/Kconfig +++ b/arch/arm/mach-ux500/Kconfig @@ -11,6 +11,7 @@ config UX500_SOC_COMMON select CACHE_L2X0 select PINCTRL select PINCTRL_NOMADIK + select COMMON_CLK config UX500_SOC_DB8500 bool diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile index 026086ff9e6..5691ef679d0 100644 --- a/arch/arm/mach-ux500/Makefile +++ b/arch/arm/mach-ux500/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel, U8500 machine. # -obj-y := clock.o cpu.o devices.o devices-common.o \ +obj-y := cpu.o devices.o devices-common.o \ id.o usb.o timer.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c index a534d8880de..1d2e3c6f8b5 100644 --- a/arch/arm/mach-ux500/board-mop500.c +++ b/arch/arm/mach-ux500/board-mop500.c @@ -524,33 +524,12 @@ static struct stedma40_chan_cfg uart2_dma_cfg_tx = { }; #endif -#define PRCC_K_SOFTRST_SET 0x18 -#define PRCC_K_SOFTRST_CLEAR 0x1C -static void ux500_uart0_reset(void) -{ - void __iomem *prcc_rst_set, *prcc_rst_clr; - - prcc_rst_set = (void __iomem *)IO_ADDRESS(U8500_CLKRST1_BASE + - PRCC_K_SOFTRST_SET); - prcc_rst_clr = (void __iomem *)IO_ADDRESS(U8500_CLKRST1_BASE + - PRCC_K_SOFTRST_CLEAR); - - /* Activate soft reset PRCC_K_SOFTRST_CLEAR */ - writel((readl(prcc_rst_clr) | 0x1), prcc_rst_clr); - udelay(1); - - /* Release soft reset PRCC_K_SOFTRST_SET */ - writel((readl(prcc_rst_set) | 0x1), prcc_rst_set); - udelay(1); -} - static struct amba_pl011_data uart0_plat = { #ifdef CONFIG_STE_DMA40 .dma_filter = stedma40_filter, .dma_rx_param = &uart0_dma_cfg_rx, .dma_tx_param = &uart0_dma_cfg_tx, #endif - .reset = ux500_uart0_reset, }; static struct amba_pl011_data uart1_plat = { diff --git a/arch/arm/mach-ux500/clock.c b/arch/arm/mach-ux500/clock.c deleted file mode 100644 index 8d73b066a18..00000000000 --- a/arch/arm/mach-ux500/clock.c +++ /dev/null @@ -1,715 +0,0 @@ -/* - * Copyright (C) 2009 ST-Ericsson - * Copyright (C) 2009 STMicroelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/list.h> -#include <linux/errno.h> -#include <linux/err.h> -#include <linux/clk.h> -#include <linux/io.h> -#include <linux/clkdev.h> -#include <linux/cpufreq.h> - -#include <plat/mtu.h> -#include <mach/hardware.h> -#include "clock.h" - -#ifdef CONFIG_DEBUG_FS -#include <linux/debugfs.h> -#include <linux/uaccess.h> /* for copy_from_user */ -static LIST_HEAD(clk_list); -#endif - -#define PRCC_PCKEN 0x00 -#define PRCC_PCKDIS 0x04 -#define PRCC_KCKEN 0x08 -#define PRCC_KCKDIS 0x0C - -#define PRCM_YYCLKEN0_MGT_SET 0x510 -#define PRCM_YYCLKEN1_MGT_SET 0x514 -#define PRCM_YYCLKEN0_MGT_CLR 0x518 -#define PRCM_YYCLKEN1_MGT_CLR 0x51C -#define PRCM_YYCLKEN0_MGT_VAL 0x520 -#define PRCM_YYCLKEN1_MGT_VAL 0x524 - -#define PRCM_SVAMMDSPCLK_MGT 0x008 -#define PRCM_SIAMMDSPCLK_MGT 0x00C -#define PRCM_SGACLK_MGT 0x014 -#define PRCM_UARTCLK_MGT 0x018 -#define PRCM_MSP02CLK_MGT 0x01C -#define PRCM_MSP1CLK_MGT 0x288 -#define PRCM_I2CCLK_MGT 0x020 -#define PRCM_SDMMCCLK_MGT 0x024 -#define PRCM_SLIMCLK_MGT 0x028 -#define PRCM_PER1CLK_MGT 0x02C -#define PRCM_PER2CLK_MGT 0x030 -#define PRCM_PER3CLK_MGT 0x034 -#define PRCM_PER5CLK_MGT 0x038 -#define PRCM_PER6CLK_MGT 0x03C -#define PRCM_PER7CLK_MGT 0x040 -#define PRCM_LCDCLK_MGT 0x044 -#define PRCM_BMLCLK_MGT 0x04C -#define PRCM_HSITXCLK_MGT 0x050 -#define PRCM_HSIRXCLK_MGT 0x054 -#define PRCM_HDMICLK_MGT 0x058 -#define PRCM_APEATCLK_MGT 0x05C -#define PRCM_APETRACECLK_MGT 0x060 -#define PRCM_MCDECLK_MGT 0x064 -#define PRCM_IPI2CCLK_MGT 0x068 -#define PRCM_DSIALTCLK_MGT 0x06C -#define PRCM_DMACLK_MGT 0x074 -#define PRCM_B2R2CLK_MGT 0x078 -#define PRCM_TVCLK_MGT 0x07C -#define PRCM_TCR 0x1C8 -#define PRCM_TCR_STOPPED (1 << 16) -#define PRCM_TCR_DOZE_MODE (1 << 17) -#define PRCM_UNIPROCLK_MGT 0x278 -#define PRCM_SSPCLK_MGT 0x280 -#define PRCM_RNGCLK_MGT 0x284 -#define PRCM_UICCCLK_MGT 0x27C - -#define PRCM_MGT_ENABLE (1 << 8) - -static DEFINE_SPINLOCK(clocks_lock); - -static void __clk_enable(struct clk *clk) -{ - if (clk->enabled++ == 0) { - if (clk->parent_cluster) - __clk_enable(clk->parent_cluster); - - if (clk->parent_periph) - __clk_enable(clk->parent_periph); - - if (clk->ops && clk->ops->enable) - clk->ops->enable(clk); - } -} - -int clk_enable(struct clk *clk) -{ - unsigned long flags; - - spin_lock_irqsave(&clocks_lock, flags); - __clk_enable(clk); - spin_unlock_irqrestore(&clocks_lock, flags); - - return 0; -} -EXPORT_SYMBOL(clk_enable); - -static void __clk_disable(struct clk *clk) -{ - if (--clk->enabled == 0) { - if (clk->ops && clk->ops->disable) - clk->ops->disable(clk); - - if (clk->parent_periph) - __clk_disable(clk->parent_periph); - - if (clk->parent_cluster) - __clk_disable(clk->parent_cluster); - } -} - -void clk_disable(struct clk *clk) -{ - unsigned long flags; - - WARN_ON(!clk->enabled); - - spin_lock_irqsave(&clocks_lock, flags); - __clk_disable(clk); - spin_unlock_irqrestore(&clocks_lock, flags); -} -EXPORT_SYMBOL(clk_disable); - -/* - * The MTU has a separate, rather complex muxing setup - * with alternative parents (peripheral cluster or - * ULP or fixed 32768 Hz) depending on settings - */ -static unsigned long clk_mtu_get_rate(struct clk *clk) -{ - void __iomem *addr; - u32 tcr; - int mtu = (int) clk->data; - /* - * One of these is selected eventually - * TODO: Replace the constant with a reference - * to the ULP source once this is modeled. - */ - unsigned long clk32k = 32768; - unsigned long mturate; - unsigned long retclk; - - if (cpu_is_u8500_family()) - addr = __io_address(U8500_PRCMU_BASE); - else - ux500_unknown_soc(); - - /* - * On a startup, always conifgure the TCR to the doze mode; - * bootloaders do it for us. Do this in the kernel too. - */ - writel(PRCM_TCR_DOZE_MODE, addr + PRCM_TCR); - - tcr = readl(addr + PRCM_TCR); - - /* Get the rate from the parent as a default */ - if (clk->parent_periph) - mturate = clk_get_rate(clk->parent_periph); - else if (clk->parent_cluster) - mturate = clk_get_rate(clk->parent_cluster); - else - /* We need to be connected SOMEWHERE */ - BUG(); - - /* Return the clock selected for this MTU */ - if (tcr & (1 << mtu)) - retclk = clk32k; - else - retclk = mturate; - - pr_info("MTU%d clock rate: %lu Hz\n", mtu, retclk); - return retclk; -} - -unsigned long clk_get_rate(struct clk *clk) -{ - unsigned long rate; - - /* - * If there is a custom getrate callback for this clock, - * it will take precedence. - */ - if (clk->get_rate) - return clk->get_rate(clk); - - if (clk->ops && clk->ops->get_rate) - return clk->ops->get_rate(clk); - - rate = clk->rate; - if (!rate) { - if (clk->parent_periph) - rate = clk_get_rate(clk->parent_periph); - else if (clk->parent_cluster) - rate = clk_get_rate(clk->parent_cluster); - } - - return rate; -} -EXPORT_SYMBOL(clk_get_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - /*TODO*/ - return rate; -} -EXPORT_SYMBOL(clk_round_rate); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - clk->rate = rate; - return 0; -} -EXPORT_SYMBOL(clk_set_rate); - -int clk_set_parent(struct clk *clk, struct clk *parent) -{ - /*TODO*/ - return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_parent); - -static void clk_prcmu_enable(struct clk *clk) -{ - void __iomem *cg_set_reg = __io_address(U8500_PRCMU_BASE) - + PRCM_YYCLKEN0_MGT_SET + clk->prcmu_cg_off; - - writel(1 << clk->prcmu_cg_bit, cg_set_reg); -} - -static void clk_prcmu_disable(struct clk *clk) -{ - void __iomem *cg_clr_reg = __io_address(U8500_PRCMU_BASE) - + PRCM_YYCLKEN0_MGT_CLR + clk->prcmu_cg_off; - - writel(1 << clk->prcmu_cg_bit, cg_clr_reg); -} - -static struct clkops clk_prcmu_ops = { - .enable = clk_prcmu_enable, - .disable = clk_prcmu_disable, -}; - -static unsigned int clkrst_base[] = { - [1] = U8500_CLKRST1_BASE, - [2] = U8500_CLKRST2_BASE, - [3] = U8500_CLKRST3_BASE, - [5] = U8500_CLKRST5_BASE, - [6] = U8500_CLKRST6_BASE, -}; - -static void clk_prcc_enable(struct clk *clk) -{ - void __iomem *addr = __io_address(clkrst_base[clk->cluster]); - - if (clk->prcc_kernel != -1) - writel(1 << clk->prcc_kernel, addr + PRCC_KCKEN); - - if (clk->prcc_bus != -1) - writel(1 << clk->prcc_bus, addr + PRCC_PCKEN); -} - -static void clk_prcc_disable(struct clk *clk) -{ - void __iomem *addr = __io_address(clkrst_base[clk->cluster]); - - if (clk->prcc_bus != -1) - writel(1 << clk->prcc_bus, addr + PRCC_PCKDIS); - - if (clk->prcc_kernel != -1) - writel(1 << clk->prcc_kernel, addr + PRCC_KCKDIS); -} - -static struct clkops clk_prcc_ops = { - .enable = clk_prcc_enable, - .disable = clk_prcc_disable, -}; - -static struct clk clk_32khz = { - .name = "clk_32khz", - .rate = 32000, -}; - -/* - * PRCMU level clock gating - */ - -/* Bank 0 */ -static DEFINE_PRCMU_CLK(svaclk, 0x0, 2, SVAMMDSPCLK); -static DEFINE_PRCMU_CLK(siaclk, 0x0, 3, SIAMMDSPCLK); -static DEFINE_PRCMU_CLK(sgaclk, 0x0, 4, SGACLK); -static DEFINE_PRCMU_CLK_RATE(uartclk, 0x0, 5, UARTCLK, 38400000); -static DEFINE_PRCMU_CLK(msp02clk, 0x0, 6, MSP02CLK); -static DEFINE_PRCMU_CLK(msp1clk, 0x0, 7, MSP1CLK); /* v1 */ -static DEFINE_PRCMU_CLK_RATE(i2cclk, 0x0, 8, I2CCLK, 48000000); -static DEFINE_PRCMU_CLK_RATE(sdmmcclk, 0x0, 9, SDMMCCLK, 100000000); -static DEFINE_PRCMU_CLK(slimclk, 0x0, 10, SLIMCLK); -static DEFINE_PRCMU_CLK(per1clk, 0x0, 11, PER1CLK); -static DEFINE_PRCMU_CLK(per2clk, 0x0, 12, PER2CLK); -static DEFINE_PRCMU_CLK(per3clk, 0x0, 13, PER3CLK); -static DEFINE_PRCMU_CLK(per5clk, 0x0, 14, PER5CLK); -static DEFINE_PRCMU_CLK_RATE(per6clk, 0x0, 15, PER6CLK, 133330000); -static DEFINE_PRCMU_CLK(lcdclk, 0x0, 17, LCDCLK); -static DEFINE_PRCMU_CLK(bmlclk, 0x0, 18, BMLCLK); -static DEFINE_PRCMU_CLK(hsitxclk, 0x0, 19, HSITXCLK); -static DEFINE_PRCMU_CLK(hsirxclk, 0x0, 20, HSIRXCLK); -static DEFINE_PRCMU_CLK(hdmiclk, 0x0, 21, HDMICLK); -static DEFINE_PRCMU_CLK(apeatclk, 0x0, 22, APEATCLK); -static DEFINE_PRCMU_CLK(apetraceclk, 0x0, 23, APETRACECLK); -static DEFINE_PRCMU_CLK(mcdeclk, 0x0, 24, MCDECLK); -static DEFINE_PRCMU_CLK(ipi2clk, 0x0, 25, IPI2CCLK); -static DEFINE_PRCMU_CLK(dsialtclk, 0x0, 26, DSIALTCLK); /* v1 */ -static DEFINE_PRCMU_CLK(dmaclk, 0x0, 27, DMACLK); -static DEFINE_PRCMU_CLK(b2r2clk, 0x0, 28, B2R2CLK); -static DEFINE_PRCMU_CLK(tvclk, 0x0, 29, TVCLK); -static DEFINE_PRCMU_CLK(uniproclk, 0x0, 30, UNIPROCLK); /* v1 */ -static DEFINE_PRCMU_CLK_RATE(sspclk, 0x0, 31, SSPCLK, 48000000); /* v1 */ - -/* Bank 1 */ -static DEFINE_PRCMU_CLK(rngclk, 0x4, 0, RNGCLK); /* v1 */ -static DEFINE_PRCMU_CLK(uiccclk, 0x4, 1, UICCCLK); /* v1 */ - -/* - * PRCC level clock gating - * Format: per#, clk, PCKEN bit, KCKEN bit, parent - */ - -/* Peripheral Cluster #1 */ -static DEFINE_PRCC_CLK(1, msp3, 11, 10, &clk_msp1clk); -static DEFINE_PRCC_CLK(1, i2c4, 10, 9, &clk_i2cclk); -static DEFINE_PRCC_CLK(1, gpio0, 9, -1, NULL); -static DEFINE_PRCC_CLK(1, slimbus0, 8, 8, &clk_slimclk); -static DEFINE_PRCC_CLK(1, spi3, 7, -1, NULL); -static DEFINE_PRCC_CLK(1, i2c2, 6, 6, &clk_i2cclk); -static DEFINE_PRCC_CLK(1, sdi0, 5, 5, &clk_sdmmcclk); -static DEFINE_PRCC_CLK(1, msp1, 4, 4, &clk_msp1clk); -static DEFINE_PRCC_CLK(1, msp0, 3, 3, &clk_msp02clk); -static DEFINE_PRCC_CLK(1, i2c1, 2, 2, &clk_i2cclk); -static DEFINE_PRCC_CLK(1, uart1, 1, 1, &clk_uartclk); -static DEFINE_PRCC_CLK(1, uart0, 0, 0, &clk_uartclk); - -/* Peripheral Cluster #2 */ -static DEFINE_PRCC_CLK(2, gpio1, 11, -1, NULL); -static DEFINE_PRCC_CLK(2, ssitx, 10, 7, NULL); -static DEFINE_PRCC_CLK(2, ssirx, 9, 6, NULL); -static DEFINE_PRCC_CLK(2, spi0, 8, -1, NULL); -static DEFINE_PRCC_CLK(2, sdi3, 7, 5, &clk_sdmmcclk); -static DEFINE_PRCC_CLK(2, sdi1, 6, 4, &clk_sdmmcclk); -static DEFINE_PRCC_CLK(2, msp2, 5, 3, &clk_msp02clk); -static DEFINE_PRCC_CLK(2, sdi4, 4, 2, &clk_sdmmcclk); -static DEFINE_PRCC_CLK(2, pwl, 3, 1, NULL); -static DEFINE_PRCC_CLK(2, spi1, 2, -1, NULL); -static DEFINE_PRCC_CLK(2, spi2, 1, -1, NULL); -static DEFINE_PRCC_CLK(2, i2c3, 0, 0, &clk_i2cclk); - -/* Peripheral Cluster #3 */ -static DEFINE_PRCC_CLK(3, gpio2, 8, -1, NULL); -static DEFINE_PRCC_CLK(3, sdi5, 7, 7, &clk_sdmmcclk); -static DEFINE_PRCC_CLK(3, uart2, 6, 6, &clk_uartclk); -static DEFINE_PRCC_CLK(3, ske, 5, 5, &clk_32khz); -static DEFINE_PRCC_CLK(3, sdi2, 4, 4, &clk_sdmmcclk); -static DEFINE_PRCC_CLK(3, i2c0, 3, 3, &clk_i2cclk); -static DEFINE_PRCC_CLK(3, ssp1, 2, 2, &clk_sspclk); -static DEFINE_PRCC_CLK(3, ssp0, 1, 1, &clk_sspclk); -static DEFINE_PRCC_CLK(3, fsmc, 0, -1, NULL); - -/* Peripheral Cluster #4 is in the always on domain */ - -/* Peripheral Cluster #5 */ -static DEFINE_PRCC_CLK(5, gpio3, 1, -1, NULL); -static DEFINE_PRCC_CLK(5, usb, 0, 0, NULL); - -/* Peripheral Cluster #6 */ - -/* MTU ID in data */ -static DEFINE_PRCC_CLK_CUSTOM(6, mtu1, 9, -1, NULL, clk_mtu_get_rate, 1); -static DEFINE_PRCC_CLK_CUSTOM(6, mtu0, 8, -1, NULL, clk_mtu_get_rate, 0); -static DEFINE_PRCC_CLK(6, cfgreg, 7, 7, NULL); -static DEFINE_PRCC_CLK(6, hash1, 6, -1, NULL); -static DEFINE_PRCC_CLK(6, unipro, 5, 1, &clk_uniproclk); -static DEFINE_PRCC_CLK(6, pka, 4, -1, NULL); -static DEFINE_PRCC_CLK(6, hash0, 3, -1, NULL); -static DEFINE_PRCC_CLK(6, cryp0, 2, -1, NULL); -static DEFINE_PRCC_CLK(6, cryp1, 1, -1, NULL); -static DEFINE_PRCC_CLK(6, rng, 0, 0, &clk_rngclk); - -static struct clk clk_dummy_apb_pclk = { - .name = "apb_pclk", -}; - -static struct clk_lookup u8500_clks[] = { - CLK(dummy_apb_pclk, NULL, "apb_pclk"), - - /* Peripheral Cluster #1 */ - CLK(gpio0, "gpio.0", NULL), - CLK(gpio0, "gpio.1", NULL), - CLK(slimbus0, "slimbus0", NULL), - CLK(i2c2, "nmk-i2c.2", NULL), - CLK(sdi0, "sdi0", NULL), - CLK(msp0, "ux500-msp-i2s.0", NULL), - CLK(i2c1, "nmk-i2c.1", NULL), - CLK(uart1, "uart1", NULL), - CLK(uart0, "uart0", NULL), - - /* Peripheral Cluster #3 */ - CLK(gpio2, "gpio.2", NULL), - CLK(gpio2, "gpio.3", NULL), - CLK(gpio2, "gpio.4", NULL), - CLK(gpio2, "gpio.5", NULL), - CLK(sdi5, "sdi5", NULL), - CLK(uart2, "uart2", NULL), - CLK(ske, "ske", NULL), - CLK(ske, "nmk-ske-keypad", NULL), - CLK(sdi2, "sdi2", NULL), - CLK(i2c0, "nmk-i2c.0", NULL), - CLK(fsmc, "fsmc", NULL), - - /* Peripheral Cluster #5 */ - CLK(gpio3, "gpio.8", NULL), - - /* Peripheral Cluster #6 */ - CLK(hash1, "hash1", NULL), - CLK(pka, "pka", NULL), - CLK(hash0, "hash0", NULL), - CLK(cryp0, "cryp0", NULL), - CLK(cryp1, "cryp1", NULL), - - /* PRCMU level clock gating */ - - /* Bank 0 */ - CLK(svaclk, "sva", NULL), - CLK(siaclk, "sia", NULL), - CLK(sgaclk, "sga", NULL), - CLK(slimclk, "slim", NULL), - CLK(lcdclk, "lcd", NULL), - CLK(bmlclk, "bml", NULL), - CLK(hsitxclk, "stm-hsi.0", NULL), - CLK(hsirxclk, "stm-hsi.1", NULL), - CLK(hdmiclk, "hdmi", NULL), - CLK(apeatclk, "apeat", NULL), - CLK(apetraceclk, "apetrace", NULL), - CLK(mcdeclk, "mcde", NULL), - CLK(ipi2clk, "ipi2", NULL), - CLK(dmaclk, "dma40.0", NULL), - CLK(b2r2clk, "b2r2", NULL), - CLK(tvclk, "tv", NULL), - - /* Peripheral Cluster #1 */ - CLK(i2c4, "nmk-i2c.4", NULL), - CLK(spi3, "spi3", NULL), - CLK(msp1, "ux500-msp-i2s.1", NULL), - CLK(msp3, "ux500-msp-i2s.3", NULL), - - /* Peripheral Cluster #2 */ - CLK(gpio1, "gpio.6", NULL), - CLK(gpio1, "gpio.7", NULL), - CLK(ssitx, "ssitx", NULL), - CLK(ssirx, "ssirx", NULL), - CLK(spi0, "spi0", NULL), - CLK(sdi3, "sdi3", NULL), - CLK(sdi1, "sdi1", NULL), - CLK(msp2, "ux500-msp-i2s.2", NULL), - CLK(sdi4, "sdi4", NULL), - CLK(pwl, "pwl", NULL), - CLK(spi1, "spi1", NULL), - CLK(spi2, "spi2", NULL), - CLK(i2c3, "nmk-i2c.3", NULL), - - /* Peripheral Cluster #3 */ - CLK(ssp1, "ssp1", NULL), - CLK(ssp0, "ssp0", NULL), - - /* Peripheral Cluster #5 */ - CLK(usb, "musb-ux500.0", "usb"), - - /* Peripheral Cluster #6 */ - CLK(mtu1, "mtu1", NULL), - CLK(mtu0, "mtu0", NULL), - CLK(cfgreg, "cfgreg", NULL), - CLK(hash1, "hash1", NULL), - CLK(unipro, "unipro", NULL), - CLK(rng, "rng", NULL), - - /* PRCMU level clock gating */ - - /* Bank 0 */ - CLK(uniproclk, "uniproclk", NULL), - CLK(dsialtclk, "dsialt", NULL), - - /* Bank 1 */ - CLK(rngclk, "rng", NULL), - CLK(uiccclk, "uicc", NULL), -}; - -#ifdef CONFIG_DEBUG_FS -/* - * debugfs support to trace clock tree hierarchy and attributes with - * powerdebug - */ -static struct dentry *clk_debugfs_root; - -void __init clk_debugfs_add_table(struct clk_lookup *cl, size_t num) -{ - while (num--) { - /* Check that the clock has not been already registered */ - if (!(cl->clk->list.prev != cl->clk->list.next)) - list_add_tail(&cl->clk->list, &clk_list); - - cl++; - } -} - -static ssize_t usecount_dbg_read(struct file *file, char __user *buf, - size_t size, loff_t *off) -{ - struct clk *clk = file->f_dentry->d_inode->i_private; - char cusecount[128]; - unsigned int len; - - len = sprintf(cusecount, "%u\n", clk->enabled); - return simple_read_from_buffer(buf, size, off, cusecount, len); -} - -static ssize_t rate_dbg_read(struct file *file, char __user *buf, - size_t size, loff_t *off) -{ - struct clk *clk = file->f_dentry->d_inode->i_private; - char crate[128]; - unsigned int rate; - unsigned int len; - - rate = clk_get_rate(clk); - len = sprintf(crate, "%u\n", rate); - return simple_read_from_buffer(buf, size, off, crate, len); -} - -static const struct file_operations usecount_fops = { - .read = usecount_dbg_read, -}; - -static const struct file_operations set_rate_fops = { - .read = rate_dbg_read, -}; - -static struct dentry *clk_debugfs_register_dir(struct clk *c, - struct dentry *p_dentry) -{ - struct dentry *d, *clk_d; - const char *p = c->name; - - if (!p) - p = "BUG"; - - clk_d = debugfs_create_dir(p, p_dentry); - if (!clk_d) - return NULL; - - d = debugfs_create_file("usecount", S_IRUGO, - clk_d, c, &usecount_fops); - if (!d) - goto err_out; - d = debugfs_create_file("rate", S_IRUGO, - clk_d, c, &set_rate_fops); - if (!d) - goto err_out; - /* - * TODO : not currently available in ux500 - * d = debugfs_create_x32("flags", S_IRUGO, clk_d, (u32 *)&c->flags); - * if (!d) - * goto err_out; - */ - - return clk_d; - -err_out: - debugfs_remove_recursive(clk_d); - return NULL; -} - -static int clk_debugfs_register_one(struct clk *c) -{ - struct clk *pa = c->parent_periph; - struct clk *bpa = c->parent_cluster; - - if (!(bpa && !pa)) { - c->dent = clk_debugfs_register_dir(c, - pa ? pa->dent : clk_debugfs_root); - if (!c->dent) - return -ENOMEM; - } - - if (bpa) { - c->dent_bus = clk_debugfs_register_dir(c, - bpa->dent_bus ? bpa->dent_bus : bpa->dent); - if ((!c->dent_bus) && (c->dent)) { - debugfs_remove_recursive(c->dent); - c->dent = NULL; - return -ENOMEM; - } - } - return 0; -} - -static int clk_debugfs_register(struct clk *c) -{ - int err; - struct clk *pa = c->parent_periph; - struct clk *bpa = c->parent_cluster; - - if (pa && (!pa->dent && !pa->dent_bus)) { - err = clk_debugfs_register(pa); - if (err) - return err; - } - - if (bpa && (!bpa->dent && !bpa->dent_bus)) { - err = clk_debugfs_register(bpa); - if (err) - return err; - } - - if ((!c->dent) && (!c->dent_bus)) { - err = clk_debugfs_register_one(c); - if (err) - return err; - } - return 0; -} - -int __init clk_debugfs_init(void) -{ - struct clk *c; - struct dentry *d; - int err; - - d = debugfs_create_dir("clock", NULL); - if (!d) - return -ENOMEM; - clk_debugfs_root = d; - - list_for_each_entry(c, &clk_list, list) { - err = clk_debugfs_register(c); - if (err) - goto err_out; - } - return 0; -err_out: - debugfs_remove_recursive(clk_debugfs_root); - return err; -} - -#endif /* defined(CONFIG_DEBUG_FS) */ - -unsigned long clk_smp_twd_rate = 500000000; - -unsigned long clk_smp_twd_get_rate(struct clk *clk) -{ - return clk_smp_twd_rate; -} - -static struct clk clk_smp_twd = { - .get_rate = clk_smp_twd_get_rate, - .name = "smp_twd", -}; - -static struct clk_lookup clk_smp_twd_lookup = { - .dev_id = "smp_twd", - .clk = &clk_smp_twd, -}; - -#ifdef CONFIG_CPU_FREQ - -static int clk_twd_cpufreq_transition(struct notifier_block *nb, - unsigned long state, void *data) -{ - struct cpufreq_freqs *f = data; - - if (state == CPUFREQ_PRECHANGE) { - /* Save frequency in simple Hz */ - clk_smp_twd_rate = (f->new * 1000) / 2; - } - - return NOTIFY_OK; -} - -static struct notifier_block clk_twd_cpufreq_nb = { - .notifier_call = clk_twd_cpufreq_transition, -}; - -int clk_init_smp_twd_cpufreq(void) -{ - return cpufreq_register_notifier(&clk_twd_cpufreq_nb, - CPUFREQ_TRANSITION_NOTIFIER); -} - -#endif - -int __init clk_init(void) -{ - clkdev_add_table(u8500_clks, ARRAY_SIZE(u8500_clks)); - clkdev_add(&clk_smp_twd_lookup); - -#ifdef CONFIG_DEBUG_FS - clk_debugfs_add_table(u8500_clks, ARRAY_SIZE(u8500_clks)); -#endif - return 0; -} diff --git a/arch/arm/mach-ux500/clock.h b/arch/arm/mach-ux500/clock.h deleted file mode 100644 index 65d27a13f46..00000000000 --- a/arch/arm/mach-ux500/clock.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (C) 2010 ST-Ericsson - * Copyright (C) 2009 STMicroelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/** - * struct clkops - ux500 clock operations - * @enable: function to enable the clock - * @disable: function to disable the clock - * @get_rate: function to get the current clock rate - * - * This structure contains function pointers to functions that will be used to - * control the clock. All of these functions are optional. If get_rate is - * NULL, the rate in the struct clk will be used. - */ -struct clkops { - void (*enable) (struct clk *); - void (*disable) (struct clk *); - unsigned long (*get_rate) (struct clk *); - int (*set_parent)(struct clk *, struct clk *); -}; - -/** - * struct clk - ux500 clock structure - * @ops: pointer to clkops struct used to control this clock - * @name: name, for debugging - * @enabled: refcount. positive if enabled, zero if disabled - * @get_rate: custom callback for getting the clock rate - * @data: custom per-clock data for example for the get_rate - * callback - * @rate: fixed rate for clocks which don't implement - * ops->getrate - * @prcmu_cg_off: address offset of the combined enable/disable register - * (used on u8500v1) - * @prcmu_cg_bit: bit in the combined enable/disable register (used on - * u8500v1) - * @prcmu_cg_mgt: address of the enable/disable register (used on - * u8500ed) - * @cluster: peripheral cluster number - * @prcc_bus: bit for the bus clock in the peripheral's CLKRST - * @prcc_kernel: bit for the kernel clock in the peripheral's CLKRST. - * -1 if no kernel clock exists. - * @parent_cluster: pointer to parent's cluster clk struct - * @parent_periph: pointer to parent's peripheral clk struct - * - * Peripherals are organised into clusters, and each cluster has an associated - * bus clock. Some peripherals also have a parent peripheral clock. - * - * In order to enable a clock for a peripheral, we need to enable: - * (1) the parent cluster (bus) clock at the PRCMU level - * (2) the parent peripheral clock (if any) at the PRCMU level - * (3) the peripheral's bus & kernel clock at the PRCC level - * - * (1) and (2) are handled by defining clk structs (DEFINE_PRCMU_CLK) for each - * of the cluster and peripheral clocks, and hooking these as the parents of - * the individual peripheral clocks. - * - * (3) is handled by specifying the bits in the PRCC control registers required - * to enable these clocks and modifying them in the ->enable and - * ->disable callbacks of the peripheral clocks (DEFINE_PRCC_CLK). - * - * This structure describes both the PRCMU-level clocks and PRCC-level clocks. - * The prcmu_* fields are only used for the PRCMU clocks, and the cluster, - * prcc, and parent pointers are only used for the PRCC-level clocks. - */ -struct clk { - const struct clkops *ops; - const char *name; - unsigned int enabled; - unsigned long (*get_rate)(struct clk *); - void *data; - - unsigned long rate; - struct list_head list; - - /* These three are only for PRCMU clks */ - - unsigned int prcmu_cg_off; - unsigned int prcmu_cg_bit; - unsigned int prcmu_cg_mgt; - - /* The rest are only for PRCC clks */ - - int cluster; - unsigned int prcc_bus; - unsigned int prcc_kernel; - - struct clk *parent_cluster; - struct clk *parent_periph; -#if defined(CONFIG_DEBUG_FS) - struct dentry *dent; /* For visible tree hierarchy */ - struct dentry *dent_bus; /* For visible tree hierarchy */ -#endif -}; - -#define DEFINE_PRCMU_CLK(_name, _cg_off, _cg_bit, _reg) \ -struct clk clk_##_name = { \ - .name = #_name, \ - .ops = &clk_prcmu_ops, \ - .prcmu_cg_off = _cg_off, \ - .prcmu_cg_bit = _cg_bit, \ - .prcmu_cg_mgt = PRCM_##_reg##_MGT \ - } - -#define DEFINE_PRCMU_CLK_RATE(_name, _cg_off, _cg_bit, _reg, _rate) \ -struct clk clk_##_name = { \ - .name = #_name, \ - .ops = &clk_prcmu_ops, \ - .prcmu_cg_off = _cg_off, \ - .prcmu_cg_bit = _cg_bit, \ - .rate = _rate, \ - .prcmu_cg_mgt = PRCM_##_reg##_MGT \ - } - -#define DEFINE_PRCC_CLK(_pclust, _name, _bus_en, _kernel_en, _kernclk) \ -struct clk clk_##_name = { \ - .name = #_name, \ - .ops = &clk_prcc_ops, \ - .cluster = _pclust, \ - .prcc_bus = _bus_en, \ - .prcc_kernel = _kernel_en, \ - .parent_cluster = &clk_per##_pclust##clk, \ - .parent_periph = _kernclk \ - } - -#define DEFINE_PRCC_CLK_CUSTOM(_pclust, _name, _bus_en, _kernel_en, _kernclk, _callback, _data) \ -struct clk clk_##_name = { \ - .name = #_name, \ - .ops = &clk_prcc_ops, \ - .cluster = _pclust, \ - .prcc_bus = _bus_en, \ - .prcc_kernel = _kernel_en, \ - .parent_cluster = &clk_per##_pclust##clk, \ - .parent_periph = _kernclk, \ - .get_rate = _callback, \ - .data = (void *) _data \ - } - - -#define CLK(_clk, _devname, _conname) \ - { \ - .clk = &clk_##_clk, \ - .dev_id = _devname, \ - .con_id = _conname, \ - } - -int __init clk_db8500_ed_fixup(void); -int __init clk_init(void); - -#ifdef CONFIG_DEBUG_FS -int clk_debugfs_init(void); -#else -static inline int clk_debugfs_init(void) { return 0; } -#endif - -#ifdef CONFIG_CPU_FREQ -int clk_init_smp_twd_cpufreq(void); -#else -static inline int clk_init_smp_twd_cpufreq(void) { return 0; } -#endif diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index e2360e7c770..17a78ec516f 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -8,7 +8,6 @@ #include <linux/platform_device.h> #include <linux/io.h> -#include <linux/clk.h> #include <linux/mfd/db8500-prcmu.h> #include <linux/clksrc-dbx500-prcmu.h> #include <linux/sys_soc.h> @@ -17,6 +16,7 @@ #include <linux/stat.h> #include <linux/of.h> #include <linux/of_irq.h> +#include <linux/platform_data/clk-ux500.h> #include <asm/hardware/gic.h> #include <asm/mach/map.h> @@ -25,8 +25,6 @@ #include <mach/setup.h> #include <mach/devices.h> -#include "clock.h" - void __iomem *_PRCMU_BASE; /* @@ -70,13 +68,17 @@ void __init ux500_init_irq(void) */ if (cpu_is_u8500_family()) db8500_prcmu_early_init(); - clk_init(); + + if (cpu_is_u8500_family()) + u8500_clk_init(); + else if (cpu_is_u9540()) + u9540_clk_init(); + else if (cpu_is_u8540()) + u8540_clk_init(); } void __init ux500_init_late(void) { - clk_debugfs_init(); - clk_init_smp_twd_cpufreq(); } static const char * __init ux500_get_machine(void) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e59c4ab71bc..13f555d6249 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -346,6 +346,8 @@ static int __init atomic_pool_init(void) (unsigned)pool->size / 1024); return 0; } + + kfree(pages); no_pages: kfree(bitmap); no_bitmap: diff --git a/arch/arm/plat-mxc/include/mach/mx25.h b/arch/arm/plat-mxc/include/mach/mx25.h index 627d94f1b01..ec466400a20 100644 --- a/arch/arm/plat-mxc/include/mach/mx25.h +++ b/arch/arm/plat-mxc/include/mach/mx25.h @@ -98,6 +98,7 @@ #define MX25_INT_UART1 (NR_IRQS_LEGACY + 45) #define MX25_INT_GPIO2 (NR_IRQS_LEGACY + 51) #define MX25_INT_GPIO1 (NR_IRQS_LEGACY + 52) +#define MX25_INT_GPT1 (NR_IRQS_LEGACY + 54) #define MX25_INT_FEC (NR_IRQS_LEGACY + 57) #define MX25_DMA_REQ_SSI2_RX1 22 diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h index 1a52725ffcf..a531149823b 100644 --- a/arch/arm/plat-omap/include/plat/omap-serial.h +++ b/arch/arm/plat-omap/include/plat/omap-serial.h @@ -18,7 +18,7 @@ #define __OMAP_SERIAL_H__ #include <linux/serial_core.h> -#include <linux/platform_device.h> +#include <linux/device.h> #include <linux/pm_qos.h> #include <plat/mux.h> @@ -42,10 +42,10 @@ #define OMAP_UART_WER_MOD_WKUP 0X7F /* Enable XON/XOFF flow control on output */ -#define OMAP_UART_SW_TX 0x04 +#define OMAP_UART_SW_TX 0x8 /* Enable XON/XOFF flow control on input */ -#define OMAP_UART_SW_RX 0x04 +#define OMAP_UART_SW_RX 0x2 #define OMAP_UART_SYSC_RESET 0X07 #define OMAP_UART_TCR_TRIG 0X0F @@ -69,11 +69,14 @@ struct omap_uart_port_info { unsigned int dma_rx_timeout; unsigned int autosuspend_timeout; unsigned int dma_rx_poll_rate; + int DTR_gpio; + int DTR_inverted; + int DTR_present; int (*get_context_loss_count)(struct device *); - void (*set_forceidle)(struct platform_device *); - void (*set_noidle)(struct platform_device *); - void (*enable_wakeup)(struct platform_device *, bool); + void (*set_forceidle)(struct device *); + void (*set_noidle)(struct device *); + void (*enable_wakeup)(struct device *, bool); }; struct uart_omap_dma { @@ -102,39 +105,4 @@ struct uart_omap_dma { unsigned int rx_timeout; }; -struct uart_omap_port { - struct uart_port port; - struct uart_omap_dma uart_dma; - struct platform_device *pdev; - - unsigned char ier; - unsigned char lcr; - unsigned char mcr; - unsigned char fcr; - unsigned char efr; - unsigned char dll; - unsigned char dlh; - unsigned char mdr1; - unsigned char scr; - - int use_dma; - /* - * Some bits in registers are cleared on a read, so they must - * be saved whenever the register is read but the bits will not - * be immediately processed. - */ - unsigned int lsr_break_flag; - unsigned char msr_saved_flags; - char name[20]; - unsigned long port_activity; - u32 context_loss_cnt; - u32 errata; - u8 wakeups_enabled; - - struct pm_qos_request pm_qos_request; - u32 latency; - u32 calc_latency; - struct work_struct qos_work; -}; - #endif /* __OMAP_SERIAL_H__ */ diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c index 65c5eca475e..d1116e2dfbe 100644 --- a/arch/arm/plat-samsung/clock.c +++ b/arch/arm/plat-samsung/clock.c @@ -144,6 +144,7 @@ long clk_round_rate(struct clk *clk, unsigned long rate) int clk_set_rate(struct clk *clk, unsigned long rate) { + unsigned long flags; int ret; if (IS_ERR(clk)) @@ -159,9 +160,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate) if (clk->ops == NULL || clk->ops->set_rate == NULL) return -EINVAL; - spin_lock(&clocks_lock); + spin_lock_irqsave(&clocks_lock, flags); ret = (clk->ops->set_rate)(clk, rate); - spin_unlock(&clocks_lock); + spin_unlock_irqrestore(&clocks_lock, flags); return ret; } @@ -173,17 +174,18 @@ struct clk *clk_get_parent(struct clk *clk) int clk_set_parent(struct clk *clk, struct clk *parent) { + unsigned long flags; int ret = 0; if (IS_ERR(clk)) return -EINVAL; - spin_lock(&clocks_lock); + spin_lock_irqsave(&clocks_lock, flags); if (clk->ops && clk->ops->set_parent) ret = (clk->ops->set_parent)(clk, parent); - spin_unlock(&clocks_lock); + spin_unlock_irqrestore(&clocks_lock, flags); return ret; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig new file mode 100644 index 00000000000..767ba568545 --- /dev/null +++ b/arch/arm64/Kconfig @@ -0,0 +1,222 @@ +config ARM64 + def_bool y + select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select GENERIC_CLOCKEVENTS + select GENERIC_HARDIRQS_NO_DEPRECATED + select GENERIC_IOMAP + select GENERIC_IRQ_PROBE + select GENERIC_IRQ_SHOW + select GENERIC_SMP_IDLE_THREAD + select GENERIC_TIME_VSYSCALL + select HARDIRQS_SW_RESEND + select HAVE_ARCH_TRACEHOOK + select HAVE_DMA_API_DEBUG + select HAVE_DMA_ATTRS + select HAVE_GENERIC_DMA_COHERENT + select HAVE_GENERIC_HARDIRQS + select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IRQ_WORK + select HAVE_MEMBLOCK + select HAVE_PERF_EVENTS + select HAVE_SPARSE_IRQ + select IRQ_DOMAIN + select NO_BOOTMEM + select OF + select OF_EARLY_FLATTREE + select PERF_USE_VMALLOC + select RTC_LIB + select SPARSE_IRQ + help + ARM 64-bit (AArch64) Linux support. + +config 64BIT + def_bool y + +config ARCH_PHYS_ADDR_T_64BIT + def_bool y + +config MMU + def_bool y + +config NO_IOPORT + def_bool y + +config STACKTRACE_SUPPORT + def_bool y + +config LOCKDEP_SUPPORT + def_bool y + +config TRACE_IRQFLAGS_SUPPORT + def_bool y + +config GENERIC_LOCKBREAK + def_bool y + depends on SMP && PREEMPT + +config RWSEM_GENERIC_SPINLOCK + def_bool y + +config GENERIC_HWEIGHT + def_bool y + +config GENERIC_CSUM + def_bool y + +config GENERIC_CALIBRATE_DELAY + def_bool y + +config ZONE_DMA32 + def_bool y + +config ARCH_DMA_ADDR_T_64BIT + def_bool y + +config NEED_DMA_MAP_STATE + def_bool y + +config NEED_SG_DMA_LENGTH + def_bool y + +config SWIOTLB + def_bool y + +config IOMMU_HELPER + def_bool SWIOTLB + +source "init/Kconfig" + +source "kernel/Kconfig.freezer" + +menu "System Type" + +endmenu + +menu "Bus support" + +config ARM_AMBA + bool + +endmenu + +menu "Kernel Features" + +source "kernel/time/Kconfig" + +config ARM64_64K_PAGES + bool "Enable 64KB pages support" + help + This feature enables 64KB pages support (4KB by default) + allowing only two levels of page tables and faster TLB + look-up. AArch32 emulation is not available when this feature + is enabled. + +config SMP + bool "Symmetric Multi-Processing" + select USE_GENERIC_SMP_HELPERS + help + This enables support for systems with more than one CPU. If + you say N here, the kernel will run on single and + multiprocessor machines, but will use only one CPU of a + multiprocessor machine. If you say Y here, the kernel will run + on many, but not all, single processor machines. On a single + processor machine, the kernel will run faster if you say N + here. + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-32)" + range 2 32 + depends on SMP + default "4" + +source kernel/Kconfig.preempt + +config HZ + int + default 100 + +config ARCH_HAS_HOLES_MEMORYMODEL + def_bool y if SPARSEMEM + +config ARCH_SPARSEMEM_ENABLE + def_bool y + select SPARSEMEM_VMEMMAP_ENABLE + +config ARCH_SPARSEMEM_DEFAULT + def_bool ARCH_SPARSEMEM_ENABLE + +config ARCH_SELECT_MEMORY_MODEL + def_bool ARCH_SPARSEMEM_ENABLE + +config HAVE_ARCH_PFN_VALID + def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + +config HW_PERF_EVENTS + bool "Enable hardware performance counter support for perf events" + depends on PERF_EVENTS + default y + help + Enable hardware performance counter support for perf events. If + disabled, perf events will use software events only. + +source "mm/Kconfig" + +endmenu + +menu "Boot options" + +config CMDLINE + string "Default kernel command string" + default "" + help + Provide a set of default command-line options at build time by + entering them here. As a minimum, you should specify the the + root device (e.g. root=/dev/nfs). + +config CMDLINE_FORCE + bool "Always use the default kernel command string" + help + Always use the default kernel command string, even if the boot + loader passes other arguments to the kernel. + This is useful if you cannot or don't want to change the + command-line options your boot loader passes to the kernel. + +endmenu + +menu "Userspace binary formats" + +source "fs/Kconfig.binfmt" + +config COMPAT + bool "Kernel support for 32-bit EL0" + depends on !ARM64_64K_PAGES + select COMPAT_BINFMT_ELF + help + This option enables support for a 32-bit EL0 running under a 64-bit + kernel at EL1. AArch32-specific components such as system calls, + the user helper functions, VFP support and the ptrace interface are + handled appropriately by the kernel. + + If you want to execute 32-bit userspace applications, say Y. + +config SYSVIPC_COMPAT + def_bool y + depends on COMPAT && SYSVIPC + +endmenu + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/arm64/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug new file mode 100644 index 00000000000..d7553f2bda6 --- /dev/null +++ b/arch/arm64/Kconfig.debug @@ -0,0 +1,27 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +config FRAME_POINTER + bool + default y + +config DEBUG_ERRORS + bool "Verbose kernel error messages" + depends on DEBUG_KERNEL + help + This option controls verbose debugging information which can be + printed when the kernel detects an internal error. This debugging + information is useful to kernel hackers when tracking down problems, + but mostly meaningless to other people. It's safe to say Y unless + you are concerned with the code size or don't want to see these + messages. + +config DEBUG_STACK_USAGE + bool "Enable stack utilization instrumentation" + depends on DEBUG_KERNEL + help + Enables the display of the minimum amount of free stack which each + task has ever had available in the sysrq-T output. + +endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile new file mode 100644 index 00000000000..364191f3be4 --- /dev/null +++ b/arch/arm64/Makefile @@ -0,0 +1,71 @@ +# +# arch/arm64/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995-2001 by Russell King + +LDFLAGS_vmlinux :=-p --no-undefined -X +CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) +OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S +GZFLAGS :=-9 + +LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) + +KBUILD_DEFCONFIG := defconfig + +KBUILD_CFLAGS += -mgeneral-regs-only +KBUILD_CPPFLAGS += -mlittle-endian +AS += -EL +LD += -EL + +comma = , + +CHECKFLAGS += -D__aarch64__ + +# Default value +head-y := arch/arm64/kernel/head.o + +# The byte offset of the kernel image in RAM from the start of RAM. +TEXT_OFFSET := 0x00080000 + +export TEXT_OFFSET GZFLAGS + +core-y += arch/arm64/kernel/ arch/arm64/mm/ +libs-y := arch/arm64/lib/ $(libs-y) +libs-y += $(LIBGCC) + +# Default target when executing plain make +KBUILD_IMAGE := Image.gz + +all: $(KBUILD_IMAGE) + +boot := arch/arm64/boot + +Image Image.gz: vmlinux + $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ + +zinstall install: vmlinux + $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ + +%.dtb: + $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ + +# We use MRPROPER_FILES and CLEAN_FILES now +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + +define archhelp + echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' + echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' + echo ' install - Install uncompressed kernel' + echo ' zinstall - Install compressed kernel' + echo ' Install using (your) ~/bin/installkernel or' + echo ' (distribution) /sbin/installkernel or' + echo ' install to $$(INSTALL_PATH) and run lilo' +endef diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore new file mode 100644 index 00000000000..8dab0bb6ae6 --- /dev/null +++ b/arch/arm64/boot/.gitignore @@ -0,0 +1,2 @@ +Image +Image.gz diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile new file mode 100644 index 00000000000..eca209b2b0b --- /dev/null +++ b/arch/arm64/boot/Makefile @@ -0,0 +1,36 @@ +# +# arch/arm64/boot/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2012, ARM Ltd. +# Author: Will Deacon <will.deacon@arm.com> +# +# Based on the ia64 boot/Makefile. +# + +targets := Image Image.gz + +$(obj)/Image: vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/Image.gz: $(obj)/Image FORCE + $(call if_changed,gzip) + +$(obj)/%.dtb: $(src)/dts/%.dts + $(call cmd,dtc) + +install: $(obj)/Image + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(obj)/Image System.map "$(INSTALL_PATH)" + +zinstall: $(obj)/Image.gz + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(obj)/Image.gz System.map "$(INSTALL_PATH)" + +clean-files += *.dtb diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh new file mode 100644 index 00000000000..12ed78aa6f0 --- /dev/null +++ b/arch/arm64/boot/install.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# +# arch/arm64/boot/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# Adapted from code in arch/i386/boot/install.sh by Russell King +# +# "make install" script for the AArch64 Linux port +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +# User may have a custom install script +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + +if [ "$(basename $2)" = "Image.gz" ]; then +# Compressed install + echo "Installing compressed kernel" + base=vmlinuz +else +# Normal install + echo "Installing normal kernel" + base=vmlinux +fi + +if [ -f $4/$base-$1 ]; then + mv $4/$base-$1 $4/$base-$1.old +fi +cat $2 > $4/$base-$1 + +# Install system map file +if [ -f $4/System.map-$1 ]; then + mv $4/System.map-$1 $4/System.map-$1.old +fi +cp $3 $4/System.map-$1 diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig new file mode 100644 index 00000000000..9212c7880da --- /dev/null +++ b/arch/arm64/configs/defconfig @@ -0,0 +1,85 @@ +CONFIG_EXPERIMENTAL=y +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_SMP=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_CMDLINE="console=ttyAMA0" +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_INET_LRO is not set +# CONFIG_IPV6 is not set +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +# CONFIG_BLK_DEV is not set +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_NETDEVICES=y +CONFIG_MII=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +CONFIG_LEGACY_PTY_COUNT=16 +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +# CONFIG_EXT3_FS_XATTR is not set +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_INFO=y +# CONFIG_FTRACE is not set +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DEBUG_ERRORS=y diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild new file mode 100644 index 00000000000..35924a542d4 --- /dev/null +++ b/arch/arm64/include/asm/Kbuild @@ -0,0 +1,51 @@ +include include/asm-generic/Kbuild.asm + +header-y += hwcap.h + +generic-y += bug.h +generic-y += bugs.h +generic-y += checksum.h +generic-y += cputime.h +generic-y += current.h +generic-y += delay.h +generic-y += div64.h +generic-y += dma.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += ftrace.h +generic-y += hw_irq.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += kmap_types.h +generic-y += linkage.h +generic-y += local.h +generic-y += local64.h +generic-y += mman.h +generic-y += msgbuf.h +generic-y += mutex.h +generic-y += pci.h +generic-y += percpu.h +generic-y += poll.h +generic-y += posix_types.h +generic-y += resource.h +generic-y += scatterlist.h +generic-y += sections.h +generic-y += segment.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += sizes.h +generic-y += socket.h +generic-y += sockios.h +generic-y += string.h +generic-y += switch_to.h +generic-y += swab.h +generic-y += termbits.h +generic-y += termios.h +generic-y += topology.h +generic-y += types.h +generic-y += unaligned.h +generic-y += user.h diff --git a/arch/arm64/include/asm/arm_generic.h b/arch/arm64/include/asm/arm_generic.h new file mode 100644 index 00000000000..e4cec9d30f2 --- /dev/null +++ b/arch/arm64/include/asm/arm_generic.h @@ -0,0 +1,100 @@ +/* + * arch/arm64/include/asm/arm_generic.h + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_ARM_GENERIC_H +#define __ASM_ARM_GENERIC_H + +#include <linux/clocksource.h> + +#define ARCH_TIMER_CTRL_ENABLE (1 << 0) +#define ARCH_TIMER_CTRL_IMASK (1 << 1) +#define ARCH_TIMER_CTRL_ISTATUS (1 << 2) + +#define ARCH_TIMER_REG_CTRL 0 +#define ARCH_TIMER_REG_FREQ 1 +#define ARCH_TIMER_REG_TVAL 2 + +static inline void arch_timer_reg_write(int reg, u32 val) +{ + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); + break; + default: + BUILD_BUG(); + } + + isb(); +} + +static inline u32 arch_timer_reg_read(int reg) +{ + u32 val; + + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); + break; + case ARCH_TIMER_REG_FREQ: + asm volatile("mrs %0, cntfrq_el0" : "=r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("mrs %0, cntp_tval_el0" : "=r" (val)); + break; + default: + BUILD_BUG(); + } + + return val; +} + +static inline void __cpuinit arch_counter_enable_user_access(void) +{ + u32 cntkctl; + + /* Disable user access to the timers and the virtual counter. */ + asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); + cntkctl &= ~((3 << 8) | (1 << 1)); + + /* Enable user access to the physical counter and frequency. */ + cntkctl |= 1; + asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); +} + +static inline cycle_t arch_counter_get_cntpct(void) +{ + cycle_t cval; + + asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); + + return cval; +} + +static inline cycle_t arch_counter_get_cntvct(void) +{ + cycle_t cval; + + asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); + + return cval; +} + +#endif diff --git a/arch/arm64/include/asm/asm-offsets.h b/arch/arm64/include/asm/asm-offsets.h new file mode 100644 index 00000000000..d370ee36a18 --- /dev/null +++ b/arch/arm64/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include <generated/asm-offsets.h> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h new file mode 100644 index 00000000000..da2a13e8f1e --- /dev/null +++ b/arch/arm64/include/asm/assembler.h @@ -0,0 +1,109 @@ +/* + * Based on arch/arm/include/asm/assembler.h + * + * Copyright (C) 1996-2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASSEMBLY__ +#error "Only include this from assembly code" +#endif + +#include <asm/ptrace.h> + +/* + * Stack pushing/popping (register pairs only). Equivalent to store decrement + * before, load increment after. + */ + .macro push, xreg1, xreg2 + stp \xreg1, \xreg2, [sp, #-16]! + .endm + + .macro pop, xreg1, xreg2 + ldp \xreg1, \xreg2, [sp], #16 + .endm + +/* + * Enable and disable interrupts. + */ + .macro disable_irq + msr daifset, #2 + .endm + + .macro enable_irq + msr daifclr, #2 + .endm + +/* + * Save/disable and restore interrupts. + */ + .macro save_and_disable_irqs, olddaif + mrs \olddaif, daif + disable_irq + .endm + + .macro restore_irqs, olddaif + msr daif, \olddaif + .endm + +/* + * Enable and disable debug exceptions. + */ + .macro disable_dbg + msr daifset, #8 + .endm + + .macro enable_dbg + msr daifclr, #8 + .endm + + .macro disable_step, tmp + mrs \tmp, mdscr_el1 + bic \tmp, \tmp, #1 + msr mdscr_el1, \tmp + .endm + + .macro enable_step, tmp + mrs \tmp, mdscr_el1 + orr \tmp, \tmp, #1 + msr mdscr_el1, \tmp + .endm + + .macro enable_dbg_if_not_stepping, tmp + mrs \tmp, mdscr_el1 + tbnz \tmp, #1, 9990f + enable_dbg +9990: + .endm + +/* + * SMP data memory barrier + */ + .macro smp_dmb, opt +#ifdef CONFIG_SMP + dmb \opt +#endif + .endm + +#define USER(l, x...) \ +9999: x; \ + .section __ex_table,"a"; \ + .align 3; \ + .quad 9999b,l; \ + .previous + +/* + * Register aliases. + */ +lr .req x30 // link register diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h new file mode 100644 index 00000000000..407717ba060 --- /dev/null +++ b/arch/arm64/include/asm/atomic.h @@ -0,0 +1,305 @@ +/* + * Based on arch/arm/include/asm/atomic.h + * + * Copyright (C) 1996 Russell King. + * Copyright (C) 2002 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_ATOMIC_H +#define __ASM_ATOMIC_H + +#include <linux/compiler.h> +#include <linux/types.h> + +#include <asm/barrier.h> +#include <asm/cmpxchg.h> + +#define ATOMIC_INIT(i) { (i) } + +#ifdef __KERNEL__ + +/* + * On ARM, ordinary assignment (str instruction) doesn't clear the local + * strex/ldrex monitor on some implementations. The reason we can use it for + * atomic_set() is the clrex or dummy strex done on every exception return. + */ +#define atomic_read(v) (*(volatile int *)&(v)->counter) +#define atomic_set(v,i) (((v)->counter) = (i)) + +/* + * AArch64 UP and SMP safe atomic ops. We use load exclusive and + * store exclusive to ensure that these are atomic. We may loop + * to ensure that the update happens. + */ +static inline void atomic_add(int i, atomic_t *v) +{ + unsigned long tmp; + int result; + + asm volatile("// atomic_add\n" +"1: ldxr %w0, [%3]\n" +" add %w0, %w0, %w4\n" +" stxr %w1, %w0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + unsigned long tmp; + int result; + + asm volatile("// atomic_add_return\n" +"1: ldaxr %w0, [%3]\n" +" add %w0, %w0, %w4\n" +" stlxr %w1, %w0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); + + return result; +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + unsigned long tmp; + int result; + + asm volatile("// atomic_sub\n" +"1: ldxr %w0, [%3]\n" +" sub %w0, %w0, %w4\n" +" stxr %w1, %w0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + +static inline int atomic_sub_return(int i, atomic_t *v) +{ + unsigned long tmp; + int result; + + asm volatile("// atomic_sub_return\n" +"1: ldaxr %w0, [%3]\n" +" sub %w0, %w0, %w4\n" +" stlxr %w1, %w0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); + + return result; +} + +static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) +{ + unsigned long tmp; + int oldval; + + asm volatile("// atomic_cmpxchg\n" +"1: ldaxr %w1, [%3]\n" +" cmp %w1, %w4\n" +" b.ne 2f\n" +" stlxr %w0, %w5, [%3]\n" +" cbnz %w0, 1b\n" +"2:" + : "=&r" (tmp), "=&r" (oldval), "+o" (ptr->counter) + : "r" (&ptr->counter), "Ir" (old), "r" (new) + : "cc"); + + return oldval; +} + +static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) +{ + unsigned long tmp, tmp2; + + asm volatile("// atomic_clear_mask\n" +"1: ldxr %0, [%3]\n" +" bic %0, %0, %4\n" +" stxr %w1, %0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (tmp), "=&r" (tmp2), "+o" (*addr) + : "r" (addr), "Ir" (mask) + : "cc"); +} + +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + + c = atomic_read(v); + while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c) + c = old; + return c; +} + +#define atomic_inc(v) atomic_add(1, v) +#define atomic_dec(v) atomic_sub(1, v) + +#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) +#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) +#define atomic_inc_return(v) (atomic_add_return(1, v)) +#define atomic_dec_return(v) (atomic_sub_return(1, v)) +#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) + +#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) + +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() + +/* + * 64-bit atomic operations. + */ +#define ATOMIC64_INIT(i) { (i) } + +#define atomic64_read(v) (*(volatile long long *)&(v)->counter) +#define atomic64_set(v,i) (((v)->counter) = (i)) + +static inline void atomic64_add(u64 i, atomic64_t *v) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_add\n" +"1: ldxr %0, [%3]\n" +" add %0, %0, %4\n" +" stxr %w1, %0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + +static inline long atomic64_add_return(long i, atomic64_t *v) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_add_return\n" +"1: ldaxr %0, [%3]\n" +" add %0, %0, %4\n" +" stlxr %w1, %0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); + + return result; +} + +static inline void atomic64_sub(u64 i, atomic64_t *v) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_sub\n" +"1: ldxr %0, [%3]\n" +" sub %0, %0, %4\n" +" stxr %w1, %0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + +static inline long atomic64_sub_return(long i, atomic64_t *v) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_sub_return\n" +"1: ldaxr %0, [%3]\n" +" sub %0, %0, %4\n" +" stlxr %w1, %0, [%3]\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); + + return result; +} + +static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) +{ + long oldval; + unsigned long res; + + asm volatile("// atomic64_cmpxchg\n" +"1: ldaxr %1, [%3]\n" +" cmp %1, %4\n" +" b.ne 2f\n" +" stlxr %w0, %5, [%3]\n" +" cbnz %w0, 1b\n" +"2:" + : "=&r" (res), "=&r" (oldval), "+o" (ptr->counter) + : "r" (&ptr->counter), "Ir" (old), "r" (new) + : "cc"); + + return oldval; +} + +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_dec_if_positive\n" +"1: ldaxr %0, [%3]\n" +" subs %0, %0, #1\n" +" b.mi 2f\n" +" stlxr %w1, %0, [%3]\n" +" cbnz %w1, 1b\n" +"2:" + : "=&r" (result), "=&r" (tmp), "+o" (v->counter) + : "r" (&v->counter) + : "cc"); + + return result; +} + +static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long c, old; + + c = atomic64_read(v); + while (c != u && (old = atomic64_cmpxchg((v), c, c + a)) != c) + c = old; + + return c != u; +} + +#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) +#define atomic64_inc(v) atomic64_add(1LL, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) +#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) +#define atomic64_dec(v) atomic64_sub(1LL, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) +#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) + +#endif +#endif diff --git a/arch/arm64/include/asm/auxvec.h b/arch/arm64/include/asm/auxvec.h new file mode 100644 index 00000000000..22d6d888585 --- /dev/null +++ b/arch/arm64/include/asm/auxvec.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_AUXVEC_H +#define __ASM_AUXVEC_H + +/* vDSO location */ +#define AT_SYSINFO_EHDR 33 + +#endif diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h new file mode 100644 index 00000000000..d4a63338a53 --- /dev/null +++ b/arch/arm64/include/asm/barrier.h @@ -0,0 +1,52 @@ +/* + * Based on arch/arm/include/asm/barrier.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#ifndef __ASSEMBLY__ + +#define sev() asm volatile("sev" : : : "memory") +#define wfe() asm volatile("wfe" : : : "memory") +#define wfi() asm volatile("wfi" : : : "memory") + +#define isb() asm volatile("isb" : : : "memory") +#define dsb() asm volatile("dsb sy" : : : "memory") + +#define mb() dsb() +#define rmb() asm volatile("dsb ld" : : : "memory") +#define wmb() asm volatile("dsb st" : : : "memory") + +#ifndef CONFIG_SMP +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#else +#define smp_mb() asm volatile("dmb ish" : : : "memory") +#define smp_rmb() asm volatile("dmb ishld" : : : "memory") +#define smp_wmb() asm volatile("dmb ishst" : : : "memory") +#endif + +#define read_barrier_depends() do { } while(0) +#define smp_read_barrier_depends() do { } while(0) + +#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define nop() asm volatile("nop"); + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h new file mode 100644 index 00000000000..5e693073b03 --- /dev/null +++ b/arch/arm64/include/asm/bitops.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_BITOPS_H +#define __ASM_BITOPS_H + +#include <linux/compiler.h> + +#include <asm/barrier.h> + +/* + * clear_bit may not imply a memory barrier + */ +#ifndef smp_mb__before_clear_bit +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() +#endif + +#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + +#include <asm-generic/bitops/builtin-__ffs.h> +#include <asm-generic/bitops/builtin-ffs.h> +#include <asm-generic/bitops/builtin-__fls.h> +#include <asm-generic/bitops/builtin-fls.h> + +#include <asm-generic/bitops/ffz.h> +#include <asm-generic/bitops/fls64.h> +#include <asm-generic/bitops/find.h> + +#include <asm-generic/bitops/sched.h> +#include <asm-generic/bitops/hweight.h> +#include <asm-generic/bitops/lock.h> + +#include <asm-generic/bitops/atomic.h> +#include <asm-generic/bitops/non-atomic.h> +#include <asm-generic/bitops/le.h> +#include <asm-generic/bitops/ext2-atomic.h> + +#endif /* __ASM_BITOPS_H */ diff --git a/arch/arm64/include/asm/bitsperlong.h b/arch/arm64/include/asm/bitsperlong.h new file mode 100644 index 00000000000..fce9c2924fa --- /dev/null +++ b/arch/arm64/include/asm/bitsperlong.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_BITSPERLONG_H +#define __ASM_BITSPERLONG_H + +#define __BITS_PER_LONG 64 + +#include <asm-generic/bitsperlong.h> + +#endif /* __ASM_BITSPERLONG_H */ diff --git a/arch/arm64/include/asm/byteorder.h b/arch/arm64/include/asm/byteorder.h new file mode 100644 index 00000000000..2b92046aafc --- /dev/null +++ b/arch/arm64/include/asm/byteorder.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_BYTEORDER_H +#define __ASM_BYTEORDER_H + +#include <linux/byteorder/little_endian.h> + +#endif /* __ASM_BYTEORDER_H */ diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h new file mode 100644 index 00000000000..390308a67f0 --- /dev/null +++ b/arch/arm64/include/asm/cache.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CACHE_H +#define __ASM_CACHE_H + +#define L1_CACHE_SHIFT 6 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +/* + * Memory returned by kmalloc() may be used for DMA, so we must make + * sure that all such allocations are cache aligned. Otherwise, + * unrelated code may cause parts of the buffer to be read into the + * cache before the transfer is done, causing old data to be seen by + * the CPU. + */ +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#define ARCH_SLAB_MINALIGN 8 + +#endif diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h new file mode 100644 index 00000000000..aa3132ab7f2 --- /dev/null +++ b/arch/arm64/include/asm/cacheflush.h @@ -0,0 +1,148 @@ +/* + * Based on arch/arm/include/asm/cacheflush.h + * + * Copyright (C) 1999-2002 Russell King. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CACHEFLUSH_H +#define __ASM_CACHEFLUSH_H + +#include <linux/mm.h> + +/* + * This flag is used to indicate that the page pointed to by a pte is clean + * and does not require cleaning before returning it to the user. + */ +#define PG_dcache_clean PG_arch_1 + +/* + * MM Cache Management + * =================== + * + * The arch/arm64/mm/cache.S implements these methods. + * + * Start addresses are inclusive and end addresses are exclusive; start + * addresses should be rounded down, end addresses up. + * + * See Documentation/cachetlb.txt for more information. Please note that + * the implementation assumes non-aliasing VIPT D-cache and (aliasing) + * VIPT or ASID-tagged VIVT I-cache. + * + * flush_cache_all() + * + * Unconditionally clean and invalidate the entire cache. + * + * flush_cache_mm(mm) + * + * Clean and invalidate all user space cache entries + * before a change of page tables. + * + * flush_icache_range(start, end) + * + * Ensure coherency between the I-cache and the D-cache in the + * region described by start, end. + * - start - virtual start address + * - end - virtual end address + * + * __flush_cache_user_range(start, end) + * + * Ensure coherency between the I-cache and the D-cache in the + * region described by start, end. + * - start - virtual start address + * - end - virtual end address + * + * __flush_dcache_area(kaddr, size) + * + * Ensure that the data held in page is written back. + * - kaddr - page address + * - size - region size + */ +extern void flush_cache_all(void); +extern void flush_cache_mm(struct mm_struct *mm); +extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn); +extern void flush_icache_range(unsigned long start, unsigned long end); +extern void __flush_dcache_area(void *addr, size_t len); +extern void __flush_cache_user_range(unsigned long start, unsigned long end); + +/* + * Copy user data from/to a page which is mapped into a different + * processes address space. Really, we want to allow our "user + * space" model to handle this. + */ +extern void copy_to_user_page(struct vm_area_struct *, struct page *, + unsigned long, void *, const void *, unsigned long); +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + do { \ + memcpy(dst, src, len); \ + } while (0) + +#define flush_cache_dup_mm(mm) flush_cache_mm(mm) + +/* + * flush_dcache_page is used when the kernel has written to the page + * cache page at virtual address page->virtual. + * + * If this page isn't mapped (ie, page_mapping == NULL), or it might + * have userspace mappings, then we _must_ always clean + invalidate + * the dcache entries associated with the kernel mapping. + * + * Otherwise we can defer the operation, and clean the cache when we are + * about to change to user space. This is the same method as used on SPARC64. + * See update_mmu_cache for the user space part. + */ +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +extern void flush_dcache_page(struct page *); + +static inline void __flush_icache_all(void) +{ + asm("ic ialluis"); +} + +#define flush_dcache_mmap_lock(mapping) \ + spin_lock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_unlock(mapping) \ + spin_unlock_irq(&(mapping)->tree_lock) + +#define flush_icache_user_range(vma,page,addr,len) \ + flush_dcache_page(page) + +/* + * We don't appear to need to do anything here. In fact, if we did, we'd + * duplicate cache flushing elsewhere performed by flush_dcache_page(). + */ +#define flush_icache_page(vma,page) do { } while (0) + +/* + * flush_cache_vmap() is used when creating mappings (eg, via vmap, + * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT + * caches, since the direct-mappings of these pages may contain cached + * data, we need to do a full cache flush to ensure that writebacks + * don't corrupt data placed into these pages via the new mappings. + */ +static inline void flush_cache_vmap(unsigned long start, unsigned long end) +{ + /* + * set_pte_at() called from vmap_pte_range() does not + * have a DSB after cleaning the cache line. + */ + dsb(); +} + +static inline void flush_cache_vunmap(unsigned long start, unsigned long end) +{ +} + +#endif diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h new file mode 100644 index 00000000000..85f5f511352 --- /dev/null +++ b/arch/arm64/include/asm/cachetype.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CACHETYPE_H +#define __ASM_CACHETYPE_H + +#include <asm/cputype.h> + +#define CTR_L1IP_SHIFT 14 +#define CTR_L1IP_MASK 3 + +#define ICACHE_POLICY_RESERVED 0 +#define ICACHE_POLICY_AIVIVT 1 +#define ICACHE_POLICY_VIPT 2 +#define ICACHE_POLICY_PIPT 3 + +static inline u32 icache_policy(void) +{ + return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK; +} + +/* + * Whilst the D-side always behaves as PIPT on AArch64, aliasing is + * permitted in the I-cache. + */ +static inline int icache_is_aliasing(void) +{ + return icache_policy() != ICACHE_POLICY_PIPT; +} + +static inline int icache_is_aivivt(void) +{ + return icache_policy() == ICACHE_POLICY_AIVIVT; +} + +#endif /* __ASM_CACHETYPE_H */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h new file mode 100644 index 00000000000..e0e65b069d9 --- /dev/null +++ b/arch/arm64/include/asm/cmpxchg.h @@ -0,0 +1,173 @@ +/* + * Based on arch/arm/include/asm/cmpxchg.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CMPXCHG_H +#define __ASM_CMPXCHG_H + +#include <linux/bug.h> + +#include <asm/barrier.h> + +static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size) +{ + unsigned long ret, tmp; + + switch (size) { + case 1: + asm volatile("// __xchg1\n" + "1: ldaxrb %w0, [%3]\n" + " stlxrb %w1, %w2, [%3]\n" + " cbnz %w1, 1b\n" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 2: + asm volatile("// __xchg2\n" + "1: ldaxrh %w0, [%3]\n" + " stlxrh %w1, %w2, [%3]\n" + " cbnz %w1, 1b\n" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 4: + asm volatile("// __xchg4\n" + "1: ldaxr %w0, [%3]\n" + " stlxr %w1, %w2, [%3]\n" + " cbnz %w1, 1b\n" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 8: + asm volatile("// __xchg8\n" + "1: ldaxr %0, [%3]\n" + " stlxr %w1, %2, [%3]\n" + " cbnz %w1, 1b\n" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + default: + BUILD_BUG(); + } + + return ret; +} + +#define xchg(ptr,x) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long oldval = 0, res; + + switch (size) { + case 1: + do { + asm volatile("// __cmpxchg1\n" + " ldxrb %w1, [%2]\n" + " mov %w0, #0\n" + " cmp %w1, %w3\n" + " b.ne 1f\n" + " stxrb %w0, %w4, [%2]\n" + "1:\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "cc"); + } while (res); + break; + + case 2: + do { + asm volatile("// __cmpxchg2\n" + " ldxrh %w1, [%2]\n" + " mov %w0, #0\n" + " cmp %w1, %w3\n" + " b.ne 1f\n" + " stxrh %w0, %w4, [%2]\n" + "1:\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "memory", "cc"); + } while (res); + break; + + case 4: + do { + asm volatile("// __cmpxchg4\n" + " ldxr %w1, [%2]\n" + " mov %w0, #0\n" + " cmp %w1, %w3\n" + " b.ne 1f\n" + " stxr %w0, %w4, [%2]\n" + "1:\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "cc"); + } while (res); + break; + + case 8: + do { + asm volatile("// __cmpxchg8\n" + " ldxr %1, [%2]\n" + " mov %w0, #0\n" + " cmp %1, %3\n" + " b.ne 1f\n" + " stxr %w0, %4, [%2]\n" + "1:\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "cc"); + } while (res); + break; + + default: + BUILD_BUG(); + } + + return oldval; +} + +static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long ret; + + smp_mb(); + ret = __cmpxchg(ptr, old, new, size); + smp_mb(); + + return ret; +} + +#define cmpxchg(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) + +#define cmpxchg_local(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) + +#endif /* __ASM_CMPXCHG_H */ diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h new file mode 100644 index 00000000000..a670a33ad73 --- /dev/null +++ b/arch/arm64/include/asm/compat.h @@ -0,0 +1,242 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_COMPAT_H +#define __ASM_COMPAT_H +#ifdef __KERNEL__ +#ifdef CONFIG_COMPAT + +/* + * Architecture specific compatibility types + */ +#include <linux/types.h> +#include <linux/sched.h> + +#define COMPAT_USER_HZ 100 +#define COMPAT_UTS_MACHINE "armv8l\0\0" + +typedef u32 compat_size_t; +typedef s32 compat_ssize_t; +typedef s32 compat_time_t; +typedef s32 compat_clock_t; +typedef s32 compat_pid_t; +typedef u32 __compat_uid_t; +typedef u32 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; +typedef u32 compat_mode_t; +typedef u32 compat_ino_t; +typedef u32 compat_dev_t; +typedef s32 compat_off_t; +typedef s64 compat_loff_t; +typedef s16 compat_nlink_t; +typedef u16 compat_ipc_pid_t; +typedef s32 compat_daddr_t; +typedef u32 compat_caddr_t; +typedef __kernel_fsid_t compat_fsid_t; +typedef s32 compat_key_t; +typedef s32 compat_timer_t; + +typedef s32 compat_int_t; +typedef s32 compat_long_t; +typedef s64 compat_s64; +typedef u32 compat_uint_t; +typedef u32 compat_ulong_t; +typedef u64 compat_u64; + +struct compat_timespec { + compat_time_t tv_sec; + s32 tv_nsec; +}; + +struct compat_timeval { + compat_time_t tv_sec; + s32 tv_usec; +}; + +struct compat_stat { + compat_dev_t st_dev; + compat_ino_t st_ino; + compat_mode_t st_mode; + compat_nlink_t st_nlink; + __compat_uid32_t st_uid; + __compat_gid32_t st_gid; + compat_dev_t st_rdev; + compat_off_t st_size; + compat_off_t st_blksize; + compat_off_t st_blocks; + compat_time_t st_atime; + u32 st_atime_nsec; + compat_time_t st_mtime; + u32 st_mtime_nsec; + compat_time_t st_ctime; + u32 st_ctime_nsec; + u32 __unused4[2]; +}; + +struct compat_flock { + short l_type; + short l_whence; + compat_off_t l_start; + compat_off_t l_len; + compat_pid_t l_pid; +}; + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +struct compat_flock64 { + short l_type; + short l_whence; + compat_loff_t l_start; + compat_loff_t l_len; + compat_pid_t l_pid; +}; + +struct compat_statfs { + int f_type; + int f_bsize; + int f_blocks; + int f_bfree; + int f_bavail; + int f_files; + int f_ffree; + compat_fsid_t f_fsid; + int f_namelen; /* SunOS ignores this field. */ + int f_frsize; + int f_flags; + int f_spare[4]; +}; + +#define COMPAT_RLIM_INFINITY 0xffffffff + +typedef u32 compat_old_sigset_t; + +#define _COMPAT_NSIG 64 +#define _COMPAT_NSIG_BPW 32 + +typedef u32 compat_sigset_word; + +#define COMPAT_OFF_T_MAX 0x7fffffff +#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL + +/* + * A pointer passed in from user mode. This should not + * be used for syscall parameters, just declare them + * as pointers because the syscall entry code will have + * appropriately converted them already. + */ +typedef u32 compat_uptr_t; + +static inline void __user *compat_ptr(compat_uptr_t uptr) +{ + return (void __user *)(unsigned long)uptr; +} + +static inline compat_uptr_t ptr_to_compat(void __user *uptr) +{ + return (u32)(unsigned long)uptr; +} + +static inline void __user *arch_compat_alloc_user_space(long len) +{ + struct pt_regs *regs = task_pt_regs(current); + return (void __user *)regs->compat_sp - len; +} + +struct compat_ipc64_perm { + compat_key_t key; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; + unsigned short mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + compat_ulong_t unused1; + compat_ulong_t unused2; +}; + +struct compat_semid64_ds { + struct compat_ipc64_perm sem_perm; + compat_time_t sem_otime; + compat_ulong_t __unused1; + compat_time_t sem_ctime; + compat_ulong_t __unused2; + compat_ulong_t sem_nsems; + compat_ulong_t __unused3; + compat_ulong_t __unused4; +}; + +struct compat_msqid64_ds { + struct compat_ipc64_perm msg_perm; + compat_time_t msg_stime; + compat_ulong_t __unused1; + compat_time_t msg_rtime; + compat_ulong_t __unused2; + compat_time_t msg_ctime; + compat_ulong_t __unused3; + compat_ulong_t msg_cbytes; + compat_ulong_t msg_qnum; + compat_ulong_t msg_qbytes; + compat_pid_t msg_lspid; + compat_pid_t msg_lrpid; + compat_ulong_t __unused4; + compat_ulong_t __unused5; +}; + +struct compat_shmid64_ds { + struct compat_ipc64_perm shm_perm; + compat_size_t shm_segsz; + compat_time_t shm_atime; + compat_ulong_t __unused1; + compat_time_t shm_dtime; + compat_ulong_t __unused2; + compat_time_t shm_ctime; + compat_ulong_t __unused3; + compat_pid_t shm_cpid; + compat_pid_t shm_lpid; + compat_ulong_t shm_nattch; + compat_ulong_t __unused4; + compat_ulong_t __unused5; +}; + +static inline int is_compat_task(void) +{ + return test_thread_flag(TIF_32BIT); +} + +static inline int is_compat_thread(struct thread_info *thread) +{ + return test_ti_thread_flag(thread, TIF_32BIT); +} + +#else /* !CONFIG_COMPAT */ + +static inline int is_compat_task(void) +{ + return 0; +} + +static inline int is_compat_thread(struct thread_info *thread) +{ + return 0; +} + +#endif /* CONFIG_COMPAT */ +#endif /* __KERNEL__ */ +#endif /* __ASM_COMPAT_H */ diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h new file mode 100644 index 00000000000..ee35fd0f223 --- /dev/null +++ b/arch/arm64/include/asm/compiler.h @@ -0,0 +1,30 @@ +/* + * Based on arch/arm/include/asm/compiler.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_COMPILER_H +#define __ASM_COMPILER_H + +/* + * This is used to ensure the compiler did actually allocate the register we + * asked it for some inline assembly sequences. Apparently we can't trust the + * compiler from one version to another so a bit of paranoia won't hurt. This + * string is meant to be concatenated with the inline asm string and will + * cause compilation to stop on mismatch. (for details, see gcc PR 15089) + */ +#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" + +#endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/cputable.h b/arch/arm64/include/asm/cputable.h new file mode 100644 index 00000000000..e3bd983d366 --- /dev/null +++ b/arch/arm64/include/asm/cputable.h @@ -0,0 +1,30 @@ +/* + * arch/arm64/include/asm/cputable.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CPUTABLE_H +#define __ASM_CPUTABLE_H + +struct cpu_info { + unsigned int cpu_id_val; + unsigned int cpu_id_mask; + const char *cpu_name; + unsigned long (*cpu_setup)(void); +}; + +extern struct cpu_info *lookup_processor_type(unsigned int); + +#endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h new file mode 100644 index 00000000000..ef54125e6c1 --- /dev/null +++ b/arch/arm64/include/asm/cputype.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CPUTYPE_H +#define __ASM_CPUTYPE_H + +#define ID_MIDR_EL1 "midr_el1" +#define ID_CTR_EL0 "ctr_el0" + +#define ID_AA64PFR0_EL1 "id_aa64pfr0_el1" +#define ID_AA64DFR0_EL1 "id_aa64dfr0_el1" +#define ID_AA64AFR0_EL1 "id_aa64afr0_el1" +#define ID_AA64ISAR0_EL1 "id_aa64isar0_el1" +#define ID_AA64MMFR0_EL1 "id_aa64mmfr0_el1" + +#define read_cpuid(reg) ({ \ + u64 __val; \ + asm("mrs %0, " reg : "=r" (__val)); \ + __val; \ +}) + +/* + * The CPU ID never changes at run time, so we might as well tell the + * compiler that it's constant. Use this function to read the CPU ID + * rather than directly reading processor_id or read_cpuid() directly. + */ +static inline u32 __attribute_const__ read_cpuid_id(void) +{ + return read_cpuid(ID_MIDR_EL1); +} + +static inline u32 __attribute_const__ read_cpuid_cachetype(void) +{ + return read_cpuid(ID_CTR_EL0); +} + +#endif diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h new file mode 100644 index 00000000000..7eaa0b30249 --- /dev/null +++ b/arch/arm64/include/asm/debug-monitors.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_DEBUG_MONITORS_H +#define __ASM_DEBUG_MONITORS_H + +#ifdef __KERNEL__ + +#define DBG_ESR_EVT(x) (((x) >> 27) & 0x7) + +/* AArch64 */ +#define DBG_ESR_EVT_HWBP 0x0 +#define DBG_ESR_EVT_HWSS 0x1 +#define DBG_ESR_EVT_HWWP 0x2 +#define DBG_ESR_EVT_BRK 0x6 + +enum debug_el { + DBG_ACTIVE_EL0 = 0, + DBG_ACTIVE_EL1, +}; + +/* AArch32 */ +#define DBG_ESR_EVT_BKPT 0x4 +#define DBG_ESR_EVT_VECC 0x5 + +#define AARCH32_BREAK_ARM 0x07f001f0 +#define AARCH32_BREAK_THUMB 0xde01 +#define AARCH32_BREAK_THUMB2_LO 0xf7f0 +#define AARCH32_BREAK_THUMB2_HI 0xa000 + +#ifndef __ASSEMBLY__ +struct task_struct; + +#define local_dbg_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + asm volatile( \ + "mrs %0, daif // local_dbg_save\n" \ + "msr daifset, #8" \ + : "=r" (flags) : : "memory"); \ + } while (0) + +#define local_dbg_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + asm volatile( \ + "msr daif, %0 // local_dbg_restore\n" \ + : : "r" (flags) : "memory"); \ + } while (0) + +#define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */ + +u8 debug_monitors_arch(void); + +void enable_debug_monitors(enum debug_el el); +void disable_debug_monitors(enum debug_el el); + +void user_rewind_single_step(struct task_struct *task); +void user_fastforward_single_step(struct task_struct *task); + +void kernel_enable_single_step(struct pt_regs *regs); +void kernel_disable_single_step(void); +int kernel_active_single_step(void); + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +int reinstall_suspended_bps(struct pt_regs *regs); +#else +static inline int reinstall_suspended_bps(struct pt_regs *regs) +{ + return -ENODEV; +} +#endif + +#endif /* __ASSEMBLY */ +#endif /* __KERNEL__ */ +#endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h new file mode 100644 index 00000000000..0d8453c755a --- /dev/null +++ b/arch/arm64/include/asm/device.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_DEVICE_H +#define __ASM_DEVICE_H + +struct dev_archdata { + struct dma_map_ops *dma_ops; +}; + +struct pdev_archdata { +}; + +#endif diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h new file mode 100644 index 00000000000..538f4b44db5 --- /dev/null +++ b/arch/arm64/include/asm/dma-mapping.h @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_DMA_MAPPING_H +#define __ASM_DMA_MAPPING_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <linux/vmalloc.h> + +#include <asm-generic/dma-coherent.h> + +#define ARCH_HAS_DMA_GET_REQUIRED_MASK + +extern struct dma_map_ops *dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (unlikely(!dev) || !dev->archdata.dma_ops) + return dma_ops; + else + return dev->archdata.dma_ops; +} + +#include <asm-generic/dma-mapping-common.h> + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return (dma_addr_t)paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +{ + return (phys_addr_t)dev_addr; +} + +static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + return ops->mapping_error(dev, dev_addr); +} + +static inline int dma_supported(struct device *dev, u64 mask) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + return ops->dma_supported(dev, mask); +} + +static inline int dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + *dev->dma_mask = mask; + + return 0; +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} + +static inline void dma_mark_clean(void *addr, size_t size) +{ +} + +static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *vaddr; + + if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr)) + return vaddr; + + vaddr = ops->alloc(dev, size, dma_handle, flags, NULL); + debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr); + return vaddr; +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dev_addr) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) + return; + + debug_dma_free_coherent(dev, size, vaddr, dev_addr); + ops->free(dev, size, vaddr, dev_addr, NULL); +} + +/* + * There is no dma_cache_sync() implementation, so just return NULL here. + */ +static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t flags) +{ + return NULL; +} + +static inline void dma_free_noncoherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle) +{ +} + +#endif /* __KERNEL__ */ +#endif /* __ASM_DMA_MAPPING_H */ diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h new file mode 100644 index 00000000000..cf284649dfc --- /dev/null +++ b/arch/arm64/include/asm/elf.h @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_ELF_H +#define __ASM_ELF_H + +#include <asm/hwcap.h> + +/* + * ELF register definitions.. + */ +#include <asm/ptrace.h> +#include <asm/user.h> + +typedef unsigned long elf_greg_t; +typedef unsigned long elf_freg_t[3]; + +#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_fp elf_fpregset_t; + +#define EM_AARCH64 183 + +/* + * AArch64 static relocation types. + */ + +/* Miscellaneous. */ +#define R_ARM_NONE 0 +#define R_AARCH64_NONE 256 + +/* Data. */ +#define R_AARCH64_ABS64 257 +#define R_AARCH64_ABS32 258 +#define R_AARCH64_ABS16 259 +#define R_AARCH64_PREL64 260 +#define R_AARCH64_PREL32 261 +#define R_AARCH64_PREL16 262 + +/* Instructions. */ +#define R_AARCH64_MOVW_UABS_G0 263 +#define R_AARCH64_MOVW_UABS_G0_NC 264 +#define R_AARCH64_MOVW_UABS_G1 265 +#define R_AARCH64_MOVW_UABS_G1_NC 266 +#define R_AARCH64_MOVW_UABS_G2 267 +#define R_AARCH64_MOVW_UABS_G2_NC 268 +#define R_AARCH64_MOVW_UABS_G3 269 + +#define R_AARCH64_MOVW_SABS_G0 270 +#define R_AARCH64_MOVW_SABS_G1 271 +#define R_AARCH64_MOVW_SABS_G2 272 + +#define R_AARCH64_LD_PREL_LO19 273 +#define R_AARCH64_ADR_PREL_LO21 274 +#define R_AARCH64_ADR_PREL_PG_HI21 275 +#define R_AARCH64_ADR_PREL_PG_HI21_NC 276 +#define R_AARCH64_ADD_ABS_LO12_NC 277 +#define R_AARCH64_LDST8_ABS_LO12_NC 278 + +#define R_AARCH64_TSTBR14 279 +#define R_AARCH64_CONDBR19 280 +#define R_AARCH64_JUMP26 282 +#define R_AARCH64_CALL26 283 +#define R_AARCH64_LDST16_ABS_LO12_NC 284 +#define R_AARCH64_LDST32_ABS_LO12_NC 285 +#define R_AARCH64_LDST64_ABS_LO12_NC 286 +#define R_AARCH64_LDST128_ABS_LO12_NC 299 + +#define R_AARCH64_MOVW_PREL_G0 287 +#define R_AARCH64_MOVW_PREL_G0_NC 288 +#define R_AARCH64_MOVW_PREL_G1 289 +#define R_AARCH64_MOVW_PREL_G1_NC 290 +#define R_AARCH64_MOVW_PREL_G2 291 +#define R_AARCH64_MOVW_PREL_G2_NC 292 +#define R_AARCH64_MOVW_PREL_G3 293 + + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_AARCH64 + +#define ELF_PLATFORM_SIZE 16 +#define ELF_PLATFORM ("aarch64") + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ((x)->e_machine == EM_AARCH64) + +#define elf_read_implies_exec(ex,stk) (stk != EXSTACK_DISABLE_X) + +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE PAGE_SIZE + +/* + * This is the location that an ET_DYN program is loaded if exec'ed. Typical + * use of this is to invoke "./ld.so someprog" to test out a new version of + * the loader. We need to make sure that it is out of the way of the program + * that it will "exec", and that there is sufficient room for the brk. + */ +extern unsigned long randomize_et_dyn(unsigned long base); +#define ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_64 / 3)) + +/* + * When the program starts, a1 contains a pointer to a function to be + * registered with atexit, as per the SVR4 ABI. A value of 0 means we have no + * such handler. + */ +#define ELF_PLAT_INIT(_r, load_addr) (_r)->regs[0] = 0 + +#define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT); + +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (elf_addr_t)current->mm->context.vdso); \ +} while (0) + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); + +/* 1GB of VA */ +#ifdef CONFIG_COMPAT +#define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ + 0x7ff >> (PAGE_SHIFT - 12) : \ + 0x3ffff >> (PAGE_SHIFT - 12)) +#else +#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) +#endif + +struct mm_struct; +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + +#ifdef CONFIG_COMPAT +#define EM_ARM 40 +#define COMPAT_ELF_PLATFORM ("v8l") + +#define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3)) + +/* AArch32 registers. */ +#define COMPAT_ELF_NGREG 18 +typedef unsigned int compat_elf_greg_t; +typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; + +/* AArch32 EABI. */ +#define EF_ARM_EABI_MASK 0xff000000 +#define compat_elf_check_arch(x) (((x)->e_machine == EM_ARM) && \ + ((x)->e_flags & EF_ARM_EABI_MASK)) + +#define compat_start_thread compat_start_thread +#define COMPAT_SET_PERSONALITY(ex) set_thread_flag(TIF_32BIT); +#define COMPAT_ARCH_DLINFO +extern int aarch32_setup_vectors_page(struct linux_binprm *bprm, + int uses_interp); +#define compat_arch_setup_additional_pages \ + aarch32_setup_vectors_page + +#endif /* CONFIG_COMPAT */ + +#endif diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h new file mode 100644 index 00000000000..ac63519b7b9 --- /dev/null +++ b/arch/arm64/include/asm/exception.h @@ -0,0 +1,23 @@ +/* + * Based on arch/arm/include/asm/exception.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_EXCEPTION_H +#define __ASM_EXCEPTION_H + +#define __exception __attribute__((section(".exception.text"))) + +#endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h new file mode 100644 index 00000000000..db0563c2348 --- /dev/null +++ b/arch/arm64/include/asm/exec.h @@ -0,0 +1,23 @@ +/* + * Based on arch/arm/include/asm/exec.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_EXEC_H +#define __ASM_EXEC_H + +extern unsigned long arch_align_stack(unsigned long sp); + +#endif /* __ASM_EXEC_H */ diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h new file mode 100644 index 00000000000..adb88a64b2f --- /dev/null +++ b/arch/arm64/include/asm/fb.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_FB_H_ +#define __ASM_FB_H_ + +#include <linux/fb.h> +#include <linux/fs.h> +#include <asm/page.h> + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); +} + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* __ASM_FB_H_ */ diff --git a/arch/arm64/include/asm/fcntl.h b/arch/arm64/include/asm/fcntl.h new file mode 100644 index 00000000000..cd2e630c235 --- /dev/null +++ b/arch/arm64/include/asm/fcntl.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_FCNTL_H +#define __ASM_FCNTL_H + +/* + * Using our own definitions for AArch32 (compat) support. + */ +#define O_DIRECTORY 040000 /* must be a directory */ +#define O_NOFOLLOW 0100000 /* don't follow links */ +#define O_DIRECT 0200000 /* direct disk access hint - currently ignored */ +#define O_LARGEFILE 0400000 + +#include <asm-generic/fcntl.h> + +#endif diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h new file mode 100644 index 00000000000..b42fab9f62a --- /dev/null +++ b/arch/arm64/include/asm/fpsimd.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_FP_H +#define __ASM_FP_H + +#include <asm/ptrace.h> + +#ifndef __ASSEMBLY__ + +/* + * FP/SIMD storage area has: + * - FPSR and FPCR + * - 32 128-bit data registers + * + * Note that user_fp forms a prefix of this structure, which is relied + * upon in the ptrace FP/SIMD accessors. struct user_fpsimd_state must + * form a prefix of struct fpsimd_state. + */ +struct fpsimd_state { + union { + struct user_fpsimd_state user_fpsimd; + struct { + __uint128_t vregs[32]; + u32 fpsr; + u32 fpcr; + }; + }; +}; + +#if defined(__KERNEL__) && defined(CONFIG_COMPAT) +/* Masks for extracting the FPSR and FPCR from the FPSCR */ +#define VFP_FPSCR_STAT_MASK 0xf800009f +#define VFP_FPSCR_CTRL_MASK 0x07f79f00 +/* + * The VFP state has 32x64-bit registers and a single 32-bit + * control/status register. + */ +#define VFP_STATE_SIZE ((32 * 8) + 4) +#endif + +struct task_struct; + +extern void fpsimd_save_state(struct fpsimd_state *state); +extern void fpsimd_load_state(struct fpsimd_state *state); + +extern void fpsimd_thread_switch(struct task_struct *next); +extern void fpsimd_flush_thread(void); + +#endif + +#endif diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h new file mode 100644 index 00000000000..3468ae8439f --- /dev/null +++ b/arch/arm64/include/asm/futex.h @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_FUTEX_H +#define __ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <asm/errno.h> + +#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ + asm volatile( \ +"1: ldaxr %w1, %2\n" \ + insn "\n" \ +"2: stlxr %w3, %w0, %2\n" \ +" cbnz %w3, 1b\n" \ +"3:\n" \ +" .pushsection .fixup,\"ax\"\n" \ +"4: mov %w0, %w5\n" \ +" b 3b\n" \ +" .popsection\n" \ +" .pushsection __ex_table,\"a\"\n" \ +" .align 3\n" \ +" .quad 1b, 4b, 2b, 4b\n" \ +" .popsection\n" \ + : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ + : "r" (oparg), "Ir" (-EFAULT) \ + : "cc") + +static inline int +futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tmp; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + pagefault_disable(); /* implies preempt_disable() */ + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("mov %w0, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("add %w0, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("orr %w0, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("and %w0, %w1, %w4", + ret, oldval, uaddr, tmp, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("eor %w0, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); /* subsumes preempt_enable() */ + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0; + u32 val, tmp; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + asm volatile("// futex_atomic_cmpxchg_inatomic\n" +"1: ldaxr %w1, %2\n" +" sub %w3, %w1, %w4\n" +" cbnz %w3, 3f\n" +"2: stlxr %w3, %w5, %2\n" +" cbnz %w3, 1b\n" +"3:\n" +" .pushsection .fixup,\"ax\"\n" +"4: mov %w0, %w6\n" +" b 3b\n" +" .popsection\n" +" .pushsection __ex_table,\"a\"\n" +" .align 3\n" +" .quad 1b, 4b, 2b, 4b\n" +" .popsection\n" + : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) + : "r" (oldval), "r" (newval), "Ir" (-EFAULT) + : "cc", "memory"); + + *uval = val; + return ret; +} + +#endif /* __KERNEL__ */ +#endif /* __ASM_FUTEX_H */ diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h new file mode 100644 index 00000000000..507546353d6 --- /dev/null +++ b/arch/arm64/include/asm/hardirq.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_HARDIRQ_H +#define __ASM_HARDIRQ_H + +#include <linux/cache.h> +#include <linux/threads.h> +#include <asm/irq.h> + +#define NR_IPI 4 + +typedef struct { + unsigned int __softirq_pending; +#ifdef CONFIG_SMP + unsigned int ipi_irqs[NR_IPI]; +#endif +} ____cacheline_aligned irq_cpustat_t; + +#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ + +#define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ +#define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) + +#ifdef CONFIG_SMP +u64 smp_irq_stat_cpu(unsigned int cpu); +#define arch_irq_stat_cpu smp_irq_stat_cpu +#endif + +#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 + +static inline void ack_bad_irq(unsigned int irq) +{ + extern unsigned long irq_err_count; + irq_err_count++; +} + +extern void handle_IRQ(unsigned int, struct pt_regs *); + +#endif /* __ASM_HARDIRQ_H */ diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h new file mode 100644 index 00000000000..d064047612b --- /dev/null +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_HW_BREAKPOINT_H +#define __ASM_HW_BREAKPOINT_H + +#ifdef __KERNEL__ + +struct arch_hw_breakpoint_ctrl { + u32 __reserved : 19, + len : 8, + type : 2, + privilege : 2, + enabled : 1; +}; + +struct arch_hw_breakpoint { + u64 address; + u64 trigger; + struct arch_hw_breakpoint_ctrl ctrl; +}; + +static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) +{ + return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) | + ctrl.enabled; +} + +static inline void decode_ctrl_reg(u32 reg, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + ctrl->enabled = reg & 0x1; + reg >>= 1; + ctrl->privilege = reg & 0x3; + reg >>= 2; + ctrl->type = reg & 0x3; + reg >>= 2; + ctrl->len = reg & 0xff; +} + +/* Breakpoint */ +#define ARM_BREAKPOINT_EXECUTE 0 + +/* Watchpoints */ +#define ARM_BREAKPOINT_LOAD 1 +#define ARM_BREAKPOINT_STORE 2 +#define AARCH64_ESR_ACCESS_MASK (1 << 6) + +/* Privilege Levels */ +#define AARCH64_BREAKPOINT_EL1 1 +#define AARCH64_BREAKPOINT_EL0 2 + +/* Lengths */ +#define ARM_BREAKPOINT_LEN_1 0x1 +#define ARM_BREAKPOINT_LEN_2 0x3 +#define ARM_BREAKPOINT_LEN_4 0xf +#define ARM_BREAKPOINT_LEN_8 0xff + +/* Kernel stepping */ +#define ARM_KERNEL_STEP_NONE 0 +#define ARM_KERNEL_STEP_ACTIVE 1 +#define ARM_KERNEL_STEP_SUSPEND 2 + +/* + * Limits. + * Changing these will require modifications to the register accessors. + */ +#define ARM_MAX_BRP 16 +#define ARM_MAX_WRP 16 +#define ARM_MAX_HBP_SLOTS (ARM_MAX_BRP + ARM_MAX_WRP) + +/* Virtual debug register bases. */ +#define AARCH64_DBG_REG_BVR 0 +#define AARCH64_DBG_REG_BCR (AARCH64_DBG_REG_BVR + ARM_MAX_BRP) +#define AARCH64_DBG_REG_WVR (AARCH64_DBG_REG_BCR + ARM_MAX_BRP) +#define AARCH64_DBG_REG_WCR (AARCH64_DBG_REG_WVR + ARM_MAX_WRP) + +/* Debug register names. */ +#define AARCH64_DBG_REG_NAME_BVR "bvr" +#define AARCH64_DBG_REG_NAME_BCR "bcr" +#define AARCH64_DBG_REG_NAME_WVR "wvr" +#define AARCH64_DBG_REG_NAME_WCR "wcr" + +/* Accessor macros for the debug registers. */ +#define AARCH64_DBG_READ(N, REG, VAL) do {\ + asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\ +} while (0) + +#define AARCH64_DBG_WRITE(N, REG, VAL) do {\ + asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\ +} while (0) + +struct task_struct; +struct notifier_block; +struct perf_event; +struct pmu; + +extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, + int *gen_len, int *gen_type); +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); + +extern int arch_install_hw_breakpoint(struct perf_event *bp); +extern void arch_uninstall_hw_breakpoint(struct perf_event *bp); +extern void hw_breakpoint_pmu_read(struct perf_event *bp); +extern int hw_breakpoint_slots(int type); + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +extern void hw_breakpoint_thread_switch(struct task_struct *next); +extern void ptrace_hw_copy_thread(struct task_struct *task); +#else +static inline void hw_breakpoint_thread_switch(struct task_struct *next) +{ +} +static inline void ptrace_hw_copy_thread(struct task_struct *task) +{ +} +#endif + +extern struct pmu perf_ops_bp; + +#endif /* __KERNEL__ */ +#endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h new file mode 100644 index 00000000000..f8190ba45a3 --- /dev/null +++ b/arch/arm64/include/asm/hwcap.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_HWCAP_H +#define __ASM_HWCAP_H + +/* + * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP + */ +#define HWCAP_FP (1 << 0) +#define HWCAP_ASIMD (1 << 1) + +#define COMPAT_HWCAP_HALF (1 << 1) +#define COMPAT_HWCAP_THUMB (1 << 2) +#define COMPAT_HWCAP_FAST_MULT (1 << 4) +#define COMPAT_HWCAP_VFP (1 << 6) +#define COMPAT_HWCAP_EDSP (1 << 7) +#define COMPAT_HWCAP_NEON (1 << 12) +#define COMPAT_HWCAP_VFPv3 (1 << 13) +#define COMPAT_HWCAP_TLS (1 << 15) +#define COMPAT_HWCAP_VFPv4 (1 << 16) +#define COMPAT_HWCAP_IDIVA (1 << 17) +#define COMPAT_HWCAP_IDIVT (1 << 18) +#define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +/* + * This yields a mask that user programs can use to figure out what + * instruction set this cpu supports. + */ +#define ELF_HWCAP (elf_hwcap) +#define COMPAT_ELF_HWCAP (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) + +extern unsigned int elf_hwcap; +#endif + +#endif diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h new file mode 100644 index 00000000000..74a2a7d304a --- /dev/null +++ b/arch/arm64/include/asm/io.h @@ -0,0 +1,258 @@ +/* + * Based on arch/arm/include/asm/io.h + * + * Copyright (C) 1996-2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_IO_H +#define __ASM_IO_H + +#ifdef __KERNEL__ + +#include <linux/types.h> + +#include <asm/byteorder.h> +#include <asm/barrier.h> +#include <asm/pgtable.h> + +/* + * Generic IO read/write. These perform native-endian accesses. + */ +static inline void __raw_writeb(u8 val, volatile void __iomem *addr) +{ + asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr)); +} + +static inline void __raw_writew(u16 val, volatile void __iomem *addr) +{ + asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr)); +} + +static inline void __raw_writel(u32 val, volatile void __iomem *addr) +{ + asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr)); +} + +static inline void __raw_writeq(u64 val, volatile void __iomem *addr) +{ + asm volatile("str %0, [%1]" : : "r" (val), "r" (addr)); +} + +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + u8 val; + asm volatile("ldrb %w0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + u16 val; + asm volatile("ldrh %w0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + u32 val; + asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + u64 val; + asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + +/* IO barriers */ +#define __iormb() rmb() +#define __iowmb() wmb() + +#define mmiowb() do { } while (0) + +/* + * Relaxed I/O memory access primitives. These follow the Device memory + * ordering rules but do not guarantee any ordering relative to Normal memory + * accesses. + */ +#define readb_relaxed(c) ({ u8 __v = __raw_readb(c); __v; }) +#define readw_relaxed(c) ({ u16 __v = le16_to_cpu((__force __le16)__raw_readw(c)); __v; }) +#define readl_relaxed(c) ({ u32 __v = le32_to_cpu((__force __le32)__raw_readl(c)); __v; }) + +#define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c))) +#define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c))) +#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) + +/* + * I/O memory access primitives. Reads are ordered relative to any + * following Normal memory access. Writes are ordered relative to any prior + * Normal memory access. + */ +#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) +#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) +#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) + +#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); }) +#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); }) +#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); }) + +/* + * I/O port access primitives. + */ +#define IO_SPACE_LIMIT 0xffff +#define PCI_IOBASE ((void __iomem *)0xffffffbbfffe0000UL) + +static inline u8 inb(unsigned long addr) +{ + return readb(addr + PCI_IOBASE); +} + +static inline u16 inw(unsigned long addr) +{ + return readw(addr + PCI_IOBASE); +} + +static inline u32 inl(unsigned long addr) +{ + return readl(addr + PCI_IOBASE); +} + +static inline void outb(u8 b, unsigned long addr) +{ + writeb(b, addr + PCI_IOBASE); +} + +static inline void outw(u16 b, unsigned long addr) +{ + writew(b, addr + PCI_IOBASE); +} + +static inline void outl(u32 b, unsigned long addr) +{ + writel(b, addr + PCI_IOBASE); +} + +#define inb_p(addr) inb(addr) +#define inw_p(addr) inw(addr) +#define inl_p(addr) inl(addr) + +#define outb_p(x, addr) outb((x), (addr)) +#define outw_p(x, addr) outw((x), (addr)) +#define outl_p(x, addr) outl((x), (addr)) + +static inline void insb(unsigned long addr, void *buffer, int count) +{ + u8 *buf = buffer; + while (count--) + *buf++ = __raw_readb(addr + PCI_IOBASE); +} + +static inline void insw(unsigned long addr, void *buffer, int count) +{ + u16 *buf = buffer; + while (count--) + *buf++ = __raw_readw(addr + PCI_IOBASE); +} + +static inline void insl(unsigned long addr, void *buffer, int count) +{ + u32 *buf = buffer; + while (count--) + *buf++ = __raw_readl(addr + PCI_IOBASE); +} + +static inline void outsb(unsigned long addr, const void *buffer, int count) +{ + const u8 *buf = buffer; + while (count--) + __raw_writeb(*buf++, addr + PCI_IOBASE); +} + +static inline void outsw(unsigned long addr, const void *buffer, int count) +{ + const u16 *buf = buffer; + while (count--) + __raw_writew(*buf++, addr + PCI_IOBASE); +} + +static inline void outsl(unsigned long addr, const void *buffer, int count) +{ + const u32 *buf = buffer; + while (count--) + __raw_writel(*buf++, addr + PCI_IOBASE); +} + +#define insb_p(port,to,len) insb(port,to,len) +#define insw_p(port,to,len) insw(port,to,len) +#define insl_p(port,to,len) insl(port,to,len) + +#define outsb_p(port,from,len) outsb(port,from,len) +#define outsw_p(port,from,len) outsw(port,from,len) +#define outsl_p(port,from,len) outsl(port,from,len) + +/* + * String version of I/O memory access operations. + */ +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t); +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t); +extern void __memset_io(volatile void __iomem *, int, size_t); + +#define memset_io(c,v,l) __memset_io((c),(v),(l)) +#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l)) +#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l)) + +/* + * I/O memory mapping functions. + */ +extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot); +extern void __iounmap(volatile void __iomem *addr); + +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_XN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC)) + +#define ioremap(addr, size) __ioremap((addr), (size), PROT_DEVICE_nGnRE) +#define ioremap_nocache(addr, size) __ioremap((addr), (size), PROT_DEVICE_nGnRE) +#define ioremap_wc(addr, size) __ioremap((addr), (size), PROT_NORMAL_NC) +#define iounmap __iounmap + +#define ARCH_HAS_IOREMAP_WC +#include <asm-generic/iomap.h> + +/* + * More restrictive address range checking than the default implementation + * (PHYS_OFFSET and PHYS_MASK taken into account). + */ +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern int valid_phys_addr_range(unsigned long addr, size_t size); +extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); + +extern int devmem_is_allowed(unsigned long pfn); + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#endif /* __KERNEL__ */ +#endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h new file mode 100644 index 00000000000..a4e1cad3202 --- /dev/null +++ b/arch/arm64/include/asm/irq.h @@ -0,0 +1,8 @@ +#ifndef __ASM_IRQ_H +#define __ASM_IRQ_H + +#include <asm-generic/irq.h> + +extern void (*handle_arch_irq)(struct pt_regs *); + +#endif diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h new file mode 100644 index 00000000000..aa11943b850 --- /dev/null +++ b/arch/arm64/include/asm/irqflags.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_IRQFLAGS_H +#define __ASM_IRQFLAGS_H + +#ifdef __KERNEL__ + +#include <asm/ptrace.h> + +/* + * CPU interrupt mask handling. + */ +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags; + asm volatile( + "mrs %0, daif // arch_local_irq_save\n" + "msr daifset, #2" + : "=r" (flags) + : + : "memory"); + return flags; +} + +static inline void arch_local_irq_enable(void) +{ + asm volatile( + "msr daifclr, #2 // arch_local_irq_enable" + : + : + : "memory"); +} + +static inline void arch_local_irq_disable(void) +{ + asm volatile( + "msr daifset, #2 // arch_local_irq_disable" + : + : + : "memory"); +} + +#define local_fiq_enable() asm("msr daifclr, #1" : : : "memory") +#define local_fiq_disable() asm("msr daifset, #1" : : : "memory") + +/* + * Save the current interrupt enable state. + */ +static inline unsigned long arch_local_save_flags(void) +{ + unsigned long flags; + asm volatile( + "mrs %0, daif // arch_local_save_flags" + : "=r" (flags) + : + : "memory"); + return flags; +} + +/* + * restore saved IRQ state + */ +static inline void arch_local_irq_restore(unsigned long flags) +{ + asm volatile( + "msr daif, %0 // arch_local_irq_restore" + : + : "r" (flags) + : "memory"); +} + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return flags & PSR_I_BIT; +} + +#endif +#endif diff --git a/arch/arm64/include/asm/memblock.h b/arch/arm64/include/asm/memblock.h new file mode 100644 index 00000000000..6afeed2467f --- /dev/null +++ b/arch/arm64/include/asm/memblock.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_MEMBLOCK_H +#define __ASM_MEMBLOCK_H + +extern void arm64_memblock_init(void); + +#endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h new file mode 100644 index 00000000000..1cac16a001c --- /dev/null +++ b/arch/arm64/include/asm/memory.h @@ -0,0 +1,144 @@ +/* + * Based on arch/arm/include/asm/memory.h + * + * Copyright (C) 2000-2002 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Note: this file should not be included by non-asm/.h files + */ +#ifndef __ASM_MEMORY_H +#define __ASM_MEMORY_H + +#include <linux/compiler.h> +#include <linux/const.h> +#include <linux/types.h> +#include <asm/sizes.h> + +/* + * Allow for constants defined here to be used from assembly code + * by prepending the UL suffix only with actual C code compilation. + */ +#define UL(x) _AC(x, UL) + +/* + * PAGE_OFFSET - the virtual address of the start of the kernel image. + * VA_BITS - the maximum number of bits for virtual addresses. + * TASK_SIZE - the maximum size of a user space task. + * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. + * The module space lives between the addresses given by TASK_SIZE + * and PAGE_OFFSET - it must be within 128MB of the kernel text. + */ +#define PAGE_OFFSET UL(0xffffffc000000000) +#define MODULES_END (PAGE_OFFSET) +#define MODULES_VADDR (MODULES_END - SZ_64M) +#define VA_BITS (39) +#define TASK_SIZE_64 (UL(1) << VA_BITS) + +#ifdef CONFIG_COMPAT +#define TASK_SIZE_32 UL(0x100000000) +#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ + TASK_SIZE_32 : TASK_SIZE_64) +#else +#define TASK_SIZE TASK_SIZE_64 +#endif /* CONFIG_COMPAT */ + +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) + +#if TASK_SIZE_64 > MODULES_VADDR +#error Top of 64-bit user space clashes with start of module space +#endif + +/* + * Physical vs virtual RAM address space conversion. These are + * private definitions which should NOT be used outside memory.h + * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. + */ +#define __virt_to_phys(x) (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET)) +#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) + +/* + * Convert a physical address to a Page Frame Number and back + */ +#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) +#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT) + +/* + * Convert a page to/from a physical address + */ +#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) +#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) + +/* + * Memory types available. + */ +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 + +#ifndef __ASSEMBLY__ + +extern phys_addr_t memstart_addr; +/* PHYS_OFFSET - the physical address of the start of memory. */ +#define PHYS_OFFSET ({ memstart_addr; }) + +/* + * PFNs are used to describe any physical page; this means + * PFN 0 == physical address 0. + * + * This is the PFN of the first RAM page in the kernel + * direct-mapped view. We assume this is the first page + * of RAM in the mem_map as well. + */ +#define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT) + +/* + * Note: Drivers should NOT use these. They are the wrong + * translation for translating DMA addresses. Use the driver + * DMA support - see dma-mapping.h. + */ +static inline phys_addr_t virt_to_phys(const volatile void *x) +{ + return __virt_to_phys((unsigned long)(x)); +} + +static inline void *phys_to_virt(phys_addr_t x) +{ + return (void *)(__phys_to_virt(x)); +} + +/* + * Drivers should NOT use these either. + */ +#define __pa(x) __virt_to_phys((unsigned long)(x)) +#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +/* + * virt_to_page(k) convert a _valid_ virtual address to struct page * + * virt_addr_valid(k) indicates whether a virtual address is valid + */ +#define ARCH_PFN_OFFSET PHYS_PFN_OFFSET + +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \ + ((void *)(kaddr) < (void *)high_memory)) + +#endif + +#include <asm-generic/memory_model.h> + +#endif diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h new file mode 100644 index 00000000000..d4f7fd5b9e3 --- /dev/null +++ b/arch/arm64/include/asm/mmu.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_MMU_H +#define __ASM_MMU_H + +typedef struct { + unsigned int id; + raw_spinlock_t id_lock; + void *vdso; +} mm_context_t; + +#define ASID(mm) ((mm)->context.id & 0xffff) + +extern void paging_init(void); +extern void setup_mm_for_reboot(void); + +#endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h new file mode 100644 index 00000000000..f68465dee02 --- /dev/null +++ b/arch/arm64/include/asm/mmu_context.h @@ -0,0 +1,152 @@ +/* + * Based on arch/arm/include/asm/mmu_context.h + * + * Copyright (C) 1996 Russell King. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_MMU_CONTEXT_H +#define __ASM_MMU_CONTEXT_H + +#include <linux/compiler.h> +#include <linux/sched.h> + +#include <asm/cacheflush.h> +#include <asm/proc-fns.h> +#include <asm-generic/mm_hooks.h> +#include <asm/cputype.h> +#include <asm/pgtable.h> + +#define MAX_ASID_BITS 16 + +extern unsigned int cpu_last_asid; + +void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); +void __new_context(struct mm_struct *mm); + +/* + * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0. + */ +static inline void cpu_set_reserved_ttbr0(void) +{ + unsigned long ttbr = page_to_phys(empty_zero_page); + + asm( + " msr ttbr0_el1, %0 // set TTBR0\n" + " isb" + : + : "r" (ttbr)); +} + +static inline void switch_new_context(struct mm_struct *mm) +{ + unsigned long flags; + + __new_context(mm); + + local_irq_save(flags); + cpu_switch_mm(mm->pgd, mm); + local_irq_restore(flags); +} + +static inline void check_and_switch_context(struct mm_struct *mm, + struct task_struct *tsk) +{ + /* + * Required during context switch to avoid speculative page table + * walking with the wrong TTBR. + */ + cpu_set_reserved_ttbr0(); + + if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) + /* + * The ASID is from the current generation, just switch to the + * new pgd. This condition is only true for calls from + * context_switch() and interrupts are already disabled. + */ + cpu_switch_mm(mm->pgd, mm); + else if (irqs_disabled()) + /* + * Defer the new ASID allocation until after the context + * switch critical region since __new_context() cannot be + * called with interrupts disabled. + */ + set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); + else + /* + * That is a direct call to switch_mm() or activate_mm() with + * interrupts enabled and a new context. + */ + switch_new_context(mm); +} + +#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) +#define destroy_context(mm) do { } while(0) + +#define finish_arch_post_lock_switch \ + finish_arch_post_lock_switch +static inline void finish_arch_post_lock_switch(void) +{ + if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { + struct mm_struct *mm = current->mm; + unsigned long flags; + + __new_context(mm); + + local_irq_save(flags); + cpu_switch_mm(mm->pgd, mm); + local_irq_restore(flags); + } +} + +/* + * This is called when "tsk" is about to enter lazy TLB mode. + * + * mm: describes the currently active mm context + * tsk: task which is entering lazy tlb + * cpu: cpu number which is entering lazy tlb + * + * tsk->mm will be NULL + */ +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +} + +/* + * This is the actual mm switch as far as the scheduler + * is concerned. No registers are touched. We avoid + * calling the CPU specific function when the mm hasn't + * actually changed. + */ +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned int cpu = smp_processor_id(); + +#ifdef CONFIG_SMP + /* check for possible thread migration */ + if (!cpumask_empty(mm_cpumask(next)) && + !cpumask_test_cpu(cpu, mm_cpumask(next))) + __flush_icache_all(); +#endif + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) + check_and_switch_context(next, tsk); +} + +#define deactivate_mm(tsk,mm) do { } while (0) +#define activate_mm(prev,next) switch_mm(prev, next, NULL) + +#endif diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h new file mode 100644 index 00000000000..e80e232b730 --- /dev/null +++ b/arch/arm64/include/asm/module.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_MODULE_H +#define __ASM_MODULE_H + +#include <asm-generic/module.h> + +#define MODULE_ARCH_VERMAGIC "aarch64" + +#endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h new file mode 100644 index 00000000000..46bf66628b6 --- /dev/null +++ b/arch/arm64/include/asm/page.h @@ -0,0 +1,67 @@ +/* + * Based on arch/arm/include/asm/page.h + * + * Copyright (C) 1995-2003 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PAGE_H +#define __ASM_PAGE_H + +/* PAGE_SHIFT determines the page size */ +#ifdef CONFIG_ARM64_64K_PAGES +#define PAGE_SHIFT 16 +#else +#define PAGE_SHIFT 12 +#endif +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ +#define __HAVE_ARCH_GATE_AREA 1 + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_ARM64_64K_PAGES +#include <asm/pgtable-2level-types.h> +#else +#include <asm/pgtable-3level-types.h> +#endif + +extern void __cpu_clear_user_page(void *p, unsigned long user); +extern void __cpu_copy_user_page(void *to, const void *from, + unsigned long user); +extern void copy_page(void *to, const void *from); +extern void clear_page(void *to); + +#define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr) +#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr) + +typedef struct page *pgtable_t; + +#ifdef CONFIG_HAVE_ARCH_PFN_VALID +extern int pfn_valid(unsigned long); +#endif + +#include <asm/memory.h> + +#endif /* !__ASSEMBLY__ */ + +#define VM_DATA_DEFAULT_FLAGS \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#include <asm-generic/getorder.h> + +#endif diff --git a/arch/arm64/include/asm/param.h b/arch/arm64/include/asm/param.h new file mode 100644 index 00000000000..8e3a281d448 --- /dev/null +++ b/arch/arm64/include/asm/param.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PARAM_H +#define __ASM_PARAM_H + +#define EXEC_PAGESIZE 65536 + +#include <asm-generic/param.h> + +#endif diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h new file mode 100644 index 00000000000..a6fffd511c5 --- /dev/null +++ b/arch/arm64/include/asm/perf_event.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_PERF_EVENT_H +#define __ASM_PERF_EVENT_H + +/* It's quiet around here... */ + +#endif diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h new file mode 100644 index 00000000000..f214069ec5d --- /dev/null +++ b/arch/arm64/include/asm/pgalloc.h @@ -0,0 +1,113 @@ +/* + * Based on arch/arm/include/asm/pgalloc.h + * + * Copyright (C) 2000-2001 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PGALLOC_H +#define __ASM_PGALLOC_H + +#include <asm/pgtable-hwdef.h> +#include <asm/processor.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> + +#define check_pgt_cache() do { } while (0) + +#ifndef CONFIG_ARM64_64K_PAGES + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); +} + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + free_page((unsigned long)pmd); +} + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); +} + +#endif /* CONFIG_ARM64_64K_PAGES */ + +extern pgd_t *pgd_alloc(struct mm_struct *mm); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); + +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) + +static inline pte_t * +pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) +{ + return (pte_t *)__get_free_page(PGALLOC_GFP); +} + +static inline pgtable_t +pte_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + struct page *pte; + + pte = alloc_pages(PGALLOC_GFP, 0); + if (pte) + pgtable_page_ctor(pte); + + return pte; +} + +/* + * Free a PTE table. + */ +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + if (pte) + free_page((unsigned long)pte); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) +{ + pgtable_page_dtor(pte); + __free_page(pte); +} + +static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, + pmdval_t prot) +{ + set_pmd(pmdp, __pmd(pte | prot)); +} + +/* + * Populate the pmdp entry with a pointer to the pte. This pmd is part + * of the mm address space. + */ +static inline void +pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) +{ + /* + * The pmd must be loaded with the physical address of the PTE table + */ + __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE); +} + +static inline void +pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) +{ + __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE); +} +#define pmd_pgtable(pmd) pmd_page(pmd) + +#endif diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h new file mode 100644 index 00000000000..0a8ed3f94e9 --- /dev/null +++ b/arch/arm64/include/asm/pgtable-2level-hwdef.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PGTABLE_2LEVEL_HWDEF_H +#define __ASM_PGTABLE_2LEVEL_HWDEF_H + +/* + * With LPAE and 64KB pages, there are 2 levels of page tables. Each level has + * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not + * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each + * entry representing 512MB. The user and kernel address spaces are limited to + * 512GB and therefore we only use 1024 entries in the PGD. + */ +#define PTRS_PER_PTE 8192 +#define PTRS_PER_PGD 1024 + +/* + * PGDIR_SHIFT determines the size a top-level page table entry can map. + */ +#define PGDIR_SHIFT 29 +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * section address mask and size definitions. + */ +#define SECTION_SHIFT 29 +#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) +#define SECTION_MASK (~(SECTION_SIZE-1)) + +#endif diff --git a/arch/arm64/include/asm/pgtable-2level-types.h b/arch/arm64/include/asm/pgtable-2level-types.h new file mode 100644 index 00000000000..3c3ca7d361e --- /dev/null +++ b/arch/arm64/include/asm/pgtable-2level-types.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PGTABLE_2LEVEL_TYPES_H +#define __ASM_PGTABLE_2LEVEL_TYPES_H + +typedef u64 pteval_t; +typedef u64 pgdval_t; +typedef pgdval_t pmdval_t; + +#undef STRICT_MM_TYPECHECKS + +#ifdef STRICT_MM_TYPECHECKS + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { pteval_t pte; } pte_t; +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pteval_t pgprot; } pgprot_t; + +#define pte_val(x) ((x).pte) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#else /* !STRICT_MM_TYPECHECKS */ + +typedef pteval_t pte_t; +typedef pgdval_t pgd_t; +typedef pteval_t pgprot_t; + +#define pte_val(x) (x) +#define pgd_val(x) (x) +#define pgprot_val(x) (x) + +#define __pte(x) (x) +#define __pgd(x) (x) +#define __pgprot(x) (x) + +#endif /* STRICT_MM_TYPECHECKS */ + +#include <asm-generic/pgtable-nopmd.h> + +#endif /* __ASM_PGTABLE_2LEVEL_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable-3level-hwdef.h b/arch/arm64/include/asm/pgtable-3level-hwdef.h new file mode 100644 index 00000000000..3dbf941d776 --- /dev/null +++ b/arch/arm64/include/asm/pgtable-3level-hwdef.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PGTABLE_3LEVEL_HWDEF_H +#define __ASM_PGTABLE_3LEVEL_HWDEF_H + +/* + * With LPAE and 4KB pages, there are 3 levels of page tables. Each level has + * 512 entries of 8 bytes each, occupying a 4K page. The first level table + * covers a range of 512GB, each entry representing 1GB. The user and kernel + * address spaces are limited to 512GB each. + */ +#define PTRS_PER_PTE 512 +#define PTRS_PER_PMD 512 +#define PTRS_PER_PGD 512 + +/* + * PGDIR_SHIFT determines the size a top-level page table entry can map. + */ +#define PGDIR_SHIFT 30 +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * PMD_SHIFT determines the size a middle-level page table entry can map. + */ +#define PMD_SHIFT 21 +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* + * section address mask and size definitions. + */ +#define SECTION_SHIFT 21 +#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) +#define SECTION_MASK (~(SECTION_SIZE-1)) + +#endif diff --git a/arch/arm64/include/asm/pgtable-3level-types.h b/arch/arm64/include/asm/pgtable-3level-types.h new file mode 100644 index 00000000000..4489615f14a --- /dev/null +++ b/arch/arm64/include/asm/pgtable-3level-types.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PGTABLE_3LEVEL_TYPES_H +#define __ASM_PGTABLE_3LEVEL_TYPES_H + +typedef u64 pteval_t; +typedef u64 pmdval_t; +typedef u64 pgdval_t; + +#undef STRICT_MM_TYPECHECKS + +#ifdef STRICT_MM_TYPECHECKS + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { pteval_t pte; } pte_t; +typedef struct { pmdval_t pmd; } pmd_t; +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pteval_t pgprot; } pgprot_t; + +#define pte_val(x) ((x).pte) +#define pmd_val(x) ((x).pmd) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#else /* !STRICT_MM_TYPECHECKS */ + +typedef pteval_t pte_t; +typedef pmdval_t pmd_t; +typedef pgdval_t pgd_t; +typedef pteval_t pgprot_t; + +#define pte_val(x) (x) +#define pmd_val(x) (x) +#define pgd_val(x) (x) +#define pgprot_val(x) (x) + +#define __pte(x) (x) +#define __pmd(x) (x) +#define __pgd(x) (x) +#define __pgprot(x) (x) + +#endif /* STRICT_MM_TYPECHECKS */ + +#include <asm-generic/pgtable-nopud.h> + +#endif /* __ASM_PGTABLE_3LEVEL_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h new file mode 100644 index 00000000000..0f3b4581d92 --- /dev/null +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PGTABLE_HWDEF_H +#define __ASM_PGTABLE_HWDEF_H + +#ifdef CONFIG_ARM64_64K_PAGES +#include <asm/pgtable-2level-hwdef.h> +#else +#include <asm/pgtable-3level-hwdef.h> +#endif + +/* + * Hardware page table definitions. + * + * Level 2 descriptor (PMD). + */ +#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) +#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) +#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) +#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) + +/* + * Section + */ +#define PMD_SECT_S (_AT(pmdval_t, 3) << 8) +#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) +#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) +#define PMD_SECT_XN (_AT(pmdval_t, 1) << 54) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PMD_ATTRINDX(t) (_AT(pmdval_t, (t)) << 2) +#define PMD_ATTRINDX_MASK (_AT(pmdval_t, 7) << 2) + +/* + * Level 3 descriptor (PTE). + */ +#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) +#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) +#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ +#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ +#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ +#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ +#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ +#define PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) (_AT(pteval_t, (t)) << 2) +#define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) + +/* + * 40-bit physical address supported. + */ +#define PHYS_MASK_SHIFT (40) +#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) + +/* + * TCR flags. + */ +#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0)) +#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) +#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) +#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) +#define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24)) +#define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24)) +#define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26)) +#define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26)) +#define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26)) +#define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26)) +#define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26)) +#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) +#define TCR_TG0_64K (UL(1) << 14) +#define TCR_TG1_64K (UL(1) << 30) +#define TCR_IPS_40BIT (UL(2) << 32) +#define TCR_ASID16 (UL(1) << 36) + +#endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h new file mode 100644 index 00000000000..8960239be72 --- /dev/null +++ b/arch/arm64/include/asm/pgtable.h @@ -0,0 +1,328 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PGTABLE_H +#define __ASM_PGTABLE_H + +#include <asm/proc-fns.h> + +#include <asm/memory.h> +#include <asm/pgtable-hwdef.h> + +/* + * Software defined PTE bits definition. + */ +#define PTE_VALID (_AT(pteval_t, 1) << 0) /* pte_present() check */ +#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ +#define PTE_DIRTY (_AT(pteval_t, 1) << 55) +#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) + +/* + * VMALLOC and SPARSEMEM_VMEMMAP ranges. + */ +#define VMALLOC_START UL(0xffffff8000000000) +#define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) + +#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) + +#define FIRST_USER_ADDRESS 0 + +#ifndef __ASSEMBLY__ +extern void __pte_error(const char *file, int line, unsigned long val); +extern void __pmd_error(const char *file, int line, unsigned long val); +extern void __pgd_error(const char *file, int line, unsigned long val); + +#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) +#ifndef CONFIG_ARM64_64K_PAGES +#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) +#endif +#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) + +/* + * The pgprot_* and protection_map entries will be fixed up at runtime to + * include the cachable and bufferable bits based on memory policy, as well as + * any architecture dependent bits like global/ASID and SMP shared mapping + * bits. + */ +#define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF + +extern pgprot_t pgprot_default; + +#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) + +#define PAGE_NONE _MOD_PROT(pgprot_default, PTE_NG | PTE_XN | PTE_RDONLY) +#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_XN) +#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG) +#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_XN | PTE_RDONLY) +#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_RDONLY) +#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_XN | PTE_RDONLY) +#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_RDONLY) +#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_XN | PTE_DIRTY) +#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_DIRTY) + +#define __PAGE_NONE __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_XN | PTE_RDONLY) +#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_XN) +#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG) +#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_XN | PTE_RDONLY) +#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_RDONLY) +#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_XN | PTE_RDONLY) +#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_RDONLY) + +#endif /* __ASSEMBLY__ */ + +#define __P000 __PAGE_NONE +#define __P001 __PAGE_READONLY +#define __P010 __PAGE_COPY +#define __P011 __PAGE_COPY +#define __P100 __PAGE_READONLY_EXEC +#define __P101 __PAGE_READONLY_EXEC +#define __P110 __PAGE_COPY_EXEC +#define __P111 __PAGE_COPY_EXEC + +#define __S000 __PAGE_NONE +#define __S001 __PAGE_READONLY +#define __S010 __PAGE_SHARED +#define __S011 __PAGE_SHARED +#define __S100 __PAGE_READONLY_EXEC +#define __S101 __PAGE_READONLY_EXEC +#define __S110 __PAGE_SHARED_EXEC +#define __S111 __PAGE_SHARED_EXEC + +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) + +#define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT) + +#define pfn_pte(pfn,prot) (__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) + +#define pte_none(pte) (!pte_val(pte)) +#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) +#define pte_page(pte) (pfn_to_page(pte_pfn(pte))) +#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) + +#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +/* + * The following only work if pte_present(). Undefined behaviour otherwise. + */ +#define pte_present(pte) (pte_val(pte) & PTE_VALID) +#define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) +#define pte_young(pte) (pte_val(pte) & PTE_AF) +#define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) +#define pte_write(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_exec(pte) (!(pte_val(pte) & PTE_XN)) + +#define pte_present_exec_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_XN)) == \ + (PTE_VALID | PTE_USER)) + +#define PTE_BIT_FUNC(fn,op) \ +static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } + +PTE_BIT_FUNC(wrprotect, |= PTE_RDONLY); +PTE_BIT_FUNC(mkwrite, &= ~PTE_RDONLY); +PTE_BIT_FUNC(mkclean, &= ~PTE_DIRTY); +PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); +PTE_BIT_FUNC(mkold, &= ~PTE_AF); +PTE_BIT_FUNC(mkyoung, |= PTE_AF); +PTE_BIT_FUNC(mkspecial, |= PTE_SPECIAL); + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + *ptep = pte; +} + +extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (pte_present_exec_user(pte)) + __sync_icache_dcache(pte, addr); + set_pte(ptep, pte); +} + +/* + * Huge pte definitions. + */ +#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) +#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) + +#define __pgprot_modify(prot,mask,bits) \ + __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) + +#define __HAVE_ARCH_PTE_SPECIAL + +/* + * Mark the prot value as uncacheable and unbufferable. + */ +#define pgprot_noncached(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define pgprot_writecombine(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_GRE)) +#define pgprot_dmacoherent(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_present(pmd) (pmd_val(pmd)) + +#define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; + dsb(); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + set_pmd(pmdp, __pmd(0)); +} + +static inline pte_t *pmd_page_vaddr(pmd_t pmd) +{ + return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK); +} + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) + +#ifndef CONFIG_ARM64_64K_PAGES + +#define pud_none(pud) (!pud_val(pud)) +#define pud_bad(pud) (!(pud_val(pud) & 2)) +#define pud_present(pud) (pud_val(pud)) + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ + *pudp = pud; + dsb(); +} + +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud(0)); +} + +static inline pmd_t *pud_page_vaddr(pud_t pud) +{ + return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); +} + +#endif /* CONFIG_ARM64_64K_PAGES */ + +/* to find an entry in a page-table-directory */ +#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) + +#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(addr) pgd_offset(&init_mm, addr) + +/* Find an entry in the second-level page table.. */ +#ifndef CONFIG_ARM64_64K_PAGES +#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) +{ + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); +} +#endif + +/* Find an entry in the third-level page table.. */ +#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + const pteval_t mask = PTE_USER | PTE_XN | PTE_RDONLY; + pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + return pte; +} + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; + +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) + +/* + * Encode and decode a swap entry: + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-8: swap type + * bits 9-63: swap offset + */ +#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_BITS 6 +#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) +#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) + +#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) +#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT) +#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) + +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) + +/* + * Ensure that there are not more swap files than can be encoded in the kernel + * the PTEs. + */ +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) + +/* + * Encode and decode a file entry: + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-63: file offset / PAGE_SIZE + */ +#define pte_file(pte) (pte_val(pte) & PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 3) +#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) + +#define PTE_FILE_MAX_BITS 61 + +extern int kern_addr_valid(unsigned long addr); + +#include <asm-generic/pgtable.h> + +/* + * remap a physical page `pfn' of size `size' with page protection `prot' + * into virtual address `from' + */ +#define io_remap_pfn_range(vma,from,pfn,size,prot) \ + remap_pfn_range(vma, from, pfn, size, prot) + +#define pgtable_cache_init() do { } while (0) + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h new file mode 100644 index 00000000000..e6f087806aa --- /dev/null +++ b/arch/arm64/include/asm/pmu.h @@ -0,0 +1,82 @@ +/* + * Based on arch/arm/include/asm/pmu.h + * + * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PMU_H +#define __ASM_PMU_H + +#ifdef CONFIG_HW_PERF_EVENTS + +/* The events for a given PMU register set. */ +struct pmu_hw_events { + /* + * The events that are active on the PMU for the given index. + */ + struct perf_event **events; + + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + unsigned long *used_mask; + + /* + * Hardware lock to serialize accesses to PMU registers. Needed for the + * read/modify/write sequences. + */ + raw_spinlock_t pmu_lock; +}; + +struct arm_pmu { + struct pmu pmu; + cpumask_t active_irqs; + const char *name; + irqreturn_t (*handle_irq)(int irq_num, void *dev); + void (*enable)(struct hw_perf_event *evt, int idx); + void (*disable)(struct hw_perf_event *evt, int idx); + int (*get_event_idx)(struct pmu_hw_events *hw_events, + struct hw_perf_event *hwc); + int (*set_event_filter)(struct hw_perf_event *evt, + struct perf_event_attr *attr); + u32 (*read_counter)(int idx); + void (*write_counter)(int idx, u32 val); + void (*start)(void); + void (*stop)(void); + void (*reset)(void *); + int (*map_event)(struct perf_event *event); + int num_events; + atomic_t active_events; + struct mutex reserve_mutex; + u64 max_period; + struct platform_device *plat_device; + struct pmu_hw_events *(*get_hw_events)(void); +}; + +#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) + +int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type); + +u64 armpmu_event_update(struct perf_event *event, + struct hw_perf_event *hwc, + int idx); + +int armpmu_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, + int idx); + +#endif /* CONFIG_HW_PERF_EVENTS */ +#endif /* __ASM_PMU_H */ diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h new file mode 100644 index 00000000000..7cdf466fd0c --- /dev/null +++ b/arch/arm64/include/asm/proc-fns.h @@ -0,0 +1,50 @@ +/* + * Based on arch/arm/include/asm/proc-fns.h + * + * Copyright (C) 1997-1999 Russell King + * Copyright (C) 2000 Deep Blue Solutions Ltd + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PROCFNS_H +#define __ASM_PROCFNS_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include <asm/page.h> + +struct mm_struct; + +extern void cpu_cache_off(void); +extern void cpu_do_idle(void); +extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); +extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); + +#include <asm/memory.h> + +#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm) + +#define cpu_get_pgd() \ +({ \ + unsigned long pg; \ + asm("mrs %0, ttbr0_el1\n" \ + : "=r" (pg)); \ + pg &= ~0xffff000000003ffful; \ + (pgd_t *)phys_to_virt(pg); \ +}) + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* __ASM_PROCFNS_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h new file mode 100644 index 00000000000..39a208a392f --- /dev/null +++ b/arch/arm64/include/asm/processor.h @@ -0,0 +1,175 @@ +/* + * Based on arch/arm/include/asm/processor.h + * + * Copyright (C) 1995-1999 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PROCESSOR_H +#define __ASM_PROCESSOR_H + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ __label__ _l; _l: &&_l;}) + +#ifdef __KERNEL__ + +#include <linux/string.h> + +#include <asm/fpsimd.h> +#include <asm/hw_breakpoint.h> +#include <asm/ptrace.h> +#include <asm/types.h> + +#ifdef __KERNEL__ +#define STACK_TOP_MAX TASK_SIZE_64 +#ifdef CONFIG_COMPAT +#define AARCH32_VECTORS_BASE 0xffff0000 +#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ + AARCH32_VECTORS_BASE : STACK_TOP_MAX) +#else +#define STACK_TOP STACK_TOP_MAX +#endif /* CONFIG_COMPAT */ +#endif /* __KERNEL__ */ + +struct debug_info { + /* Have we suspended stepping by a debugger? */ + int suspended_step; + /* Allow breakpoints and watchpoints to be disabled for this thread. */ + int bps_disabled; + int wps_disabled; + /* Hardware breakpoints pinned to this task. */ + struct perf_event *hbp_break[ARM_MAX_BRP]; + struct perf_event *hbp_watch[ARM_MAX_WRP]; +}; + +struct cpu_context { + unsigned long x19; + unsigned long x20; + unsigned long x21; + unsigned long x22; + unsigned long x23; + unsigned long x24; + unsigned long x25; + unsigned long x26; + unsigned long x27; + unsigned long x28; + unsigned long fp; + unsigned long sp; + unsigned long pc; +}; + +struct thread_struct { + struct cpu_context cpu_context; /* cpu context */ + unsigned long tp_value; + struct fpsimd_state fpsimd_state; + unsigned long fault_address; /* fault info */ + struct debug_info debug; /* debugging */ +}; + +#define INIT_THREAD { } + +static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) +{ + memset(regs, 0, sizeof(*regs)); + regs->syscallno = ~0UL; + regs->pc = pc; +} + +static inline void start_thread(struct pt_regs *regs, unsigned long pc, + unsigned long sp) +{ + unsigned long *stack = (unsigned long *)sp; + + start_thread_common(regs, pc); + regs->pstate = PSR_MODE_EL0t; + regs->sp = sp; + regs->regs[2] = stack[2]; /* x2 (envp) */ + regs->regs[1] = stack[1]; /* x1 (argv) */ + regs->regs[0] = stack[0]; /* x0 (argc) */ +} + +#ifdef CONFIG_COMPAT +static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, + unsigned long sp) +{ + unsigned int *stack = (unsigned int *)sp; + + start_thread_common(regs, pc); + regs->pstate = COMPAT_PSR_MODE_USR; + if (pc & 1) + regs->pstate |= COMPAT_PSR_T_BIT; + regs->compat_sp = sp; + regs->regs[2] = stack[2]; /* x2 (envp) */ + regs->regs[1] = stack[1]; /* x1 (argv) */ + regs->regs[0] = stack[0]; /* x0 (argc) */ +} +#endif + +/* Forward declaration, a strange C thing */ +struct task_struct; + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); + +/* Prepare to copy thread state - unlazy all lazy status */ +#define prepare_to_copy(tsk) do { } while (0) + +unsigned long get_wchan(struct task_struct *p); + +#define cpu_relax() barrier() + +/* Thread switching */ +extern struct task_struct *cpu_switch_to(struct task_struct *prev, + struct task_struct *next); + +/* + * Create a new kernel thread + */ +extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +#define task_pt_regs(p) \ + ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) + +#define KSTK_EIP(tsk) task_pt_regs(tsk)->pc +#define KSTK_ESP(tsk) task_pt_regs(tsk)->sp + +/* + * Prefetching support + */ +#define ARCH_HAS_PREFETCH +static inline void prefetch(const void *ptr) +{ + asm volatile("prfm pldl1keep, %a0\n" : : "p" (ptr)); +} + +#define ARCH_HAS_PREFETCHW +static inline void prefetchw(const void *ptr) +{ + asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr)); +} + +#define ARCH_HAS_SPINLOCK_PREFETCH +static inline void spin_lock_prefetch(const void *x) +{ + prefetchw(x); +} + +#define HAVE_ARCH_PICK_MMAP_LAYOUT + +#endif + +#endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/prom.h b/arch/arm64/include/asm/prom.h new file mode 100644 index 00000000000..68b90e68295 --- /dev/null +++ b/arch/arm64/include/asm/prom.h @@ -0,0 +1 @@ +/* Empty for now */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h new file mode 100644 index 00000000000..0fa5d6c9ef7 --- /dev/null +++ b/arch/arm64/include/asm/ptrace.h @@ -0,0 +1,207 @@ +/* + * Based on arch/arm/include/asm/ptrace.h + * + * Copyright (C) 1996-2003 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PTRACE_H +#define __ASM_PTRACE_H + +#include <linux/types.h> + +#include <asm/hwcap.h> + +/* AArch32-specific ptrace requests */ +#define COMPAT_PTRACE_GETREGS 12 +#define COMPAT_PTRACE_SETREGS 13 +#define COMPAT_PTRACE_GET_THREAD_AREA 22 +#define COMPAT_PTRACE_SET_SYSCALL 23 +#define COMPAT_PTRACE_GETVFPREGS 27 +#define COMPAT_PTRACE_SETVFPREGS 28 +#define COMPAT_PTRACE_GETHBPREGS 29 +#define COMPAT_PTRACE_SETHBPREGS 30 + +/* + * PSR bits + */ +#define PSR_MODE_EL0t 0x00000000 +#define PSR_MODE_EL1t 0x00000004 +#define PSR_MODE_EL1h 0x00000005 +#define PSR_MODE_EL2t 0x00000008 +#define PSR_MODE_EL2h 0x00000009 +#define PSR_MODE_EL3t 0x0000000c +#define PSR_MODE_EL3h 0x0000000d +#define PSR_MODE_MASK 0x0000000f + +/* AArch32 CPSR bits */ +#define PSR_MODE32_BIT 0x00000010 +#define COMPAT_PSR_MODE_USR 0x00000010 +#define COMPAT_PSR_T_BIT 0x00000020 +#define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */ + +/* AArch64 SPSR bits */ +#define PSR_F_BIT 0x00000040 +#define PSR_I_BIT 0x00000080 +#define PSR_A_BIT 0x00000100 +#define PSR_D_BIT 0x00000200 +#define PSR_Q_BIT 0x08000000 +#define PSR_V_BIT 0x10000000 +#define PSR_C_BIT 0x20000000 +#define PSR_Z_BIT 0x40000000 +#define PSR_N_BIT 0x80000000 + +/* + * Groups of PSR bits + */ +#define PSR_f 0xff000000 /* Flags */ +#define PSR_s 0x00ff0000 /* Status */ +#define PSR_x 0x0000ff00 /* Extension */ +#define PSR_c 0x000000ff /* Control */ + +/* + * These are 'magic' values for PTRACE_PEEKUSR that return info about where a + * process is located in memory. + */ +#define PT_TEXT_ADDR 0x10000 +#define PT_DATA_ADDR 0x10004 +#define PT_TEXT_END_ADDR 0x10008 + +#ifndef __ASSEMBLY__ + +/* + * User structures for general purpose, floating point and debug registers. + */ +struct user_pt_regs { + __u64 regs[31]; + __u64 sp; + __u64 pc; + __u64 pstate; +}; + +struct user_fpsimd_state { + __uint128_t vregs[32]; + __u32 fpsr; + __u32 fpcr; +}; + +struct user_hwdebug_state { + __u32 dbg_info; + struct { + __u64 addr; + __u32 ctrl; + } dbg_regs[16]; +}; + +#ifdef __KERNEL__ + +/* sizeof(struct user) for AArch32 */ +#define COMPAT_USER_SZ 296 +/* AArch32 uses x13 as the stack pointer... */ +#define compat_sp regs[13] +/* ... and x14 as the link register. */ +#define compat_lr regs[14] + +/* + * This struct defines the way the registers are stored on the stack during an + * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for + * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs. + */ +struct pt_regs { + union { + struct user_pt_regs user_regs; + struct { + u64 regs[31]; + u64 sp; + u64 pc; + u64 pstate; + }; + }; + u64 orig_x0; + u64 syscallno; +}; + +#define arch_has_single_step() (1) + +#ifdef CONFIG_COMPAT +#define compat_thumb_mode(regs) \ + (((regs)->pstate & COMPAT_PSR_T_BIT)) +#else +#define compat_thumb_mode(regs) (0) +#endif + +#define user_mode(regs) \ + (((regs)->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t) + +#define compat_user_mode(regs) \ + (((regs)->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) == \ + (PSR_MODE32_BIT | PSR_MODE_EL0t)) + +#define processor_mode(regs) \ + ((regs)->pstate & PSR_MODE_MASK) + +#define interrupts_enabled(regs) \ + (!((regs)->pstate & PSR_I_BIT)) + +#define fast_interrupts_enabled(regs) \ + (!((regs)->pstate & PSR_F_BIT)) + +#define user_stack_pointer(regs) \ + ((regs)->sp) + +/* + * Are the current registers suitable for user mode? (used to maintain + * security in signal handlers) + */ +static inline int valid_user_regs(struct user_pt_regs *regs) +{ + if (user_mode(regs) && (regs->pstate & PSR_I_BIT) == 0) { + regs->pstate &= ~(PSR_F_BIT | PSR_A_BIT); + + /* The T bit is reserved for AArch64 */ + if (!(regs->pstate & PSR_MODE32_BIT)) + regs->pstate &= ~COMPAT_PSR_T_BIT; + + return 1; + } + + /* + * Force PSR to something logical... + */ + regs->pstate &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | \ + COMPAT_PSR_T_BIT | PSR_MODE32_BIT; + + if (!(regs->pstate & PSR_MODE32_BIT)) { + regs->pstate &= ~COMPAT_PSR_T_BIT; + regs->pstate |= PSR_MODE_EL0t; + } + + return 0; +} + +#define instruction_pointer(regs) (regs)->pc + +#ifdef CONFIG_SMP +extern unsigned long profile_pc(struct pt_regs *regs); +#else +#define profile_pc(regs) instruction_pointer(regs) +#endif + +extern int aarch32_break_trap(struct pt_regs *regs); + +#endif /* __KERNEL__ */ + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h new file mode 100644 index 00000000000..9cf2e46fbbd --- /dev/null +++ b/arch/arm64/include/asm/setup.h @@ -0,0 +1,26 @@ +/* + * Based on arch/arm/include/asm/setup.h + * + * Copyright (C) 1997-1999 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SETUP_H +#define __ASM_SETUP_H + +#include <linux/types.h> + +#define COMMAND_LINE_SIZE 2048 + +#endif diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h new file mode 100644 index 00000000000..4df608a8459 --- /dev/null +++ b/arch/arm64/include/asm/shmparam.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SHMPARAM_H +#define __ASM_SHMPARAM_H + +/* + * For IPC syscalls from compat tasks, we need to use the legacy 16k + * alignment value. Since we don't have aliasing D-caches, the rest of + * the time we can safely use PAGE_SIZE. + */ +#define COMPAT_SHMLBA 0x4000 + +#include <asm-generic/shmparam.h> + +#endif /* __ASM_SHMPARAM_H */ diff --git a/arch/arm64/include/asm/sigcontext.h b/arch/arm64/include/asm/sigcontext.h new file mode 100644 index 00000000000..573cec77881 --- /dev/null +++ b/arch/arm64/include/asm/sigcontext.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SIGCONTEXT_H +#define __ASM_SIGCONTEXT_H + +#include <linux/types.h> + +/* + * Signal context structure - contains all info to do with the state + * before the signal handler was invoked. + */ +struct sigcontext { + __u64 fault_address; + /* AArch64 registers */ + __u64 regs[31]; + __u64 sp; + __u64 pc; + __u64 pstate; + /* 4K reserved for FP/SIMD state and future expansion */ + __u8 __reserved[4096] __attribute__((__aligned__(16))); +}; + +/* + * Header to be used at the beginning of structures extending the user + * context. Such structures must be placed after the rt_sigframe on the stack + * and be 16-byte aligned. The last structure must be a dummy one with the + * magic and size set to 0. + */ +struct _aarch64_ctx { + __u32 magic; + __u32 size; +}; + +#define FPSIMD_MAGIC 0x46508001 + +struct fpsimd_context { + struct _aarch64_ctx head; + __u32 fpsr; + __u32 fpcr; + __uint128_t vregs[32]; +}; + +#ifdef __KERNEL__ +/* + * Auxiliary context saved in the sigcontext.__reserved array. Not exported to + * user space as it will change with the addition of new context. User space + * should check the magic/size information. + */ +struct aux_context { + struct fpsimd_context fpsimd; + /* additional context to be added before "end" */ + struct _aarch64_ctx end; +}; +#endif + +#endif diff --git a/arch/arm64/include/asm/siginfo.h b/arch/arm64/include/asm/siginfo.h new file mode 100644 index 00000000000..5a74a0853db --- /dev/null +++ b/arch/arm64/include/asm/siginfo.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SIGINFO_H +#define __ASM_SIGINFO_H + +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) + +#include <asm-generic/siginfo.h> + +#endif diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h new file mode 100644 index 00000000000..8d1e7236431 --- /dev/null +++ b/arch/arm64/include/asm/signal.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SIGNAL_H +#define __ASM_SIGNAL_H + +/* Required for AArch32 compatibility. */ +#define SA_RESTORER 0x04000000 + +#include <asm-generic/signal.h> + +#endif diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h new file mode 100644 index 00000000000..7c275e3b640 --- /dev/null +++ b/arch/arm64/include/asm/signal32.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SIGNAL32_H +#define __ASM_SIGNAL32_H + +#ifdef __KERNEL__ +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +#define AARCH32_KERN_SIGRET_CODE_OFFSET 0x500 + +extern const compat_ulong_t aarch32_sigret_code[6]; + +int compat_setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs *regs); +int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs); + +void compat_setup_restart_syscall(struct pt_regs *regs); +#else + +static inline int compat_setup_frame(int usid, struct k_sigaction *ka, + sigset_t *set, struct pt_regs *regs) +{ + return -ENOSYS; +} + +static inline int compat_setup_rt_frame(int usig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *set, + struct pt_regs *regs) +{ + return -ENOSYS; +} + +static inline void compat_setup_restart_syscall(struct pt_regs *regs) +{ +} +#endif /* CONFIG_COMPAT */ +#endif /* __KERNEL__ */ +#endif /* __ASM_SIGNAL32_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h new file mode 100644 index 00000000000..7e34295f78e --- /dev/null +++ b/arch/arm64/include/asm/smp.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/thread_info.h> + +#ifndef CONFIG_SMP +# error "<asm/smp.h> included in non-SMP build" +#endif + +#define raw_smp_processor_id() (current_thread_info()->cpu) + +struct seq_file; + +/* + * generate IPI list text + */ +extern void show_ipi_list(struct seq_file *p, int prec); + +/* + * Called from C code, this handles an IPI. + */ +extern void handle_IPI(int ipinr, struct pt_regs *regs); + +/* + * Setup the set of possible CPUs (via set_cpu_possible) + */ +extern void smp_init_cpus(void); + +/* + * Provide a function to raise an IPI cross call on CPUs in callmap. + */ +extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); + +/* + * Called from the secondary holding pen, this is the secondary CPU entry point. + */ +asmlinkage void secondary_start_kernel(void); + +/* + * Initial data for bringing up a secondary CPU. + */ +struct secondary_data { + void *stack; +}; +extern struct secondary_data secondary_data; +extern void secondary_holding_pen(void); +extern volatile unsigned long secondary_holding_pen_release; + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +#endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h new file mode 100644 index 00000000000..1be62bcb9d4 --- /dev/null +++ b/arch/arm64/include/asm/sparsemem.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SPARSEMEM_H +#define __ASM_SPARSEMEM_H + +#ifdef CONFIG_SPARSEMEM +#define MAX_PHYSMEM_BITS 40 +#define SECTION_SIZE_BITS 30 +#endif + +#endif diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h new file mode 100644 index 00000000000..41112fe2f8b --- /dev/null +++ b/arch/arm64/include/asm/spinlock.h @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include <asm/spinlock_types.h> +#include <asm/processor.h> + +/* + * Spinlock implementation. + * + * The old value is read exclusively and the new one, if unlocked, is written + * exclusively. In case of failure, the loop is restarted. + * + * The memory barriers are implicit with the load-acquire and store-release + * instructions. + * + * Unlocked value: 0 + * Locked value: 1 + */ + +#define arch_spin_is_locked(x) ((x)->lock != 0) +#define arch_spin_unlock_wait(lock) \ + do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + unsigned int tmp; + + asm volatile( + " sevl\n" + "1: wfe\n" + "2: ldaxr %w0, [%1]\n" + " cbnz %w0, 1b\n" + " stxr %w0, %w2, [%1]\n" + " cbnz %w0, 2b\n" + : "=&r" (tmp) + : "r" (&lock->lock), "r" (1) + : "memory"); +} + +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned int tmp; + + asm volatile( + " ldaxr %w0, [%1]\n" + " cbnz %w0, 1f\n" + " stxr %w0, %w2, [%1]\n" + "1:\n" + : "=&r" (tmp) + : "r" (&lock->lock), "r" (1) + : "memory"); + + return !tmp; +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + asm volatile( + " stlr %w1, [%0]\n" + : : "r" (&lock->lock), "r" (0) : "memory"); +} + +/* + * Write lock implementation. + * + * Write locks set bit 31. Unlocking, is done by writing 0 since the lock is + * exclusively held. + * + * The memory barriers are implicit with the load-acquire and store-release + * instructions. + */ + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int tmp; + + asm volatile( + " sevl\n" + "1: wfe\n" + "2: ldaxr %w0, [%1]\n" + " cbnz %w0, 1b\n" + " stxr %w0, %w2, [%1]\n" + " cbnz %w0, 2b\n" + : "=&r" (tmp) + : "r" (&rw->lock), "r" (0x80000000) + : "memory"); +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + unsigned int tmp; + + asm volatile( + " ldaxr %w0, [%1]\n" + " cbnz %w0, 1f\n" + " stxr %w0, %w2, [%1]\n" + "1:\n" + : "=&r" (tmp) + : "r" (&rw->lock), "r" (0x80000000) + : "memory"); + + return !tmp; +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + asm volatile( + " stlr %w1, [%0]\n" + : : "r" (&rw->lock), "r" (0) : "memory"); +} + +/* write_can_lock - would write_trylock() succeed? */ +#define arch_write_can_lock(x) ((x)->lock == 0) + +/* + * Read lock implementation. + * + * It exclusively loads the lock value, increments it and stores the new value + * back if positive and the CPU still exclusively owns the location. If the + * value is negative, the lock is already held. + * + * During unlocking there may be multiple active read locks but no write lock. + * + * The memory barriers are implicit with the load-acquire and store-release + * instructions. + */ +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned int tmp, tmp2; + + asm volatile( + " sevl\n" + "1: wfe\n" + "2: ldaxr %w0, [%2]\n" + " add %w0, %w0, #1\n" + " tbnz %w0, #31, 1b\n" + " stxr %w1, %w0, [%2]\n" + " cbnz %w1, 2b\n" + : "=&r" (tmp), "=&r" (tmp2) + : "r" (&rw->lock) + : "memory"); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned int tmp, tmp2; + + asm volatile( + "1: ldxr %w0, [%2]\n" + " sub %w0, %w0, #1\n" + " stlxr %w1, %w0, [%2]\n" + " cbnz %w1, 1b\n" + : "=&r" (tmp), "=&r" (tmp2) + : "r" (&rw->lock) + : "memory"); +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned int tmp, tmp2 = 1; + + asm volatile( + " ldaxr %w0, [%2]\n" + " add %w0, %w0, #1\n" + " tbnz %w0, #31, 1f\n" + " stxr %w1, %w0, [%2]\n" + "1:\n" + : "=&r" (tmp), "+r" (tmp2) + : "r" (&rw->lock) + : "memory"); + + return !tmp2; +} + +/* read_can_lock - would read_trylock() succeed? */ +#define arch_read_can_lock(x) ((x)->lock < 0x80000000) + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h new file mode 100644 index 00000000000..9a494346efe --- /dev/null +++ b/arch/arm64/include/asm/spinlock_types.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SPINLOCK_TYPES_H +#define __ASM_SPINLOCK_TYPES_H + +#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H) +# error "please don't include this file directly" +#endif + +/* We only require natural alignment for exclusive accesses. */ +#define __lock_aligned + +typedef struct { + volatile unsigned int lock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile unsigned int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h new file mode 100644 index 00000000000..7318f6d54aa --- /dev/null +++ b/arch/arm64/include/asm/stacktrace.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_STACKTRACE_H +#define __ASM_STACKTRACE_H + +struct stackframe { + unsigned long fp; + unsigned long sp; + unsigned long pc; +}; + +extern int unwind_frame(struct stackframe *frame); +extern void walk_stackframe(struct stackframe *frame, + int (*fn)(struct stackframe *, void *), void *data); + +#endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h new file mode 100644 index 00000000000..d87225cbead --- /dev/null +++ b/arch/arm64/include/asm/stat.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_STAT_H +#define __ASM_STAT_H + +#include <asm-generic/stat.h> + +#if defined(__KERNEL__) && defined(CONFIG_COMPAT) + +#include <asm/compat.h> + +/* + * struct stat64 is needed for compat tasks only. Its definition is different + * from the generic struct stat64. + */ +struct stat64 { + compat_u64 st_dev; + unsigned char __pad0[4]; + +#define STAT64_HAS_BROKEN_ST_INO 1 + compat_ulong_t __st_ino; + compat_uint_t st_mode; + compat_uint_t st_nlink; + + compat_ulong_t st_uid; + compat_ulong_t st_gid; + + compat_u64 st_rdev; + unsigned char __pad3[4]; + + compat_s64 st_size; + compat_ulong_t st_blksize; + compat_u64 st_blocks; /* Number of 512-byte blocks allocated. */ + + compat_ulong_t st_atime; + compat_ulong_t st_atime_nsec; + + compat_ulong_t st_mtime; + compat_ulong_t st_mtime_nsec; + + compat_ulong_t st_ctime; + compat_ulong_t st_ctime_nsec; + + compat_u64 st_ino; +}; + +#endif + +#endif diff --git a/arch/arm64/include/asm/statfs.h b/arch/arm64/include/asm/statfs.h new file mode 100644 index 00000000000..6f621905097 --- /dev/null +++ b/arch/arm64/include/asm/statfs.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_STATFS_H +#define __ASM_STATFS_H + +#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4))) + +#include <asm-generic/statfs.h> + +#endif diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h new file mode 100644 index 00000000000..89c047f9a97 --- /dev/null +++ b/arch/arm64/include/asm/syscall.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SYSCALL_H +#define __ASM_SYSCALL_H + +#include <linux/err.h> + + +static inline int syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->syscallno; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->regs[0] = regs->orig_x0; +} + + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->regs[0]; + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->regs[0]; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->regs[0] = (long) error ? error : val; +} + +#define SYSCALL_MAX_ARGS 6 + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + if (i + n > SYSCALL_MAX_ARGS) { + unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; + unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; + pr_warning("%s called with max args %d, handling only %d\n", + __func__, i + n, SYSCALL_MAX_ARGS); + memset(args_bad, 0, n_bad * sizeof(args[0])); + } + + if (i == 0) { + args[0] = regs->orig_x0; + args++; + i++; + n--; + } + + memcpy(args, ®s->regs[i], n * sizeof(args[0])); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + if (i + n > SYSCALL_MAX_ARGS) { + pr_warning("%s called with max args %d, handling only %d\n", + __func__, i + n, SYSCALL_MAX_ARGS); + n = SYSCALL_MAX_ARGS - i; + } + + if (i == 0) { + regs->orig_x0 = args[0]; + args++; + i++; + n--; + } + + memcpy(®s->regs[i], args, n * sizeof(args[0])); +} + +#endif /* __ASM_SYSCALL_H */ diff --git a/arch/arm64/include/asm/syscalls.h b/arch/arm64/include/asm/syscalls.h new file mode 100644 index 00000000000..09ff33572aa --- /dev/null +++ b/arch/arm64/include/asm/syscalls.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SYSCALLS_H +#define __ASM_SYSCALLS_H + +#include <linux/linkage.h> +#include <linux/compiler.h> +#include <linux/signal.h> + +/* + * System call wrappers implemented in kernel/entry.S. + */ +asmlinkage long sys_execve_wrapper(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp); +asmlinkage long sys_clone_wrapper(unsigned long clone_flags, + unsigned long newsp, + void __user *parent_tid, + unsigned long tls_val, + void __user *child_tid); +asmlinkage long sys_rt_sigreturn_wrapper(void); +asmlinkage long sys_sigaltstack_wrapper(const stack_t __user *uss, + stack_t __user *uoss); + +#include <asm-generic/syscalls.h> + +#endif /* __ASM_SYSCALLS_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h new file mode 100644 index 00000000000..95e40725534 --- /dev/null +++ b/arch/arm64/include/asm/system_misc.h @@ -0,0 +1,54 @@ +/* + * Based on arch/arm/include/asm/system_misc.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_SYSTEM_MISC_H +#define __ASM_SYSTEM_MISC_H + +#ifndef __ASSEMBLY__ + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <linux/irqflags.h> + +struct pt_regs; + +void die(const char *msg, struct pt_regs *regs, int err); + +struct siginfo; +void arm64_notify_die(const char *str, struct pt_regs *regs, + struct siginfo *info, int err); + +void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int, + struct pt_regs *), + int sig, int code, const char *name); + +struct mm_struct; +extern void show_pte(struct mm_struct *mm, unsigned long addr); +extern void __show_regs(struct pt_regs *); + +void soft_restart(unsigned long); +extern void (*pm_restart)(const char *cmd); + +#define UDBG_UNDEFINED (1 << 0) +#define UDBG_SYSCALL (1 << 1) +#define UDBG_BADABORT (1 << 2) +#define UDBG_SEGV (1 << 3) +#define UDBG_BUS (1 << 4) + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h new file mode 100644 index 00000000000..3659e460071 --- /dev/null +++ b/arch/arm64/include/asm/thread_info.h @@ -0,0 +1,127 @@ +/* + * Based on arch/arm/include/asm/thread_info.h + * + * Copyright (C) 2002 Russell King. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_THREAD_INFO_H +#define __ASM_THREAD_INFO_H + +#ifdef __KERNEL__ + +#include <linux/compiler.h> + +#ifndef CONFIG_ARM64_64K_PAGES +#define THREAD_SIZE_ORDER 1 +#endif + +#define THREAD_SIZE 8192 +#define THREAD_START_SP (THREAD_SIZE - 16) + +#ifndef __ASSEMBLY__ + +struct task_struct; +struct exec_domain; + +#include <asm/types.h> + +typedef unsigned long mm_segment_t; + +/* + * low level task data that entry.S needs immediate access to. + * __switch_to() assumes cpu_context follows immediately after cpu_domain. + */ +struct thread_info { + unsigned long flags; /* low level flags */ + mm_segment_t addr_limit; /* address limit */ + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + struct restart_block restart_block; + int preempt_count; /* 0 => preemptable, <0 => bug */ + int cpu; /* cpu */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* + * how to get the thread information struct from C + */ +static inline struct thread_info *current_thread_info(void) __attribute_const__; + +static inline struct thread_info *current_thread_info(void) +{ + register unsigned long sp asm ("sp"); + return (struct thread_info *)(sp & ~(THREAD_SIZE - 1)); +} + +#define thread_saved_pc(tsk) \ + ((unsigned long)(tsk->thread.cpu_context.pc)) +#define thread_saved_sp(tsk) \ + ((unsigned long)(tsk->thread.cpu_context.sp)) +#define thread_saved_fp(tsk) \ + ((unsigned long)(tsk->thread.cpu_context.fp)) + +#endif + +/* + * We use bit 30 of the preempt_count to indicate that kernel + * preemption is occurring. See <asm/hardirq.h>. + */ +#define PREEMPT_ACTIVE 0x40000000 + +/* + * thread information flags: + * TIF_SYSCALL_TRACE - syscall trace active + * TIF_SIGPENDING - signal pending + * TIF_NEED_RESCHED - rescheduling necessary + * TIF_NOTIFY_RESUME - callback before returning to user + * TIF_USEDFPU - FPU was used by this task this quantum (SMP) + * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED + */ +#define TIF_SIGPENDING 0 +#define TIF_NEED_RESCHED 1 +#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_SYSCALL_TRACE 8 +#define TIF_POLLING_NRFLAG 16 +#define TIF_MEMDIE 18 /* is terminating due to OOM killer */ +#define TIF_FREEZE 19 +#define TIF_RESTORE_SIGMASK 20 +#define TIF_SINGLESTEP 21 +#define TIF_32BIT 22 /* 32bit process */ +#define TIF_SWITCH_MM 23 /* deferred switch_mm */ + +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_32BIT (1 << TIF_32BIT) + +#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ + _TIF_NOTIFY_RESUME) + +#endif /* __KERNEL__ */ +#endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h new file mode 100644 index 00000000000..b24a31a7e2c --- /dev/null +++ b/arch/arm64/include/asm/timex.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_TIMEX_H +#define __ASM_TIMEX_H + +/* + * Use the current timer as a cycle counter since this is what we use for + * the delay loop. + */ +#define get_cycles() ({ cycles_t c; read_current_timer(&c); c; }) + +#include <asm-generic/timex.h> + +#define ARCH_HAS_READ_CURRENT_TIMER + +#endif diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h new file mode 100644 index 00000000000..654f0968030 --- /dev/null +++ b/arch/arm64/include/asm/tlb.h @@ -0,0 +1,190 @@ +/* + * Based on arch/arm/include/asm/tlb.h + * + * Copyright (C) 2002 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_TLB_H +#define __ASM_TLB_H + +#include <linux/pagemap.h> +#include <linux/swap.h> + +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +#define MMU_GATHER_BUNDLE 8 + +/* + * TLB handling. This allows us to remove pages from the page + * tables, and efficiently handle the TLB issues. + */ +struct mmu_gather { + struct mm_struct *mm; + unsigned int fullmm; + struct vm_area_struct *vma; + unsigned long range_start; + unsigned long range_end; + unsigned int nr; + unsigned int max; + struct page **pages; + struct page *local[MMU_GATHER_BUNDLE]; +}; + +/* + * This is unnecessarily complex. There's three ways the TLB shootdown + * code is used: + * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region(). + * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called. + * tlb->vma will be non-NULL. + * 2. Unmapping all vmas. See exit_mmap(). + * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called. + * tlb->vma will be non-NULL. Additionally, page tables will be freed. + * 3. Unmapping argument pages. See shift_arg_pages(). + * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called. + * tlb->vma will be NULL. + */ +static inline void tlb_flush(struct mmu_gather *tlb) +{ + if (tlb->fullmm || !tlb->vma) + flush_tlb_mm(tlb->mm); + else if (tlb->range_end > 0) { + flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end); + tlb->range_start = TASK_SIZE; + tlb->range_end = 0; + } +} + +static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr) +{ + if (!tlb->fullmm) { + if (addr < tlb->range_start) + tlb->range_start = addr; + if (addr + PAGE_SIZE > tlb->range_end) + tlb->range_end = addr + PAGE_SIZE; + } +} + +static inline void __tlb_alloc_page(struct mmu_gather *tlb) +{ + unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + + if (addr) { + tlb->pages = (void *)addr; + tlb->max = PAGE_SIZE / sizeof(struct page *); + } +} + +static inline void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush(tlb); + free_pages_and_swap_cache(tlb->pages, tlb->nr); + tlb->nr = 0; + if (tlb->pages == tlb->local) + __tlb_alloc_page(tlb); +} + +static inline void +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm) +{ + tlb->mm = mm; + tlb->fullmm = fullmm; + tlb->vma = NULL; + tlb->max = ARRAY_SIZE(tlb->local); + tlb->pages = tlb->local; + tlb->nr = 0; + __tlb_alloc_page(tlb); +} + +static inline void +tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +{ + tlb_flush_mmu(tlb); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + if (tlb->pages != tlb->local) + free_pages((unsigned long)tlb->pages, 0); +} + +/* + * Memorize the range for the TLB flush. + */ +static inline void +tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + +/* + * In the case of tlb vma handling, we can optimise these away in the + * case where we're doing a full MM flush. When we're doing a munmap, + * the vmas are adjusted to only cover the region to be torn down. + */ +static inline void +tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + if (!tlb->fullmm) { + tlb->vma = vma; + tlb->range_start = TASK_SIZE; + tlb->range_end = 0; + } +} + +static inline void +tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + if (!tlb->fullmm) + tlb_flush(tlb); +} + +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + tlb->pages[tlb->nr++] = page; + VM_BUG_ON(tlb->nr > tlb->max); + return tlb->max - tlb->nr; +} + +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + if (!__tlb_remove_page(tlb, page)) + tlb_flush_mmu(tlb); +} + +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, + unsigned long addr) +{ + pgtable_page_dtor(pte); + tlb_add_flush(tlb, addr); + tlb_remove_page(tlb, pte); +} + +#ifndef CONFIG_ARM64_64K_PAGES +static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, + unsigned long addr) +{ + tlb_add_flush(tlb, addr); + tlb_remove_page(tlb, virt_to_page(pmdp)); +} +#endif + +#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) +#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr) +#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) + +#define tlb_migrate_finish(mm) do { } while (0) + +#endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h new file mode 100644 index 00000000000..122d6320f74 --- /dev/null +++ b/arch/arm64/include/asm/tlbflush.h @@ -0,0 +1,122 @@ +/* + * Based on arch/arm/include/asm/tlbflush.h + * + * Copyright (C) 1999-2003 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_TLBFLUSH_H +#define __ASM_TLBFLUSH_H + +#ifndef __ASSEMBLY__ + +#include <linux/sched.h> +#include <asm/cputype.h> + +extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); +extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long); + +extern struct cpu_tlb_fns cpu_tlb; + +/* + * TLB Management + * ============== + * + * The arch/arm64/mm/tlb.S files implement these methods. + * + * The TLB specific code is expected to perform whatever tests it needs + * to determine if it should invalidate the TLB for each call. Start + * addresses are inclusive and end addresses are exclusive; it is safe to + * round these addresses down. + * + * flush_tlb_all() + * + * Invalidate the entire TLB. + * + * flush_tlb_mm(mm) + * + * Invalidate all TLB entries in a particular address space. + * - mm - mm_struct describing address space + * + * flush_tlb_range(mm,start,end) + * + * Invalidate a range of TLB entries in the specified address + * space. + * - mm - mm_struct describing address space + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * + * flush_tlb_page(vaddr,vma) + * + * Invalidate the specified page in the specified address range. + * - vaddr - virtual address (may not be aligned) + * - vma - vma_struct describing address range + * + * flush_kern_tlb_page(kaddr) + * + * Invalidate the TLB entry for the specified page. The address + * will be in the kernels virtual memory space. Current uses + * only require the D-TLB to be invalidated. + * - kaddr - Kernel virtual memory address + */ +static inline void flush_tlb_all(void) +{ + dsb(); + asm("tlbi vmalle1is"); + dsb(); + isb(); +} + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + unsigned long asid = (unsigned long)ASID(mm) << 48; + + dsb(); + asm("tlbi aside1is, %0" : : "r" (asid)); + dsb(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long uaddr) +{ + unsigned long addr = uaddr >> 12 | + ((unsigned long)ASID(vma->vm_mm) << 48); + + dsb(); + asm("tlbi vae1is, %0" : : "r" (addr)); + dsb(); +} + +/* + * Convert calls to our calling convention. + */ +#define flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma) +#define flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e) + +/* + * On AArch64, the cache coherency is handled via the set_pte_at() function. + */ +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + /* + * set_pte() does not have a DSB, so make sure that the page table + * write is visible. + */ + dsb(); +} + +#endif + +#endif diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h new file mode 100644 index 00000000000..10ca8ff93cc --- /dev/null +++ b/arch/arm64/include/asm/traps.h @@ -0,0 +1,30 @@ +/* + * Based on arch/arm/include/asm/traps.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_TRAP_H +#define __ASM_TRAP_H + +static inline int in_exception_text(unsigned long ptr) +{ + extern char __exception_text_start[]; + extern char __exception_text_end[]; + + return ptr >= (unsigned long)&__exception_text_start && + ptr < (unsigned long)&__exception_text_end; +} + +#endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h new file mode 100644 index 00000000000..008f8481da6 --- /dev/null +++ b/arch/arm64/include/asm/uaccess.h @@ -0,0 +1,297 @@ +/* + * Based on arch/arm/include/asm/uaccess.h + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_UACCESS_H +#define __ASM_UACCESS_H + +/* + * User space memory access functions + */ +#include <linux/string.h> +#include <linux/thread_info.h> + +#include <asm/ptrace.h> +#include <asm/errno.h> +#include <asm/memory.h> +#include <asm/compiler.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +extern int fixup_exception(struct pt_regs *regs); + +#define KERNEL_DS (-1UL) +#define get_ds() (KERNEL_DS) + +#define USER_DS TASK_SIZE_64 +#define get_fs() (current_thread_info()->addr_limit) + +static inline void set_fs(mm_segment_t fs) +{ + current_thread_info()->addr_limit = fs; +} + +#define segment_eq(a,b) ((a) == (b)) + +/* + * Return 1 if addr < current->addr_limit, 0 otherwise. + */ +#define __addr_ok(addr) \ +({ \ + unsigned long flag; \ + asm("cmp %1, %0; cset %0, lo" \ + : "=&r" (flag) \ + : "r" (addr), "0" (current_thread_info()->addr_limit) \ + : "cc"); \ + flag; \ +}) + +/* + * Test whether a block of memory is a valid user space address. + * Returns 1 if the range is valid, 0 otherwise. + * + * This is equivalent to the following test: + * (u65)addr + (u65)size < (u65)current->addr_limit + * + * This needs 65-bit arithmetic. + */ +#define __range_ok(addr, size) \ +({ \ + unsigned long flag, roksum; \ + __chk_user_ptr(addr); \ + asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, cc" \ + : "=&r" (flag), "=&r" (roksum) \ + : "1" (addr), "Ir" (size), \ + "r" (current_thread_info()->addr_limit) \ + : "cc"); \ + flag; \ +}) + +#define access_ok(type, addr, size) __range_ok(addr, size) + +/* + * The "__xxx" versions of the user access functions do not verify the address + * space - it must have been done previously with a separate "access_ok()" + * call. + * + * The "__xxx_error" versions set the third argument to -EFAULT if an error + * occurs, and leave it unchanged on success. + */ +#define __get_user_asm(instr, reg, x, addr, err) \ + asm volatile( \ + "1: " instr " " reg "1, [%2]\n" \ + "2:\n" \ + " .section .fixup, \"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %3\n" \ + " mov %1, #0\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .quad 1b, 3b\n" \ + " .previous" \ + : "+r" (err), "=&r" (x) \ + : "r" (addr), "i" (-EFAULT)) + +#define __get_user_err(x, ptr, err) \ +do { \ + unsigned long __gu_val; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \ + break; \ + case 2: \ + __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err)); \ + break; \ + case 4: \ + __get_user_asm("ldr", "%w", __gu_val, (ptr), (err)); \ + break; \ + case 8: \ + __get_user_asm("ldr", "%", __gu_val, (ptr), (err)); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + (x) = (__typeof__(*(ptr)))__gu_val; \ +} while (0) + +#define __get_user(x, ptr) \ +({ \ + int __gu_err = 0; \ + __get_user_err((x), (ptr), __gu_err); \ + __gu_err; \ +}) + +#define __get_user_error(x, ptr, err) \ +({ \ + __get_user_err((x), (ptr), (err)); \ + (void)0; \ +}) + +#define __get_user_unaligned __get_user + +#define get_user(x, ptr) \ +({ \ + might_sleep(); \ + access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ? \ + __get_user((x), (ptr)) : \ + ((x) = 0, -EFAULT); \ +}) + +#define __put_user_asm(instr, reg, x, addr, err) \ + asm volatile( \ + "1: " instr " " reg "1, [%2]\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %3\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .quad 1b, 3b\n" \ + " .previous" \ + : "+r" (err) \ + : "r" (x), "r" (addr), "i" (-EFAULT)) + +#define __put_user_err(x, ptr, err) \ +do { \ + __typeof__(*(ptr)) __pu_val = (x); \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \ + break; \ + case 2: \ + __put_user_asm("strh", "%w", __pu_val, (ptr), (err)); \ + break; \ + case 4: \ + __put_user_asm("str", "%w", __pu_val, (ptr), (err)); \ + break; \ + case 8: \ + __put_user_asm("str", "%", __pu_val, (ptr), (err)); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ +} while (0) + +#define __put_user(x, ptr) \ +({ \ + int __pu_err = 0; \ + __put_user_err((x), (ptr), __pu_err); \ + __pu_err; \ +}) + +#define __put_user_error(x, ptr, err) \ +({ \ + __put_user_err((x), (ptr), (err)); \ + (void)0; \ +}) + +#define __put_user_unaligned __put_user + +#define put_user(x, ptr) \ +({ \ + might_sleep(); \ + access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ? \ + __put_user((x), (ptr)) : \ + -EFAULT; \ +}) + +extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); +extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); + +extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count); +extern unsigned long __must_check __strnlen_user(const char __user *s, long n); + +static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else /* security hole - plug it */ + memset(to, 0, n); + return n; +} + +static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + n = __copy_to_user(to, from, n); + return n; +} + +static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n)) + n = __copy_in_user(to, from, n); + return n; +} + +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user + +static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + n = __clear_user(to, n); + return n; +} + +static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) +{ + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) + res = __strncpy_from_user(dst, src, count); + return res; +} + +#define strlen_user(s) strnlen_user(s, ~0UL >> 1) + +static inline long __must_check strnlen_user(const char __user *s, long n) +{ + unsigned long res = 0; + + if (__addr_ok(s)) + res = __strnlen_user(s, n); + + return res; +} + +#endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/asm/ucontext.h b/arch/arm64/include/asm/ucontext.h new file mode 100644 index 00000000000..bde96072089 --- /dev/null +++ b/arch/arm64/include/asm/ucontext.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_UCONTEXT_H +#define __ASM_UCONTEXT_H + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + sigset_t uc_sigmask; + /* glibc uses a 1024-bit sigset_t */ + __u8 __unused[(1024 - sizeof(sigset_t)) / 8]; + /* last for future expansion */ + struct sigcontext uc_mcontext; +}; + +#endif /* __ASM_UCONTEXT_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h new file mode 100644 index 00000000000..fe18a683274 --- /dev/null +++ b/arch/arm64/include/asm/unistd.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#if !defined(__ASM_UNISTD_H) || defined(__SYSCALL) +#define __ASM_UNISTD_H + +#ifndef __SYSCALL_COMPAT +#include <asm-generic/unistd.h> +#endif + +#if defined(__KERNEL__) && defined(CONFIG_COMPAT) +#include <asm/unistd32.h> +#endif + +#endif /* __ASM_UNISTD_H */ diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h new file mode 100644 index 00000000000..a50405f5ee4 --- /dev/null +++ b/arch/arm64/include/asm/unistd32.h @@ -0,0 +1,758 @@ +/* + * Based on arch/arm/include/asm/unistd.h + * + * Copyright (C) 2001-2005 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#if !defined(__ASM_UNISTD32_H) || defined(__SYSCALL) +#define __ASM_UNISTD32_H + +#ifndef __SYSCALL +#define __SYSCALL(x, y) +#endif + +/* + * This file contains the system call numbers. + */ + +#ifdef __SYSCALL_COMPAT + +#define __NR_restart_syscall 0 +__SYSCALL(__NR_restart_syscall, sys_restart_syscall) +#define __NR_exit 1 +__SYSCALL(__NR_exit, sys_exit) +#define __NR_fork 2 +__SYSCALL(__NR_fork, sys_fork) +#define __NR_read 3 +__SYSCALL(__NR_read, sys_read) +#define __NR_write 4 +__SYSCALL(__NR_write, sys_write) +#define __NR_open 5 +__SYSCALL(__NR_open, sys_open) +#define __NR_close 6 +__SYSCALL(__NR_close, sys_close) +__SYSCALL(7, sys_ni_syscall) /* 7 was sys_waitpid */ +#define __NR_creat 8 +__SYSCALL(__NR_creat, sys_creat) +#define __NR_link 9 +__SYSCALL(__NR_link, sys_link) +#define __NR_unlink 10 +__SYSCALL(__NR_unlink, sys_unlink) +#define __NR_execve 11 +__SYSCALL(__NR_execve, sys_execve) +#define __NR_chdir 12 +__SYSCALL(__NR_chdir, sys_chdir) +__SYSCALL(13, sys_ni_syscall) /* 13 was sys_time */ +#define __NR_mknod 14 +__SYSCALL(__NR_mknod, sys_mknod) +#define __NR_chmod 15 +__SYSCALL(__NR_chmod, sys_chmod) +#define __NR_lchown 16 +__SYSCALL(__NR_lchown, sys_lchown16) +__SYSCALL(17, sys_ni_syscall) /* 17 was sys_break */ +__SYSCALL(18, sys_ni_syscall) /* 18 was sys_stat */ +#define __NR_lseek 19 +__SYSCALL(__NR_lseek, sys_lseek) +#define __NR_getpid 20 +__SYSCALL(__NR_getpid, sys_getpid) +#define __NR_mount 21 +__SYSCALL(__NR_mount, sys_mount) +__SYSCALL(22, sys_ni_syscall) /* 22 was sys_umount */ +#define __NR_setuid 23 +__SYSCALL(__NR_setuid, sys_setuid16) +#define __NR_getuid 24 +__SYSCALL(__NR_getuid, sys_getuid16) +__SYSCALL(25, sys_ni_syscall) /* 25 was sys_stime */ +#define __NR_ptrace 26 +__SYSCALL(__NR_ptrace, sys_ptrace) +__SYSCALL(27, sys_ni_syscall) /* 27 was sys_alarm */ +__SYSCALL(28, sys_ni_syscall) /* 28 was sys_fstat */ +#define __NR_pause 29 +__SYSCALL(__NR_pause, sys_pause) +__SYSCALL(30, sys_ni_syscall) /* 30 was sys_utime */ +__SYSCALL(31, sys_ni_syscall) /* 31 was sys_stty */ +__SYSCALL(32, sys_ni_syscall) /* 32 was sys_gtty */ +#define __NR_access 33 +__SYSCALL(__NR_access, sys_access) +#define __NR_nice 34 +__SYSCALL(__NR_nice, sys_nice) +__SYSCALL(35, sys_ni_syscall) /* 35 was sys_ftime */ +#define __NR_sync 36 +__SYSCALL(__NR_sync, sys_sync) +#define __NR_kill 37 +__SYSCALL(__NR_kill, sys_kill) +#define __NR_rename 38 +__SYSCALL(__NR_rename, sys_rename) +#define __NR_mkdir 39 +__SYSCALL(__NR_mkdir, sys_mkdir) +#define __NR_rmdir 40 +__SYSCALL(__NR_rmdir, sys_rmdir) +#define __NR_dup 41 +__SYSCALL(__NR_dup, sys_dup) +#define __NR_pipe 42 +__SYSCALL(__NR_pipe, sys_pipe) +#define __NR_times 43 +__SYSCALL(__NR_times, sys_times) +__SYSCALL(44, sys_ni_syscall) /* 44 was sys_prof */ +#define __NR_brk 45 +__SYSCALL(__NR_brk, sys_brk) +#define __NR_setgid 46 +__SYSCALL(__NR_setgid, sys_setgid16) +#define __NR_getgid 47 +__SYSCALL(__NR_getgid, sys_getgid16) +__SYSCALL(48, sys_ni_syscall) /* 48 was sys_signal */ +#define __NR_geteuid 49 +__SYSCALL(__NR_geteuid, sys_geteuid16) +#define __NR_getegid 50 +__SYSCALL(__NR_getegid, sys_getegid16) +#define __NR_acct 51 +__SYSCALL(__NR_acct, sys_acct) +#define __NR_umount2 52 +__SYSCALL(__NR_umount2, sys_umount) +__SYSCALL(53, sys_ni_syscall) /* 53 was sys_lock */ +#define __NR_ioctl 54 +__SYSCALL(__NR_ioctl, sys_ioctl) +#define __NR_fcntl 55 +__SYSCALL(__NR_fcntl, sys_fcntl) +__SYSCALL(56, sys_ni_syscall) /* 56 was sys_mpx */ +#define __NR_setpgid 57 +__SYSCALL(__NR_setpgid, sys_setpgid) +__SYSCALL(58, sys_ni_syscall) /* 58 was sys_ulimit */ +__SYSCALL(59, sys_ni_syscall) /* 59 was sys_olduname */ +#define __NR_umask 60 +__SYSCALL(__NR_umask, sys_umask) +#define __NR_chroot 61 +__SYSCALL(__NR_chroot, sys_chroot) +#define __NR_ustat 62 +__SYSCALL(__NR_ustat, sys_ustat) +#define __NR_dup2 63 +__SYSCALL(__NR_dup2, sys_dup2) +#define __NR_getppid 64 +__SYSCALL(__NR_getppid, sys_getppid) +#define __NR_getpgrp 65 +__SYSCALL(__NR_getpgrp, sys_getpgrp) +#define __NR_setsid 66 +__SYSCALL(__NR_setsid, sys_setsid) +#define __NR_sigaction 67 +__SYSCALL(__NR_sigaction, sys_sigaction) +__SYSCALL(68, sys_ni_syscall) /* 68 was sys_sgetmask */ +__SYSCALL(69, sys_ni_syscall) /* 69 was sys_ssetmask */ +#define __NR_setreuid 70 +__SYSCALL(__NR_setreuid, sys_setreuid16) +#define __NR_setregid 71 +__SYSCALL(__NR_setregid, sys_setregid16) +#define __NR_sigsuspend 72 +__SYSCALL(__NR_sigsuspend, sys_sigsuspend) +#define __NR_sigpending 73 +__SYSCALL(__NR_sigpending, sys_sigpending) +#define __NR_sethostname 74 +__SYSCALL(__NR_sethostname, sys_sethostname) +#define __NR_setrlimit 75 +__SYSCALL(__NR_setrlimit, sys_setrlimit) +__SYSCALL(76, sys_ni_syscall) /* 76 was sys_getrlimit */ +#define __NR_getrusage 77 +__SYSCALL(__NR_getrusage, sys_getrusage) +#define __NR_gettimeofday 78 +__SYSCALL(__NR_gettimeofday, sys_gettimeofday) +#define __NR_settimeofday 79 +__SYSCALL(__NR_settimeofday, sys_settimeofday) +#define __NR_getgroups 80 +__SYSCALL(__NR_getgroups, sys_getgroups16) +#define __NR_setgroups 81 +__SYSCALL(__NR_setgroups, sys_setgroups16) +__SYSCALL(82, sys_ni_syscall) /* 82 was sys_select */ +#define __NR_symlink 83 +__SYSCALL(__NR_symlink, sys_symlink) +__SYSCALL(84, sys_ni_syscall) /* 84 was sys_lstat */ +#define __NR_readlink 85 +__SYSCALL(__NR_readlink, sys_readlink) +#define __NR_uselib 86 +__SYSCALL(__NR_uselib, sys_uselib) +#define __NR_swapon 87 +__SYSCALL(__NR_swapon, sys_swapon) +#define __NR_reboot 88 +__SYSCALL(__NR_reboot, sys_reboot) +__SYSCALL(89, sys_ni_syscall) /* 89 was sys_readdir */ +__SYSCALL(90, sys_ni_syscall) /* 90 was sys_mmap */ +#define __NR_munmap 91 +__SYSCALL(__NR_munmap, sys_munmap) +#define __NR_truncate 92 +__SYSCALL(__NR_truncate, sys_truncate) +#define __NR_ftruncate 93 +__SYSCALL(__NR_ftruncate, sys_ftruncate) +#define __NR_fchmod 94 +__SYSCALL(__NR_fchmod, sys_fchmod) +#define __NR_fchown 95 +__SYSCALL(__NR_fchown, sys_fchown16) +#define __NR_getpriority 96 +__SYSCALL(__NR_getpriority, sys_getpriority) +#define __NR_setpriority 97 +__SYSCALL(__NR_setpriority, sys_setpriority) +__SYSCALL(98, sys_ni_syscall) /* 98 was sys_profil */ +#define __NR_statfs 99 +__SYSCALL(__NR_statfs, sys_statfs) +#define __NR_fstatfs 100 +__SYSCALL(__NR_fstatfs, sys_fstatfs) +__SYSCALL(101, sys_ni_syscall) /* 101 was sys_ioperm */ +__SYSCALL(102, sys_ni_syscall) /* 102 was sys_socketcall */ +#define __NR_syslog 103 +__SYSCALL(__NR_syslog, sys_syslog) +#define __NR_setitimer 104 +__SYSCALL(__NR_setitimer, sys_setitimer) +#define __NR_getitimer 105 +__SYSCALL(__NR_getitimer, sys_getitimer) +#define __NR_stat 106 +__SYSCALL(__NR_stat, sys_newstat) +#define __NR_lstat 107 +__SYSCALL(__NR_lstat, sys_newlstat) +#define __NR_fstat 108 +__SYSCALL(__NR_fstat, sys_newfstat) +__SYSCALL(109, sys_ni_syscall) /* 109 was sys_uname */ +__SYSCALL(110, sys_ni_syscall) /* 110 was sys_iopl */ +#define __NR_vhangup 111 +__SYSCALL(__NR_vhangup, sys_vhangup) +__SYSCALL(112, sys_ni_syscall) /* 112 was sys_idle */ +__SYSCALL(113, sys_ni_syscall) /* 113 was sys_syscall */ +#define __NR_wait4 114 +__SYSCALL(__NR_wait4, sys_wait4) +#define __NR_swapoff 115 +__SYSCALL(__NR_swapoff, sys_swapoff) +#define __NR_sysinfo 116 +__SYSCALL(__NR_sysinfo, sys_sysinfo) +__SYSCALL(117, sys_ni_syscall) /* 117 was sys_ipc */ +#define __NR_fsync 118 +__SYSCALL(__NR_fsync, sys_fsync) +#define __NR_sigreturn 119 +__SYSCALL(__NR_sigreturn, sys_sigreturn) +#define __NR_clone 120 +__SYSCALL(__NR_clone, sys_clone) +#define __NR_setdomainname 121 +__SYSCALL(__NR_setdomainname, sys_setdomainname) +#define __NR_uname 122 +__SYSCALL(__NR_uname, sys_newuname) +__SYSCALL(123, sys_ni_syscall) /* 123 was sys_modify_ldt */ +#define __NR_adjtimex 124 +__SYSCALL(__NR_adjtimex, sys_adjtimex) +#define __NR_mprotect 125 +__SYSCALL(__NR_mprotect, sys_mprotect) +#define __NR_sigprocmask 126 +__SYSCALL(__NR_sigprocmask, sys_sigprocmask) +__SYSCALL(127, sys_ni_syscall) /* 127 was sys_create_module */ +#define __NR_init_module 128 +__SYSCALL(__NR_init_module, sys_init_module) +#define __NR_delete_module 129 +__SYSCALL(__NR_delete_module, sys_delete_module) +__SYSCALL(130, sys_ni_syscall) /* 130 was sys_get_kernel_syms */ +#define __NR_quotactl 131 +__SYSCALL(__NR_quotactl, sys_quotactl) +#define __NR_getpgid 132 +__SYSCALL(__NR_getpgid, sys_getpgid) +#define __NR_fchdir 133 +__SYSCALL(__NR_fchdir, sys_fchdir) +#define __NR_bdflush 134 +__SYSCALL(__NR_bdflush, sys_bdflush) +#define __NR_sysfs 135 +__SYSCALL(__NR_sysfs, sys_sysfs) +#define __NR_personality 136 +__SYSCALL(__NR_personality, sys_personality) +__SYSCALL(137, sys_ni_syscall) /* 137 was sys_afs_syscall */ +#define __NR_setfsuid 138 +__SYSCALL(__NR_setfsuid, sys_setfsuid16) +#define __NR_setfsgid 139 +__SYSCALL(__NR_setfsgid, sys_setfsgid16) +#define __NR__llseek 140 +__SYSCALL(__NR__llseek, sys_llseek) +#define __NR_getdents 141 +__SYSCALL(__NR_getdents, sys_getdents) +#define __NR__newselect 142 +__SYSCALL(__NR__newselect, sys_select) +#define __NR_flock 143 +__SYSCALL(__NR_flock, sys_flock) +#define __NR_msync 144 +__SYSCALL(__NR_msync, sys_msync) +#define __NR_readv 145 +__SYSCALL(__NR_readv, sys_readv) +#define __NR_writev 146 +__SYSCALL(__NR_writev, sys_writev) +#define __NR_getsid 147 +__SYSCALL(__NR_getsid, sys_getsid) +#define __NR_fdatasync 148 +__SYSCALL(__NR_fdatasync, sys_fdatasync) +#define __NR__sysctl 149 +__SYSCALL(__NR__sysctl, sys_sysctl) +#define __NR_mlock 150 +__SYSCALL(__NR_mlock, sys_mlock) +#define __NR_munlock 151 +__SYSCALL(__NR_munlock, sys_munlock) +#define __NR_mlockall 152 +__SYSCALL(__NR_mlockall, sys_mlockall) +#define __NR_munlockall 153 +__SYSCALL(__NR_munlockall, sys_munlockall) +#define __NR_sched_setparam 154 +__SYSCALL(__NR_sched_setparam, sys_sched_setparam) +#define __NR_sched_getparam 155 +__SYSCALL(__NR_sched_getparam, sys_sched_getparam) +#define __NR_sched_setscheduler 156 +__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) +#define __NR_sched_getscheduler 157 +__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) +#define __NR_sched_yield 158 +__SYSCALL(__NR_sched_yield, sys_sched_yield) +#define __NR_sched_get_priority_max 159 +__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) +#define __NR_sched_get_priority_min 160 +__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#define __NR_sched_rr_get_interval 161 +__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval) +#define __NR_nanosleep 162 +__SYSCALL(__NR_nanosleep, sys_nanosleep) +#define __NR_mremap 163 +__SYSCALL(__NR_mremap, sys_mremap) +#define __NR_setresuid 164 +__SYSCALL(__NR_setresuid, sys_setresuid16) +#define __NR_getresuid 165 +__SYSCALL(__NR_getresuid, sys_getresuid16) +__SYSCALL(166, sys_ni_syscall) /* 166 was sys_vm86 */ +__SYSCALL(167, sys_ni_syscall) /* 167 was sys_query_module */ +#define __NR_poll 168 +__SYSCALL(__NR_poll, sys_poll) +#define __NR_nfsservctl 169 +__SYSCALL(__NR_nfsservctl, sys_ni_syscall) +#define __NR_setresgid 170 +__SYSCALL(__NR_setresgid, sys_setresgid16) +#define __NR_getresgid 171 +__SYSCALL(__NR_getresgid, sys_getresgid16) +#define __NR_prctl 172 +__SYSCALL(__NR_prctl, sys_prctl) +#define __NR_rt_sigreturn 173 +__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) +#define __NR_rt_sigaction 174 +__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) +#define __NR_rt_sigprocmask 175 +__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask) +#define __NR_rt_sigpending 176 +__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending) +#define __NR_rt_sigtimedwait 177 +__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait) +#define __NR_rt_sigqueueinfo 178 +__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) +#define __NR_rt_sigsuspend 179 +__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) +#define __NR_pread64 180 +__SYSCALL(__NR_pread64, sys_pread64) +#define __NR_pwrite64 181 +__SYSCALL(__NR_pwrite64, sys_pwrite64) +#define __NR_chown 182 +__SYSCALL(__NR_chown, sys_chown16) +#define __NR_getcwd 183 +__SYSCALL(__NR_getcwd, sys_getcwd) +#define __NR_capget 184 +__SYSCALL(__NR_capget, sys_capget) +#define __NR_capset 185 +__SYSCALL(__NR_capset, sys_capset) +#define __NR_sigaltstack 186 +__SYSCALL(__NR_sigaltstack, sys_sigaltstack) +#define __NR_sendfile 187 +__SYSCALL(__NR_sendfile, sys_sendfile) +__SYSCALL(188, sys_ni_syscall) /* 188 reserved */ +__SYSCALL(189, sys_ni_syscall) /* 189 reserved */ +#define __NR_vfork 190 +__SYSCALL(__NR_vfork, sys_vfork) +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +__SYSCALL(__NR_ugetrlimit, sys_getrlimit) +#define __NR_mmap2 192 +__SYSCALL(__NR_mmap2, sys_mmap2) +#define __NR_truncate64 193 +__SYSCALL(__NR_truncate64, sys_truncate64) +#define __NR_ftruncate64 194 +__SYSCALL(__NR_ftruncate64, sys_ftruncate64) +#define __NR_stat64 195 +__SYSCALL(__NR_stat64, sys_stat64) +#define __NR_lstat64 196 +__SYSCALL(__NR_lstat64, sys_lstat64) +#define __NR_fstat64 197 +__SYSCALL(__NR_fstat64, sys_fstat64) +#define __NR_lchown32 198 +__SYSCALL(__NR_lchown32, sys_lchown) +#define __NR_getuid32 199 +__SYSCALL(__NR_getuid32, sys_getuid) +#define __NR_getgid32 200 +__SYSCALL(__NR_getgid32, sys_getgid) +#define __NR_geteuid32 201 +__SYSCALL(__NR_geteuid32, sys_geteuid) +#define __NR_getegid32 202 +__SYSCALL(__NR_getegid32, sys_getegid) +#define __NR_setreuid32 203 +__SYSCALL(__NR_setreuid32, sys_setreuid) +#define __NR_setregid32 204 +__SYSCALL(__NR_setregid32, sys_setregid) +#define __NR_getgroups32 205 +__SYSCALL(__NR_getgroups32, sys_getgroups) +#define __NR_setgroups32 206 +__SYSCALL(__NR_setgroups32, sys_setgroups) +#define __NR_fchown32 207 +__SYSCALL(__NR_fchown32, sys_fchown) +#define __NR_setresuid32 208 +__SYSCALL(__NR_setresuid32, sys_setresuid) +#define __NR_getresuid32 209 +__SYSCALL(__NR_getresuid32, sys_getresuid) +#define __NR_setresgid32 210 +__SYSCALL(__NR_setresgid32, sys_setresgid) +#define __NR_getresgid32 211 +__SYSCALL(__NR_getresgid32, sys_getresgid) +#define __NR_chown32 212 +__SYSCALL(__NR_chown32, sys_chown) +#define __NR_setuid32 213 +__SYSCALL(__NR_setuid32, sys_setuid) +#define __NR_setgid32 214 +__SYSCALL(__NR_setgid32, sys_setgid) +#define __NR_setfsuid32 215 +__SYSCALL(__NR_setfsuid32, sys_setfsuid) +#define __NR_setfsgid32 216 +__SYSCALL(__NR_setfsgid32, sys_setfsgid) +#define __NR_getdents64 217 +__SYSCALL(__NR_getdents64, sys_getdents64) +#define __NR_pivot_root 218 +__SYSCALL(__NR_pivot_root, sys_pivot_root) +#define __NR_mincore 219 +__SYSCALL(__NR_mincore, sys_mincore) +#define __NR_madvise 220 +__SYSCALL(__NR_madvise, sys_madvise) +#define __NR_fcntl64 221 +__SYSCALL(__NR_fcntl64, sys_fcntl64) +__SYSCALL(222, sys_ni_syscall) /* 222 for tux */ +__SYSCALL(223, sys_ni_syscall) /* 223 is unused */ +#define __NR_gettid 224 +__SYSCALL(__NR_gettid, sys_gettid) +#define __NR_readahead 225 +__SYSCALL(__NR_readahead, sys_readahead) +#define __NR_setxattr 226 +__SYSCALL(__NR_setxattr, sys_setxattr) +#define __NR_lsetxattr 227 +__SYSCALL(__NR_lsetxattr, sys_lsetxattr) +#define __NR_fsetxattr 228 +__SYSCALL(__NR_fsetxattr, sys_fsetxattr) +#define __NR_getxattr 229 +__SYSCALL(__NR_getxattr, sys_getxattr) +#define __NR_lgetxattr 230 +__SYSCALL(__NR_lgetxattr, sys_lgetxattr) +#define __NR_fgetxattr 231 +__SYSCALL(__NR_fgetxattr, sys_fgetxattr) +#define __NR_listxattr 232 +__SYSCALL(__NR_listxattr, sys_listxattr) +#define __NR_llistxattr 233 +__SYSCALL(__NR_llistxattr, sys_llistxattr) +#define __NR_flistxattr 234 +__SYSCALL(__NR_flistxattr, sys_flistxattr) +#define __NR_removexattr 235 +__SYSCALL(__NR_removexattr, sys_removexattr) +#define __NR_lremovexattr 236 +__SYSCALL(__NR_lremovexattr, sys_lremovexattr) +#define __NR_fremovexattr 237 +__SYSCALL(__NR_fremovexattr, sys_fremovexattr) +#define __NR_tkill 238 +__SYSCALL(__NR_tkill, sys_tkill) +#define __NR_sendfile64 239 +__SYSCALL(__NR_sendfile64, sys_sendfile64) +#define __NR_futex 240 +__SYSCALL(__NR_futex, sys_futex) +#define __NR_sched_setaffinity 241 +__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity) +#define __NR_sched_getaffinity 242 +__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity) +#define __NR_io_setup 243 +__SYSCALL(__NR_io_setup, sys_io_setup) +#define __NR_io_destroy 244 +__SYSCALL(__NR_io_destroy, sys_io_destroy) +#define __NR_io_getevents 245 +__SYSCALL(__NR_io_getevents, sys_io_getevents) +#define __NR_io_submit 246 +__SYSCALL(__NR_io_submit, sys_io_submit) +#define __NR_io_cancel 247 +__SYSCALL(__NR_io_cancel, sys_io_cancel) +#define __NR_exit_group 248 +__SYSCALL(__NR_exit_group, sys_exit_group) +#define __NR_lookup_dcookie 249 +__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie) +#define __NR_epoll_create 250 +__SYSCALL(__NR_epoll_create, sys_epoll_create) +#define __NR_epoll_ctl 251 +__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) +#define __NR_epoll_wait 252 +__SYSCALL(__NR_epoll_wait, sys_epoll_wait) +#define __NR_remap_file_pages 253 +__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) +__SYSCALL(254, sys_ni_syscall) /* 254 for set_thread_area */ +__SYSCALL(255, sys_ni_syscall) /* 255 for get_thread_area */ +#define __NR_set_tid_address 256 +__SYSCALL(__NR_set_tid_address, sys_set_tid_address) +#define __NR_timer_create 257 +__SYSCALL(__NR_timer_create, sys_timer_create) +#define __NR_timer_settime 258 +__SYSCALL(__NR_timer_settime, sys_timer_settime) +#define __NR_timer_gettime 259 +__SYSCALL(__NR_timer_gettime, sys_timer_gettime) +#define __NR_timer_getoverrun 260 +__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#define __NR_timer_delete 261 +__SYSCALL(__NR_timer_delete, sys_timer_delete) +#define __NR_clock_settime 262 +__SYSCALL(__NR_clock_settime, sys_clock_settime) +#define __NR_clock_gettime 263 +__SYSCALL(__NR_clock_gettime, sys_clock_gettime) +#define __NR_clock_getres 264 +__SYSCALL(__NR_clock_getres, sys_clock_getres) +#define __NR_clock_nanosleep 265 +__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep) +#define __NR_statfs64 266 +__SYSCALL(__NR_statfs64, sys_statfs64) +#define __NR_fstatfs64 267 +__SYSCALL(__NR_fstatfs64, sys_fstatfs64) +#define __NR_tgkill 268 +__SYSCALL(__NR_tgkill, sys_tgkill) +#define __NR_utimes 269 +__SYSCALL(__NR_utimes, sys_utimes) +#define __NR_fadvise64 270 +__SYSCALL(__NR_fadvise64, sys_fadvise64_64) +#define __NR_pciconfig_iobase 271 +__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase) +#define __NR_pciconfig_read 272 +__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read) +#define __NR_pciconfig_write 273 +__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write) +#define __NR_mq_open 274 +__SYSCALL(__NR_mq_open, sys_mq_open) +#define __NR_mq_unlink 275 +__SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#define __NR_mq_timedsend 276 +__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) +#define __NR_mq_timedreceive 277 +__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) +#define __NR_mq_notify 278 +__SYSCALL(__NR_mq_notify, sys_mq_notify) +#define __NR_mq_getsetattr 279 +__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) +#define __NR_waitid 280 +__SYSCALL(__NR_waitid, sys_waitid) +#define __NR_socket 281 +__SYSCALL(__NR_socket, sys_socket) +#define __NR_bind 282 +__SYSCALL(__NR_bind, sys_bind) +#define __NR_connect 283 +__SYSCALL(__NR_connect, sys_connect) +#define __NR_listen 284 +__SYSCALL(__NR_listen, sys_listen) +#define __NR_accept 285 +__SYSCALL(__NR_accept, sys_accept) +#define __NR_getsockname 286 +__SYSCALL(__NR_getsockname, sys_getsockname) +#define __NR_getpeername 287 +__SYSCALL(__NR_getpeername, sys_getpeername) +#define __NR_socketpair 288 +__SYSCALL(__NR_socketpair, sys_socketpair) +#define __NR_send 289 +__SYSCALL(__NR_send, sys_send) +#define __NR_sendto 290 +__SYSCALL(__NR_sendto, sys_sendto) +#define __NR_recv 291 +__SYSCALL(__NR_recv, sys_recv) +#define __NR_recvfrom 292 +__SYSCALL(__NR_recvfrom, sys_recvfrom) +#define __NR_shutdown 293 +__SYSCALL(__NR_shutdown, sys_shutdown) +#define __NR_setsockopt 294 +__SYSCALL(__NR_setsockopt, sys_setsockopt) +#define __NR_getsockopt 295 +__SYSCALL(__NR_getsockopt, sys_getsockopt) +#define __NR_sendmsg 296 +__SYSCALL(__NR_sendmsg, sys_sendmsg) +#define __NR_recvmsg 297 +__SYSCALL(__NR_recvmsg, sys_recvmsg) +#define __NR_semop 298 +__SYSCALL(__NR_semop, sys_semop) +#define __NR_semget 299 +__SYSCALL(__NR_semget, sys_semget) +#define __NR_semctl 300 +__SYSCALL(__NR_semctl, sys_semctl) +#define __NR_msgsnd 301 +__SYSCALL(__NR_msgsnd, sys_msgsnd) +#define __NR_msgrcv 302 +__SYSCALL(__NR_msgrcv, sys_msgrcv) +#define __NR_msgget 303 +__SYSCALL(__NR_msgget, sys_msgget) +#define __NR_msgctl 304 +__SYSCALL(__NR_msgctl, sys_msgctl) +#define __NR_shmat 305 +__SYSCALL(__NR_shmat, sys_shmat) +#define __NR_shmdt 306 +__SYSCALL(__NR_shmdt, sys_shmdt) +#define __NR_shmget 307 +__SYSCALL(__NR_shmget, sys_shmget) +#define __NR_shmctl 308 +__SYSCALL(__NR_shmctl, sys_shmctl) +#define __NR_add_key 309 +__SYSCALL(__NR_add_key, sys_add_key) +#define __NR_request_key 310 +__SYSCALL(__NR_request_key, sys_request_key) +#define __NR_keyctl 311 +__SYSCALL(__NR_keyctl, sys_keyctl) +#define __NR_semtimedop 312 +__SYSCALL(__NR_semtimedop, sys_semtimedop) +#define __NR_vserver 313 +__SYSCALL(__NR_vserver, sys_ni_syscall) +#define __NR_ioprio_set 314 +__SYSCALL(__NR_ioprio_set, sys_ioprio_set) +#define __NR_ioprio_get 315 +__SYSCALL(__NR_ioprio_get, sys_ioprio_get) +#define __NR_inotify_init 316 +__SYSCALL(__NR_inotify_init, sys_inotify_init) +#define __NR_inotify_add_watch 317 +__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) +#define __NR_inotify_rm_watch 318 +__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_mbind 319 +__SYSCALL(__NR_mbind, sys_mbind) +#define __NR_get_mempolicy 320 +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) +#define __NR_set_mempolicy 321 +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) +#define __NR_openat 322 +__SYSCALL(__NR_openat, sys_openat) +#define __NR_mkdirat 323 +__SYSCALL(__NR_mkdirat, sys_mkdirat) +#define __NR_mknodat 324 +__SYSCALL(__NR_mknodat, sys_mknodat) +#define __NR_fchownat 325 +__SYSCALL(__NR_fchownat, sys_fchownat) +#define __NR_futimesat 326 +__SYSCALL(__NR_futimesat, sys_futimesat) +#define __NR_fstatat64 327 +__SYSCALL(__NR_fstatat64, sys_fstatat64) +#define __NR_unlinkat 328 +__SYSCALL(__NR_unlinkat, sys_unlinkat) +#define __NR_renameat 329 +__SYSCALL(__NR_renameat, sys_renameat) +#define __NR_linkat 330 +__SYSCALL(__NR_linkat, sys_linkat) +#define __NR_symlinkat 331 +__SYSCALL(__NR_symlinkat, sys_symlinkat) +#define __NR_readlinkat 332 +__SYSCALL(__NR_readlinkat, sys_readlinkat) +#define __NR_fchmodat 333 +__SYSCALL(__NR_fchmodat, sys_fchmodat) +#define __NR_faccessat 334 +__SYSCALL(__NR_faccessat, sys_faccessat) +#define __NR_pselect6 335 +__SYSCALL(__NR_pselect6, sys_pselect6) +#define __NR_ppoll 336 +__SYSCALL(__NR_ppoll, sys_ppoll) +#define __NR_unshare 337 +__SYSCALL(__NR_unshare, sys_unshare) +#define __NR_set_robust_list 338 +__SYSCALL(__NR_set_robust_list, sys_set_robust_list) +#define __NR_get_robust_list 339 +__SYSCALL(__NR_get_robust_list, sys_get_robust_list) +#define __NR_splice 340 +__SYSCALL(__NR_splice, sys_splice) +#define __NR_sync_file_range2 341 +__SYSCALL(__NR_sync_file_range2, sys_sync_file_range2) +#define __NR_tee 342 +__SYSCALL(__NR_tee, sys_tee) +#define __NR_vmsplice 343 +__SYSCALL(__NR_vmsplice, sys_vmsplice) +#define __NR_move_pages 344 +__SYSCALL(__NR_move_pages, sys_move_pages) +#define __NR_getcpu 345 +__SYSCALL(__NR_getcpu, sys_getcpu) +#define __NR_epoll_pwait 346 +__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) +#define __NR_kexec_load 347 +__SYSCALL(__NR_kexec_load, sys_kexec_load) +#define __NR_utimensat 348 +__SYSCALL(__NR_utimensat, sys_utimensat) +#define __NR_signalfd 349 +__SYSCALL(__NR_signalfd, sys_signalfd) +#define __NR_timerfd_create 350 +__SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#define __NR_eventfd 351 +__SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_fallocate 352 +__SYSCALL(__NR_fallocate, sys_fallocate) +#define __NR_timerfd_settime 353 +__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) +#define __NR_timerfd_gettime 354 +__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) +#define __NR_signalfd4 355 +__SYSCALL(__NR_signalfd4, sys_signalfd4) +#define __NR_eventfd2 356 +__SYSCALL(__NR_eventfd2, sys_eventfd2) +#define __NR_epoll_create1 357 +__SYSCALL(__NR_epoll_create1, sys_epoll_create1) +#define __NR_dup3 358 +__SYSCALL(__NR_dup3, sys_dup3) +#define __NR_pipe2 359 +__SYSCALL(__NR_pipe2, sys_pipe2) +#define __NR_inotify_init1 360 +__SYSCALL(__NR_inotify_init1, sys_inotify_init1) +#define __NR_preadv 361 +__SYSCALL(__NR_preadv, sys_preadv) +#define __NR_pwritev 362 +__SYSCALL(__NR_pwritev, sys_pwritev) +#define __NR_rt_tgsigqueueinfo 363 +__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) +#define __NR_perf_event_open 364 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) +#define __NR_recvmmsg 365 +__SYSCALL(__NR_recvmmsg, sys_recvmmsg) +#define __NR_accept4 366 +__SYSCALL(__NR_accept4, sys_accept4) +#define __NR_fanotify_init 367 +__SYSCALL(__NR_fanotify_init, sys_fanotify_init) +#define __NR_fanotify_mark 368 +__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) +#define __NR_prlimit64 369 +__SYSCALL(__NR_prlimit64, sys_prlimit64) +#define __NR_name_to_handle_at 370 +__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) +#define __NR_open_by_handle_at 371 +__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) +#define __NR_clock_adjtime 372 +__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) +#define __NR_syncfs 373 +__SYSCALL(__NR_syncfs, sys_syncfs) + +/* + * The following SVCs are ARM private. + */ +#define __ARM_NR_COMPAT_BASE 0x0f0000 +#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) +#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) + +#endif /* __SYSCALL_COMPAT */ + +#define __NR_compat_syscalls 374 + +#define __ARCH_WANT_COMPAT_IPC_PARSE_VERSION +#define __ARCH_WANT_COMPAT_STAT64 +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_PAUSE +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_SYS_NICE +#define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_SIGPROCMASK +#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND + +#endif /* __ASM_UNISTD32_H */ diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h new file mode 100644 index 00000000000..839ce0031bd --- /dev/null +++ b/arch/arm64/include/asm/vdso.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_VDSO_H +#define __ASM_VDSO_H + +#ifdef __KERNEL__ + +/* + * Default link address for the vDSO. + * Since we randomise the VDSO mapping, there's little point in trying + * to prelink this. + */ +#define VDSO_LBASE 0x0 + +#ifndef __ASSEMBLY__ + +#include <generated/vdso-offsets.h> + +#define VDSO_SYMBOL(base, name) \ +({ \ + (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \ +}) + +#endif /* !__ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* __ASM_VDSO_H */ diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h new file mode 100644 index 00000000000..de66199673d --- /dev/null +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_VDSO_DATAPAGE_H +#define __ASM_VDSO_DATAPAGE_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +struct vdso_data { + __u64 cs_cycle_last; /* Timebase at clocksource init */ + __u64 xtime_clock_sec; /* Kernel time */ + __u64 xtime_clock_nsec; + __u64 xtime_coarse_sec; /* Coarse time */ + __u64 xtime_coarse_nsec; + __u64 wtm_clock_sec; /* Wall to monotonic time */ + __u64 wtm_clock_nsec; + __u32 tb_seq_count; /* Timebase sequence counter */ + __u32 cs_mult; /* Clocksource multiplier */ + __u32 cs_shift; /* Clocksource shift */ + __u32 tz_minuteswest; /* Whacky timezone stuff */ + __u32 tz_dsttime; + __u32 use_syscall; +}; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* __ASM_VDSO_DATAPAGE_H */ diff --git a/arch/arm64/kernel/.gitignore b/arch/arm64/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/arm64/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile new file mode 100644 index 00000000000..e2caff1b812 --- /dev/null +++ b/arch/arm64/kernel/Makefile @@ -0,0 +1,27 @@ +# +# Makefile for the linux kernel. +# + +CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) +AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) + +# Object file lists. +arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ + entry-fpsimd.o process.o ptrace.o setup.o signal.o \ + sys.o stacktrace.o time.o traps.o io.o vdso.o + +arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ + sys_compat.o +arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o +arm64-obj-$(CONFIG_SMP) += smp.o +arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o + +obj-y += $(arm64-obj-y) vdso/ +obj-m += $(arm64-obj-m) +head-y := head.o +extra-y := $(head-y) vmlinux.lds + +# vDSO - this must be built first to generate the symbol offsets +$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h +$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c new file mode 100644 index 00000000000..cef3925eaf6 --- /dev/null +++ b/arch/arm64/kernel/arm64ksyms.c @@ -0,0 +1,46 @@ +/* + * Based on arch/arm/kernel/armksyms.c + * + * Copyright (C) 2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/cryptohash.h> +#include <linux/delay.h> +#include <linux/in6.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/io.h> + +#include <asm/checksum.h> + + /* user mem (segment) */ +EXPORT_SYMBOL(__strnlen_user); +EXPORT_SYMBOL(__strncpy_from_user); + +EXPORT_SYMBOL(copy_page); + +EXPORT_SYMBOL(__copy_from_user); +EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(__clear_user); + + /* bitops */ +EXPORT_SYMBOL(__atomic_hash); + + /* physical memory */ +EXPORT_SYMBOL(memstart_addr); diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c new file mode 100644 index 00000000000..a2a4d810bea --- /dev/null +++ b/arch/arm64/kernel/asm-offsets.c @@ -0,0 +1,108 @@ +/* + * Based on arch/arm/kernel/asm-offsets.c + * + * Copyright (C) 1995-2003 Russell King + * 2001-2002 Keith Owens + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <asm/thread_info.h> +#include <asm/memory.h> +#include <asm/cputable.h> +#include <asm/vdso_datapage.h> +#include <linux/kbuild.h> + +int main(void) +{ + DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); + BLANK(); + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); + DEFINE(TI_TASK, offsetof(struct thread_info, task)); + DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + BLANK(); + DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); + BLANK(); + DEFINE(S_X0, offsetof(struct pt_regs, regs[0])); + DEFINE(S_X1, offsetof(struct pt_regs, regs[1])); + DEFINE(S_X2, offsetof(struct pt_regs, regs[2])); + DEFINE(S_X3, offsetof(struct pt_regs, regs[3])); + DEFINE(S_X4, offsetof(struct pt_regs, regs[4])); + DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); + DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); + DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); + DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); + DEFINE(S_SP, offsetof(struct pt_regs, sp)); +#ifdef CONFIG_COMPAT + DEFINE(S_COMPAT_SP, offsetof(struct pt_regs, compat_sp)); +#endif + DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); + DEFINE(S_PC, offsetof(struct pt_regs, pc)); + DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); + DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); + BLANK(); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + BLANK(); + DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); + DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); + BLANK(); + DEFINE(VM_EXEC, VM_EXEC); + BLANK(); + DEFINE(PAGE_SZ, PAGE_SIZE); + BLANK(); + DEFINE(CPU_INFO_SZ, sizeof(struct cpu_info)); + DEFINE(CPU_INFO_SETUP, offsetof(struct cpu_info, cpu_setup)); + BLANK(); + DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); + DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); + DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); + BLANK(); + DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); + DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); + DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); + DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); + BLANK(); + DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); + DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); + DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); + DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); + DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); + DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); + DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); + DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult)); + DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); + DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); + DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); + BLANK(); + DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPEC_TV_NSEC, offsetof(struct timespec, tv_nsec)); + BLANK(); + DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); + DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + return 0; +} diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c new file mode 100644 index 00000000000..63cfc4a43f4 --- /dev/null +++ b/arch/arm64/kernel/cputable.c @@ -0,0 +1,33 @@ +/* + * arch/arm64/kernel/cputable.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> + +#include <asm/cputable.h> + +extern unsigned long __cpu_setup(void); + +struct cpu_info __initdata cpu_table[] = { + { + .cpu_id_val = 0x000f0000, + .cpu_id_mask = 0x000f0000, + .cpu_name = "AArch64 Processor", + .cpu_setup = __cpu_setup, + }, + { /* Empty */ }, +}; diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c new file mode 100644 index 00000000000..0c3ba9f5137 --- /dev/null +++ b/arch/arm64/kernel/debug-monitors.c @@ -0,0 +1,288 @@ +/* + * ARMv8 single-step debug support and mdscr context switching. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/cpu.h> +#include <linux/debugfs.h> +#include <linux/hardirq.h> +#include <linux/init.h> +#include <linux/ptrace.h> +#include <linux/stat.h> + +#include <asm/debug-monitors.h> +#include <asm/local.h> +#include <asm/cputype.h> +#include <asm/system_misc.h> + +/* Low-level stepping controls. */ +#define DBG_MDSCR_SS (1 << 0) +#define DBG_SPSR_SS (1 << 21) + +/* MDSCR_EL1 enabling bits */ +#define DBG_MDSCR_KDE (1 << 13) +#define DBG_MDSCR_MDE (1 << 15) +#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) + +/* Determine debug architecture. */ +u8 debug_monitors_arch(void) +{ + return read_cpuid(ID_AA64DFR0_EL1) & 0xf; +} + +/* + * MDSCR access routines. + */ +static void mdscr_write(u32 mdscr) +{ + unsigned long flags; + local_dbg_save(flags); + asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); + local_dbg_restore(flags); +} + +static u32 mdscr_read(void) +{ + u32 mdscr; + asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); + return mdscr; +} + +/* + * Allow root to disable self-hosted debug from userspace. + * This is useful if you want to connect an external JTAG debugger. + */ +static u32 debug_enabled = 1; + +static int create_debug_debugfs_entry(void) +{ + debugfs_create_bool("debug_enabled", 0644, NULL, &debug_enabled); + return 0; +} +fs_initcall(create_debug_debugfs_entry); + +static int __init early_debug_disable(char *buf) +{ + debug_enabled = 0; + return 0; +} + +early_param("nodebugmon", early_debug_disable); + +/* + * Keep track of debug users on each core. + * The ref counts are per-cpu so we use a local_t type. + */ +static DEFINE_PER_CPU(local_t, mde_ref_count); +static DEFINE_PER_CPU(local_t, kde_ref_count); + +void enable_debug_monitors(enum debug_el el) +{ + u32 mdscr, enable = 0; + + WARN_ON(preemptible()); + + if (local_inc_return(&__get_cpu_var(mde_ref_count)) == 1) + enable = DBG_MDSCR_MDE; + + if (el == DBG_ACTIVE_EL1 && + local_inc_return(&__get_cpu_var(kde_ref_count)) == 1) + enable |= DBG_MDSCR_KDE; + + if (enable && debug_enabled) { + mdscr = mdscr_read(); + mdscr |= enable; + mdscr_write(mdscr); + } +} + +void disable_debug_monitors(enum debug_el el) +{ + u32 mdscr, disable = 0; + + WARN_ON(preemptible()); + + if (local_dec_and_test(&__get_cpu_var(mde_ref_count))) + disable = ~DBG_MDSCR_MDE; + + if (el == DBG_ACTIVE_EL1 && + local_dec_and_test(&__get_cpu_var(kde_ref_count))) + disable &= ~DBG_MDSCR_KDE; + + if (disable) { + mdscr = mdscr_read(); + mdscr &= disable; + mdscr_write(mdscr); + } +} + +/* + * OS lock clearing. + */ +static void clear_os_lock(void *unused) +{ + asm volatile("msr mdscr_el1, %0" : : "r" (0)); + isb(); + asm volatile("msr oslar_el1, %0" : : "r" (0)); + isb(); +} + +static int __cpuinit os_lock_notify(struct notifier_block *self, + unsigned long action, void *data) +{ + int cpu = (unsigned long)data; + if (action == CPU_ONLINE) + smp_call_function_single(cpu, clear_os_lock, NULL, 1); + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata os_lock_nb = { + .notifier_call = os_lock_notify, +}; + +static int __cpuinit debug_monitors_init(void) +{ + /* Clear the OS lock. */ + smp_call_function(clear_os_lock, NULL, 1); + clear_os_lock(NULL); + + /* Register hotplug handler. */ + register_cpu_notifier(&os_lock_nb); + return 0; +} +postcore_initcall(debug_monitors_init); + +/* + * Single step API and exception handling. + */ +static void set_regs_spsr_ss(struct pt_regs *regs) +{ + unsigned long spsr; + + spsr = regs->pstate; + spsr &= ~DBG_SPSR_SS; + spsr |= DBG_SPSR_SS; + regs->pstate = spsr; +} + +static void clear_regs_spsr_ss(struct pt_regs *regs) +{ + unsigned long spsr; + + spsr = regs->pstate; + spsr &= ~DBG_SPSR_SS; + regs->pstate = spsr; +} + +static int single_step_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + /* + * If we are stepping a pending breakpoint, call the hw_breakpoint + * handler first. + */ + if (!reinstall_suspended_bps(regs)) + return 0; + + if (user_mode(regs)) { + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)instruction_pointer(regs); + force_sig_info(SIGTRAP, &info, current); + + /* + * ptrace will disable single step unless explicitly + * asked to re-enable it. For other clients, it makes + * sense to leave it enabled (i.e. rewind the controls + * to the active-not-pending state). + */ + user_rewind_single_step(current); + } else { + /* TODO: route to KGDB */ + pr_warning("Unexpected kernel single-step exception at EL1\n"); + /* + * Re-enable stepping since we know that we will be + * returning to regs. + */ + set_regs_spsr_ss(regs); + } + + return 0; +} + +static int __init single_step_init(void) +{ + hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, + TRAP_HWBKPT, "single-step handler"); + return 0; +} +arch_initcall(single_step_init); + +/* Re-enable single step for syscall restarting. */ +void user_rewind_single_step(struct task_struct *task) +{ + /* + * If single step is active for this thread, then set SPSR.SS + * to 1 to avoid returning to the active-pending state. + */ + if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + set_regs_spsr_ss(task_pt_regs(task)); +} + +void user_fastforward_single_step(struct task_struct *task) +{ + if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + clear_regs_spsr_ss(task_pt_regs(task)); +} + +/* Kernel API */ +void kernel_enable_single_step(struct pt_regs *regs) +{ + WARN_ON(!irqs_disabled()); + set_regs_spsr_ss(regs); + mdscr_write(mdscr_read() | DBG_MDSCR_SS); + enable_debug_monitors(DBG_ACTIVE_EL1); +} + +void kernel_disable_single_step(void) +{ + WARN_ON(!irqs_disabled()); + mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); + disable_debug_monitors(DBG_ACTIVE_EL1); +} + +int kernel_active_single_step(void) +{ + WARN_ON(!irqs_disabled()); + return mdscr_read() & DBG_MDSCR_SS; +} + +/* ptrace API */ +void user_enable_single_step(struct task_struct *task) +{ + set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); + set_regs_spsr_ss(task_pt_regs(task)); +} + +void user_disable_single_step(struct task_struct *task) +{ + clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); +} diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S new file mode 100644 index 00000000000..17988a6e7ea --- /dev/null +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -0,0 +1,80 @@ +/* + * FP/SIMD state saving and restoring + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> + +/* + * Save the FP registers. + * + * x0 - pointer to struct fpsimd_state + */ +ENTRY(fpsimd_save_state) + stp q0, q1, [x0, #16 * 0] + stp q2, q3, [x0, #16 * 2] + stp q4, q5, [x0, #16 * 4] + stp q6, q7, [x0, #16 * 6] + stp q8, q9, [x0, #16 * 8] + stp q10, q11, [x0, #16 * 10] + stp q12, q13, [x0, #16 * 12] + stp q14, q15, [x0, #16 * 14] + stp q16, q17, [x0, #16 * 16] + stp q18, q19, [x0, #16 * 18] + stp q20, q21, [x0, #16 * 20] + stp q22, q23, [x0, #16 * 22] + stp q24, q25, [x0, #16 * 24] + stp q26, q27, [x0, #16 * 26] + stp q28, q29, [x0, #16 * 28] + stp q30, q31, [x0, #16 * 30]! + mrs x8, fpsr + str w8, [x0, #16 * 2] + mrs x8, fpcr + str w8, [x0, #16 * 2 + 4] + ret +ENDPROC(fpsimd_save_state) + +/* + * Load the FP registers. + * + * x0 - pointer to struct fpsimd_state + */ +ENTRY(fpsimd_load_state) + ldp q0, q1, [x0, #16 * 0] + ldp q2, q3, [x0, #16 * 2] + ldp q4, q5, [x0, #16 * 4] + ldp q6, q7, [x0, #16 * 6] + ldp q8, q9, [x0, #16 * 8] + ldp q10, q11, [x0, #16 * 10] + ldp q12, q13, [x0, #16 * 12] + ldp q14, q15, [x0, #16 * 14] + ldp q16, q17, [x0, #16 * 16] + ldp q18, q19, [x0, #16 * 18] + ldp q20, q21, [x0, #16 * 20] + ldp q22, q23, [x0, #16 * 22] + ldp q24, q25, [x0, #16 * 24] + ldp q26, q27, [x0, #16 * 26] + ldp q28, q29, [x0, #16 * 28] + ldp q30, q31, [x0, #16 * 30]! + ldr w8, [x0, #16 * 2] + ldr w9, [x0, #16 * 2 + 4] + msr fpsr, x8 + msr fpcr, x9 + ret +ENDPROC(fpsimd_load_state) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S new file mode 100644 index 00000000000..38cf853a366 --- /dev/null +++ b/arch/arm64/kernel/entry.S @@ -0,0 +1,695 @@ +/* + * Low-level exception handling code + * + * Copyright (C) 2012 ARM Ltd. + * Authors: Catalin Marinas <catalin.marinas@arm.com> + * Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/errno.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> + +/* + * Bad Abort numbers + *----------------- + */ +#define BAD_SYNC 0 +#define BAD_IRQ 1 +#define BAD_FIQ 2 +#define BAD_ERROR 3 + + .macro kernel_entry, el, regsize = 64 + sub sp, sp, #S_FRAME_SIZE - S_LR // room for LR, SP, SPSR, ELR + .if \regsize == 32 + mov w0, w0 // zero upper 32 bits of x0 + .endif + push x28, x29 + push x26, x27 + push x24, x25 + push x22, x23 + push x20, x21 + push x18, x19 + push x16, x17 + push x14, x15 + push x12, x13 + push x10, x11 + push x8, x9 + push x6, x7 + push x4, x5 + push x2, x3 + push x0, x1 + .if \el == 0 + mrs x21, sp_el0 + .else + add x21, sp, #S_FRAME_SIZE + .endif + mrs x22, elr_el1 + mrs x23, spsr_el1 + stp lr, x21, [sp, #S_LR] + stp x22, x23, [sp, #S_PC] + + /* + * Set syscallno to -1 by default (overridden later if real syscall). + */ + .if \el == 0 + mvn x21, xzr + str x21, [sp, #S_SYSCALLNO] + .endif + + /* + * Registers that may be useful after this macro is invoked: + * + * x21 - aborted SP + * x22 - aborted PC + * x23 - aborted PSTATE + */ + .endm + + .macro kernel_exit, el, ret = 0 + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR + .if \el == 0 + ldr x23, [sp, #S_SP] // load return stack pointer + .endif + .if \ret + ldr x1, [sp, #S_X1] // preserve x0 (syscall return) + add sp, sp, S_X2 + .else + pop x0, x1 + .endif + pop x2, x3 // load the rest of the registers + pop x4, x5 + pop x6, x7 + pop x8, x9 + msr elr_el1, x21 // set up the return data + msr spsr_el1, x22 + .if \el == 0 + msr sp_el0, x23 + .endif + pop x10, x11 + pop x12, x13 + pop x14, x15 + pop x16, x17 + pop x18, x19 + pop x20, x21 + pop x22, x23 + pop x24, x25 + pop x26, x27 + pop x28, x29 + ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP + eret // return to kernel + .endm + + .macro get_thread_info, rd + mov \rd, sp + and \rd, \rd, #~((1 << 13) - 1) // top of 8K stack + .endm + +/* + * These are the registers used in the syscall handler, and allow us to + * have in theory up to 7 arguments to a function - x0 to x6. + * + * x7 is reserved for the system call number in 32-bit mode. + */ +sc_nr .req x25 // number of system calls +scno .req x26 // syscall number +stbl .req x27 // syscall table pointer +tsk .req x28 // current thread_info + +/* + * Interrupt handling. + */ + .macro irq_handler + ldr x1, handle_arch_irq + mov x0, sp + blr x1 + .endm + + .text + +/* + * Exception vectors. + */ + .macro ventry label + .align 7 + b \label + .endm + + .align 11 +ENTRY(vectors) + ventry el1_sync_invalid // Synchronous EL1t + ventry el1_irq_invalid // IRQ EL1t + ventry el1_fiq_invalid // FIQ EL1t + ventry el1_error_invalid // Error EL1t + + ventry el1_sync // Synchronous EL1h + ventry el1_irq // IRQ EL1h + ventry el1_fiq_invalid // FIQ EL1h + ventry el1_error_invalid // Error EL1h + + ventry el0_sync // Synchronous 64-bit EL0 + ventry el0_irq // IRQ 64-bit EL0 + ventry el0_fiq_invalid // FIQ 64-bit EL0 + ventry el0_error_invalid // Error 64-bit EL0 + +#ifdef CONFIG_COMPAT + ventry el0_sync_compat // Synchronous 32-bit EL0 + ventry el0_irq_compat // IRQ 32-bit EL0 + ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 + ventry el0_error_invalid_compat // Error 32-bit EL0 +#else + ventry el0_sync_invalid // Synchronous 32-bit EL0 + ventry el0_irq_invalid // IRQ 32-bit EL0 + ventry el0_fiq_invalid // FIQ 32-bit EL0 + ventry el0_error_invalid // Error 32-bit EL0 +#endif +END(vectors) + +/* + * Invalid mode handlers + */ + .macro inv_entry, el, reason, regsize = 64 + kernel_entry el, \regsize + mov x0, sp + mov x1, #\reason + mrs x2, esr_el1 + b bad_mode + .endm + +el0_sync_invalid: + inv_entry 0, BAD_SYNC +ENDPROC(el0_sync_invalid) + +el0_irq_invalid: + inv_entry 0, BAD_IRQ +ENDPROC(el0_irq_invalid) + +el0_fiq_invalid: + inv_entry 0, BAD_FIQ +ENDPROC(el0_fiq_invalid) + +el0_error_invalid: + inv_entry 0, BAD_ERROR +ENDPROC(el0_error_invalid) + +#ifdef CONFIG_COMPAT +el0_fiq_invalid_compat: + inv_entry 0, BAD_FIQ, 32 +ENDPROC(el0_fiq_invalid_compat) + +el0_error_invalid_compat: + inv_entry 0, BAD_ERROR, 32 +ENDPROC(el0_error_invalid_compat) +#endif + +el1_sync_invalid: + inv_entry 1, BAD_SYNC +ENDPROC(el1_sync_invalid) + +el1_irq_invalid: + inv_entry 1, BAD_IRQ +ENDPROC(el1_irq_invalid) + +el1_fiq_invalid: + inv_entry 1, BAD_FIQ +ENDPROC(el1_fiq_invalid) + +el1_error_invalid: + inv_entry 1, BAD_ERROR +ENDPROC(el1_error_invalid) + +/* + * EL1 mode handlers. + */ + .align 6 +el1_sync: + kernel_entry 1 + mrs x1, esr_el1 // read the syndrome register + lsr x24, x1, #26 // exception class + cmp x24, #0x25 // data abort in EL1 + b.eq el1_da + cmp x24, #0x18 // configurable trap + b.eq el1_undef + cmp x24, #0x26 // stack alignment exception + b.eq el1_sp_pc + cmp x24, #0x22 // pc alignment exception + b.eq el1_sp_pc + cmp x24, #0x00 // unknown exception in EL1 + b.eq el1_undef + cmp x24, #0x30 // debug exception in EL1 + b.ge el1_dbg + b el1_inv +el1_da: + /* + * Data abort handling + */ + mrs x0, far_el1 + enable_dbg_if_not_stepping x2 + // re-enable interrupts if they were enabled in the aborted context + tbnz x23, #7, 1f // PSR_I_BIT + enable_irq +1: + mov x2, sp // struct pt_regs + bl do_mem_abort + + // disable interrupts before pulling preserved data off the stack + disable_irq + kernel_exit 1 +el1_sp_pc: + /* + * Stack or PC alignment exception handling + */ + mrs x0, far_el1 + mov x1, x25 + mov x2, sp + b do_sp_pc_abort +el1_undef: + /* + * Undefined instruction + */ + mov x0, sp + b do_undefinstr +el1_dbg: + /* + * Debug exception handling + */ + tbz x24, #0, el1_inv // EL1 only + mrs x0, far_el1 + mov x2, sp // struct pt_regs + bl do_debug_exception + + kernel_exit 1 +el1_inv: + // TODO: add support for undefined instructions in kernel mode + mov x0, sp + mov x1, #BAD_SYNC + mrs x2, esr_el1 + b bad_mode +ENDPROC(el1_sync) + + .align 6 +el1_irq: + kernel_entry 1 + enable_dbg_if_not_stepping x0 +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif +#ifdef CONFIG_PREEMPT + get_thread_info tsk + ldr x24, [tsk, #TI_PREEMPT] // get preempt count + add x0, x24, #1 // increment it + str x0, [tsk, #TI_PREEMPT] +#endif + irq_handler +#ifdef CONFIG_PREEMPT + str x24, [tsk, #TI_PREEMPT] // restore preempt count + cbnz x24, 1f // preempt count != 0 + ldr x0, [tsk, #TI_FLAGS] // get flags + tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? + bl el1_preempt +1: +#endif +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on +#endif + kernel_exit 1 +ENDPROC(el1_irq) + +#ifdef CONFIG_PREEMPT +el1_preempt: + mov x24, lr +1: enable_dbg + bl preempt_schedule_irq // irq en/disable is done inside + ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS + tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? + ret x24 +#endif + +/* + * EL0 mode handlers. + */ + .align 6 +el0_sync: + kernel_entry 0 + mrs x25, esr_el1 // read the syndrome register + lsr x24, x25, #26 // exception class + cmp x24, #0x15 // SVC in 64-bit state + b.eq el0_svc + adr lr, ret_from_exception + cmp x24, #0x24 // data abort in EL0 + b.eq el0_da + cmp x24, #0x20 // instruction abort in EL0 + b.eq el0_ia + cmp x24, #0x07 // FP/ASIMD access + b.eq el0_fpsimd_acc + cmp x24, #0x2c // FP/ASIMD exception + b.eq el0_fpsimd_exc + cmp x24, #0x18 // configurable trap + b.eq el0_undef + cmp x24, #0x26 // stack alignment exception + b.eq el0_sp_pc + cmp x24, #0x22 // pc alignment exception + b.eq el0_sp_pc + cmp x24, #0x00 // unknown exception in EL0 + b.eq el0_undef + cmp x24, #0x30 // debug exception in EL0 + b.ge el0_dbg + b el0_inv + +#ifdef CONFIG_COMPAT + .align 6 +el0_sync_compat: + kernel_entry 0, 32 + mrs x25, esr_el1 // read the syndrome register + lsr x24, x25, #26 // exception class + cmp x24, #0x11 // SVC in 32-bit state + b.eq el0_svc_compat + adr lr, ret_from_exception + cmp x24, #0x24 // data abort in EL0 + b.eq el0_da + cmp x24, #0x20 // instruction abort in EL0 + b.eq el0_ia + cmp x24, #0x07 // FP/ASIMD access + b.eq el0_fpsimd_acc + cmp x24, #0x28 // FP/ASIMD exception + b.eq el0_fpsimd_exc + cmp x24, #0x00 // unknown exception in EL0 + b.eq el0_undef + cmp x24, #0x30 // debug exception in EL0 + b.ge el0_dbg + b el0_inv +el0_svc_compat: + /* + * AArch32 syscall handling + */ + adr stbl, compat_sys_call_table // load compat syscall table pointer + uxtw scno, w7 // syscall number in w7 (r7) + mov sc_nr, #__NR_compat_syscalls + b el0_svc_naked + + .align 6 +el0_irq_compat: + kernel_entry 0, 32 + b el0_irq_naked +#endif + +el0_da: + /* + * Data abort handling + */ + mrs x0, far_el1 + disable_step x1 + isb + enable_dbg + // enable interrupts before calling the main handler + enable_irq + mov x1, x25 + mov x2, sp + b do_mem_abort +el0_ia: + /* + * Instruction abort handling + */ + mrs x0, far_el1 + disable_step x1 + isb + enable_dbg + // enable interrupts before calling the main handler + enable_irq + orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts + mov x2, sp + b do_mem_abort +el0_fpsimd_acc: + /* + * Floating Point or Advanced SIMD access + */ + mov x0, x25 + mov x1, sp + b do_fpsimd_acc +el0_fpsimd_exc: + /* + * Floating Point or Advanced SIMD exception + */ + mov x0, x25 + mov x1, sp + b do_fpsimd_exc +el0_sp_pc: + /* + * Stack or PC alignment exception handling + */ + mrs x0, far_el1 + disable_step x1 + isb + enable_dbg + // enable interrupts before calling the main handler + enable_irq + mov x1, x25 + mov x2, sp + b do_sp_pc_abort +el0_undef: + /* + * Undefined instruction + */ + mov x0, sp + b do_undefinstr +el0_dbg: + /* + * Debug exception handling + */ + tbnz x24, #0, el0_inv // EL0 only + mrs x0, far_el1 + disable_step x1 + mov x1, x25 + mov x2, sp + b do_debug_exception +el0_inv: + mov x0, sp + mov x1, #BAD_SYNC + mrs x2, esr_el1 + b bad_mode +ENDPROC(el0_sync) + + .align 6 +el0_irq: + kernel_entry 0 +el0_irq_naked: + disable_step x1 + isb + enable_dbg +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif + get_thread_info tsk +#ifdef CONFIG_PREEMPT + ldr x24, [tsk, #TI_PREEMPT] // get preempt count + add x23, x24, #1 // increment it + str x23, [tsk, #TI_PREEMPT] +#endif + irq_handler +#ifdef CONFIG_PREEMPT + ldr x0, [tsk, #TI_PREEMPT] + str x24, [tsk, #TI_PREEMPT] + cmp x0, x23 + b.eq 1f + mov x1, #0 + str x1, [x1] // BUG +1: +#endif +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on +#endif + b ret_to_user +ENDPROC(el0_irq) + +/* + * This is the return code to user mode for abort handlers + */ +ret_from_exception: + get_thread_info tsk + b ret_to_user +ENDPROC(ret_from_exception) + +/* + * Register switch for AArch64. The callee-saved registers need to be saved + * and restored. On entry: + * x0 = previous task_struct (must be preserved across the switch) + * x1 = next task_struct + * Previous and next are guaranteed not to be the same. + * + */ +ENTRY(cpu_switch_to) + add x8, x0, #THREAD_CPU_CONTEXT + mov x9, sp + stp x19, x20, [x8], #16 // store callee-saved registers + stp x21, x22, [x8], #16 + stp x23, x24, [x8], #16 + stp x25, x26, [x8], #16 + stp x27, x28, [x8], #16 + stp x29, x9, [x8], #16 + str lr, [x8] + add x8, x1, #THREAD_CPU_CONTEXT + ldp x19, x20, [x8], #16 // restore callee-saved registers + ldp x21, x22, [x8], #16 + ldp x23, x24, [x8], #16 + ldp x25, x26, [x8], #16 + ldp x27, x28, [x8], #16 + ldp x29, x9, [x8], #16 + ldr lr, [x8] + mov sp, x9 + ret +ENDPROC(cpu_switch_to) + +/* + * This is the fast syscall return path. We do as little as possible here, + * and this includes saving x0 back into the kernel stack. + */ +ret_fast_syscall: + disable_irq // disable interrupts + ldr x1, [tsk, #TI_FLAGS] + and x2, x1, #_TIF_WORK_MASK + cbnz x2, fast_work_pending + tbz x1, #TIF_SINGLESTEP, fast_exit + disable_dbg + enable_step x2 +fast_exit: + kernel_exit 0, ret = 1 + +/* + * Ok, we need to do extra processing, enter the slow path. + */ +fast_work_pending: + str x0, [sp, #S_X0] // returned x0 +work_pending: + tbnz x1, #TIF_NEED_RESCHED, work_resched + /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ + ldr x2, [sp, #S_PSTATE] + mov x0, sp // 'regs' + tst x2, #PSR_MODE_MASK // user mode regs? + b.ne no_work_pending // returning to kernel + bl do_notify_resume + b ret_to_user +work_resched: + enable_dbg + bl schedule + +/* + * "slow" syscall return path. + */ +ENTRY(ret_to_user) + disable_irq // disable interrupts + ldr x1, [tsk, #TI_FLAGS] + and x2, x1, #_TIF_WORK_MASK + cbnz x2, work_pending + tbz x1, #TIF_SINGLESTEP, no_work_pending + disable_dbg + enable_step x2 +no_work_pending: + kernel_exit 0, ret = 0 +ENDPROC(ret_to_user) + +/* + * This is how we return from a fork. + */ +ENTRY(ret_from_fork) + bl schedule_tail + get_thread_info tsk + b ret_to_user +ENDPROC(ret_from_fork) + +/* + * SVC handler. + */ + .align 6 +el0_svc: + adrp stbl, sys_call_table // load syscall table pointer + uxtw scno, w8 // syscall number in w8 + mov sc_nr, #__NR_syscalls +el0_svc_naked: // compat entry point + stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number + disable_step x16 + isb + enable_dbg + enable_irq + + get_thread_info tsk + ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing + tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? + adr lr, ret_fast_syscall // return address + cmp scno, sc_nr // check upper syscall limit + b.hs ni_sys + ldr x16, [stbl, scno, lsl #3] // address in the syscall table + br x16 // call sys_* routine +ni_sys: + mov x0, sp + b do_ni_syscall +ENDPROC(el0_svc) + + /* + * This is the really slow path. We're going to be doing context + * switches, and waiting for our parent to respond. + */ +__sys_trace: + mov x1, sp + mov w0, #0 // trace entry + bl syscall_trace + adr lr, __sys_trace_return // return address + uxtw scno, w0 // syscall number (possibly new) + mov x1, sp // pointer to regs + cmp scno, sc_nr // check upper syscall limit + b.hs ni_sys + ldp x0, x1, [sp] // restore the syscall args + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldr x16, [stbl, scno, lsl #3] // address in the syscall table + br x16 // call sys_* routine + +__sys_trace_return: + str x0, [sp] // save returned x0 + mov x1, sp + mov w0, #1 // trace exit + bl syscall_trace + b ret_to_user + +/* + * Special system call wrappers. + */ +ENTRY(sys_execve_wrapper) + mov x3, sp + b sys_execve +ENDPROC(sys_execve_wrapper) + +ENTRY(sys_clone_wrapper) + mov x5, sp + b sys_clone +ENDPROC(sys_clone_wrapper) + +ENTRY(sys_rt_sigreturn_wrapper) + mov x0, sp + b sys_rt_sigreturn +ENDPROC(sys_rt_sigreturn_wrapper) + +ENTRY(sys_sigaltstack_wrapper) + ldr x2, [sp, #S_SP] + b sys_sigaltstack +ENDPROC(sys_sigaltstack_wrapper) + +ENTRY(handle_arch_irq) + .quad 0 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c new file mode 100644 index 00000000000..e8b8357aedb --- /dev/null +++ b/arch/arm64/kernel/fpsimd.c @@ -0,0 +1,106 @@ +/* + * FP/SIMD context switching and fault handling + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/signal.h> + +#include <asm/fpsimd.h> +#include <asm/cputype.h> + +#define FPEXC_IOF (1 << 0) +#define FPEXC_DZF (1 << 1) +#define FPEXC_OFF (1 << 2) +#define FPEXC_UFF (1 << 3) +#define FPEXC_IXF (1 << 4) +#define FPEXC_IDF (1 << 7) + +/* + * Trapped FP/ASIMD access. + */ +void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) +{ + /* TODO: implement lazy context saving/restoring */ + WARN_ON(1); +} + +/* + * Raise a SIGFPE for the current process. + */ +void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) +{ + siginfo_t info; + unsigned int si_code = 0; + + if (esr & FPEXC_IOF) + si_code = FPE_FLTINV; + else if (esr & FPEXC_DZF) + si_code = FPE_FLTDIV; + else if (esr & FPEXC_OFF) + si_code = FPE_FLTOVF; + else if (esr & FPEXC_UFF) + si_code = FPE_FLTUND; + else if (esr & FPEXC_IXF) + si_code = FPE_FLTRES; + + memset(&info, 0, sizeof(info)); + info.si_signo = SIGFPE; + info.si_code = si_code; + info.si_addr = (void __user *)instruction_pointer(regs); + + send_sig_info(SIGFPE, &info, current); +} + +void fpsimd_thread_switch(struct task_struct *next) +{ + /* check if not kernel threads */ + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); + if (next->mm) + fpsimd_load_state(&next->thread.fpsimd_state); +} + +void fpsimd_flush_thread(void) +{ + memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + fpsimd_load_state(¤t->thread.fpsimd_state); +} + +/* + * FP/SIMD support code initialisation. + */ +static int __init fpsimd_init(void) +{ + u64 pfr = read_cpuid(ID_AA64PFR0_EL1); + + if (pfr & (0xf << 16)) { + pr_notice("Floating-point is not implemented\n"); + return 0; + } + elf_hwcap |= HWCAP_FP; + + if (pfr & (0xf << 20)) + pr_notice("Advanced SIMD is not implemented\n"); + else + elf_hwcap |= HWCAP_ASIMD; + + return 0; +} +late_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S new file mode 100644 index 00000000000..a2f02b63eae --- /dev/null +++ b/arch/arm64/kernel/head.S @@ -0,0 +1,510 @@ +/* + * Low-level CPU initialisation + * Based on arch/arm/kernel/head.S + * + * Copyright (C) 1994-2002 Russell King + * Copyright (C) 2003-2012 ARM Ltd. + * Authors: Catalin Marinas <catalin.marinas@arm.com> + * Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> +#include <asm/memory.h> +#include <asm/thread_info.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> + +/* + * swapper_pg_dir is the virtual address of the initial page table. We place + * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has + * 2 pages and is placed below swapper_pg_dir. + */ +#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) + +#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 +#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#endif + +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) + + .globl swapper_pg_dir + .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE + + .globl idmap_pg_dir + .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE + + .macro pgtbl, ttb0, ttb1, phys + add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE + sub \ttb0, \ttb1, #IDMAP_DIR_SIZE + .endm + +#ifdef CONFIG_ARM64_64K_PAGES +#define BLOCK_SHIFT PAGE_SHIFT +#define BLOCK_SIZE PAGE_SIZE +#else +#define BLOCK_SHIFT SECTION_SHIFT +#define BLOCK_SIZE SECTION_SIZE +#endif + +#define KERNEL_START KERNEL_RAM_VADDR +#define KERNEL_END _end + +/* + * Initial memory map attributes. + */ +#ifndef CONFIG_SMP +#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF +#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF +#else +#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED +#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S +#endif + +#ifdef CONFIG_ARM64_64K_PAGES +#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS +#define IO_MMUFLAGS PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_XN | PTE_FLAGS +#else +#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS +#define IO_MMUFLAGS PMD_ATTRINDX(MT_DEVICE_nGnRE) | PMD_SECT_XN | PMD_FLAGS +#endif + +/* + * Kernel startup entry point. + * --------------------------- + * + * The requirements are: + * MMU = off, D-cache = off, I-cache = on or off, + * x0 = physical address to the FDT blob. + * + * This code is mostly position independent so you call this at + * __pa(PAGE_OFFSET + TEXT_OFFSET). + * + * Note that the callee-saved registers are used for storing variables + * that are useful before the MMU is enabled. The allocations are described + * in the entry routines. + */ + __HEAD + + /* + * DO NOT MODIFY. Image header expected by Linux boot-loaders. + */ + b stext // branch to kernel start, magic + .long 0 // reserved + .quad TEXT_OFFSET // Image load offset from start of RAM + .quad 0 // reserved + .quad 0 // reserved + +ENTRY(stext) + mov x21, x0 // x21=FDT + bl el2_setup // Drop to EL1 + mrs x22, midr_el1 // x22=cpuid + mov x0, x22 + bl lookup_processor_type + mov x23, x0 // x23=current cpu_table + cbz x23, __error_p // invalid processor (x23=0)? + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl __vet_fdt + bl __create_page_tables // x25=TTBR0, x26=TTBR1 + /* + * The following calls CPU specific code in a position independent + * manner. See arch/arm64/mm/proc.S for details. x23 = base of + * cpu_info structure selected by lookup_processor_type above. + * On return, the CPU will be ready for the MMU to be turned on and + * the TCR will have been set. + */ + ldr x27, __switch_data // address to jump to after + // MMU has been enabled + adr lr, __enable_mmu // return (PIC) address + ldr x12, [x23, #CPU_INFO_SETUP] + add x12, x12, x28 // __virt_to_phys + br x12 // initialise processor +ENDPROC(stext) + +/* + * If we're fortunate enough to boot at EL2, ensure that the world is + * sane before dropping to EL1. + */ +ENTRY(el2_setup) + mrs x0, CurrentEL + cmp x0, #PSR_MODE_EL2t + ccmp x0, #PSR_MODE_EL2h, #0x4, ne + b.eq 1f + ret + + /* Hyp configuration. */ +1: mov x0, #(1 << 31) // 64-bit EL1 + msr hcr_el2, x0 + + /* Generic timers. */ + mrs x0, cnthctl_el2 + orr x0, x0, #3 // Enable EL1 physical timers + msr cnthctl_el2, x0 + + /* Populate ID registers. */ + mrs x0, midr_el1 + mrs x1, mpidr_el1 + msr vpidr_el2, x0 + msr vmpidr_el2, x1 + + /* sctlr_el1 */ + mov x0, #0x0800 // Set/clear RES{1,0} bits + movk x0, #0x30d0, lsl #16 + msr sctlr_el1, x0 + + /* Coprocessor traps. */ + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 + +#ifdef CONFIG_COMPAT + msr hstr_el2, xzr // Disable CP15 traps to EL2 +#endif + + /* spsr */ + mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, x0 + msr elr_el2, lr + eret +ENDPROC(el2_setup) + + .align 3 +2: .quad . + .quad PAGE_OFFSET + +#ifdef CONFIG_SMP + .pushsection .smp.pen.text, "ax" + .align 3 +1: .quad . + .quad secondary_holding_pen_release + + /* + * This provides a "holding pen" for platforms to hold all secondary + * cores are held until we're ready for them to initialise. + */ +ENTRY(secondary_holding_pen) + bl el2_setup // Drop to EL1 + mrs x0, mpidr_el1 + and x0, x0, #15 // CPU number + adr x1, 1b + ldp x2, x3, [x1] + sub x1, x1, x2 + add x3, x3, x1 +pen: ldr x4, [x3] + cmp x4, x0 + b.eq secondary_startup + wfe + b pen +ENDPROC(secondary_holding_pen) + .popsection + +ENTRY(secondary_startup) + /* + * Common entry point for secondary CPUs. + */ + mrs x22, midr_el1 // x22=cpuid + mov x0, x22 + bl lookup_processor_type + mov x23, x0 // x23=current cpu_table + cbz x23, __error_p // invalid processor (x23=0)? + + bl __calc_phys_offset // x24=phys offset + pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1 + ldr x12, [x23, #CPU_INFO_SETUP] + add x12, x12, x28 // __virt_to_phys + blr x12 // initialise processor + + ldr x21, =secondary_data + ldr x27, =__secondary_switched // address to jump to after enabling the MMU + b __enable_mmu +ENDPROC(secondary_startup) + +ENTRY(__secondary_switched) + ldr x0, [x21] // get secondary_data.stack + mov sp, x0 + mov x29, #0 + b secondary_start_kernel +ENDPROC(__secondary_switched) +#endif /* CONFIG_SMP */ + +/* + * Setup common bits before finally enabling the MMU. Essentially this is just + * loading the page table pointer and vector base registers. + * + * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on + * the MMU. + */ +__enable_mmu: + ldr x5, =vectors + msr vbar_el1, x5 + msr ttbr0_el1, x25 // load TTBR0 + msr ttbr1_el1, x26 // load TTBR1 + isb + b __turn_mmu_on +ENDPROC(__enable_mmu) + +/* + * Enable the MMU. This completely changes the structure of the visible memory + * space. You will not be able to trace execution through this. + * + * x0 = system control register + * x27 = *virtual* address to jump to upon completion + * + * other registers depend on the function called upon completion + */ + .align 6 +__turn_mmu_on: + msr sctlr_el1, x0 + isb + br x27 +ENDPROC(__turn_mmu_on) + +/* + * Calculate the start of physical memory. + */ +__calc_phys_offset: + adr x0, 1f + ldp x1, x2, [x0] + sub x28, x0, x1 // x28 = PHYS_OFFSET - PAGE_OFFSET + add x24, x2, x28 // x24 = PHYS_OFFSET + ret +ENDPROC(__calc_phys_offset) + + .align 3 +1: .quad . + .quad PAGE_OFFSET + +/* + * Macro to populate the PGD for the corresponding block entry in the next + * level (tbl) for the given virtual address. + * + * Preserves: pgd, tbl, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 + lsr \tmp1, \virt, #PGDIR_SHIFT + and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index + orr \tmp2, \tbl, #3 // PGD entry table type + str \tmp2, [\pgd, \tmp1, lsl #3] + .endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves: tbl, flags + * Corrupts: phys, start, end, pstate + */ + .macro create_block_map, tbl, flags, phys, start, end, idmap=0 + lsr \phys, \phys, #BLOCK_SHIFT + .if \idmap + and \start, \phys, #PTRS_PER_PTE - 1 // table index + .else + lsr \start, \start, #BLOCK_SHIFT + and \start, \start, #PTRS_PER_PTE - 1 // table index + .endif + orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry + .ifnc \start,\end + lsr \end, \end, #BLOCK_SHIFT + and \end, \end, #PTRS_PER_PTE - 1 // table end index + .endif +9999: str \phys, [\tbl, \start, lsl #3] // store the entry + .ifnc \start,\end + add \start, \start, #1 // next entry + add \phys, \phys, #BLOCK_SIZE // next block + cmp \start, \end + b.ls 9999b + .endif + .endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + * - identity mapping to enable the MMU (low address, TTBR0) + * - first few MB of the kernel linear mapping to jump to once the MMU has + * been enabled, including the FDT blob (TTBR1) + */ +__create_page_tables: + pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + + /* + * Clear the idmap and swapper page tables. + */ + mov x0, x25 + add x6, x26, #SWAPPER_DIR_SIZE +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x6 + b.lo 1b + + ldr x7, =MM_MMUFLAGS + + /* + * Create the identity mapping. + */ + add x0, x25, #PAGE_SIZE // section table address + adr x3, __turn_mmu_on // virtual/physical address + create_pgd_entry x25, x0, x3, x5, x6 + create_block_map x0, x7, x3, x5, x5, idmap=1 + + /* + * Map the kernel image (starting with PHYS_OFFSET). + */ + add x0, x26, #PAGE_SIZE // section table address + mov x5, #PAGE_OFFSET + create_pgd_entry x26, x0, x5, x3, x6 + ldr x6, =KERNEL_END - 1 + mov x3, x24 // phys offset + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the FDT blob (maximum 2MB; must be within 512MB of + * PHYS_OFFSET). + */ + mov x3, x21 // FDT phys address + and x3, x3, #~((1 << 21) - 1) // 2MB aligned + mov x6, #PAGE_OFFSET + sub x5, x3, x24 // subtract PHYS_OFFSET + tst x5, #~((1 << 29) - 1) // within 512MB? + csel x21, xzr, x21, ne // zero the FDT pointer + b.ne 1f + add x5, x5, x6 // __va(FDT blob) + add x6, x5, #1 << 21 // 2MB for the FDT blob + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 +1: + ret +ENDPROC(__create_page_tables) + .ltorg + + .align 3 + .type __switch_data, %object +__switch_data: + .quad __mmap_switched + .quad __data_loc // x4 + .quad _data // x5 + .quad __bss_start // x6 + .quad _end // x7 + .quad processor_id // x4 + .quad __fdt_pointer // x5 + .quad memstart_addr // x6 + .quad init_thread_union + THREAD_START_SP // sp + +/* + * The following fragment of code is executed with the MMU on in MMU mode, and + * uses absolute addresses; this is not position independent. + */ +__mmap_switched: + adr x3, __switch_data + 8 + + ldp x4, x5, [x3], #16 + ldp x6, x7, [x3], #16 + cmp x4, x5 // Copy data segment if needed +1: ccmp x5, x6, #4, ne + b.eq 2f + ldr x16, [x4], #8 + str x16, [x5], #8 + b 1b +2: +1: cmp x6, x7 + b.hs 2f + str xzr, [x6], #8 // Clear BSS + b 1b +2: + ldp x4, x5, [x3], #16 + ldr x6, [x3], #8 + ldr x16, [x3] + mov sp, x16 + str x22, [x4] // Save processor ID + str x21, [x5] // Save FDT pointer + str x24, [x6] // Save PHYS_OFFSET + mov x29, #0 + b start_kernel +ENDPROC(__mmap_switched) + +/* + * Exception handling. Something went wrong and we can't proceed. We ought to + * tell the user, but since we don't have any guarantee that we're even + * running on the right architecture, we do virtually nothing. + */ +__error_p: +ENDPROC(__error_p) + +__error: +1: nop + b 1b +ENDPROC(__error) + +/* + * This function gets the processor ID in w0 and searches the cpu_table[] for + * a match. It returns a pointer to the struct cpu_info it found. The + * cpu_table[] must end with an empty (all zeros) structure. + * + * This routine can be called via C code and it needs to work with the MMU + * both disabled and enabled (the offset is calculated automatically). + */ +ENTRY(lookup_processor_type) + adr x1, __lookup_processor_type_data + ldp x2, x3, [x1] + sub x1, x1, x2 // get offset between VA and PA + add x3, x3, x1 // convert VA to PA +1: + ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask + cbz w5, 2f // end of list? + and w6, w6, w0 + cmp w5, w6 + b.eq 3f + add x3, x3, #CPU_INFO_SZ + b 1b +2: + mov x3, #0 // unknown processor +3: + mov x0, x3 + ret +ENDPROC(lookup_processor_type) + + .align 3 + .type __lookup_processor_type_data, %object +__lookup_processor_type_data: + .quad . + .quad cpu_table + .size __lookup_processor_type_data, . - __lookup_processor_type_data + +/* + * Determine validity of the x21 FDT pointer. + * The dtb must be 8-byte aligned and live in the first 512M of memory. + */ +__vet_fdt: + tst x21, #0x7 + b.ne 1f + cmp x21, x24 + b.lt 1f + mov x0, #(1 << 29) + add x0, x0, x24 + cmp x21, x0 + b.ge 1f + ret +1: + mov x21, #0 + ret +ENDPROC(__vet_fdt) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..5ab825c59db --- /dev/null +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -0,0 +1,880 @@ +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + * + * Copyright (C) 2012 ARM Limited + * Author: Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define pr_fmt(fmt) "hw-breakpoint: " fmt + +#include <linux/errno.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> +#include <linux/smp.h> + +#include <asm/compat.h> +#include <asm/current.h> +#include <asm/debug-monitors.h> +#include <asm/hw_breakpoint.h> +#include <asm/kdebug.h> +#include <asm/traps.h> +#include <asm/cputype.h> +#include <asm/system_misc.h> + +/* Breakpoint currently in use for each BRP. */ +static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); + +/* Watchpoint currently in use for each WRP. */ +static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); + +/* Currently stepping a per-CPU kernel breakpoint. */ +static DEFINE_PER_CPU(int, stepping_kernel_bp); + +/* Number of BRP/WRP registers on this CPU. */ +static int core_num_brps; +static int core_num_wrps; + +/* Determine number of BRP registers available. */ +static int get_num_brps(void) +{ + return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; +} + +/* Determine number of WRP registers available. */ +static int get_num_wrps(void) +{ + return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; +} + +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + +#define READ_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + AARCH64_DBG_READ(N, REG, VAL); \ + break + +#define WRITE_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + AARCH64_DBG_WRITE(N, REG, VAL); \ + break + +#define GEN_READ_WB_REG_CASES(OFF, REG, VAL) \ + READ_WB_REG_CASE(OFF, 0, REG, VAL); \ + READ_WB_REG_CASE(OFF, 1, REG, VAL); \ + READ_WB_REG_CASE(OFF, 2, REG, VAL); \ + READ_WB_REG_CASE(OFF, 3, REG, VAL); \ + READ_WB_REG_CASE(OFF, 4, REG, VAL); \ + READ_WB_REG_CASE(OFF, 5, REG, VAL); \ + READ_WB_REG_CASE(OFF, 6, REG, VAL); \ + READ_WB_REG_CASE(OFF, 7, REG, VAL); \ + READ_WB_REG_CASE(OFF, 8, REG, VAL); \ + READ_WB_REG_CASE(OFF, 9, REG, VAL); \ + READ_WB_REG_CASE(OFF, 10, REG, VAL); \ + READ_WB_REG_CASE(OFF, 11, REG, VAL); \ + READ_WB_REG_CASE(OFF, 12, REG, VAL); \ + READ_WB_REG_CASE(OFF, 13, REG, VAL); \ + READ_WB_REG_CASE(OFF, 14, REG, VAL); \ + READ_WB_REG_CASE(OFF, 15, REG, VAL) + +#define GEN_WRITE_WB_REG_CASES(OFF, REG, VAL) \ + WRITE_WB_REG_CASE(OFF, 0, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + +static u64 read_wb_reg(int reg, int n) +{ + u64 val = 0; + + switch (reg + n) { + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); + default: + pr_warning("attempt to read from unknown breakpoint register %d\n", n); + } + + return val; +} + +static void write_wb_reg(int reg, int n, u64 val) +{ + switch (reg + n) { + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); + default: + pr_warning("attempt to write to unknown breakpoint register %d\n", n); + } + isb(); +} + +/* + * Convert a breakpoint privilege level to the corresponding exception + * level. + */ +static enum debug_el debug_exception_level(int privilege) +{ + switch (privilege) { + case AARCH64_BREAKPOINT_EL0: + return DBG_ACTIVE_EL0; + case AARCH64_BREAKPOINT_EL1: + return DBG_ACTIVE_EL1; + default: + pr_warning("invalid breakpoint privilege level %d\n", privilege); + return -EINVAL; + } +} + +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event **slot, **slots; + struct debug_info *debug_info = ¤t->thread.debug; + int i, max_slots, ctrl_reg, val_reg, reg_enable; + u32 ctrl; + + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + /* Breakpoint */ + ctrl_reg = AARCH64_DBG_REG_BCR; + val_reg = AARCH64_DBG_REG_BVR; + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps; + reg_enable = !debug_info->bps_disabled; + } else { + /* Watchpoint */ + ctrl_reg = AARCH64_DBG_REG_WCR; + val_reg = AARCH64_DBG_REG_WVR; + slots = __get_cpu_var(wp_on_reg); + max_slots = core_num_wrps; + reg_enable = !debug_info->wps_disabled; + } + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + + if (!*slot) { + *slot = bp; + break; + } + } + + if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) + return -ENOSPC; + + /* Ensure debug monitors are enabled at the correct exception level. */ + enable_debug_monitors(debug_exception_level(info->ctrl.privilege)); + + /* Setup the address register. */ + write_wb_reg(val_reg, i, info->address); + + /* Setup the control register. */ + ctrl = encode_ctrl_reg(info->ctrl); + write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + + return 0; +} + +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event **slot, **slots; + int i, max_slots, base; + + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + /* Breakpoint */ + base = AARCH64_DBG_REG_BCR; + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps; + } else { + /* Watchpoint */ + base = AARCH64_DBG_REG_WCR; + slots = __get_cpu_var(wp_on_reg); + max_slots = core_num_wrps; + } + + /* Remove the breakpoint. */ + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) + return; + + /* Reset the control register. */ + write_wb_reg(base, i, 0); + + /* Release the debug monitors for the correct exception level. */ + disable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case ARM_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case ARM_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case ARM_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; + case ARM_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; + } + + return len_in_bytes; +} + +/* + * Check whether bp virtual address is in kernel space. + */ +int arch_check_bp_in_kernelspace(struct perf_event *bp) +{ + unsigned int len; + unsigned long va; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + va = info->address; + len = get_hbp_len(info->ctrl.len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl. + * Hopefully this will disappear when ptrace can bypass the conversion + * to generic breakpoint descriptions. + */ +int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, + int *gen_len, int *gen_type) +{ + /* Type */ + switch (ctrl.type) { + case ARM_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; + break; + case ARM_BREAKPOINT_LOAD: + *gen_type = HW_BREAKPOINT_R; + break; + case ARM_BREAKPOINT_STORE: + *gen_type = HW_BREAKPOINT_W; + break; + case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE: + *gen_type = HW_BREAKPOINT_RW; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (ctrl.len) { + case ARM_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case ARM_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case ARM_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; + case ARM_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * Construct an arch_hw_breakpoint from a perf_event. + */ +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_X: + info->ctrl.type = ARM_BREAKPOINT_EXECUTE; + break; + case HW_BREAKPOINT_R: + info->ctrl.type = ARM_BREAKPOINT_LOAD; + break; + case HW_BREAKPOINT_W: + info->ctrl.type = ARM_BREAKPOINT_STORE; + break; + case HW_BREAKPOINT_RW: + info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (bp->attr.bp_len) { + case HW_BREAKPOINT_LEN_1: + info->ctrl.len = ARM_BREAKPOINT_LEN_1; + break; + case HW_BREAKPOINT_LEN_2: + info->ctrl.len = ARM_BREAKPOINT_LEN_2; + break; + case HW_BREAKPOINT_LEN_4: + info->ctrl.len = ARM_BREAKPOINT_LEN_4; + break; + case HW_BREAKPOINT_LEN_8: + info->ctrl.len = ARM_BREAKPOINT_LEN_8; + break; + default: + return -EINVAL; + } + + /* + * On AArch64, we only permit breakpoints of length 4, whereas + * AArch32 also requires breakpoints of length 2 for Thumb. + * Watchpoints can be of length 1, 2, 4 or 8 bytes. + */ + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + if (is_compat_task()) { + if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && + info->ctrl.len != ARM_BREAKPOINT_LEN_4) + return -EINVAL; + } else if (info->ctrl.len != ARM_BREAKPOINT_LEN_4) { + /* + * FIXME: Some tools (I'm looking at you perf) assume + * that breakpoints should be sizeof(long). This + * is nonsense. For now, we fix up the parameter + * but we should probably return -EINVAL instead. + */ + info->ctrl.len = ARM_BREAKPOINT_LEN_4; + } + } + + /* Address */ + info->address = bp->attr.bp_addr; + + /* + * Privilege + * Note that we disallow combined EL0/EL1 breakpoints because + * that would complicate the stepping code. + */ + if (arch_check_bp_in_kernelspace(bp)) + info->ctrl.privilege = AARCH64_BREAKPOINT_EL1; + else + info->ctrl.privilege = AARCH64_BREAKPOINT_EL0; + + /* Enabled? */ + info->ctrl.enabled = !bp->attr.disabled; + + return 0; +} + +/* + * Validate the arch-specific HW Breakpoint register settings. + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + int ret; + u64 alignment_mask, offset; + + /* Build the arch_hw_breakpoint. */ + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + /* + * Check address alignment. + * We don't do any clever alignment correction for watchpoints + * because using 64-bit unaligned addresses is deprecated for + * AArch64. + * + * AArch32 tasks expect some simple alignment fixups, so emulate + * that here. + */ + if (is_compat_task()) { + if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + alignment_mask = 0x7; + else + alignment_mask = 0x3; + offset = info->address & alignment_mask; + switch (offset) { + case 0: + /* Aligned */ + break; + case 1: + /* Allow single byte watchpoint. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; + case 2: + /* Allow halfword watchpoints and breakpoints. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + break; + default: + return -EINVAL; + } + + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + } else { + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) + alignment_mask = 0x3; + else + alignment_mask = 0x7; + if (info->address & alignment_mask) + return -EINVAL; + } + + /* + * Disallow per-task kernel breakpoints since these would + * complicate the stepping code. + */ + if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target) + return -EINVAL; + + return 0; +} + +/* + * Enable/disable all of the breakpoints active at the specified + * exception level at the register level. + * This is used when single-stepping after a breakpoint exception. + */ +static void toggle_bp_registers(int reg, enum debug_el el, int enable) +{ + int i, max_slots, privilege; + u32 ctrl; + struct perf_event **slots; + + switch (reg) { + case AARCH64_DBG_REG_BCR: + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps; + break; + case AARCH64_DBG_REG_WCR: + slots = __get_cpu_var(wp_on_reg); + max_slots = core_num_wrps; + break; + default: + return; + } + + for (i = 0; i < max_slots; ++i) { + if (!slots[i]) + continue; + + privilege = counter_arch_bp(slots[i])->ctrl.privilege; + if (debug_exception_level(privilege) != el) + continue; + + ctrl = read_wb_reg(reg, i); + if (enable) + ctrl |= 0x1; + else + ctrl &= ~0x1; + write_wb_reg(reg, i, ctrl); + } +} + +/* + * Debug exception handlers. + */ +static int breakpoint_handler(unsigned long unused, unsigned int esr, + struct pt_regs *regs) +{ + int i, step = 0, *kernel_step; + u32 ctrl_reg; + u64 addr, val; + struct perf_event *bp, **slots; + struct debug_info *debug_info; + struct arch_hw_breakpoint_ctrl ctrl; + + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + addr = instruction_pointer(regs); + debug_info = ¤t->thread.debug; + + for (i = 0; i < core_num_brps; ++i) { + rcu_read_lock(); + + bp = slots[i]; + + if (bp == NULL) + goto unlock; + + /* Check if the breakpoint value matches. */ + val = read_wb_reg(AARCH64_DBG_REG_BVR, i); + if (val != (addr & ~0x3)) + goto unlock; + + /* Possible match, check the byte address select to confirm. */ + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_BCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + if (!((1 << (addr & 0x3)) & ctrl.len)) + goto unlock; + + counter_arch_bp(bp)->trigger = addr; + perf_bp_event(bp, regs); + + /* Do we need to handle the stepping? */ + if (!bp->overflow_handler) + step = 1; +unlock: + rcu_read_unlock(); + } + + if (!step) + return 0; + + if (user_mode(regs)) { + debug_info->bps_disabled = 1; + toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 0); + + /* If we're already stepping a watchpoint, just return. */ + if (debug_info->wps_disabled) + return 0; + + if (test_thread_flag(TIF_SINGLESTEP)) + debug_info->suspended_step = 1; + else + user_enable_single_step(current); + } else { + toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); + kernel_step = &__get_cpu_var(stepping_kernel_bp); + + if (*kernel_step != ARM_KERNEL_STEP_NONE) + return 0; + + if (kernel_active_single_step()) { + *kernel_step = ARM_KERNEL_STEP_SUSPEND; + } else { + *kernel_step = ARM_KERNEL_STEP_ACTIVE; + kernel_enable_single_step(regs); + } + } + + return 0; +} + +static int watchpoint_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + int i, step = 0, *kernel_step, access; + u32 ctrl_reg; + u64 val, alignment_mask; + struct perf_event *wp, **slots; + struct debug_info *debug_info; + struct arch_hw_breakpoint *info; + struct arch_hw_breakpoint_ctrl ctrl; + + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + debug_info = ¤t->thread.debug; + + for (i = 0; i < core_num_wrps; ++i) { + rcu_read_lock(); + + wp = slots[i]; + + if (wp == NULL) + goto unlock; + + info = counter_arch_bp(wp); + /* AArch32 watchpoints are either 4 or 8 bytes aligned. */ + if (is_compat_task()) { + if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + alignment_mask = 0x7; + else + alignment_mask = 0x3; + } else { + alignment_mask = 0x7; + } + + /* Check if the watchpoint value matches. */ + val = read_wb_reg(AARCH64_DBG_REG_WVR, i); + if (val != (addr & ~alignment_mask)) + goto unlock; + + /* Possible match, check the byte address select to confirm. */ + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + if (!((1 << (addr & alignment_mask)) & ctrl.len)) + goto unlock; + + /* + * Check that the access type matches. + * 0 => load, otherwise => store + */ + access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W : + HW_BREAKPOINT_R; + if (!(access & hw_breakpoint_type(wp))) + goto unlock; + + info->trigger = addr; + perf_bp_event(wp, regs); + + /* Do we need to handle the stepping? */ + if (!wp->overflow_handler) + step = 1; + +unlock: + rcu_read_unlock(); + } + + if (!step) + return 0; + + /* + * We always disable EL0 watchpoints because the kernel can + * cause these to fire via an unprivileged access. + */ + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 0); + + if (user_mode(regs)) { + debug_info->wps_disabled = 1; + + /* If we're already stepping a breakpoint, just return. */ + if (debug_info->bps_disabled) + return 0; + + if (test_thread_flag(TIF_SINGLESTEP)) + debug_info->suspended_step = 1; + else + user_enable_single_step(current); + } else { + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); + kernel_step = &__get_cpu_var(stepping_kernel_bp); + + if (*kernel_step != ARM_KERNEL_STEP_NONE) + return 0; + + if (kernel_active_single_step()) { + *kernel_step = ARM_KERNEL_STEP_SUSPEND; + } else { + *kernel_step = ARM_KERNEL_STEP_ACTIVE; + kernel_enable_single_step(regs); + } + } + + return 0; +} + +/* + * Handle single-step exception. + */ +int reinstall_suspended_bps(struct pt_regs *regs) +{ + struct debug_info *debug_info = ¤t->thread.debug; + int handled_exception = 0, *kernel_step; + + kernel_step = &__get_cpu_var(stepping_kernel_bp); + + /* + * Called from single-step exception handler. + * Return 0 if execution can resume, 1 if a SIGTRAP should be + * reported. + */ + if (user_mode(regs)) { + if (debug_info->bps_disabled) { + debug_info->bps_disabled = 0; + toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1); + handled_exception = 1; + } + + if (debug_info->wps_disabled) { + debug_info->wps_disabled = 0; + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1); + handled_exception = 1; + } + + if (handled_exception) { + if (debug_info->suspended_step) { + debug_info->suspended_step = 0; + /* Allow exception handling to fall-through. */ + handled_exception = 0; + } else { + user_disable_single_step(current); + } + } + } else if (*kernel_step != ARM_KERNEL_STEP_NONE) { + toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 1); + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 1); + + if (!debug_info->wps_disabled) + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1); + + if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) { + kernel_disable_single_step(); + handled_exception = 1; + } else { + handled_exception = 0; + } + + *kernel_step = ARM_KERNEL_STEP_NONE; + } + + return !handled_exception; +} + +/* + * Context-switcher for restoring suspended breakpoints. + */ +void hw_breakpoint_thread_switch(struct task_struct *next) +{ + /* + * current next + * disabled: 0 0 => The usual case, NOTIFY_DONE + * 0 1 => Disable the registers + * 1 0 => Enable the registers + * 1 1 => NOTIFY_DONE. per-task bps will + * get taken care of by perf. + */ + + struct debug_info *current_debug_info, *next_debug_info; + + current_debug_info = ¤t->thread.debug; + next_debug_info = &next->thread.debug; + + /* Update breakpoints. */ + if (current_debug_info->bps_disabled != next_debug_info->bps_disabled) + toggle_bp_registers(AARCH64_DBG_REG_BCR, + DBG_ACTIVE_EL0, + !next_debug_info->bps_disabled); + + /* Update watchpoints. */ + if (current_debug_info->wps_disabled != next_debug_info->wps_disabled) + toggle_bp_registers(AARCH64_DBG_REG_WCR, + DBG_ACTIVE_EL0, + !next_debug_info->wps_disabled); +} + +/* + * CPU initialisation. + */ +static void reset_ctrl_regs(void *unused) +{ + int i; + + for (i = 0; i < core_num_brps; ++i) { + write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + } + + for (i = 0; i < core_num_wrps; ++i) { + write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + } +} + +static int __cpuinit hw_breakpoint_reset_notify(struct notifier_block *self, + unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + if (action == CPU_ONLINE) + smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1); + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata hw_breakpoint_reset_nb = { + .notifier_call = hw_breakpoint_reset_notify, +}; + +/* + * One-time initialisation. + */ +static int __init arch_hw_breakpoint_init(void) +{ + core_num_brps = get_num_brps(); + core_num_wrps = get_num_wrps(); + + pr_info("found %d breakpoint and %d watchpoint registers.\n", + core_num_brps, core_num_wrps); + + /* + * Reset the breakpoint resources. We assume that a halting + * debugger will leave the world in a nice state for us. + */ + smp_call_function(reset_ctrl_regs, NULL, 1); + reset_ctrl_regs(NULL); + + /* Register debug fault handlers. */ + hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, + TRAP_HWBKPT, "hw-breakpoint handler"); + hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, + TRAP_HWBKPT, "hw-watchpoint handler"); + + /* Register hotplug notifier. */ + register_cpu_notifier(&hw_breakpoint_reset_nb); + + return 0; +} +arch_initcall(arch_hw_breakpoint_init); + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ +} + +/* + * Dummy function to register with die_notifier. + */ +int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c new file mode 100644 index 00000000000..7d37ead4d19 --- /dev/null +++ b/arch/arm64/kernel/io.c @@ -0,0 +1,64 @@ +/* + * Based on arch/arm/kernel/io.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/types.h> +#include <linux/io.h> + +/* + * Copy data from IO memory space to "real" memory space. + */ +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) +{ + unsigned char *t = to; + while (count) { + count--; + *t = readb(from); + t++; + from++; + } +} +EXPORT_SYMBOL(__memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + */ +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) +{ + const unsigned char *f = from; + while (count) { + count--; + writeb(*f, to); + f++; + to++; + } +} +EXPORT_SYMBOL(__memcpy_toio); + +/* + * "memset" on IO memory space. + */ +void __memset_io(volatile void __iomem *dst, int c, size_t count) +{ + while (count) { + count--; + writeb(c, dst); + dst++; + } +} +EXPORT_SYMBOL(__memset_io); diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c new file mode 100644 index 00000000000..0373c6609ea --- /dev/null +++ b/arch/arm64/kernel/irq.c @@ -0,0 +1,84 @@ +/* + * Based on arch/arm/kernel/irq.c + * + * Copyright (C) 1992 Linus Torvalds + * Modifications for ARM processor Copyright (C) 1995-2000 Russell King. + * Support for Dynamic Tick Timer Copyright (C) 2004-2005 Nokia Corporation. + * Dynamic Tick Timer written by Tony Lindgren <tony@atomide.com> and + * Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel_stat.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/of_irq.h> +#include <linux/seq_file.h> +#include <linux/ratelimit.h> + +unsigned long irq_err_count; + +int arch_show_interrupts(struct seq_file *p, int prec) +{ +#ifdef CONFIG_SMP + show_ipi_list(p, prec); +#endif + seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); + return 0; +} + +/* + * handle_IRQ handles all hardware IRQ's. Decoded IRQs should + * not come via this function. Instead, they should provide their + * own 'handler'. Used by platform code implementing C-based 1st + * level decoding. + */ +void handle_IRQ(unsigned int irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + + /* + * Some hardware gives randomly wrong interrupts. Rather + * than crashing, do something sensible. + */ + if (unlikely(irq >= nr_irqs)) { + pr_warn_ratelimited("Bad IRQ%u\n", irq); + ack_bad_irq(irq); + } else { + generic_handle_irq(irq); + } + + irq_exit(); + set_irq_regs(old_regs); +} + +/* + * Interrupt controllers supported by the kernel. + */ +static const struct of_device_id intctrl_of_match[] __initconst = { + /* IRQ controllers { .compatible, .data } info to go here */ + {} +}; + +void __init init_IRQ(void) +{ + of_irq_init(intctrl_of_match); + + if (!handle_arch_irq) + panic("No interrupt controller found."); +} diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S new file mode 100644 index 00000000000..8b69ecb1d8b --- /dev/null +++ b/arch/arm64/kernel/kuser32.S @@ -0,0 +1,77 @@ +/* + * Low-level user helpers placed in the vectors page for AArch32. + * Based on the kuser helpers in arch/arm/kernel/entry-armv.S. + * + * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net> + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * AArch32 user helpers. + * + * Each segment is 32-byte aligned and will be moved to the top of the high + * vector page. New segments (if ever needed) must be added in front of + * existing ones. This mechanism should be used only for things that are + * really small and justified, and not be abused freely. + * + * See Documentation/arm/kernel_user_helpers.txt for formal definitions. + */ + .align 5 + .globl __kuser_helper_start +__kuser_helper_start: + +__kuser_cmpxchg64: // 0xffff0f60 + .inst 0xe92d00f0 // push {r4, r5, r6, r7} + .inst 0xe1c040d0 // ldrd r4, r5, [r0] + .inst 0xe1c160d0 // ldrd r6, r7, [r1] + .inst 0xf57ff05f // dmb sy + .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] + .inst 0xe0303004 // eors r3, r0, r4 + .inst 0x00313005 // eoreqs r3, r1, r5 + .inst 0x01a23f96 // strexdeq r3, r6, [r2] + .inst 0x03330001 // teqeq r3, #1 + .inst 0x0afffff9 // beq 1b + .inst 0xf57ff05f // dmb sy + .inst 0xe2730000 // rsbs r0, r3, #0 + .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} + .inst 0xe12fff1e // bx lr + + .align 5 +__kuser_memory_barrier: // 0xffff0fa0 + .inst 0xf57ff05f // dmb sy + .inst 0xe12fff1e // bx lr + + .align 5 +__kuser_cmpxchg: // 0xffff0fc0 + .inst 0xf57ff05f // dmb sy + .inst 0xe1923f9f // 1: ldrex r3, [r2] + .inst 0xe0533000 // subs r3, r3, r0 + .inst 0x01823f91 // strexeq r3, r1, [r2] + .inst 0x03330001 // teqeq r3, #1 + .inst 0x0afffffa // beq 1b + .inst 0xe2730000 // rsbs r0, r3, #0 + .inst 0xeaffffef // b <__kuser_memory_barrier> + + .align 5 +__kuser_get_tls: // 0xffff0fe0 + .inst 0xee1d0f70 // mrc p15, 0, r0, c13, c0, 3 + .inst 0xe12fff1e // bx lr + .rep 5 + .word 0 + .endr + +__kuser_helper_version: // 0xffff0ffc + .word ((__kuser_helper_end - __kuser_helper_start) >> 5) + .globl __kuser_helper_end +__kuser_helper_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c new file mode 100644 index 00000000000..ca0e3d55da9 --- /dev/null +++ b/arch/arm64/kernel/module.c @@ -0,0 +1,456 @@ +/* + * AArch64 loadable module support. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/bitops.h> +#include <linux/elf.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/moduleloader.h> +#include <linux/vmalloc.h> + +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, -1, + __builtin_return_address(0)); +} + +enum aarch64_reloc_op { + RELOC_OP_NONE, + RELOC_OP_ABS, + RELOC_OP_PREL, + RELOC_OP_PAGE, +}; + +static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) +{ + switch (reloc_op) { + case RELOC_OP_ABS: + return val; + case RELOC_OP_PREL: + return val - (u64)place; + case RELOC_OP_PAGE: + return (val & ~0xfff) - ((u64)place & ~0xfff); + case RELOC_OP_NONE: + return 0; + } + + pr_err("do_reloc: unknown relocation operation %d\n", reloc_op); + return 0; +} + +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) +{ + u64 imm_mask = (1 << len) - 1; + s64 sval = do_reloc(op, place, val); + + switch (len) { + case 16: + *(s16 *)place = sval; + break; + case 32: + *(s32 *)place = sval; + break; + case 64: + *(s64 *)place = sval; + break; + default: + pr_err("Invalid length (%d) for data relocation\n", len); + return 0; + } + + /* + * Extract the upper value bits (including the sign bit) and + * shift them to bit 0. + */ + sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); + + /* + * Overflow has occurred if the value is not representable in + * len bits (i.e the bottom len bits are not sign-extended and + * the top bits are not all zero). + */ + if ((u64)(sval + 1) > 2) + return -ERANGE; + + return 0; +} + +enum aarch64_imm_type { + INSN_IMM_MOVNZ, + INSN_IMM_MOVK, + INSN_IMM_ADR, + INSN_IMM_26, + INSN_IMM_19, + INSN_IMM_16, + INSN_IMM_14, + INSN_IMM_12, + INSN_IMM_9, +}; + +static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) +{ + u32 immlo, immhi, lomask, himask, mask; + int shift; + + switch (type) { + case INSN_IMM_MOVNZ: + /* + * For signed MOVW relocations, we have to manipulate the + * instruction encoding depending on whether or not the + * immediate is less than zero. + */ + insn &= ~(3 << 29); + if ((s64)imm >= 0) { + /* >=0: Set the instruction to MOVZ (opcode 10b). */ + insn |= 2 << 29; + } else { + /* + * <0: Set the instruction to MOVN (opcode 00b). + * Since we've masked the opcode already, we + * don't need to do anything other than + * inverting the new immediate field. + */ + imm = ~imm; + } + case INSN_IMM_MOVK: + mask = BIT(16) - 1; + shift = 5; + break; + case INSN_IMM_ADR: + lomask = 0x3; + himask = 0x7ffff; + immlo = imm & lomask; + imm >>= 2; + immhi = imm & himask; + imm = (immlo << 24) | (immhi); + mask = (lomask << 24) | (himask); + shift = 5; + break; + case INSN_IMM_26: + mask = BIT(26) - 1; + shift = 0; + break; + case INSN_IMM_19: + mask = BIT(19) - 1; + shift = 5; + break; + case INSN_IMM_16: + mask = BIT(16) - 1; + shift = 5; + break; + case INSN_IMM_14: + mask = BIT(14) - 1; + shift = 5; + break; + case INSN_IMM_12: + mask = BIT(12) - 1; + shift = 10; + break; + case INSN_IMM_9: + mask = BIT(9) - 1; + shift = 12; + break; + default: + pr_err("encode_insn_immediate: unknown immediate encoding %d\n", + type); + return 0; + } + + /* Update the immediate field. */ + insn &= ~(mask << shift); + insn |= (imm & mask) << shift; + + return insn; +} + +static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, + int lsb, enum aarch64_imm_type imm_type) +{ + u64 imm, limit = 0; + s64 sval; + u32 insn = *(u32 *)place; + + sval = do_reloc(op, place, val); + sval >>= lsb; + imm = sval & 0xffff; + + /* Update the instruction with the new encoding. */ + *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + + /* Shift out the immediate field. */ + sval >>= 16; + + /* + * For unsigned immediates, the overflow check is straightforward. + * For signed immediates, the sign bit is actually the bit past the + * most significant bit of the field. + * The INSN_IMM_16 immediate type is unsigned. + */ + if (imm_type != INSN_IMM_16) { + sval++; + limit++; + } + + /* Check the upper bits depending on the sign of the immediate. */ + if ((u64)sval > limit) + return -ERANGE; + + return 0; +} + +static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, + int lsb, int len, enum aarch64_imm_type imm_type) +{ + u64 imm, imm_mask; + s64 sval; + u32 insn = *(u32 *)place; + + /* Calculate the relocation value. */ + sval = do_reloc(op, place, val); + sval >>= lsb; + + /* Extract the value bits and shift them to bit 0. */ + imm_mask = (BIT(lsb + len) - 1) >> lsb; + imm = sval & imm_mask; + + /* Update the instruction's immediate field. */ + *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + + /* + * Extract the upper value bits (including the sign bit) and + * shift them to bit 0. + */ + sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); + + /* + * Overflow has occurred if the upper bits are not all equal to + * the sign bit of the value. + */ + if ((u64)(sval + 1) >= 2) + return -ERANGE; + + return 0; +} + +int apply_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + int ovf; + bool overflow_check; + Elf64_Sym *sym; + void *loc; + u64 val; + Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; + + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* loc corresponds to P in the AArch64 ELF document. */ + loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + + /* sym is the ELF symbol we're referring to. */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rel[i].r_info); + + /* val corresponds to (S + A) in the AArch64 ELF document. */ + val = sym->st_value + rel[i].r_addend; + + /* Check for overflow by default. */ + overflow_check = true; + + /* Perform the static relocation. */ + switch (ELF64_R_TYPE(rel[i].r_info)) { + /* Null relocations. */ + case R_ARM_NONE: + case R_AARCH64_NONE: + ovf = 0; + break; + + /* Data relocations. */ + case R_AARCH64_ABS64: + overflow_check = false; + ovf = reloc_data(RELOC_OP_ABS, loc, val, 64); + break; + case R_AARCH64_ABS32: + ovf = reloc_data(RELOC_OP_ABS, loc, val, 32); + break; + case R_AARCH64_ABS16: + ovf = reloc_data(RELOC_OP_ABS, loc, val, 16); + break; + case R_AARCH64_PREL64: + overflow_check = false; + ovf = reloc_data(RELOC_OP_PREL, loc, val, 64); + break; + case R_AARCH64_PREL32: + ovf = reloc_data(RELOC_OP_PREL, loc, val, 32); + break; + case R_AARCH64_PREL16: + ovf = reloc_data(RELOC_OP_PREL, loc, val, 16); + break; + + /* MOVW instruction relocations. */ + case R_AARCH64_MOVW_UABS_G0_NC: + overflow_check = false; + case R_AARCH64_MOVW_UABS_G0: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, + INSN_IMM_16); + break; + case R_AARCH64_MOVW_UABS_G1_NC: + overflow_check = false; + case R_AARCH64_MOVW_UABS_G1: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, + INSN_IMM_16); + break; + case R_AARCH64_MOVW_UABS_G2_NC: + overflow_check = false; + case R_AARCH64_MOVW_UABS_G2: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, + INSN_IMM_16); + break; + case R_AARCH64_MOVW_UABS_G3: + /* We're using the top bits so we can't overflow. */ + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, + INSN_IMM_16); + break; + case R_AARCH64_MOVW_SABS_G0: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, + INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_SABS_G1: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, + INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_SABS_G2: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, + INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_PREL_G0_NC: + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, + INSN_IMM_MOVK); + break; + case R_AARCH64_MOVW_PREL_G0: + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, + INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_PREL_G1_NC: + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, + INSN_IMM_MOVK); + break; + case R_AARCH64_MOVW_PREL_G1: + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, + INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_PREL_G2_NC: + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, + INSN_IMM_MOVK); + break; + case R_AARCH64_MOVW_PREL_G2: + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, + INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_PREL_G3: + /* We're using the top bits so we can't overflow. */ + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, + INSN_IMM_MOVNZ); + break; + + /* Immediate instruction relocations. */ + case R_AARCH64_LD_PREL_LO19: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, + INSN_IMM_19); + break; + case R_AARCH64_ADR_PREL_LO21: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, + INSN_IMM_ADR); + break; + case R_AARCH64_ADR_PREL_PG_HI21_NC: + overflow_check = false; + case R_AARCH64_ADR_PREL_PG_HI21: + ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, + INSN_IMM_ADR); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, + INSN_IMM_12); + break; + case R_AARCH64_LDST16_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, + INSN_IMM_12); + break; + case R_AARCH64_LDST32_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, + INSN_IMM_12); + break; + case R_AARCH64_LDST64_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, + INSN_IMM_12); + break; + case R_AARCH64_LDST128_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, + INSN_IMM_12); + break; + case R_AARCH64_TSTBR14: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, + INSN_IMM_14); + break; + case R_AARCH64_CONDBR19: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, + INSN_IMM_19); + break; + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, + INSN_IMM_26); + break; + + default: + pr_err("module %s: unsupported RELA relocation: %llu\n", + me->name, ELF64_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + + if (overflow_check && ovf == -ERANGE) + goto overflow; + + } + + return 0; + +overflow: + pr_err("module %s: overflow in relocation type %d val %Lx\n", + me->name, (int)ELF64_R_TYPE(rel[i].r_info), val); + return -ENOEXEC; +} diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c new file mode 100644 index 00000000000..ecbf2d81ec5 --- /dev/null +++ b/arch/arm64/kernel/perf_event.c @@ -0,0 +1,1368 @@ +/* + * PMU support + * + * Copyright (C) 2012 ARM Limited + * Author: Will Deacon <will.deacon@arm.com> + * + * This code is based heavily on the ARMv7 perf event code. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#define pr_fmt(fmt) "hw perfevents: " fmt + +#include <linux/bitmap.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> + +#include <asm/cputype.h> +#include <asm/irq.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> +#include <asm/stacktrace.h> + +/* + * ARMv8 supports a maximum of 32 events. + * The cycle counter is included in this total. + */ +#define ARMPMU_MAX_HWEVENTS 32 + +static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); +static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); +static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); + +#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) + +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *cpu_pmu; + +int +armpmu_get_max_events(void) +{ + int max_events = 0; + + if (cpu_pmu != NULL) + max_events = cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(armpmu_get_max_events); + +int perf_num_counters(void) +{ + return armpmu_get_max_events(); +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +#define HW_OP_UNSUPPORTED 0xFFFF + +#define C(_x) \ + PERF_COUNT_HW_CACHE_##_x + +#define CACHE_OP_UNSUPPORTED 0xFFFF + +static int +armpmu_map_cache_event(const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u64 config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + + if (ret == CACHE_OP_UNSUPPORTED) + return -ENOENT; + + return ret; +} + +static int +armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) +{ + int mapping = (*event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +static int +armpmu_map_raw_event(u32 raw_event_mask, u64 config) +{ + return (int)(config & raw_event_mask); +} + +static int map_cpu_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask) +{ + u64 config = event->attr.config; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + return armpmu_map_event(event_map, config); + case PERF_TYPE_HW_CACHE: + return armpmu_map_cache_event(cache_map, config); + case PERF_TYPE_RAW: + return armpmu_map_raw_event(raw_event_mask, config); + } + + return -ENOENT; +} + +int +armpmu_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > (s64)armpmu->max_period) + left = armpmu->max_period; + + local64_set(&hwc->prev_count, (u64)-left); + + armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +u64 +armpmu_event_update(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + u64 delta, prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = armpmu->read_counter(idx); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void +armpmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; + + armpmu_event_update(event, hwc, hwc->idx); +} + +static void +armpmu_stop(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(hwc, hwc->idx); + barrier(); /* why? */ + armpmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static void +armpmu_start(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + /* + * Set the period again. Some counters can't be stopped, so when we + * were stopped we simply disabled the IRQ source and the counter + * may have been left counting. If we don't do this step then we may + * get an interrupt too soon or *way* too late if the overflow has + * happened since disabling. + */ + armpmu_event_set_period(event, hwc, hwc->idx); + armpmu->enable(hwc, hwc->idx); +} + +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0); + + armpmu_stop(event, PERF_EF_UPDATE); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + + perf_event_update_userpage(event); +} + +static int +armpmu_add(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + + perf_pmu_disable(event->pmu); + + /* If we don't have a space for the counter then finish early. */ + idx = armpmu->get_event_idx(hw_events, hwc); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then make + * sure it is disabled. + */ + event->hw.idx = idx; + armpmu->disable(hwc, idx); + hw_events->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static int +validate_event(struct pmu_hw_events *hw_events, + struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event fake_event = event->hw; + struct pmu *leader_pmu = event->group_leader->pmu; + + if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) + return 1; + + return armpmu->get_event_idx(hw_events, &fake_event) >= 0; +} + +static int +validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct pmu_hw_events fake_pmu; + DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); + + /* + * Initialise the fake PMU. We only need to populate the + * used_mask for the purposes of validation. + */ + memset(fake_used_mask, 0, sizeof(fake_used_mask)); + fake_pmu.used_mask = fake_used_mask; + + if (!validate_event(&fake_pmu, leader)) + return -EINVAL; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(&fake_pmu, sibling)) + return -EINVAL; + } + + if (!validate_event(&fake_pmu, event)) + return -EINVAL; + + return 0; +} + +static void +armpmu_release_hardware(struct arm_pmu *armpmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = armpmu->plat_device; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, armpmu); + } +} + +static int +armpmu_reserve_hardware(struct arm_pmu *armpmu) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = armpmu->plat_device; + + if (!pmu_device) { + pr_err("no PMU device registered\n"); + return -ENODEV; + } + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_err("no irqs for PMUs defined\n"); + return -ENODEV; + } + + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, armpmu->handle_irq, + IRQF_NOBALANCING, + "arm-pmu", armpmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + armpmu_release_hardware(armpmu); + return err; + } + + cpumask_set_cpu(i, &armpmu->active_irqs); + } + + return 0; +} + +static void +hw_perf_event_destroy(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + atomic_t *active_events = &armpmu->active_events; + struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; + + if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { + armpmu_release_hardware(armpmu); + mutex_unlock(pmu_reserve_mutex); + } +} + +static int +event_requires_mode_exclusion(struct perf_event_attr *attr) +{ + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv; +} + +static int +__hw_perf_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int mapping, err; + + mapping = armpmu->map_event(event); + + if (mapping < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapping; + } + + /* + * We don't assign an index until we actually place the event onto + * hardware. Use -1 to signify that we haven't decided where to put it + * yet. For SMP systems, each core has it's own PMU so we can't do any + * clever allocation or constraints checking at this point. + */ + hwc->idx = -1; + hwc->config_base = 0; + hwc->config = 0; + hwc->event_base = 0; + + /* + * Check whether we need to exclude the counter from certain modes. + */ + if ((!armpmu->set_event_filter || + armpmu->set_event_filter(hwc, &event->attr)) && + event_requires_mode_exclusion(&event->attr)) { + pr_debug("ARM performance counters do not support mode exclusion\n"); + return -EPERM; + } + + /* + * Store the event encoding into the config_base field. + */ + hwc->config_base |= (unsigned long)mapping; + + if (!hwc->sample_period) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + hwc->sample_period = armpmu->max_period >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + err = 0; + if (event->group_leader != event) { + err = validate_group(event); + if (err) + return -EINVAL; + } + + return err; +} + +static int armpmu_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + int err = 0; + atomic_t *active_events = &armpmu->active_events; + + if (armpmu->map_event(event) == -ENOENT) + return -ENOENT; + + event->destroy = hw_perf_event_destroy; + + if (!atomic_inc_not_zero(active_events)) { + mutex_lock(&armpmu->reserve_mutex); + if (atomic_read(active_events) == 0) + err = armpmu_reserve_hardware(armpmu); + + if (!err) + atomic_inc(active_events); + mutex_unlock(&armpmu->reserve_mutex); + } + + if (err) + return err; + + err = __hw_perf_event_init(event); + if (err) + hw_perf_event_destroy(event); + + return err; +} + +static void armpmu_enable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + + if (enabled) + armpmu->start(); +} + +static void armpmu_disable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + armpmu->stop(); +} + +static void __init armpmu_init(struct arm_pmu *armpmu) +{ + atomic_set(&armpmu->active_events, 0); + mutex_init(&armpmu->reserve_mutex); + + armpmu->pmu = (struct pmu) { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, + }; +} + +int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type) +{ + armpmu_init(armpmu); + return perf_pmu_register(&armpmu->pmu, name, type); +} + +/* + * ARMv8 PMUv3 Performance Events handling code. + * Common event types. + */ +enum armv8_pmuv3_perf_types { + /* Required events. */ + ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04, + ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11, + ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12, + + /* At least one of the following is required. */ + ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED = 0x08, + ARMV8_PMUV3_PERFCTR_OP_SPEC = 0x1B, + + /* Common architectural events. */ + ARMV8_PMUV3_PERFCTR_MEM_READ = 0x06, + ARMV8_PMUV3_PERFCTR_MEM_WRITE = 0x07, + ARMV8_PMUV3_PERFCTR_EXC_TAKEN = 0x09, + ARMV8_PMUV3_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV8_PMUV3_PERFCTR_CID_WRITE = 0x0B, + ARMV8_PMUV3_PERFCTR_PC_WRITE = 0x0C, + ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN = 0x0E, + ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, + ARMV8_PMUV3_PERFCTR_TTBR_WRITE = 0x1C, + + /* Common microarchitectural events. */ + ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01, + ARMV8_PMUV3_PERFCTR_ITLB_REFILL = 0x02, + ARMV8_PMUV3_PERFCTR_DTLB_REFILL = 0x05, + ARMV8_PMUV3_PERFCTR_MEM_ACCESS = 0x13, + ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB = 0x15, + ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16, + ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17, + ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18, + ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19, + ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A, + ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, + + /* + * This isn't an architected event. + * We detect this event number and use the cycle counter instead. + */ + ARMV8_PMUV3_PERFCTR_CPU_CYCLES = 0xFF, +}; + +/* PMUv3 HW events mapping. */ +static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events' indices + */ +#define ARMV8_IDX_CYCLE_COUNTER 0 +#define ARMV8_IDX_COUNTER0 1 +#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) + +#define ARMV8_MAX_COUNTERS 32 +#define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) + +/* + * ARMv8 low level PMU access + */ + +/* + * Perf Event to low level counters mapping + */ +#define ARMV8_IDX_TO_COUNTER(x) \ + (((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK) + +/* + * Per-CPU PMCR: config reg + */ +#define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ +#define ARMV8_PMCR_N_MASK 0x1f +#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ + +/* + * PMOVSR: counters overflow flag status reg + */ +#define ARMV8_OVSR_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV8_OVERFLOWED_MASK ARMV8_OVSR_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV8_EVTYPE_MASK 0xc00000ff /* Mask for writable bits */ +#define ARMV8_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ + +/* + * Event filters for PMUv3 + */ +#define ARMV8_EXCLUDE_EL1 (1 << 31) +#define ARMV8_EXCLUDE_EL0 (1 << 30) +#define ARMV8_INCLUDE_EL2 (1 << 27) + +static inline u32 armv8pmu_pmcr_read(void) +{ + u32 val; + asm volatile("mrs %0, pmcr_el0" : "=r" (val)); + return val; +} + +static inline void armv8pmu_pmcr_write(u32 val) +{ + val &= ARMV8_PMCR_MASK; + isb(); + asm volatile("msr pmcr_el0, %0" :: "r" (val)); +} + +static inline int armv8pmu_has_overflowed(u32 pmovsr) +{ + return pmovsr & ARMV8_OVERFLOWED_MASK; +} + +static inline int armv8pmu_counter_valid(int idx) +{ + return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST; +} + +static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) +{ + int ret = 0; + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u checking wrong counter %d overflow status\n", + smp_processor_id(), idx); + } else { + counter = ARMV8_IDX_TO_COUNTER(idx); + ret = pmnc & BIT(counter); + } + + return ret; +} + +static inline int armv8pmu_select_counter(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u selecting wrong PMNC counter %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmselr_el0, %0" :: "r" (counter)); + isb(); + + return idx; +} + +static inline u32 armv8pmu_read_counter(int idx) +{ + u32 value = 0; + + if (!armv8pmu_counter_valid(idx)) + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + else if (idx == ARMV8_IDX_CYCLE_COUNTER) + asm volatile("mrs %0, pmccntr_el0" : "=r" (value)); + else if (armv8pmu_select_counter(idx) == idx) + asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value)); + + return value; +} + +static inline void armv8pmu_write_counter(int idx, u32 value) +{ + if (!armv8pmu_counter_valid(idx)) + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); + else if (idx == ARMV8_IDX_CYCLE_COUNTER) + asm volatile("msr pmccntr_el0, %0" :: "r" (value)); + else if (armv8pmu_select_counter(idx) == idx) + asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); +} + +static inline void armv8pmu_write_evtype(int idx, u32 val) +{ + if (armv8pmu_select_counter(idx) == idx) { + val &= ARMV8_EVTYPE_MASK; + asm volatile("msr pmxevtyper_el0, %0" :: "r" (val)); + } +} + +static inline int armv8pmu_enable_counter(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u enabling wrong PMNC counter %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); + return idx; +} + +static inline int armv8pmu_disable_counter(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u disabling wrong PMNC counter %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); + return idx; +} + +static inline int armv8pmu_enable_intens(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); + return idx; +} + +static inline int armv8pmu_disable_intens(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); + isb(); + /* Clear the overflow flag in case an interrupt is pending. */ + asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); + isb(); + return idx; +} + +static inline u32 armv8pmu_getreset_flags(void) +{ + u32 value; + + /* Read */ + asm volatile("mrs %0, pmovsclr_el0" : "=r" (value)); + + /* Write to clear flags */ + value &= ARMV8_OVSR_MASK; + asm volatile("msr pmovsclr_el0, %0" :: "r" (value)); + + return value; +} + +static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Disable counter + */ + armv8pmu_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters). + */ + armv8pmu_write_evtype(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv8pmu_enable_intens(idx); + + /* + * Enable counter + */ + armv8pmu_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + /* + * Disable counter and interrupt + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Disable counter + */ + armv8pmu_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv8pmu_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) +{ + u32 pmovsr; + struct perf_sample_data data; + struct pmu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmovsr = armv8pmu_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv8pmu_has_overflowed(pmovsr)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv8pmu_start(void) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_stop(void) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx; + unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT; + + /* Always place a cycle counter into the cycle counter. */ + if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV8_IDX_CYCLE_COUNTER; + } + + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; +} + +/* + * Add an event filter to a given event. This will only work for PMUv2 PMUs. + */ +static int armv8pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (attr->exclude_idle) + return -EPERM; + if (attr->exclude_user) + config_base |= ARMV8_EXCLUDE_EL0; + if (attr->exclude_kernel) + config_base |= ARMV8_EXCLUDE_EL1; + if (!attr->exclude_hv) + config_base |= ARMV8_INCLUDE_EL2; + + /* + * Install the filter into config_base as this is used to + * construct the event type. + */ + event->config_base = config_base; + + return 0; +} + +static void armv8pmu_reset(void *info) +{ + u32 idx, nb_cnt = cpu_pmu->num_events; + + /* The counter and interrupt enable registers are unknown at reset. */ + for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) + armv8pmu_disable_event(NULL, idx); + + /* Initialize & Reset PMNC: C and P bits. */ + armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); + + /* Disable access from userspace. */ + asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); +} + +static int armv8_pmuv3_map_event(struct perf_event *event) +{ + return map_cpu_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, 0xFF); +} + +static struct arm_pmu armv8pmu = { + .handle_irq = armv8pmu_handle_irq, + .enable = armv8pmu_enable_event, + .disable = armv8pmu_disable_event, + .read_counter = armv8pmu_read_counter, + .write_counter = armv8pmu_write_counter, + .get_event_idx = armv8pmu_get_event_idx, + .start = armv8pmu_start, + .stop = armv8pmu_stop, + .reset = armv8pmu_reset, + .max_period = (1LLU << 32) - 1, +}; + +static u32 __init armv8pmu_read_num_pmnc_events(void) +{ + u32 nb_cnt; + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +static struct arm_pmu *__init armv8_pmuv3_pmu_init(void) +{ + armv8pmu.name = "arm/armv8-pmuv3"; + armv8pmu.map_event = armv8_pmuv3_map_event; + armv8pmu.num_events = armv8pmu_read_num_pmnc_events(); + armv8pmu.set_event_filter = armv8pmu_set_event_filter; + return &armv8pmu; +} + +/* + * Ensure the PMU has sane values out of reset. + * This requires SMP to be available, so exists as a separate initcall. + */ +static int __init +cpu_pmu_reset(void) +{ + if (cpu_pmu && cpu_pmu->reset) + return on_each_cpu(cpu_pmu->reset, NULL, 1); + return 0; +} +arch_initcall(cpu_pmu_reset); + +/* + * PMU platform driver and devicetree bindings. + */ +static struct of_device_id armpmu_of_device_ids[] = { + {.compatible = "arm,armv8-pmuv3"}, + {}, +}; + +static int __devinit armpmu_device_probe(struct platform_device *pdev) +{ + if (!cpu_pmu) + return -ENODEV; + + cpu_pmu->plat_device = pdev; + return 0; +} + +static struct platform_driver armpmu_driver = { + .driver = { + .name = "arm-pmu", + .of_match_table = armpmu_of_device_ids, + }, + .probe = armpmu_device_probe, +}; + +static int __init register_pmu_driver(void) +{ + return platform_driver_register(&armpmu_driver); +} +device_initcall(register_pmu_driver); + +static struct pmu_hw_events *armpmu_get_cpu_events(void) +{ + return &__get_cpu_var(cpu_hw_events); +} + +static void __init cpu_pmu_init(struct arm_pmu *armpmu) +{ + int cpu; + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); + events->events = per_cpu(hw_events, cpu); + events->used_mask = per_cpu(used_mask, cpu); + raw_spin_lock_init(&events->pmu_lock); + } + armpmu->get_hw_events = armpmu_get_cpu_events; +} + +static int __init init_hw_perf_events(void) +{ + u64 dfr = read_cpuid(ID_AA64DFR0_EL1); + + switch ((dfr >> 8) & 0xf) { + case 0x1: /* PMUv3 */ + cpu_pmu = armv8_pmuv3_pmu_init(); + break; + } + + if (cpu_pmu) { + pr_info("enabled with %s PMU driver, %d counters available\n", + cpu_pmu->name, cpu_pmu->num_events); + cpu_pmu_init(cpu_pmu); + armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); + } else { + pr_info("no hardware support available\n"); + } + + return 0; +} +early_initcall(init_hw_perf_events); + +/* + * Callchain handling code. + */ +struct frame_tail { + struct frame_tail __user *fp; + unsigned long lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail >= buftail.fp) + return NULL; + + return buftail.fp; +} + +void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + + while (entry->nr < PERF_MAX_STACK_DEPTH && + tail && !((unsigned long)tail & 0xf)) + tail = user_backtrace(tail, entry); +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int callchain_trace(struct stackframe *frame, void *data) +{ + struct perf_callchain_entry *entry = data; + perf_callchain_store(entry, frame->pc); + return 0; +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + struct stackframe frame; + + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; + walk_stackframe(&frame, callchain_trace, entry); +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c new file mode 100644 index 00000000000..f22965ea1cf --- /dev/null +++ b/arch/arm64/kernel/process.c @@ -0,0 +1,408 @@ +/* + * Based on arch/arm/kernel/process.c + * + * Original Copyright (C) 1995 Linus Torvalds + * Copyright (C) 1996-2000 Russell King - Converted to ARM. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdarg.h> + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/user.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/kallsyms.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/elfcore.h> +#include <linux/pm.h> +#include <linux/tick.h> +#include <linux/utsname.h> +#include <linux/uaccess.h> +#include <linux/random.h> +#include <linux/hw_breakpoint.h> +#include <linux/personality.h> +#include <linux/notifier.h> + +#include <asm/compat.h> +#include <asm/cacheflush.h> +#include <asm/processor.h> +#include <asm/stacktrace.h> +#include <asm/fpsimd.h> + +static void setup_restart(void) +{ + /* + * Tell the mm system that we are going to reboot - + * we may need it to insert some 1:1 mappings so that + * soft boot works. + */ + setup_mm_for_reboot(); + + /* Clean and invalidate caches */ + flush_cache_all(); + + /* Turn D-cache off */ + cpu_cache_off(); + + /* Push out any further dirty data, and ensure cache is empty */ + flush_cache_all(); +} + +void soft_restart(unsigned long addr) +{ + setup_restart(); + cpu_reset(addr); +} + +/* + * Function pointers to optional machine specific functions + */ +void (*pm_power_off)(void); +EXPORT_SYMBOL_GPL(pm_power_off); + +void (*pm_restart)(const char *cmd); +EXPORT_SYMBOL_GPL(pm_restart); + + +/* + * This is our default idle handler. + */ +static void default_idle(void) +{ + /* + * This should do all the clock switching and wait for interrupt + * tricks + */ + cpu_do_idle(); + local_irq_enable(); +} + +void (*pm_idle)(void) = default_idle; +EXPORT_SYMBOL_GPL(pm_idle); + +/* + * The idle thread, has rather strange semantics for calling pm_idle, + * but this is what x86 does and we need to do the same, so that + * things like cpuidle get called in the same way. The only difference + * is that we always respect 'hlt_counter' to prevent low power idle. + */ +void cpu_idle(void) +{ + local_fiq_enable(); + + /* endless idle loop with no priority at all */ + while (1) { + tick_nohz_idle_enter(); + rcu_idle_enter(); + while (!need_resched()) { + /* + * We need to disable interrupts here to ensure + * we don't miss a wakeup call. + */ + local_irq_disable(); + if (!need_resched()) { + stop_critical_timings(); + pm_idle(); + start_critical_timings(); + /* + * pm_idle functions should always return + * with IRQs enabled. + */ + WARN_ON(irqs_disabled()); + } else { + local_irq_enable(); + } + } + rcu_idle_exit(); + tick_nohz_idle_exit(); + schedule_preempt_disabled(); + } +} + +void machine_shutdown(void) +{ +#ifdef CONFIG_SMP + smp_send_stop(); +#endif +} + +void machine_halt(void) +{ + machine_shutdown(); + while (1); +} + +void machine_power_off(void) +{ + machine_shutdown(); + if (pm_power_off) + pm_power_off(); +} + +void machine_restart(char *cmd) +{ + machine_shutdown(); + + /* Disable interrupts first */ + local_irq_disable(); + local_fiq_disable(); + + /* Now call the architecture specific reboot code. */ + if (pm_restart) + pm_restart(cmd); + + /* + * Whoops - the architecture was unable to reboot. + */ + printk("Reboot failed -- System halted\n"); + while (1); +} + +void __show_regs(struct pt_regs *regs) +{ + int i; + + printk("CPU: %d %s (%s %.*s)\n", + raw_smp_processor_id(), print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + print_symbol("PC is at %s\n", instruction_pointer(regs)); + print_symbol("LR is at %s\n", regs->regs[30]); + printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", + regs->pc, regs->regs[30], regs->pstate); + printk("sp : %016llx\n", regs->sp); + for (i = 29; i >= 0; i--) { + printk("x%-2d: %016llx ", i, regs->regs[i]); + if (i % 2 == 0) + printk("\n"); + } + printk("\n"); +} + +void show_regs(struct pt_regs * regs) +{ + printk("\n"); + printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); + __show_regs(regs); +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ +} + +void flush_thread(void) +{ + fpsimd_flush_thread(); + flush_ptrace_hw_breakpoint(current); +} + +void release_thread(struct task_struct *dead_task) +{ +} + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + fpsimd_save_state(¤t->thread.fpsimd_state); + *dst = *src; + return 0; +} + +asmlinkage void ret_from_fork(void) asm("ret_from_fork"); + +int copy_thread(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p, + struct pt_regs *regs) +{ + struct pt_regs *childregs = task_pt_regs(p); + unsigned long tls = p->thread.tp_value; + + *childregs = *regs; + childregs->regs[0] = 0; + + if (is_compat_thread(task_thread_info(p))) + childregs->compat_sp = stack_start; + else { + /* + * Read the current TLS pointer from tpidr_el0 as it may be + * out-of-sync with the saved value. + */ + asm("mrs %0, tpidr_el0" : "=r" (tls)); + childregs->sp = stack_start; + } + + memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); + p->thread.cpu_context.sp = (unsigned long)childregs; + p->thread.cpu_context.pc = (unsigned long)ret_from_fork; + + /* If a TLS pointer was passed to clone, use that for the new thread. */ + if (clone_flags & CLONE_SETTLS) + tls = regs->regs[3]; + p->thread.tp_value = tls; + + ptrace_hw_copy_thread(p); + + return 0; +} + +static void tls_thread_switch(struct task_struct *next) +{ + unsigned long tpidr, tpidrro; + + if (!is_compat_task()) { + asm("mrs %0, tpidr_el0" : "=r" (tpidr)); + current->thread.tp_value = tpidr; + } + + if (is_compat_thread(task_thread_info(next))) { + tpidr = 0; + tpidrro = next->thread.tp_value; + } else { + tpidr = next->thread.tp_value; + tpidrro = 0; + } + + asm( + " msr tpidr_el0, %0\n" + " msr tpidrro_el0, %1" + : : "r" (tpidr), "r" (tpidrro)); +} + +/* + * Thread switching. + */ +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next) +{ + struct task_struct *last; + + fpsimd_thread_switch(next); + tls_thread_switch(next); + hw_breakpoint_thread_switch(next); + + /* the actual thread switch */ + last = cpu_switch_to(prev, next); + + return last; +} + +/* + * Fill in the task's elfregs structure for a core dump. + */ +int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs) +{ + elf_core_copy_regs(elfregs, task_pt_regs(t)); + return 1; +} + +/* + * fill in the fpe structure for a core dump... + */ +int dump_fpu (struct pt_regs *regs, struct user_fp *fp) +{ + return 0; +} +EXPORT_SYMBOL(dump_fpu); + +/* + * Shuffle the argument into the correct register before calling the + * thread function. x1 is the thread argument, x2 is the pointer to + * the thread function, and x3 points to the exit function. + */ +extern void kernel_thread_helper(void); +asm( ".section .text\n" +" .align\n" +" .type kernel_thread_helper, #function\n" +"kernel_thread_helper:\n" +" mov x0, x1\n" +" mov x30, x3\n" +" br x2\n" +" .size kernel_thread_helper, . - kernel_thread_helper\n" +" .previous"); + +#define kernel_thread_exit do_exit + +/* + * Create a kernel thread. + */ +pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + + regs.regs[1] = (unsigned long)arg; + regs.regs[2] = (unsigned long)fn; + regs.regs[3] = (unsigned long)kernel_thread_exit; + regs.pc = (unsigned long)kernel_thread_helper; + regs.pstate = PSR_MODE_EL1h; + + return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + +unsigned long get_wchan(struct task_struct *p) +{ + struct stackframe frame; + int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + frame.fp = thread_saved_fp(p); + frame.sp = thread_saved_sp(p); + frame.pc = thread_saved_pc(p); + do { + int ret = unwind_frame(&frame); + if (ret < 0) + return 0; + if (!in_sched_functions(frame.pc)) + return frame.pc; + } while (count ++ < 16); + return 0; +} + +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + return sp & ~0xf; +} + +static unsigned long randomize_base(unsigned long base) +{ + unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1; + return randomize_range(base, range_end, 0) ? : base; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + return randomize_base(mm->brk); +} + +unsigned long randomize_et_dyn(unsigned long base) +{ + return randomize_base(base); +} diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c new file mode 100644 index 00000000000..ac3550ecc7b --- /dev/null +++ b/arch/arm64/kernel/ptrace.c @@ -0,0 +1,1126 @@ +/* + * Based on arch/arm/kernel/ptrace.c + * + * By Ross Biro 1/23/92 + * edited by Linus Torvalds + * ARM modifications Copyright (C) 2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/security.h> +#include <linux/init.h> +#include <linux/signal.h> +#include <linux/uaccess.h> +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/elf.h> + +#include <asm/compat.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/traps.h> +#include <asm/system_misc.h> + +/* + * TODO: does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Called by kernel/ptrace.c when detaching.. + */ +void ptrace_disable(struct task_struct *child) +{ +} + +/* + * Handle hitting a breakpoint. + */ +static int ptrace_break(struct pt_regs *regs) +{ + siginfo_t info = { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int arm64_break_trap(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + return ptrace_break(regs); +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +/* + * Handle hitting a HW-breakpoint. + */ +static void ptrace_hbptriggered(struct perf_event *bp, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp); + siginfo_t info = { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_HWBKPT, + .si_addr = (void __user *)(bkpt->trigger), + }; + +#ifdef CONFIG_COMPAT + int i; + + if (!is_compat_task()) + goto send_sig; + + for (i = 0; i < ARM_MAX_BRP; ++i) { + if (current->thread.debug.hbp_break[i] == bp) { + info.si_errno = (i << 1) + 1; + break; + } + } + for (i = ARM_MAX_BRP; i < ARM_MAX_HBP_SLOTS && !bp; ++i) { + if (current->thread.debug.hbp_watch[i] == bp) { + info.si_errno = -((i << 1) + 1); + break; + } + } + +send_sig: +#endif + force_sig_info(SIGTRAP, &info, current); +} + +/* + * Unregister breakpoints from this task and reset the pointers in + * the thread_struct. + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < ARM_MAX_BRP; i++) { + if (t->debug.hbp_break[i]) { + unregister_hw_breakpoint(t->debug.hbp_break[i]); + t->debug.hbp_break[i] = NULL; + } + } + + for (i = 0; i < ARM_MAX_WRP; i++) { + if (t->debug.hbp_watch[i]) { + unregister_hw_breakpoint(t->debug.hbp_watch[i]); + t->debug.hbp_watch[i] = NULL; + } + } +} + +void ptrace_hw_copy_thread(struct task_struct *tsk) +{ + memset(&tsk->thread.debug, 0, sizeof(struct debug_info)); +} + +static struct perf_event *ptrace_hbp_get_event(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx) +{ + struct perf_event *bp = ERR_PTR(-EINVAL); + + switch (note_type) { + case NT_ARM_HW_BREAK: + if (idx < ARM_MAX_BRP) + bp = tsk->thread.debug.hbp_break[idx]; + break; + case NT_ARM_HW_WATCH: + if (idx < ARM_MAX_WRP) + bp = tsk->thread.debug.hbp_watch[idx]; + break; + } + + return bp; +} + +static int ptrace_hbp_set_event(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + struct perf_event *bp) +{ + int err = -EINVAL; + + switch (note_type) { + case NT_ARM_HW_BREAK: + if (idx < ARM_MAX_BRP) { + tsk->thread.debug.hbp_break[idx] = bp; + err = 0; + } + break; + case NT_ARM_HW_WATCH: + if (idx < ARM_MAX_WRP) { + tsk->thread.debug.hbp_watch[idx] = bp; + err = 0; + } + break; + } + + return err; +} + +static struct perf_event *ptrace_hbp_create(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx) +{ + struct perf_event *bp; + struct perf_event_attr attr; + int err, type; + + switch (note_type) { + case NT_ARM_HW_BREAK: + type = HW_BREAKPOINT_X; + break; + case NT_ARM_HW_WATCH: + type = HW_BREAKPOINT_RW; + break; + default: + return ERR_PTR(-EINVAL); + } + + ptrace_breakpoint_init(&attr); + + /* + * Initialise fields to sane defaults + * (i.e. values that will pass validation). + */ + attr.bp_addr = 0; + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = type; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, tsk); + if (IS_ERR(bp)) + return bp; + + err = ptrace_hbp_set_event(note_type, tsk, idx, bp); + if (err) + return ERR_PTR(err); + + return bp; +} + +static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, + struct arch_hw_breakpoint_ctrl ctrl, + struct perf_event_attr *attr) +{ + int err, len, type; + + err = arch_bp_generic_fields(ctrl, &len, &type); + if (err) + return err; + + switch (note_type) { + case NT_ARM_HW_BREAK: + if ((type & HW_BREAKPOINT_X) != type) + return -EINVAL; + break; + case NT_ARM_HW_WATCH: + if ((type & HW_BREAKPOINT_RW) != type) + return -EINVAL; + break; + default: + return -EINVAL; + } + + attr->bp_len = len; + attr->bp_type = type; + attr->disabled = !ctrl.enabled; + + return 0; +} + +static int ptrace_hbp_get_resource_info(unsigned int note_type, u32 *info) +{ + u8 num; + u32 reg = 0; + + switch (note_type) { + case NT_ARM_HW_BREAK: + num = hw_breakpoint_slots(TYPE_INST); + break; + case NT_ARM_HW_WATCH: + num = hw_breakpoint_slots(TYPE_DATA); + break; + default: + return -EINVAL; + } + + reg |= debug_monitors_arch(); + reg <<= 8; + reg |= num; + + *info = reg; + return 0; +} + +static int ptrace_hbp_get_ctrl(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + u32 *ctrl) +{ + struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + + if (IS_ERR(bp)) + return PTR_ERR(bp); + + *ctrl = bp ? encode_ctrl_reg(counter_arch_bp(bp)->ctrl) : 0; + return 0; +} + +static int ptrace_hbp_get_addr(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + u64 *addr) +{ + struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + + if (IS_ERR(bp)) + return PTR_ERR(bp); + + *addr = bp ? bp->attr.bp_addr : 0; + return 0; +} + +static struct perf_event *ptrace_hbp_get_initialised_bp(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx) +{ + struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + + if (!bp) + bp = ptrace_hbp_create(note_type, tsk, idx); + + return bp; +} + +static int ptrace_hbp_set_ctrl(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + u32 uctrl) +{ + int err; + struct perf_event *bp; + struct perf_event_attr attr; + struct arch_hw_breakpoint_ctrl ctrl; + + bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); + if (IS_ERR(bp)) { + err = PTR_ERR(bp); + return err; + } + + attr = bp->attr; + decode_ctrl_reg(uctrl, &ctrl); + err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); + if (err) + return err; + + return modify_user_hw_breakpoint(bp, &attr); +} + +static int ptrace_hbp_set_addr(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + u64 addr) +{ + int err; + struct perf_event *bp; + struct perf_event_attr attr; + + bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); + if (IS_ERR(bp)) { + err = PTR_ERR(bp); + return err; + } + + attr = bp->attr; + attr.bp_addr = addr; + err = modify_user_hw_breakpoint(bp, &attr); + return err; +} + +#define PTRACE_HBP_ADDR_SZ sizeof(u64) +#define PTRACE_HBP_CTRL_SZ sizeof(u32) +#define PTRACE_HBP_REG_OFF sizeof(u32) + +static int hw_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int note_type = regset->core_note_type; + int ret, idx = 0, offset = PTRACE_HBP_REG_OFF, limit; + u32 info, ctrl; + u64 addr; + + /* Resource info */ + ret = ptrace_hbp_get_resource_info(note_type, &info); + if (ret) + return ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0, 4); + if (ret) + return ret; + + /* (address, ctrl) registers */ + limit = regset->n * regset->size; + while (count && offset < limit) { + ret = ptrace_hbp_get_addr(note_type, target, idx, &addr); + if (ret) + return ret; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr, + offset, offset + PTRACE_HBP_ADDR_SZ); + if (ret) + return ret; + offset += PTRACE_HBP_ADDR_SZ; + + ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl); + if (ret) + return ret; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl, + offset, offset + PTRACE_HBP_CTRL_SZ); + if (ret) + return ret; + offset += PTRACE_HBP_CTRL_SZ; + idx++; + } + + return 0; +} + +static int hw_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int note_type = regset->core_note_type; + int ret, idx = 0, offset = PTRACE_HBP_REG_OFF, limit; + u32 ctrl; + u64 addr; + + /* Resource info */ + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, 4); + if (ret) + return ret; + + /* (address, ctrl) registers */ + limit = regset->n * regset->size; + while (count && offset < limit) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr, + offset, offset + PTRACE_HBP_ADDR_SZ); + if (ret) + return ret; + ret = ptrace_hbp_set_addr(note_type, target, idx, addr); + if (ret) + return ret; + offset += PTRACE_HBP_ADDR_SZ; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, + offset, offset + PTRACE_HBP_CTRL_SZ); + if (ret) + return ret; + ret = ptrace_hbp_set_ctrl(note_type, target, idx, ctrl); + if (ret) + return ret; + offset += PTRACE_HBP_CTRL_SZ; + idx++; + } + + return 0; +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); +} + +static int gpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_pt_regs newregs; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1); + if (ret) + return ret; + + if (!valid_user_regs(&newregs)) + return -EINVAL; + + task_pt_regs(target)->user_regs = newregs; + return 0; +} + +/* + * TODO: update fp accessors for lazy context switching (sync/flush hwstate) + */ +static int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct user_fpsimd_state *uregs; + uregs = &target->thread.fpsimd_state.user_fpsimd; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); +} + +static int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_fpsimd_state newstate; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); + if (ret) + return ret; + + target->thread.fpsimd_state.user_fpsimd = newstate; + return ret; +} + +static int tls_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned long *tls = &target->thread.tp_value; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); +} + +static int tls_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + unsigned long tls; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + if (ret) + return ret; + + target->thread.tp_value = tls; + return ret; +} + +enum aarch64_regset { + REGSET_GPR, + REGSET_FPR, + REGSET_TLS, +#ifdef CONFIG_HAVE_HW_BREAKPOINT + REGSET_HW_BREAK, + REGSET_HW_WATCH, +#endif +}; + +static const struct user_regset aarch64_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_pt_regs) / sizeof(u64), + .size = sizeof(u64), + .align = sizeof(u64), + .get = gpr_get, + .set = gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_fpsimd_state) / sizeof(u32), + /* + * We pretend we have 32-bit registers because the fpsr and + * fpcr are 32-bits wide. + */ + .size = sizeof(u32), + .align = sizeof(u32), + .get = fpr_get, + .set = fpr_set + }, + [REGSET_TLS] = { + .core_note_type = NT_ARM_TLS, + .n = 1, + .size = sizeof(void *), + .align = sizeof(void *), + .get = tls_get, + .set = tls_set, + }, +#ifdef CONFIG_HAVE_HW_BREAKPOINT + [REGSET_HW_BREAK] = { + .core_note_type = NT_ARM_HW_BREAK, + .n = sizeof(struct user_hwdebug_state) / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = hw_break_get, + .set = hw_break_set, + }, + [REGSET_HW_WATCH] = { + .core_note_type = NT_ARM_HW_WATCH, + .n = sizeof(struct user_hwdebug_state) / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = hw_break_get, + .set = hw_break_set, + }, +#endif +}; + +static const struct user_regset_view user_aarch64_view = { + .name = "aarch64", .e_machine = EM_AARCH64, + .regsets = aarch64_regsets, .n = ARRAY_SIZE(aarch64_regsets) +}; + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +enum compat_regset { + REGSET_COMPAT_GPR, + REGSET_COMPAT_VFP, +}; + +static int compat_gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret = 0; + unsigned int i, start, num_regs; + + /* Calculate the number of AArch32 registers contained in count */ + num_regs = count / regset->size; + + /* Convert pos into an register number */ + start = pos / regset->size; + + if (start + num_regs > regset->n) + return -EIO; + + for (i = 0; i < num_regs; ++i) { + unsigned int idx = start + i; + void *reg; + + switch (idx) { + case 15: + reg = (void *)&task_pt_regs(target)->pc; + break; + case 16: + reg = (void *)&task_pt_regs(target)->pstate; + break; + case 17: + reg = (void *)&task_pt_regs(target)->orig_x0; + break; + default: + reg = (void *)&task_pt_regs(target)->regs[idx]; + } + + ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); + + if (ret) + break; + else + ubuf += sizeof(compat_ulong_t); + } + + return ret; +} + +static int compat_gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs newregs; + int ret = 0; + unsigned int i, start, num_regs; + + /* Calculate the number of AArch32 registers contained in count */ + num_regs = count / regset->size; + + /* Convert pos into an register number */ + start = pos / regset->size; + + if (start + num_regs > regset->n) + return -EIO; + + newregs = *task_pt_regs(target); + + for (i = 0; i < num_regs; ++i) { + unsigned int idx = start + i; + void *reg; + + switch (idx) { + case 15: + reg = (void *)&newregs.pc; + break; + case 16: + reg = (void *)&newregs.pstate; + break; + case 17: + reg = (void *)&newregs.orig_x0; + break; + default: + reg = (void *)&newregs.regs[idx]; + } + + ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t)); + + if (ret) + goto out; + else + ubuf += sizeof(compat_ulong_t); + } + + if (valid_user_regs(&newregs.user_regs)) + *task_pt_regs(target) = newregs; + else + ret = -EINVAL; + +out: + return ret; +} + +static int compat_vfp_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct user_fpsimd_state *uregs; + compat_ulong_t fpscr; + int ret; + + uregs = &target->thread.fpsimd_state.user_fpsimd; + + /* + * The VFP registers are packed into the fpsimd_state, so they all sit + * nicely together for us. We just need to create the fpscr separately. + */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, + VFP_STATE_SIZE - sizeof(compat_ulong_t)); + + if (count && !ret) { + fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | + (uregs->fpcr & VFP_FPSCR_CTRL_MASK); + ret = put_user(fpscr, (compat_ulong_t *)ubuf); + } + + return ret; +} + +static int compat_vfp_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_fpsimd_state *uregs; + compat_ulong_t fpscr; + int ret; + + if (pos + count > VFP_STATE_SIZE) + return -EIO; + + uregs = &target->thread.fpsimd_state.user_fpsimd; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, + VFP_STATE_SIZE - sizeof(compat_ulong_t)); + + if (count && !ret) { + ret = get_user(fpscr, (compat_ulong_t *)ubuf); + uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; + uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + } + + return ret; +} + +static const struct user_regset aarch32_regsets[] = { + [REGSET_COMPAT_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = COMPAT_ELF_NGREG, + .size = sizeof(compat_elf_greg_t), + .align = sizeof(compat_elf_greg_t), + .get = compat_gpr_get, + .set = compat_gpr_set + }, + [REGSET_COMPAT_VFP] = { + .core_note_type = NT_ARM_VFP, + .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), + .size = sizeof(compat_ulong_t), + .align = sizeof(compat_ulong_t), + .get = compat_vfp_get, + .set = compat_vfp_set + }, +}; + +static const struct user_regset_view user_aarch32_view = { + .name = "aarch32", .e_machine = EM_ARM, + .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) +}; + +int aarch32_break_trap(struct pt_regs *regs) +{ + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (bp) + return ptrace_break(regs); + return 1; +} + +static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, + compat_ulong_t __user *ret) +{ + compat_ulong_t tmp; + + if (off & 3) + return -EIO; + + if (off == PT_TEXT_ADDR) + tmp = tsk->mm->start_code; + else if (off == PT_DATA_ADDR) + tmp = tsk->mm->start_data; + else if (off == PT_TEXT_END_ADDR) + tmp = tsk->mm->end_code; + else if (off < sizeof(compat_elf_gregset_t)) + return copy_regset_to_user(tsk, &user_aarch32_view, + REGSET_COMPAT_GPR, off, + sizeof(compat_ulong_t), ret); + else if (off >= COMPAT_USER_SZ) + return -EIO; + else + tmp = 0; + + return put_user(tmp, ret); +} + +static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, + compat_ulong_t val) +{ + int ret; + + if (off & 3 || off >= COMPAT_USER_SZ) + return -EIO; + + if (off >= sizeof(compat_elf_gregset_t)) + return 0; + + ret = copy_regset_from_user(tsk, &user_aarch32_view, + REGSET_COMPAT_GPR, off, + sizeof(compat_ulong_t), + &val); + return ret; +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + +/* + * Convert a virtual register number into an index for a thread_info + * breakpoint array. Breakpoints are identified using positive numbers + * whilst watchpoints are negative. The registers are laid out as pairs + * of (address, control), each pair mapping to a unique hw_breakpoint struct. + * Register 0 is reserved for describing resource information. + */ +static int compat_ptrace_hbp_num_to_idx(compat_long_t num) +{ + return (abs(num) - 1) >> 1; +} + +static int compat_ptrace_hbp_get_resource_info(u32 *kdata) +{ + u8 num_brps, num_wrps, debug_arch, wp_len; + u32 reg = 0; + + num_brps = hw_breakpoint_slots(TYPE_INST); + num_wrps = hw_breakpoint_slots(TYPE_DATA); + + debug_arch = debug_monitors_arch(); + wp_len = 8; + reg |= debug_arch; + reg <<= 8; + reg |= wp_len; + reg <<= 8; + reg |= num_wrps; + reg <<= 8; + reg |= num_brps; + + *kdata = reg; + return 0; +} + +static int compat_ptrace_hbp_get(unsigned int note_type, + struct task_struct *tsk, + compat_long_t num, + u32 *kdata) +{ + u64 addr = 0; + u32 ctrl = 0; + + int err, idx = compat_ptrace_hbp_num_to_idx(num);; + + if (num & 1) { + err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr); + *kdata = (u32)addr; + } else { + err = ptrace_hbp_get_ctrl(note_type, tsk, idx, &ctrl); + *kdata = ctrl; + } + + return err; +} + +static int compat_ptrace_hbp_set(unsigned int note_type, + struct task_struct *tsk, + compat_long_t num, + u32 *kdata) +{ + u64 addr; + u32 ctrl; + + int err, idx = compat_ptrace_hbp_num_to_idx(num); + + if (num & 1) { + addr = *kdata; + err = ptrace_hbp_set_addr(note_type, tsk, idx, addr); + } else { + ctrl = *kdata; + err = ptrace_hbp_set_ctrl(note_type, tsk, idx, ctrl); + } + + return err; +} + +static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, + compat_ulong_t __user *data) +{ + int ret; + u32 kdata; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + /* Watchpoint */ + if (num < 0) { + ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata); + /* Resource info */ + } else if (num == 0) { + ret = compat_ptrace_hbp_get_resource_info(&kdata); + /* Breakpoint */ + } else { + ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata); + } + set_fs(old_fs); + + if (!ret) + ret = put_user(kdata, data); + + return ret; +} + +static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, + compat_ulong_t __user *data) +{ + int ret; + u32 kdata = 0; + mm_segment_t old_fs = get_fs(); + + if (num == 0) + return 0; + + ret = get_user(kdata, data); + if (ret) + return ret; + + set_fs(KERNEL_DS); + if (num < 0) + ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata); + else + ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata); + set_fs(old_fs); + + return ret; +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + void __user *datap = compat_ptr(data); + int ret; + + switch (request) { + case PTRACE_PEEKUSR: + ret = compat_ptrace_read_user(child, addr, datap); + break; + + case PTRACE_POKEUSR: + ret = compat_ptrace_write_user(child, addr, data); + break; + + case COMPAT_PTRACE_GETREGS: + ret = copy_regset_to_user(child, + &user_aarch32_view, + REGSET_COMPAT_GPR, + 0, sizeof(compat_elf_gregset_t), + datap); + break; + + case COMPAT_PTRACE_SETREGS: + ret = copy_regset_from_user(child, + &user_aarch32_view, + REGSET_COMPAT_GPR, + 0, sizeof(compat_elf_gregset_t), + datap); + break; + + case COMPAT_PTRACE_GET_THREAD_AREA: + ret = put_user((compat_ulong_t)child->thread.tp_value, + (compat_ulong_t __user *)datap); + break; + + case COMPAT_PTRACE_SET_SYSCALL: + task_pt_regs(child)->syscallno = data; + ret = 0; + break; + + case COMPAT_PTRACE_GETVFPREGS: + ret = copy_regset_to_user(child, + &user_aarch32_view, + REGSET_COMPAT_VFP, + 0, VFP_STATE_SIZE, + datap); + break; + + case COMPAT_PTRACE_SETVFPREGS: + ret = copy_regset_from_user(child, + &user_aarch32_view, + REGSET_COMPAT_VFP, + 0, VFP_STATE_SIZE, + datap); + break; + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + case COMPAT_PTRACE_GETHBPREGS: + ret = compat_ptrace_gethbpregs(child, addr, datap); + break; + + case COMPAT_PTRACE_SETHBPREGS: + ret = compat_ptrace_sethbpregs(child, addr, datap); + break; +#endif + + default: + ret = compat_ptrace_request(child, request, addr, + data); + break; + } + + return ret; +} +#endif /* CONFIG_COMPAT */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT + if (is_compat_thread(task_thread_info(task))) + return &user_aarch32_view; +#endif + return &user_aarch64_view; +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + return ptrace_request(child, request, addr, data); +} + + +static int __init ptrace_break_init(void) +{ + hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); + return 0; +} +core_initcall(ptrace_break_init); + + +asmlinkage int syscall_trace(int dir, struct pt_regs *regs) +{ + unsigned long saved_reg; + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return regs->syscallno; + + if (is_compat_task()) { + /* AArch32 uses ip (r12) for scratch */ + saved_reg = regs->regs[12]; + regs->regs[12] = dir; + } else { + /* + * Save X7. X7 is used to denote syscall entry/exit: + * X7 = 0 -> entry, = 1 -> exit + */ + saved_reg = regs->regs[7]; + regs->regs[7] = dir; + } + + if (dir) + tracehook_report_syscall_exit(regs, 0); + else if (tracehook_report_syscall_entry(regs)) + regs->syscallno = ~0UL; + + if (is_compat_task()) + regs->regs[12] = saved_reg; + else + regs->regs[7] = saved_reg; + + return regs->syscallno; +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c new file mode 100644 index 00000000000..48ffb9fb3fe --- /dev/null +++ b/arch/arm64/kernel/setup.c @@ -0,0 +1,347 @@ +/* + * Based on arch/arm/kernel/setup.c + * + * Copyright (C) 1995-2001 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/utsname.h> +#include <linux/initrd.h> +#include <linux/console.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/screen_info.h> +#include <linux/init.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/root_dev.h> +#include <linux/cpu.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/memblock.h> +#include <linux/of_fdt.h> + +#include <asm/cputype.h> +#include <asm/elf.h> +#include <asm/cputable.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/traps.h> +#include <asm/memblock.h> + +unsigned int processor_id; +EXPORT_SYMBOL(processor_id); + +unsigned int elf_hwcap __read_mostly; +EXPORT_SYMBOL_GPL(elf_hwcap); + +static const char *cpu_name; +static const char *machine_name; +phys_addr_t __fdt_pointer __initdata; + +/* + * Standard memory resources + */ +static struct resource mem_res[] = { + { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM + }, + { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM + } +}; + +#define kernel_code mem_res[0] +#define kernel_data mem_res[1] + +void __init early_print(const char *str, ...) +{ + char buf[256]; + va_list ap; + + va_start(ap, str); + vsnprintf(buf, sizeof(buf), str, ap); + va_end(ap); + + printk("%s", buf); +} + +static void __init setup_processor(void) +{ + struct cpu_info *cpu_info; + + /* + * locate processor in the list of supported processor + * types. The linker builds this table for us from the + * entries in arch/arm/mm/proc.S + */ + cpu_info = lookup_processor_type(read_cpuid_id()); + if (!cpu_info) { + printk("CPU configuration botched (ID %08x), unable to continue.\n", + read_cpuid_id()); + while (1); + } + + cpu_name = cpu_info->cpu_name; + + printk("CPU: %s [%08x] revision %d\n", + cpu_name, read_cpuid_id(), read_cpuid_id() & 15); + + sprintf(init_utsname()->machine, "aarch64"); + elf_hwcap = 0; +} + +static void __init setup_machine_fdt(phys_addr_t dt_phys) +{ + struct boot_param_header *devtree; + unsigned long dt_root; + + /* Check we have a non-NULL DT pointer */ + if (!dt_phys) { + early_print("\n" + "Error: NULL or invalid device tree blob\n" + "The dtb must be 8-byte aligned and passed in the first 512MB of memory\n" + "\nPlease check your bootloader.\n"); + + while (true) + cpu_relax(); + + } + + devtree = phys_to_virt(dt_phys); + + /* Check device tree validity */ + if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) { + early_print("\n" + "Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n" + "Expected 0x%x, found 0x%x\n" + "\nPlease check your bootloader.\n", + dt_phys, devtree, OF_DT_HEADER, + be32_to_cpu(devtree->magic)); + + while (true) + cpu_relax(); + } + + initial_boot_params = devtree; + dt_root = of_get_flat_dt_root(); + + machine_name = of_get_flat_dt_prop(dt_root, "model", NULL); + if (!machine_name) + machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL); + if (!machine_name) + machine_name = "<unknown>"; + pr_info("Machine: %s\n", machine_name); + + /* Retrieve various information from the /chosen node */ + of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); + /* Initialize {size,address}-cells info */ + of_scan_flat_dt(early_init_dt_scan_root, NULL); + /* Setup memory, calling early_init_dt_add_memory_arch */ + of_scan_flat_dt(early_init_dt_scan_memory, NULL); +} + +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ + size &= PAGE_MASK; + memblock_add(base, size); +} + +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return __va(memblock_alloc(size, align)); +} + +/* + * Limit the memory size that was specified via FDT. + */ +static int __init early_mem(char *p) +{ + phys_addr_t limit; + + if (!p) + return 1; + + limit = memparse(p, &p) & PAGE_MASK; + pr_notice("Memory limited to %lldMB\n", limit >> 20); + + memblock_enforce_memory_limit(limit); + + return 0; +} +early_param("mem", early_mem); + +static void __init request_standard_resources(void) +{ + struct memblock_region *region; + struct resource *res; + + kernel_code.start = virt_to_phys(_text); + kernel_code.end = virt_to_phys(_etext - 1); + kernel_data.start = virt_to_phys(_sdata); + kernel_data.end = virt_to_phys(_end - 1); + + for_each_memblock(memory, region) { + res = alloc_bootmem_low(sizeof(*res)); + res->name = "System RAM"; + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + request_resource(&iomem_resource, res); + + if (kernel_code.start >= res->start && + kernel_code.end <= res->end) + request_resource(res, &kernel_code); + if (kernel_data.start >= res->start && + kernel_data.end <= res->end) + request_resource(res, &kernel_data); + } +} + +void __init setup_arch(char **cmdline_p) +{ + setup_processor(); + + setup_machine_fdt(__fdt_pointer); + + init_mm.start_code = (unsigned long) _text; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = (unsigned long) _end; + + *cmdline_p = boot_command_line; + + parse_early_param(); + + arm64_memblock_init(); + + paging_init(); + request_standard_resources(); + + unflatten_device_tree(); + +#ifdef CONFIG_SMP + smp_init_cpus(); +#endif + +#ifdef CONFIG_VT +#if defined(CONFIG_VGA_CONSOLE) + conswitchp = &vga_con; +#elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +#endif +#endif +} + +static DEFINE_PER_CPU(struct cpu, cpu_data); + +static int __init topology_init(void) +{ + int i; + + for_each_possible_cpu(i) { + struct cpu *cpu = &per_cpu(cpu_data, i); + cpu->hotpluggable = 1; + register_cpu(cpu, i); + } + + return 0; +} +subsys_initcall(topology_init); + +static const char *hwcap_str[] = { + "fp", + "asimd", + NULL +}; + +static int c_show(struct seq_file *m, void *v) +{ + int i; + + seq_printf(m, "Processor\t: %s rev %d (%s)\n", + cpu_name, read_cpuid_id() & 15, ELF_PLATFORM); + + for_each_online_cpu(i) { + /* + * glibc reads /proc/cpuinfo to determine the number of + * online processors, looking for lines beginning with + * "processor". Give glibc what it expects. + */ +#ifdef CONFIG_SMP + seq_printf(m, "processor\t: %d\n", i); +#endif + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", + loops_per_jiffy / (500000UL/HZ), + loops_per_jiffy / (5000UL/HZ) % 100); + } + + /* dump out the processor features */ + seq_puts(m, "Features\t: "); + + for (i = 0; hwcap_str[i]; i++) + if (elf_hwcap & (1 << i)) + seq_printf(m, "%s ", hwcap_str[i]); + + seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); + seq_printf(m, "CPU architecture: AArch64\n"); + seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); + seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); + seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); + + seq_puts(m, "\n"); + + seq_printf(m, "Hardware\t: %s\n", machine_name); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < 1 ? (void *)1 : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c new file mode 100644 index 00000000000..8807ba2cf26 --- /dev/null +++ b/arch/arm64/kernel/signal.c @@ -0,0 +1,437 @@ +/* + * Based on arch/arm/kernel/signal.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/personality.h> +#include <linux/freezer.h> +#include <linux/uaccess.h> +#include <linux/tracehook.h> +#include <linux/ratelimit.h> + +#include <asm/compat.h> +#include <asm/debug-monitors.h> +#include <asm/elf.h> +#include <asm/cacheflush.h> +#include <asm/ucontext.h> +#include <asm/unistd.h> +#include <asm/fpsimd.h> +#include <asm/signal32.h> +#include <asm/vdso.h> + +/* + * Do a signal return; undo the signal stack. These are aligned to 128-bit. + */ +struct rt_sigframe { + struct siginfo info; + struct ucontext uc; +}; + +static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) +{ + struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; + int err; + + /* dump the hardware registers to the fpsimd_state structure */ + fpsimd_save_state(fpsimd); + + /* copy the FP and status/control registers */ + err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); + __put_user_error(fpsimd->fpsr, &ctx->fpsr, err); + __put_user_error(fpsimd->fpcr, &ctx->fpcr, err); + + /* copy the magic/size information */ + __put_user_error(FPSIMD_MAGIC, &ctx->head.magic, err); + __put_user_error(sizeof(struct fpsimd_context), &ctx->head.size, err); + + return err ? -EFAULT : 0; +} + +static int restore_fpsimd_context(struct fpsimd_context __user *ctx) +{ + struct fpsimd_state fpsimd; + __u32 magic, size; + int err = 0; + + /* check the magic/size information */ + __get_user_error(magic, &ctx->head.magic, err); + __get_user_error(size, &ctx->head.size, err); + if (err) + return -EFAULT; + if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context)) + return -EINVAL; + + /* copy the FP and status/control registers */ + err = __copy_from_user(fpsimd.vregs, ctx->vregs, + sizeof(fpsimd.vregs)); + __get_user_error(fpsimd.fpsr, &ctx->fpsr, err); + __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); + + /* load the hardware registers from the fpsimd_state structure */ + if (!err) { + preempt_disable(); + fpsimd_load_state(&fpsimd); + preempt_enable(); + } + + return err ? -EFAULT : 0; +} + +static int restore_sigframe(struct pt_regs *regs, + struct rt_sigframe __user *sf) +{ + sigset_t set; + int i, err; + struct aux_context __user *aux = + (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + + err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); + if (err == 0) + set_current_blocked(&set); + + for (i = 0; i < 31; i++) + __get_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], + err); + __get_user_error(regs->sp, &sf->uc.uc_mcontext.sp, err); + __get_user_error(regs->pc, &sf->uc.uc_mcontext.pc, err); + __get_user_error(regs->pstate, &sf->uc.uc_mcontext.pstate, err); + + /* + * Avoid sys_rt_sigreturn() restarting. + */ + regs->syscallno = ~0UL; + + err |= !valid_user_regs(®s->user_regs); + + if (err == 0) + err |= restore_fpsimd_context(&aux->fpsimd); + + return err; +} + +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* + * Since we stacked the signal on a 128-bit boundary, then 'sp' should + * be word aligned here. + */ + if (regs->sp & 15) + goto badframe; + + frame = (struct rt_sigframe __user *)regs->sp; + + if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) + goto badframe; + + if (restore_sigframe(regs, frame)) + goto badframe; + + if (do_sigaltstack(&frame->uc.uc_stack, + NULL, regs->sp) == -EFAULT) + goto badframe; + + return regs->regs[0]; + +badframe: + if (show_unhandled_signals) + pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", + current->comm, task_pid_nr(current), __func__, + regs->pc, regs->sp); + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, + unsigned long sp) +{ + return do_sigaltstack(uss, uoss, sp); +} + +static int setup_sigframe(struct rt_sigframe __user *sf, + struct pt_regs *regs, sigset_t *set) +{ + int i, err = 0; + struct aux_context __user *aux = + (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + + for (i = 0; i < 31; i++) + __put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], + err); + __put_user_error(regs->sp, &sf->uc.uc_mcontext.sp, err); + __put_user_error(regs->pc, &sf->uc.uc_mcontext.pc, err); + __put_user_error(regs->pstate, &sf->uc.uc_mcontext.pstate, err); + + __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); + + err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); + + if (err == 0) + err |= preserve_fpsimd_context(&aux->fpsimd); + + /* set the "end" magic */ + __put_user_error(0, &aux->end.magic, err); + __put_user_error(0, &aux->end.size, err); + + return err; +} + +static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + int framesize) +{ + unsigned long sp, sp_top; + void __user *frame; + + sp = sp_top = regs->sp; + + /* + * This is the X/Open sanctioned signal stack switching. + */ + if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) + sp = sp_top = current->sas_ss_sp + current->sas_ss_size; + + /* room for stack frame (FP, LR) */ + sp -= 16; + + sp = (sp - framesize) & ~15; + frame = (void __user *)sp; + + /* + * Check that we can actually write to the signal frame. + */ + if (!access_ok(VERIFY_WRITE, frame, sp_top - sp)) + frame = NULL; + + return frame; +} + +static int setup_return(struct pt_regs *regs, struct k_sigaction *ka, + void __user *frame, int usig) +{ + int err = 0; + __sigrestore_t sigtramp; + unsigned long __user *sp = (unsigned long __user *)regs->sp; + + /* set up the stack frame */ + __put_user_error(regs->regs[29], sp - 2, err); + __put_user_error(regs->regs[30], sp - 1, err); + + regs->regs[0] = usig; + regs->regs[29] = regs->sp - 16; + regs->sp = (unsigned long)frame; + regs->pc = (unsigned long)ka->sa.sa_handler; + + if (ka->sa.sa_flags & SA_RESTORER) + sigtramp = ka->sa.sa_restorer; + else + sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); + + regs->regs[30] = (unsigned long)sigtramp; + + return err; +} + +static int setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + stack_t stack; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + if (!frame) + return 1; + + __put_user_error(0, &frame->uc.uc_flags, err); + __put_user_error(NULL, &frame->uc.uc_link, err); + + memset(&stack, 0, sizeof(stack)); + stack.ss_sp = (void __user *)current->sas_ss_sp; + stack.ss_flags = sas_ss_flags(regs->sp); + stack.ss_size = current->sas_ss_size; + err |= __copy_to_user(&frame->uc.uc_stack, &stack, sizeof(stack)); + + err |= setup_sigframe(frame, regs, set); + if (err == 0) + err = setup_return(regs, ka, frame, usig); + + if (err == 0 && ka->sa.sa_flags & SA_SIGINFO) { + err |= copy_siginfo_to_user(&frame->info, info); + regs->regs[1] = (unsigned long)&frame->info; + regs->regs[2] = (unsigned long)&frame->uc; + } + + return err; +} + +static void setup_restart_syscall(struct pt_regs *regs) +{ + if (is_compat_task()) + compat_setup_restart_syscall(regs); + else + regs->regs[8] = __NR_restart_syscall; +} + +/* + * OK, we're invoking a handler + */ +static void handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, struct pt_regs *regs) +{ + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = current; + sigset_t *oldset = sigmask_to_save(); + int usig = sig; + int ret; + + /* + * translate the signal + */ + if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) + usig = thread->exec_domain->signal_invmap[usig]; + + /* + * Set up the stack frame + */ + if (is_compat_task()) { + if (ka->sa.sa_flags & SA_SIGINFO) + ret = compat_setup_rt_frame(usig, ka, info, oldset, + regs); + else + ret = compat_setup_frame(usig, ka, oldset, regs); + } else { + ret = setup_rt_frame(usig, ka, info, oldset, regs); + } + + /* + * Check that the resulting registers are actually sane. + */ + ret |= !valid_user_regs(®s->user_regs); + + if (ret != 0) { + force_sigsegv(sig, tsk); + return; + } + + /* + * Fast forward the stepping logic so we step into the signal + * handler. + */ + user_fastforward_single_step(tsk); + + signal_delivered(sig, info, ka, regs, 0); +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + * + * Note that we go through the signals twice: once to check the signals that + * the kernel can handle, and then we build all the user-level signal handling + * stack-frames in one go after that. + */ +static void do_signal(struct pt_regs *regs) +{ + unsigned long continue_addr = 0, restart_addr = 0; + struct k_sigaction ka; + siginfo_t info; + int signr, retval = 0; + int syscall = (int)regs->syscallno; + + /* + * If we were from a system call, check for system call restarting... + */ + if (syscall >= 0) { + continue_addr = regs->pc; + restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4); + retval = regs->regs[0]; + + /* + * Avoid additional syscall restarting via ret_to_user. + */ + regs->syscallno = ~0UL; + + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PC. + */ + switch (retval) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTART_RESTARTBLOCK: + regs->regs[0] = regs->orig_x0; + regs->pc = restart_addr; + break; + } + } + + /* + * Get the signal to deliver. When running under ptrace, at this point + * the debugger may change all of our registers. + */ + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + if (signr > 0) { + /* + * Depending on the signal settings, we may need to revert the + * decision to restart the system call, but skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->pc == restart_addr && + (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK || + (retval == -ERESTARTSYS && + !(ka.sa.sa_flags & SA_RESTART)))) { + regs->regs[0] = -EINTR; + regs->pc = continue_addr; + } + + handle_signal(signr, &ka, &info, regs); + return; + } + + /* + * Handle restarting a different system call. As above, if a debugger + * has chosen to restart at a different PC, ignore the restart. + */ + if (syscall >= 0 && regs->pc == restart_addr) { + if (retval == -ERESTART_RESTARTBLOCK) + setup_restart_syscall(regs); + user_rewind_single_step(current); + } + + restore_saved_sigmask(); +} + +asmlinkage void do_notify_resume(struct pt_regs *regs, + unsigned int thread_flags) +{ + if (thread_flags & _TIF_SIGPENDING) + do_signal(regs); + + if (thread_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } +} diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c new file mode 100644 index 00000000000..ac74c2f261e --- /dev/null +++ b/arch/arm64/kernel/signal32.c @@ -0,0 +1,876 @@ +/* + * Based on arch/arm/kernel/signal.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * Modified by Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define __SYSCALL_COMPAT + +#include <linux/compat.h> +#include <linux/signal.h> +#include <linux/syscalls.h> +#include <linux/ratelimit.h> + +#include <asm/fpsimd.h> +#include <asm/signal32.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + /* The padding is the same size as AArch64. */ + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + compat_pid_t _pid; /* sender's pid */ + __compat_uid32_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + compat_pid_t _pid; /* sender's pid */ + __compat_uid32_t _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + compat_pid_t _pid; /* which child */ + __compat_uid32_t _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + compat_uptr_t _addr; /* faulting insn/memory ref. */ + short _addr_lsb; /* LSB of the reported address */ + } _sigfault; + + /* SIGPOLL */ + struct { + compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +struct compat_sigaction { + compat_uptr_t sa_handler; + compat_ulong_t sa_flags; + compat_uptr_t sa_restorer; + compat_sigset_t sa_mask; +}; + +struct compat_old_sigaction { + compat_uptr_t sa_handler; + compat_old_sigset_t sa_mask; + compat_ulong_t sa_flags; + compat_uptr_t sa_restorer; +}; + +typedef struct compat_sigaltstack { + compat_uptr_t ss_sp; + int ss_flags; + compat_size_t ss_size; +} compat_stack_t; + +struct compat_sigcontext { + /* We always set these two fields to 0 */ + compat_ulong_t trap_no; + compat_ulong_t error_code; + + compat_ulong_t oldmask; + compat_ulong_t arm_r0; + compat_ulong_t arm_r1; + compat_ulong_t arm_r2; + compat_ulong_t arm_r3; + compat_ulong_t arm_r4; + compat_ulong_t arm_r5; + compat_ulong_t arm_r6; + compat_ulong_t arm_r7; + compat_ulong_t arm_r8; + compat_ulong_t arm_r9; + compat_ulong_t arm_r10; + compat_ulong_t arm_fp; + compat_ulong_t arm_ip; + compat_ulong_t arm_sp; + compat_ulong_t arm_lr; + compat_ulong_t arm_pc; + compat_ulong_t arm_cpsr; + compat_ulong_t fault_address; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + struct compat_ucontext *uc_link; + compat_stack_t uc_stack; + struct compat_sigcontext uc_mcontext; + compat_sigset_t uc_sigmask; + int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))]; + compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); +}; + +struct compat_vfp_sigframe { + compat_ulong_t magic; + compat_ulong_t size; + struct compat_user_vfp { + compat_u64 fpregs[32]; + compat_ulong_t fpscr; + } ufp; + struct compat_user_vfp_exc { + compat_ulong_t fpexc; + compat_ulong_t fpinst; + compat_ulong_t fpinst2; + } ufp_exc; +} __attribute__((__aligned__(8))); + +#define VFP_MAGIC 0x56465001 +#define VFP_STORAGE_SIZE sizeof(struct compat_vfp_sigframe) + +struct compat_aux_sigframe { + struct compat_vfp_sigframe vfp; + + /* Something that isn't a valid magic number for any coprocessor. */ + unsigned long end_magic; +} __attribute__((__aligned__(8))); + +struct compat_sigframe { + struct compat_ucontext uc; + compat_ulong_t retcode[2]; +}; + +struct compat_rt_sigframe { + struct compat_siginfo info; + struct compat_sigframe sig; +}; + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +/* + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + */ +#define MOV_R7_NR_SIGRETURN (0xe3a07000 | __NR_sigreturn) +#define SVC_SYS_SIGRETURN (0xef000000 | __NR_sigreturn) +#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | __NR_rt_sigreturn) +#define SVC_SYS_RT_SIGRETURN (0xef000000 | __NR_rt_sigreturn) + +/* + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ +#define SVC_THUMB_SIGRETURN (((0xdf00 | __NR_sigreturn) << 16) | \ + 0x2700 | __NR_sigreturn) +#define SVC_THUMB_RT_SIGRETURN (((0xdf00 | __NR_rt_sigreturn) << 16) | \ + 0x2700 | __NR_rt_sigreturn) + +const compat_ulong_t aarch32_sigret_code[6] = { + /* + * AArch32 sigreturn code. + * We don't construct an OABI SWI - instead we just set the imm24 field + * to the EABI syscall number so that we create a sane disassembly. + */ + MOV_R7_NR_SIGRETURN, SVC_SYS_SIGRETURN, SVC_THUMB_SIGRETURN, + MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN, +}; + +static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) +{ + compat_sigset_t cset; + + cset.sig[0] = set->sig[0] & 0xffffffffull; + cset.sig[1] = set->sig[0] >> 32; + + return copy_to_user(uset, &cset, sizeof(*uset)); +} + +static inline int get_sigset_t(sigset_t *set, + const compat_sigset_t __user *uset) +{ + compat_sigset_t s32; + + if (copy_from_user(&s32, uset, sizeof(*uset))) + return -EFAULT; + + set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + return 0; +} + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +{ + int err; + + if (!access_ok(VERIFY_WRITE, to, sizeof(*to))) + return -EFAULT; + + /* If you change siginfo_t structure, please be sure + * this code is fixed accordingly. + * It should never copy any pad contained in the structure + * to avoid security leaks, but must copy the generic + * 3 ints plus the relevant union member. + * This routine must convert siginfo from 64bit to 32bit as well + * at the same time. + */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + if (from->si_code < 0) + err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, + SI_PAD_SIZE); + else switch (from->si_code & __SI_MASK) { + case __SI_KILL: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + break; + case __SI_TIMER: + err |= __put_user(from->si_tid, &to->si_tid); + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, + &to->si_ptr); + break; + case __SI_POLL: + err |= __put_user(from->si_band, &to->si_band); + err |= __put_user(from->si_fd, &to->si_fd); + break; + case __SI_FAULT: + err |= __put_user((compat_uptr_t)(unsigned long)from->si_addr, + &to->si_addr); +#ifdef BUS_MCEERR_AO + /* + * Other callers might not initialize the si_lsb field, + * so check explicitely for the right codes here. + */ + if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); +#endif + break; + case __SI_CHLD: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_status, &to->si_status); + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + break; + case __SI_RT: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ: /* But this is */ + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr); + break; + default: /* this is just in case for now ... */ + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + break; + } + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + memset(to, 0, sizeof *to); + + if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || + copy_from_user(to->_sifields._pad, + from->_sifields._pad, SI_PAD_SIZE)) + return -EFAULT; + + return 0; +} + +/* + * VFP save/restore code. + */ +static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) +{ + struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; + compat_ulong_t magic = VFP_MAGIC; + compat_ulong_t size = VFP_STORAGE_SIZE; + compat_ulong_t fpscr, fpexc; + int err = 0; + + /* + * Save the hardware registers to the fpsimd_state structure. + * Note that this also saves V16-31, which aren't visible + * in AArch32. + */ + fpsimd_save_state(fpsimd); + + /* Place structure header on the stack */ + __put_user_error(magic, &frame->magic, err); + __put_user_error(size, &frame->size, err); + + /* + * Now copy the FP registers. Since the registers are packed, + * we can copy the prefix we want (V0-V15) as it is. + * FIXME: Won't work if big endian. + */ + err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, + sizeof(frame->ufp.fpregs)); + + /* Create an AArch32 fpscr from the fpsr and the fpcr. */ + fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | + (fpsimd->fpcr & VFP_FPSCR_CTRL_MASK); + __put_user_error(fpscr, &frame->ufp.fpscr, err); + + /* + * The exception register aren't available so we fake up a + * basic FPEXC and zero everything else. + */ + fpexc = (1 << 30); + __put_user_error(fpexc, &frame->ufp_exc.fpexc, err); + __put_user_error(0, &frame->ufp_exc.fpinst, err); + __put_user_error(0, &frame->ufp_exc.fpinst2, err); + + return err ? -EFAULT : 0; +} + +static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) +{ + struct fpsimd_state fpsimd; + compat_ulong_t magic = VFP_MAGIC; + compat_ulong_t size = VFP_STORAGE_SIZE; + compat_ulong_t fpscr; + int err = 0; + + __get_user_error(magic, &frame->magic, err); + __get_user_error(size, &frame->size, err); + + if (err) + return -EFAULT; + if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) + return -EINVAL; + + /* + * Copy the FP registers into the start of the fpsimd_state. + * FIXME: Won't work if big endian. + */ + err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, + sizeof(frame->ufp.fpregs)); + + /* Extract the fpsr and the fpcr from the fpscr */ + __get_user_error(fpscr, &frame->ufp.fpscr, err); + fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK; + fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + + /* + * We don't need to touch the exception register, so + * reload the hardware state. + */ + if (!err) { + preempt_disable(); + fpsimd_load_state(&fpsimd); + preempt_enable(); + } + + return err ? -EFAULT : 0; +} + +/* + * atomically swap in the new signal mask, and wait for a signal. + */ +asmlinkage int compat_sys_sigsuspend(int restart, compat_ulong_t oldmask, + compat_old_sigset_t mask) +{ + sigset_t blocked; + + siginitset(¤t->blocked, mask); + return sigsuspend(&blocked); +} + +asmlinkage int compat_sys_sigaction(int sig, + const struct compat_old_sigaction __user *act, + struct compat_old_sigaction __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + compat_old_sigset_t mask; + compat_uptr_t handler, restorer; + + if (act) { + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(handler, &act->sa_handler) || + __get_user(restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + + new_ka.sa.sa_handler = compat_ptr(handler); + new_ka.sa.sa_restorer = compat_ptr(restorer); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + +asmlinkage int compat_sys_rt_sigaction(int sig, + const struct compat_sigaction __user *act, + struct compat_sigaction __user *oact, + compat_size_t sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + + if (act) { + compat_uptr_t handler, restorer; + + ret = get_user(handler, &act->sa_handler); + new_ka.sa.sa_handler = compat_ptr(handler); + ret |= get_user(restorer, &act->sa_restorer); + new_ka.sa.sa_restorer = compat_ptr(restorer); + ret |= get_sigset_t(&new_ka.sa.sa_mask, &act->sa_mask); + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + if (ret) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + if (!ret && oact) { + ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); + ret |= put_sigset_t(&oact->sa_mask, &old_ka.sa.sa_mask); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + } + return ret; +} + +int compat_do_sigaltstack(compat_uptr_t compat_uss, compat_uptr_t compat_uoss, + compat_ulong_t sp) +{ + compat_stack_t __user *newstack = compat_ptr(compat_uss); + compat_stack_t __user *oldstack = compat_ptr(compat_uoss); + compat_uptr_t ss_sp; + int ret; + mm_segment_t old_fs; + stack_t uss, uoss; + + /* Marshall the compat new stack into a stack_t */ + if (newstack) { + if (get_user(ss_sp, &newstack->ss_sp) || + __get_user(uss.ss_flags, &newstack->ss_flags) || + __get_user(uss.ss_size, &newstack->ss_size)) + return -EFAULT; + uss.ss_sp = compat_ptr(ss_sp); + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + /* The __user pointer casts are valid because of the set_fs() */ + ret = do_sigaltstack( + newstack ? (stack_t __user *) &uss : NULL, + oldstack ? (stack_t __user *) &uoss : NULL, + (unsigned long)sp); + set_fs(old_fs); + + /* Convert the old stack_t into a compat stack. */ + if (!ret && oldstack && + (put_user(ptr_to_compat(uoss.ss_sp), &oldstack->ss_sp) || + __put_user(uoss.ss_flags, &oldstack->ss_flags) || + __put_user(uoss.ss_size, &oldstack->ss_size))) + return -EFAULT; + return ret; +} + +static int compat_restore_sigframe(struct pt_regs *regs, + struct compat_sigframe __user *sf) +{ + int err; + sigset_t set; + struct compat_aux_sigframe __user *aux; + + err = get_sigset_t(&set, &sf->uc.uc_sigmask); + if (err == 0) { + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + } + + __get_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); + __get_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err); + __get_user_error(regs->regs[2], &sf->uc.uc_mcontext.arm_r2, err); + __get_user_error(regs->regs[3], &sf->uc.uc_mcontext.arm_r3, err); + __get_user_error(regs->regs[4], &sf->uc.uc_mcontext.arm_r4, err); + __get_user_error(regs->regs[5], &sf->uc.uc_mcontext.arm_r5, err); + __get_user_error(regs->regs[6], &sf->uc.uc_mcontext.arm_r6, err); + __get_user_error(regs->regs[7], &sf->uc.uc_mcontext.arm_r7, err); + __get_user_error(regs->regs[8], &sf->uc.uc_mcontext.arm_r8, err); + __get_user_error(regs->regs[9], &sf->uc.uc_mcontext.arm_r9, err); + __get_user_error(regs->regs[10], &sf->uc.uc_mcontext.arm_r10, err); + __get_user_error(regs->regs[11], &sf->uc.uc_mcontext.arm_fp, err); + __get_user_error(regs->regs[12], &sf->uc.uc_mcontext.arm_ip, err); + __get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); + __get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); + __get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); + __get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + + /* + * Avoid compat_sys_sigreturn() restarting. + */ + regs->syscallno = ~0UL; + + err |= !valid_user_regs(®s->user_regs); + + aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; + if (err == 0) + err |= compat_restore_vfp_context(&aux->vfp); + + return err; +} + +asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) +{ + struct compat_sigframe __user *frame; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* + * Since we stacked the signal on a 64-bit boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->compat_sp & 7) + goto badframe; + + frame = (struct compat_sigframe __user *)regs->compat_sp; + + if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) + goto badframe; + + if (compat_restore_sigframe(regs, frame)) + goto badframe; + + return regs->regs[0]; + +badframe: + if (show_unhandled_signals) + pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", + current->comm, task_pid_nr(current), __func__, + regs->pc, regs->sp); + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) +{ + struct compat_rt_sigframe __user *frame; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* + * Since we stacked the signal on a 64-bit boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->compat_sp & 7) + goto badframe; + + frame = (struct compat_rt_sigframe __user *)regs->compat_sp; + + if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) + goto badframe; + + if (compat_restore_sigframe(regs, &frame->sig)) + goto badframe; + + if (compat_do_sigaltstack(ptr_to_compat(&frame->sig.uc.uc_stack), + ptr_to_compat((void __user *)NULL), + regs->compat_sp) == -EFAULT) + goto badframe; + + return regs->regs[0]; + +badframe: + if (show_unhandled_signals) + pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", + current->comm, task_pid_nr(current), __func__, + regs->pc, regs->sp); + force_sig(SIGSEGV, current); + return 0; +} + +static inline void __user *compat_get_sigframe(struct k_sigaction *ka, + struct pt_regs *regs, + int framesize) +{ + compat_ulong_t sp = regs->compat_sp; + void __user *frame; + + /* + * This is the X/Open sanctioned signal stack switching. + */ + if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + + /* + * ATPCS B01 mandates 8-byte alignment + */ + frame = compat_ptr((compat_uptr_t)((sp - framesize) & ~7)); + + /* + * Check that we can actually write to the signal frame. + */ + if (!access_ok(VERIFY_WRITE, frame, framesize)) + frame = NULL; + + return frame; +} + +static int compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, + compat_ulong_t __user *rc, void __user *frame, + int usig) +{ + compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler); + compat_ulong_t retcode; + compat_ulong_t spsr = regs->pstate & ~PSR_f; + int thumb; + + /* Check if the handler is written for ARM or Thumb */ + thumb = handler & 1; + + if (thumb) { + spsr |= COMPAT_PSR_T_BIT; + spsr &= ~COMPAT_PSR_IT_MASK; + } else { + spsr &= ~COMPAT_PSR_T_BIT; + } + + if (ka->sa.sa_flags & SA_RESTORER) { + retcode = ptr_to_compat(ka->sa.sa_restorer); + } else { + /* Set up sigreturn pointer */ + unsigned int idx = thumb << 1; + + if (ka->sa.sa_flags & SA_SIGINFO) + idx += 3; + + retcode = AARCH32_VECTORS_BASE + + AARCH32_KERN_SIGRET_CODE_OFFSET + + (idx << 2) + thumb; + } + + regs->regs[0] = usig; + regs->compat_sp = ptr_to_compat(frame); + regs->compat_lr = retcode; + regs->pc = handler; + regs->pstate = spsr; + + return 0; +} + +static int compat_setup_sigframe(struct compat_sigframe __user *sf, + struct pt_regs *regs, sigset_t *set) +{ + struct compat_aux_sigframe __user *aux; + int err = 0; + + __put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); + __put_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err); + __put_user_error(regs->regs[2], &sf->uc.uc_mcontext.arm_r2, err); + __put_user_error(regs->regs[3], &sf->uc.uc_mcontext.arm_r3, err); + __put_user_error(regs->regs[4], &sf->uc.uc_mcontext.arm_r4, err); + __put_user_error(regs->regs[5], &sf->uc.uc_mcontext.arm_r5, err); + __put_user_error(regs->regs[6], &sf->uc.uc_mcontext.arm_r6, err); + __put_user_error(regs->regs[7], &sf->uc.uc_mcontext.arm_r7, err); + __put_user_error(regs->regs[8], &sf->uc.uc_mcontext.arm_r8, err); + __put_user_error(regs->regs[9], &sf->uc.uc_mcontext.arm_r9, err); + __put_user_error(regs->regs[10], &sf->uc.uc_mcontext.arm_r10, err); + __put_user_error(regs->regs[11], &sf->uc.uc_mcontext.arm_fp, err); + __put_user_error(regs->regs[12], &sf->uc.uc_mcontext.arm_ip, err); + __put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); + __put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); + __put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); + __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + + __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); + __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.error_code, err); + __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); + __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); + + err |= put_sigset_t(&sf->uc.uc_sigmask, set); + + aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; + + if (err == 0) + err |= compat_preserve_vfp_context(&aux->vfp); + __put_user_error(0, &aux->end_magic, err); + + return err; +} + +/* + * 32-bit signal handling routines called from signal.c + */ +int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + struct compat_rt_sigframe __user *frame; + compat_stack_t stack; + int err = 0; + + frame = compat_get_sigframe(ka, regs, sizeof(*frame)); + + if (!frame) + return 1; + + err |= copy_siginfo_to_user32(&frame->info, info); + + __put_user_error(0, &frame->sig.uc.uc_flags, err); + __put_user_error(NULL, &frame->sig.uc.uc_link, err); + + memset(&stack, 0, sizeof(stack)); + stack.ss_sp = (compat_uptr_t)current->sas_ss_sp; + stack.ss_flags = sas_ss_flags(regs->compat_sp); + stack.ss_size = current->sas_ss_size; + err |= __copy_to_user(&frame->sig.uc.uc_stack, &stack, sizeof(stack)); + + err |= compat_setup_sigframe(&frame->sig, regs, set); + if (err == 0) + err = compat_setup_return(regs, ka, frame->sig.retcode, frame, + usig); + + if (err == 0) { + regs->regs[1] = (compat_ulong_t)(unsigned long)&frame->info; + regs->regs[2] = (compat_ulong_t)(unsigned long)&frame->sig.uc; + } + + return err; +} + +int compat_setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs *regs) +{ + struct compat_sigframe __user *frame; + int err = 0; + + frame = compat_get_sigframe(ka, regs, sizeof(*frame)); + + if (!frame) + return 1; + + __put_user_error(0x5ac3c35a, &frame->uc.uc_flags, err); + + err |= compat_setup_sigframe(frame, regs, set); + if (err == 0) + err = compat_setup_return(regs, ka, frame->retcode, frame, usig); + + return err; +} + +/* + * RT signals don't have generic compat wrappers. + * See arch/powerpc/kernel/signal_32.c + */ +asmlinkage int compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, + compat_size_t sigsetsize) +{ + sigset_t s; + sigset_t __user *up; + int ret; + mm_segment_t old_fs = get_fs(); + + if (set) { + if (get_sigset_t(&s, set)) + return -EFAULT; + } + + set_fs(KERNEL_DS); + /* This is valid because of the set_fs() */ + up = (sigset_t __user *) &s; + ret = sys_rt_sigprocmask(how, set ? up : NULL, oset ? up : NULL, + sigsetsize); + set_fs(old_fs); + if (ret) + return ret; + if (oset) { + if (put_sigset_t(oset, &s)) + return -EFAULT; + } + return 0; +} + +asmlinkage int compat_sys_rt_sigpending(compat_sigset_t __user *set, + compat_size_t sigsetsize) +{ + sigset_t s; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + /* The __user pointer cast is valid because of the set_fs() */ + ret = sys_rt_sigpending((sigset_t __user *) &s, sigsetsize); + set_fs(old_fs); + if (!ret) { + if (put_sigset_t(set, &s)) + return -EFAULT; + } + return ret; +} + +asmlinkage int compat_sys_rt_sigqueueinfo(int pid, int sig, + compat_siginfo_t __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + ret = copy_siginfo_from_user32(&info, uinfo); + if (unlikely(ret)) + return ret; + + set_fs (KERNEL_DS); + /* The __user pointer cast is valid because of the set_fs() */ + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *) &info); + set_fs (old_fs); + return ret; +} + +void compat_setup_restart_syscall(struct pt_regs *regs) +{ + regs->regs[7] = __NR_restart_syscall; +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c new file mode 100644 index 00000000000..b711525be21 --- /dev/null +++ b/arch/arm64/kernel/smp.c @@ -0,0 +1,469 @@ +/* + * SMP initialisation and IPI support + * Based on arch/arm/kernel/smp.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cache.h> +#include <linux/profile.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/irq.h> +#include <linux/percpu.h> +#include <linux/clockchips.h> +#include <linux/completion.h> +#include <linux/of.h> + +#include <asm/atomic.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/tlbflush.h> +#include <asm/ptrace.h> +#include <asm/mmu_context.h> + +/* + * as from 2.5, kernels no longer have an init_tasks structure + * so we need some other way of telling a new secondary core + * where to place its SVC stack + */ +struct secondary_data secondary_data; +volatile unsigned long secondary_holding_pen_release = -1; + +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNC, + IPI_CALL_FUNC_SINGLE, + IPI_CPU_STOP, +}; + +static DEFINE_RAW_SPINLOCK(boot_lock); + +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not. This is necessary for the hotplug code to work + * reliably. + */ +static void __cpuinit write_pen_release(int val) +{ + void *start = (void *)&secondary_holding_pen_release; + unsigned long size = sizeof(secondary_holding_pen_release); + + secondary_holding_pen_release = val; + __flush_dcache_area(start, size); +} + +/* + * Boot a secondary CPU, and assign it the specified idle task. + * This also gives us the initial stack to use for this CPU. + */ +static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) +{ + unsigned long timeout; + + /* + * Set synchronisation state between this boot processor + * and the secondary one + */ + raw_spin_lock(&boot_lock); + + /* + * Update the pen release flag. + */ + write_pen_release(cpu); + + /* + * Send an event, causing the secondaries to read pen_release. + */ + sev(); + + timeout = jiffies + (1 * HZ); + while (time_before(jiffies, timeout)) { + if (secondary_holding_pen_release == -1UL) + break; + udelay(10); + } + + /* + * Now the secondary core is starting up let it run its + * calibrations, then wait for it to finish + */ + raw_spin_unlock(&boot_lock); + + return secondary_holding_pen_release != -1 ? -ENOSYS : 0; +} + +static DECLARE_COMPLETION(cpu_running); + +int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) +{ + int ret; + + /* + * We need to tell the secondary core where to find its stack and the + * page tables. + */ + secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; + __flush_dcache_area(&secondary_data, sizeof(secondary_data)); + + /* + * Now bring the CPU into our world. + */ + ret = boot_secondary(cpu, idle); + if (ret == 0) { + /* + * CPU was successfully started, wait for it to come online or + * time out. + */ + wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(1000)); + + if (!cpu_online(cpu)) { + pr_crit("CPU%u: failed to come online\n", cpu); + ret = -EIO; + } + } else { + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + } + + secondary_data.stack = NULL; + + return ret; +} + +/* + * This is the secondary CPU boot entry. We're using this CPUs + * idle thread stack, but a set of temporary page tables. + */ +asmlinkage void __cpuinit secondary_start_kernel(void) +{ + struct mm_struct *mm = &init_mm; + unsigned int cpu = smp_processor_id(); + + printk("CPU%u: Booted secondary processor\n", cpu); + + /* + * All kernel threads share the same mm context; grab a + * reference and switch to it. + */ + atomic_inc(&mm->mm_count); + current->active_mm = mm; + cpumask_set_cpu(cpu, mm_cpumask(mm)); + + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + + preempt_disable(); + trace_hardirqs_off(); + + /* + * Let the primary processor know we're out of the + * pen, then head off into the C entry point + */ + write_pen_release(-1); + + /* + * Synchronise with the boot thread. + */ + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); + + /* + * Enable local interrupts. + */ + notify_cpu_starting(cpu); + local_irq_enable(); + local_fiq_enable(); + + /* + * OK, now it's safe to let the boot CPU continue. Wait for + * the CPU migration code to notice that the CPU is online + * before we continue. + */ + set_cpu_online(cpu, true); + while (!cpu_active(cpu)) + cpu_relax(); + + /* + * OK, it's off to the idle thread for us + */ + cpu_idle(); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + unsigned long bogosum = loops_per_jiffy * num_online_cpus(); + + pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num_online_cpus(), bogosum / (500000/HZ), + (bogosum / (5000/HZ)) % 100); +} + +void __init smp_prepare_boot_cpu(void) +{ +} + +static void (*smp_cross_call)(const struct cpumask *, unsigned int); +static phys_addr_t cpu_release_addr[NR_CPUS]; + +/* + * Enumerate the possible CPU set from the device tree. + */ +void __init smp_init_cpus(void) +{ + const char *enable_method; + struct device_node *dn = NULL; + int cpu = 0; + + while ((dn = of_find_node_by_type(dn, "cpu"))) { + if (cpu >= NR_CPUS) + goto next; + + /* + * We currently support only the "spin-table" enable-method. + */ + enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method || strcmp(enable_method, "spin-table")) { + pr_err("CPU %d: missing or invalid enable-method property: %s\n", + cpu, enable_method); + goto next; + } + + /* + * Determine the address from which the CPU is polling. + */ + if (of_property_read_u64(dn, "cpu-release-addr", + &cpu_release_addr[cpu])) { + pr_err("CPU %d: missing or invalid cpu-release-addr property\n", + cpu); + goto next; + } + + set_cpu_possible(cpu, true); +next: + cpu++; + } + + /* sanity check */ + if (cpu > NR_CPUS) + pr_warning("no. of cores (%d) greater than configured maximum of %d - clipping\n", + cpu, NR_CPUS); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int cpu; + void **release_addr; + unsigned int ncores = num_possible_cpus(); + + /* + * are we trying to boot more cores than exist? + */ + if (max_cpus > ncores) + max_cpus = ncores; + + /* + * Initialise the present map (which describes the set of CPUs + * actually populated at the present time) and release the + * secondaries from the bootloader. + */ + for_each_possible_cpu(cpu) { + if (max_cpus == 0) + break; + + if (!cpu_release_addr[cpu]) + continue; + + release_addr = __va(cpu_release_addr[cpu]); + release_addr[0] = (void *)__pa(secondary_holding_pen); + __flush_dcache_area(release_addr, sizeof(release_addr[0])); + + set_cpu_present(cpu, true); + max_cpus--; + } + + /* + * Send an event to wake up the secondaries. + */ + sev(); +} + + +void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) +{ + smp_cross_call = fn; +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_CALL_FUNC); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); +} + +static const char *ipi_types[NR_IPI] = { +#define S(x,s) [x - IPI_RESCHEDULE] = s + S(IPI_RESCHEDULE, "Rescheduling interrupts"), + S(IPI_CALL_FUNC, "Function call interrupts"), + S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), + S(IPI_CPU_STOP, "CPU stop interrupts"), +}; + +void show_ipi_list(struct seq_file *p, int prec) +{ + unsigned int cpu, i; + + for (i = 0; i < NR_IPI; i++) { + seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE, + prec >= 4 ? " " : ""); + for_each_present_cpu(cpu) + seq_printf(p, "%10u ", + __get_irq_stat(cpu, ipi_irqs[i])); + seq_printf(p, " %s\n", ipi_types[i]); + } +} + +u64 smp_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = 0; + int i; + + for (i = 0; i < NR_IPI; i++) + sum += __get_irq_stat(cpu, ipi_irqs[i]); + + return sum; +} + +static DEFINE_RAW_SPINLOCK(stop_lock); + +/* + * ipi_cpu_stop - handle IPI from smp_send_stop() + */ +static void ipi_cpu_stop(unsigned int cpu) +{ + if (system_state == SYSTEM_BOOTING || + system_state == SYSTEM_RUNNING) { + raw_spin_lock(&stop_lock); + pr_crit("CPU%u: stopping\n", cpu); + dump_stack(); + raw_spin_unlock(&stop_lock); + } + + set_cpu_online(cpu, false); + + local_fiq_disable(); + local_irq_disable(); + + while (1) + cpu_relax(); +} + +/* + * Main handler for inter-processor interrupts + */ +void handle_IPI(int ipinr, struct pt_regs *regs) +{ + unsigned int cpu = smp_processor_id(); + struct pt_regs *old_regs = set_irq_regs(regs); + + if (ipinr >= IPI_RESCHEDULE && ipinr < IPI_RESCHEDULE + NR_IPI) + __inc_irq_stat(cpu, ipi_irqs[ipinr - IPI_RESCHEDULE]); + + switch (ipinr) { + case IPI_RESCHEDULE: + scheduler_ipi(); + break; + + case IPI_CALL_FUNC: + irq_enter(); + generic_smp_call_function_interrupt(); + irq_exit(); + break; + + case IPI_CALL_FUNC_SINGLE: + irq_enter(); + generic_smp_call_function_single_interrupt(); + irq_exit(); + break; + + case IPI_CPU_STOP: + irq_enter(); + ipi_cpu_stop(cpu); + irq_exit(); + break; + + default: + pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); + break; + } + set_irq_regs(old_regs); +} + +void smp_send_reschedule(int cpu) +{ + smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); +} + +void smp_send_stop(void) +{ + unsigned long timeout; + + if (num_online_cpus() > 1) { + cpumask_t mask; + + cpumask_copy(&mask, cpu_online_mask); + cpu_clear(smp_processor_id(), mask); + + smp_cross_call(&mask, IPI_CPU_STOP); + } + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while (num_online_cpus() > 1 && timeout--) + udelay(1); + + if (num_online_cpus() > 1) + pr_warning("SMP: failed to stop secondary CPUs\n"); +} + +/* + * not supported here + */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c new file mode 100644 index 00000000000..d25459ff57f --- /dev/null +++ b/arch/arm64/kernel/stacktrace.c @@ -0,0 +1,127 @@ +/* + * Stack tracing support + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/stacktrace.h> + +#include <asm/stacktrace.h> + +/* + * AArch64 PCS assigns the frame pointer to x29. + * + * A simple function prologue looks like this: + * sub sp, sp, #0x10 + * stp x29, x30, [sp] + * mov x29, sp + * + * A simple function epilogue looks like this: + * mov sp, x29 + * ldp x29, x30, [sp] + * add sp, sp, #0x10 + */ +int unwind_frame(struct stackframe *frame) +{ + unsigned long high, low; + unsigned long fp = frame->fp; + + low = frame->sp; + high = ALIGN(low, THREAD_SIZE); + + if (fp < low || fp > high || fp & 0xf) + return -EINVAL; + + frame->sp = fp + 0x10; + frame->fp = *(unsigned long *)(fp); + frame->pc = *(unsigned long *)(fp + 8); + + return 0; +} + +void notrace walk_stackframe(struct stackframe *frame, + int (*fn)(struct stackframe *, void *), void *data) +{ + while (1) { + int ret; + + if (fn(frame, data)) + break; + ret = unwind_frame(frame); + if (ret < 0) + break; + } +} +EXPORT_SYMBOL(walk_stackframe); + +#ifdef CONFIG_STACKTRACE +struct stack_trace_data { + struct stack_trace *trace; + unsigned int no_sched_functions; + unsigned int skip; +}; + +static int save_trace(struct stackframe *frame, void *d) +{ + struct stack_trace_data *data = d; + struct stack_trace *trace = data->trace; + unsigned long addr = frame->pc; + + if (data->no_sched_functions && in_sched_functions(addr)) + return 0; + if (data->skip) { + data->skip--; + return 0; + } + + trace->entries[trace->nr_entries++] = addr; + + return trace->nr_entries >= trace->max_entries; +} + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + struct stack_trace_data data; + struct stackframe frame; + + data.trace = trace; + data.skip = trace->skip; + + if (tsk != current) { + data.no_sched_functions = 1; + frame.fp = thread_saved_fp(tsk); + frame.sp = thread_saved_sp(tsk); + frame.pc = thread_saved_pc(tsk); + } else { + register unsigned long current_sp asm("sp"); + data.no_sched_functions = 0; + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)save_stack_trace_tsk; + } + + walk_stackframe(&frame, save_trace, &data); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void save_stack_trace(struct stack_trace *trace) +{ + save_stack_trace_tsk(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); +#endif diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c new file mode 100644 index 00000000000..905fcfb0ddd --- /dev/null +++ b/arch/arm64/kernel/sys.c @@ -0,0 +1,138 @@ +/* + * AArch64-specific system calls implementation + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/syscalls.h> + +/* + * Clone a task - this clones the calling program thread. + */ +asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tidptr, unsigned long tls_val, + int __user *child_tidptr, struct pt_regs *regs) +{ + if (!newsp) + newsp = regs->sp; + /* 16-byte aligned stack mandatory on AArch64 */ + if (newsp & 15) + return -EINVAL; + return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); +} + +/* + * sys_execve() executes a new program. + */ +asmlinkage long sys_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs) +{ + long error; + char * filename; + + filename = getname(filenamei); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, argv, envp, regs); + putname(filename); +out: + return error; +} + +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) +{ + struct pt_regs regs; + int ret; + + memset(®s, 0, sizeof(struct pt_regs)); + ret = do_execve(filename, + (const char __user *const __user *)argv, + (const char __user *const __user *)envp, ®s); + if (ret < 0) + goto out; + + /* + * Save argc to the register structure for userspace. + */ + regs.regs[0] = ret; + + /* + * We were successful. We won't be returning to our caller, but + * instead to user space by manipulating the kernel stack. + */ + asm( "add x0, %0, %1\n\t" + "mov x1, %2\n\t" + "mov x2, %3\n\t" + "bl memmove\n\t" /* copy regs to top of stack */ + "mov x27, #0\n\t" /* not a syscall */ + "mov x28, %0\n\t" /* thread structure */ + "mov sp, x0\n\t" /* reposition stack pointer */ + "b ret_to_user" + : + : "r" (current_thread_info()), + "Ir" (THREAD_START_SP - sizeof(regs)), + "r" (®s), + "Ir" (sizeof(regs)) + : "x0", "x1", "x2", "x27", "x28", "x30", "memory"); + + out: + return ret; +} +EXPORT_SYMBOL(kernel_execve); + +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t off) +{ + if (offset_in_page(off) != 0) + return -EINVAL; + + return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); +} + +/* + * Wrappers to pass the pt_regs argument. + */ +#define sys_execve sys_execve_wrapper +#define sys_clone sys_clone_wrapper +#define sys_rt_sigreturn sys_rt_sigreturn_wrapper +#define sys_sigaltstack sys_sigaltstack_wrapper + +#include <asm/syscalls.h> + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = sym, + +/* + * The sys_call_table array must be 4K aligned to be accessible from + * kernel/entry.S. + */ +void *sys_call_table[__NR_syscalls] __aligned(4096) = { + [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S new file mode 100644 index 00000000000..5e4dc93cc31 --- /dev/null +++ b/arch/arm64/kernel/sys32.S @@ -0,0 +1,282 @@ +/* + * Compat system call wrappers + * + * Copyright (C) 2012 ARM Ltd. + * Authors: Will Deacon <will.deacon@arm.com> + * Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/asm-offsets.h> + +/* + * System call wrappers for the AArch32 compatibility layer. + */ +compat_sys_fork_wrapper: + mov x0, sp + b compat_sys_fork +ENDPROC(compat_sys_fork_wrapper) + +compat_sys_vfork_wrapper: + mov x0, sp + b compat_sys_vfork +ENDPROC(compat_sys_vfork_wrapper) + +compat_sys_execve_wrapper: + mov x3, sp + b compat_sys_execve +ENDPROC(compat_sys_execve_wrapper) + +compat_sys_clone_wrapper: + mov x5, sp + b compat_sys_clone +ENDPROC(compat_sys_clone_wrapper) + +compat_sys_sigreturn_wrapper: + mov x0, sp + mov x27, #0 // prevent syscall restart handling (why) + b compat_sys_sigreturn +ENDPROC(compat_sys_sigreturn_wrapper) + +compat_sys_rt_sigreturn_wrapper: + mov x0, sp + mov x27, #0 // prevent syscall restart handling (why) + b compat_sys_rt_sigreturn +ENDPROC(compat_sys_rt_sigreturn_wrapper) + +compat_sys_sigaltstack_wrapper: + ldr x2, [sp, #S_COMPAT_SP] + b compat_do_sigaltstack +ENDPROC(compat_sys_sigaltstack_wrapper) + +compat_sys_statfs64_wrapper: + mov w3, #84 + cmp w1, #88 + csel w1, w3, w1, eq + b compat_sys_statfs64 +ENDPROC(compat_sys_statfs64_wrapper) + +compat_sys_fstatfs64_wrapper: + mov w3, #84 + cmp w1, #88 + csel w1, w3, w1, eq + b compat_sys_fstatfs64 +ENDPROC(compat_sys_fstatfs64_wrapper) + +/* + * Wrappers for AArch32 syscalls that either take 64-bit parameters + * in registers or that take 32-bit parameters which require sign + * extension. + */ +compat_sys_lseek_wrapper: + sxtw x1, w1 + b sys_lseek +ENDPROC(compat_sys_lseek_wrapper) + +compat_sys_pread64_wrapper: + orr x3, x4, x5, lsl #32 + b sys_pread64 +ENDPROC(compat_sys_pread64_wrapper) + +compat_sys_pwrite64_wrapper: + orr x3, x4, x5, lsl #32 + b sys_pwrite64 +ENDPROC(compat_sys_pwrite64_wrapper) + +compat_sys_truncate64_wrapper: + orr x1, x2, x3, lsl #32 + b sys_truncate +ENDPROC(compat_sys_truncate64_wrapper) + +compat_sys_ftruncate64_wrapper: + orr x1, x2, x3, lsl #32 + b sys_ftruncate +ENDPROC(compat_sys_ftruncate64_wrapper) + +compat_sys_readahead_wrapper: + orr x1, x2, x3, lsl #32 + mov w2, w4 + b sys_readahead +ENDPROC(compat_sys_readahead_wrapper) + +compat_sys_lookup_dcookie: + orr x0, x0, x1, lsl #32 + mov w1, w2 + mov w2, w3 + b sys_lookup_dcookie +ENDPROC(compat_sys_lookup_dcookie) + +compat_sys_fadvise64_64_wrapper: + mov w6, w1 + orr x1, x2, x3, lsl #32 + orr x2, x4, x5, lsl #32 + mov w3, w6 + b sys_fadvise64_64 +ENDPROC(compat_sys_fadvise64_64_wrapper) + +compat_sys_sync_file_range2_wrapper: + orr x2, x2, x3, lsl #32 + orr x3, x4, x5, lsl #32 + b sys_sync_file_range2 +ENDPROC(compat_sys_sync_file_range2_wrapper) + +compat_sys_fallocate_wrapper: + orr x2, x2, x3, lsl #32 + orr x3, x4, x5, lsl #32 + b sys_fallocate +ENDPROC(compat_sys_fallocate_wrapper) + +compat_sys_fanotify_mark_wrapper: + orr x2, x2, x3, lsl #32 + mov w3, w4 + mov w4, w5 + b sys_fanotify_mark +ENDPROC(compat_sys_fanotify_mark_wrapper) + +/* + * Use the compat system call wrappers. + */ +#define sys_fork compat_sys_fork_wrapper +#define sys_open compat_sys_open +#define sys_execve compat_sys_execve_wrapper +#define sys_lseek compat_sys_lseek_wrapper +#define sys_mount compat_sys_mount +#define sys_ptrace compat_sys_ptrace +#define sys_times compat_sys_times +#define sys_ioctl compat_sys_ioctl +#define sys_fcntl compat_sys_fcntl +#define sys_ustat compat_sys_ustat +#define sys_sigaction compat_sys_sigaction +#define sys_sigsuspend compat_sys_sigsuspend +#define sys_sigpending compat_sys_sigpending +#define sys_setrlimit compat_sys_setrlimit +#define sys_getrusage compat_sys_getrusage +#define sys_gettimeofday compat_sys_gettimeofday +#define sys_settimeofday compat_sys_settimeofday +#define sys_statfs compat_sys_statfs +#define sys_fstatfs compat_sys_fstatfs +#define sys_setitimer compat_sys_setitimer +#define sys_getitimer compat_sys_getitimer +#define sys_newstat compat_sys_newstat +#define sys_newlstat compat_sys_newlstat +#define sys_newfstat compat_sys_newfstat +#define sys_wait4 compat_sys_wait4 +#define sys_sysinfo compat_sys_sysinfo +#define sys_sigreturn compat_sys_sigreturn_wrapper +#define sys_clone compat_sys_clone_wrapper +#define sys_adjtimex compat_sys_adjtimex +#define sys_sigprocmask compat_sys_sigprocmask +#define sys_getdents compat_sys_getdents +#define sys_select compat_sys_select +#define sys_readv compat_sys_readv +#define sys_writev compat_sys_writev +#define sys_sysctl compat_sys_sysctl +#define sys_sched_rr_get_interval compat_sys_sched_rr_get_interval +#define sys_nanosleep compat_sys_nanosleep +#define sys_rt_sigreturn compat_sys_rt_sigreturn_wrapper +#define sys_rt_sigaction compat_sys_rt_sigaction +#define sys_rt_sigprocmask compat_sys_rt_sigprocmask +#define sys_rt_sigpending compat_sys_rt_sigpending +#define sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +#define sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo +#define sys_rt_sigsuspend compat_sys_rt_sigsuspend +#define sys_pread64 compat_sys_pread64_wrapper +#define sys_pwrite64 compat_sys_pwrite64_wrapper +#define sys_sigaltstack compat_sys_sigaltstack_wrapper +#define sys_sendfile compat_sys_sendfile +#define sys_vfork compat_sys_vfork_wrapper +#define sys_getrlimit compat_sys_getrlimit +#define sys_mmap2 sys_mmap_pgoff +#define sys_truncate64 compat_sys_truncate64_wrapper +#define sys_ftruncate64 compat_sys_ftruncate64_wrapper +#define sys_getdents64 compat_sys_getdents64 +#define sys_fcntl64 compat_sys_fcntl64 +#define sys_readahead compat_sys_readahead_wrapper +#define sys_futex compat_sys_futex +#define sys_sched_setaffinity compat_sys_sched_setaffinity +#define sys_sched_getaffinity compat_sys_sched_getaffinity +#define sys_io_setup compat_sys_io_setup +#define sys_io_getevents compat_sys_io_getevents +#define sys_io_submit compat_sys_io_submit +#define sys_lookup_dcookie compat_sys_lookup_dcookie +#define sys_timer_create compat_sys_timer_create +#define sys_timer_settime compat_sys_timer_settime +#define sys_timer_gettime compat_sys_timer_gettime +#define sys_clock_settime compat_sys_clock_settime +#define sys_clock_gettime compat_sys_clock_gettime +#define sys_clock_getres compat_sys_clock_getres +#define sys_clock_nanosleep compat_sys_clock_nanosleep +#define sys_statfs64 compat_sys_statfs64_wrapper +#define sys_fstatfs64 compat_sys_fstatfs64_wrapper +#define sys_utimes compat_sys_utimes +#define sys_fadvise64_64 compat_sys_fadvise64_64_wrapper +#define sys_mq_open compat_sys_mq_open +#define sys_mq_timedsend compat_sys_mq_timedsend +#define sys_mq_timedreceive compat_sys_mq_timedreceive +#define sys_mq_notify compat_sys_mq_notify +#define sys_mq_getsetattr compat_sys_mq_getsetattr +#define sys_waitid compat_sys_waitid +#define sys_recv compat_sys_recv +#define sys_recvfrom compat_sys_recvfrom +#define sys_setsockopt compat_sys_setsockopt +#define sys_getsockopt compat_sys_getsockopt +#define sys_sendmsg compat_sys_sendmsg +#define sys_recvmsg compat_sys_recvmsg +#define sys_semctl compat_sys_semctl +#define sys_msgsnd compat_sys_msgsnd +#define sys_msgrcv compat_sys_msgrcv +#define sys_msgctl compat_sys_msgctl +#define sys_shmat compat_sys_shmat +#define sys_shmctl compat_sys_shmctl +#define sys_keyctl compat_sys_keyctl +#define sys_semtimedop compat_sys_semtimedop +#define sys_mbind compat_sys_mbind +#define sys_get_mempolicy compat_sys_get_mempolicy +#define sys_set_mempolicy compat_sys_set_mempolicy +#define sys_openat compat_sys_openat +#define sys_futimesat compat_sys_futimesat +#define sys_pselect6 compat_sys_pselect6 +#define sys_ppoll compat_sys_ppoll +#define sys_set_robust_list compat_sys_set_robust_list +#define sys_get_robust_list compat_sys_get_robust_list +#define sys_sync_file_range2 compat_sys_sync_file_range2_wrapper +#define sys_vmsplice compat_sys_vmsplice +#define sys_move_pages compat_sys_move_pages +#define sys_epoll_pwait compat_sys_epoll_pwait +#define sys_kexec_load compat_sys_kexec_load +#define sys_utimensat compat_sys_utimensat +#define sys_signalfd compat_sys_signalfd +#define sys_fallocate compat_sys_fallocate_wrapper +#define sys_timerfd_settime compat_sys_timerfd_settime +#define sys_timerfd_gettime compat_sys_timerfd_gettime +#define sys_signalfd4 compat_sys_signalfd4 +#define sys_preadv compat_sys_preadv +#define sys_pwritev compat_sys_pwritev +#define sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +#define sys_recvmmsg compat_sys_recvmmsg +#define sys_fanotify_mark compat_sys_fanotify_mark_wrapper + +#undef __SYSCALL +#define __SYSCALL(x, y) .quad y // x +#define __SYSCALL_COMPAT + +/* + * The system calls table must be 4KB aligned. + */ + .align 12 +ENTRY(compat_sys_call_table) +#include <asm/unistd.h> diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c new file mode 100644 index 00000000000..967e92fdff0 --- /dev/null +++ b/arch/arm64/kernel/sys_compat.c @@ -0,0 +1,164 @@ +/* + * Based on arch/arm/kernel/sys_arm.c + * + * Copyright (C) People who wrote linux/arch/i386/kernel/sys_i386.c + * Copyright (C) 1995, 1996 Russell King. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define __SYSCALL_COMPAT + +#include <linux/compat.h> +#include <linux/personality.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/unistd.h> + +asmlinkage int compat_sys_fork(struct pt_regs *regs) +{ + return do_fork(SIGCHLD, regs->compat_sp, regs, 0, NULL, NULL); +} + +asmlinkage int compat_sys_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tidptr, int tls_val, + int __user *child_tidptr, struct pt_regs *regs) +{ + if (!newsp) + newsp = regs->compat_sp; + + return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); +} + +asmlinkage int compat_sys_vfork(struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->compat_sp, + regs, 0, NULL, NULL); +} + +asmlinkage int compat_sys_execve(const char __user *filenamei, + compat_uptr_t argv, compat_uptr_t envp, + struct pt_regs *regs) +{ + int error; + char * filename; + + filename = getname(filenamei); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = compat_do_execve(filename, compat_ptr(argv), compat_ptr(envp), + regs); + putname(filename); +out: + return error; +} + +asmlinkage int compat_sys_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); + set_fs(old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +asmlinkage int compat_sys_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, s32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + + if (offset && get_user(of, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, + count); + set_fs(old_fs); + + if (offset && put_user(of, offset)) + return -EFAULT; + return ret; +} + +static inline void +do_compat_cache_op(unsigned long start, unsigned long end, int flags) +{ + struct mm_struct *mm = current->active_mm; + struct vm_area_struct *vma; + + if (end < start || flags) + return; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (vma && vma->vm_start < end) { + if (start < vma->vm_start) + start = vma->vm_start; + if (end > vma->vm_end) + end = vma->vm_end; + up_read(&mm->mmap_sem); + __flush_cache_user_range(start & PAGE_MASK, PAGE_ALIGN(end)); + return; + } + up_read(&mm->mmap_sem); +} + +/* + * Handle all unrecognised system calls. + */ +long compat_arm_syscall(struct pt_regs *regs) +{ + unsigned int no = regs->regs[7]; + + switch (no) { + /* + * Flush a region from virtual address 'r0' to virtual address 'r1' + * _exclusive_. There is no alignment requirement on either address; + * user space does not need to know the hardware cache layout. + * + * r2 contains flags. It should ALWAYS be passed as ZERO until it + * is defined to be something else. For now we ignore it, but may + * the fires of hell burn in your belly if you break this rule. ;) + * + * (at a later date, we may want to allow this call to not flush + * various aspects of the cache. Passing '0' will guarantee that + * everything necessary gets flushed to maintain consistency in + * the specified region). + */ + case __ARM_NR_compat_cacheflush: + do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); + return 0; + + case __ARM_NR_compat_set_tls: + current->thread.tp_value = regs->regs[0]; + asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); + return 0; + + default: + return -ENOSYS; + } +} diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c new file mode 100644 index 00000000000..3b4b7258f49 --- /dev/null +++ b/arch/arm64/kernel/time.c @@ -0,0 +1,65 @@ +/* + * Based on arch/arm/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * Modifications for ARM (C) 1994-2001 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/timex.h> +#include <linux/errno.h> +#include <linux/profile.h> +#include <linux/syscore_ops.h> +#include <linux/timer.h> +#include <linux/irq.h> + +#include <clocksource/arm_generic.h> + +#include <asm/thread_info.h> +#include <asm/stacktrace.h> + +#ifdef CONFIG_SMP +unsigned long profile_pc(struct pt_regs *regs) +{ + struct stackframe frame; + + if (!in_lock_functions(regs->pc)) + return regs->pc; + + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; + do { + int ret = unwind_frame(&frame); + if (ret < 0) + return 0; + } while (in_lock_functions(frame.pc)); + + return frame.pc; +} +EXPORT_SYMBOL(profile_pc); +#endif + +void __init time_init(void) +{ + arm_generic_timer_init(); +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c new file mode 100644 index 00000000000..3883f842434 --- /dev/null +++ b/arch/arm64/kernel/traps.c @@ -0,0 +1,348 @@ +/* + * Based on arch/arm/kernel/traps.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/signal.h> +#include <linux/personality.h> +#include <linux/kallsyms.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/hardirq.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/syscalls.h> + +#include <asm/atomic.h> +#include <asm/traps.h> +#include <asm/stacktrace.h> +#include <asm/exception.h> +#include <asm/system_misc.h> + +static const char *handler[]= { + "Synchronous Abort", + "IRQ", + "FIQ", + "Error" +}; + +int show_unhandled_signals = 1; + +/* + * Dump out the contents of some memory nicely... + */ +static void dump_mem(const char *lvl, const char *str, unsigned long bottom, + unsigned long top) +{ + unsigned long first; + mm_segment_t fs; + int i; + + /* + * We need to switch to kernel mode so that we can use __get_user + * to safely read from kernel space. Note that we now dump the + * code first, just in case the backtrace kills us. + */ + fs = get_fs(); + set_fs(KERNEL_DS); + + printk("%s%s(0x%016lx to 0x%016lx)\n", lvl, str, bottom, top); + + for (first = bottom & ~31; first < top; first += 32) { + unsigned long p; + char str[sizeof(" 12345678") * 8 + 1]; + + memset(str, ' ', sizeof(str)); + str[sizeof(str) - 1] = '\0'; + + for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { + if (p >= bottom && p < top) { + unsigned int val; + if (__get_user(val, (unsigned int *)p) == 0) + sprintf(str + i * 9, " %08x", val); + else + sprintf(str + i * 9, " ????????"); + } + } + printk("%s%04lx:%s\n", lvl, first & 0xffff, str); + } + + set_fs(fs); +} + +static void dump_backtrace_entry(unsigned long where, unsigned long stack) +{ + print_ip_sym(where); + if (in_exception_text(where)) + dump_mem("", "Exception stack", stack, + stack + sizeof(struct pt_regs)); +} + +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + unsigned long addr = instruction_pointer(regs); + mm_segment_t fs; + char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; + int i; + + /* + * We need to switch to kernel mode so that we can use __get_user + * to safely read from kernel space. Note that we now dump the + * code first, just in case the backtrace kills us. + */ + fs = get_fs(); + set_fs(KERNEL_DS); + + for (i = -4; i < 1; i++) { + unsigned int val, bad; + + bad = __get_user(val, &((u32 *)addr)[i]); + + if (!bad) + p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); + else { + p += sprintf(p, "bad PC value"); + break; + } + } + printk("%sCode: %s\n", lvl, str); + + set_fs(fs); +} + +static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +{ + struct stackframe frame; + const register unsigned long current_sp asm ("sp"); + + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (!tsk) + tsk = current; + + if (regs) { + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; + } else if (tsk == current) { + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)dump_backtrace; + } else { + /* + * task blocked in __switch_to + */ + frame.fp = thread_saved_fp(tsk); + frame.sp = thread_saved_sp(tsk); + frame.pc = thread_saved_pc(tsk); + } + + printk("Call trace:\n"); + while (1) { + unsigned long where = frame.pc; + int ret; + + ret = unwind_frame(&frame); + if (ret < 0) + break; + dump_backtrace_entry(where, frame.sp); + } +} + +void dump_stack(void) +{ + dump_backtrace(NULL, NULL); +} + +EXPORT_SYMBOL(dump_stack); + +void show_stack(struct task_struct *tsk, unsigned long *sp) +{ + dump_backtrace(NULL, tsk); + barrier(); +} + +#ifdef CONFIG_PREEMPT +#define S_PREEMPT " PREEMPT" +#else +#define S_PREEMPT "" +#endif +#ifdef CONFIG_SMP +#define S_SMP " SMP" +#else +#define S_SMP "" +#endif + +static int __die(const char *str, int err, struct thread_info *thread, + struct pt_regs *regs) +{ + struct task_struct *tsk = thread->task; + static int die_counter; + int ret; + + pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n", + str, err, ++die_counter); + + /* trap and error numbers are mostly meaningless on ARM */ + ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV); + if (ret == NOTIFY_STOP) + return ret; + + print_modules(); + __show_regs(regs); + pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + + if (!user_mode(regs) || in_interrupt()) { + dump_mem(KERN_EMERG, "Stack: ", regs->sp, + THREAD_SIZE + (unsigned long)task_stack_page(tsk)); + dump_backtrace(regs, tsk); + dump_instr(KERN_EMERG, regs); + } + + return ret; +} + +static DEFINE_RAW_SPINLOCK(die_lock); + +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) +{ + struct thread_info *thread = current_thread_info(); + int ret; + + oops_enter(); + + raw_spin_lock_irq(&die_lock); + console_verbose(); + bust_spinlocks(1); + ret = __die(str, err, thread, regs); + + if (regs && kexec_should_crash(thread->task)) + crash_kexec(regs); + + bust_spinlocks(0); + add_taint(TAINT_DIE); + raw_spin_unlock_irq(&die_lock); + oops_exit(); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + if (ret != NOTIFY_STOP) + do_exit(SIGSEGV); +} + +void arm64_notify_die(const char *str, struct pt_regs *regs, + struct siginfo *info, int err) +{ + if (user_mode(regs)) + force_sig_info(info->si_signo, info, current); + else + die(str, regs, err); +} + +asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +{ + siginfo_t info; + void __user *pc = (void __user *)instruction_pointer(regs); + +#ifdef CONFIG_COMPAT + /* check for AArch32 breakpoint instructions */ + if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + return; +#endif + + if (show_unhandled_signals) { + pr_info("%s[%d]: undefined instruction: pc=%p\n", + current->comm, task_pid_nr(current), pc); + dump_instr(KERN_INFO, regs); + } + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = pc; + + arm64_notify_die("Oops - undefined instruction", regs, &info, 0); +} + +long compat_arm_syscall(struct pt_regs *regs); + +asmlinkage long do_ni_syscall(struct pt_regs *regs) +{ +#ifdef CONFIG_COMPAT + long ret; + if (is_compat_task()) { + ret = compat_arm_syscall(regs); + if (ret != -ENOSYS) + return ret; + } +#endif + + if (show_unhandled_signals) { + pr_info("%s[%d]: syscall %d\n", current->comm, + task_pid_nr(current), (int)regs->syscallno); + dump_instr("", regs); + if (user_mode(regs)) + __show_regs(regs); + } + + return sys_ni_syscall(); +} + +/* + * bad_mode handles the impossible case in the exception vector. + */ +asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +{ + console_verbose(); + + pr_crit("Bad mode in %s handler detected, code 0x%08x\n", + handler[reason], esr); + + die("Oops - bad mode", regs, 0); + local_irq_disable(); + panic("bad mode"); +} + +void __pte_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pte %016lx.\n", file, line, val); +} + +void __pmd_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pmd %016lx.\n", file, line, val); +} + +void __pgd_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pgd %016lx.\n", file, line, val); +} + +void __init trap_init(void) +{ + return; +} diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c new file mode 100644 index 00000000000..17948fc7d66 --- /dev/null +++ b/arch/arm64/kernel/vdso.c @@ -0,0 +1,261 @@ +/* + * VDSO implementation for AArch64 and vector page setup for AArch32. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/kernel.h> +#include <linux/clocksource.h> +#include <linux/elf.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include <asm/cacheflush.h> +#include <asm/signal32.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> + +extern char vdso_start, vdso_end; +static unsigned long vdso_pages; +static struct page **vdso_pagelist; + +/* + * The vDSO data page. + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = &vdso_data_store.data; + +#ifdef CONFIG_COMPAT +/* + * Create and map the vectors page for AArch32 tasks. + */ +static struct page *vectors_page[1]; + +static int alloc_vectors_page(void) +{ + extern char __kuser_helper_start[], __kuser_helper_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + unsigned long vpage; + + vpage = get_zeroed_page(GFP_ATOMIC); + + if (!vpage) + return -ENOMEM; + + /* kuser helpers */ + memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start, + kuser_sz); + + /* sigreturn code */ + memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, + aarch32_sigret_code, sizeof(aarch32_sigret_code)); + + flush_icache_range(vpage, vpage + PAGE_SIZE); + vectors_page[0] = virt_to_page(vpage); + + return 0; +} +arch_initcall(alloc_vectors_page); + +int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + unsigned long addr = AARCH32_VECTORS_BASE; + int ret; + + down_write(&mm->mmap_sem); + current->mm->context.vdso = (void *)addr; + + /* Map vectors page at the high address. */ + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, + vectors_page); + + up_write(&mm->mmap_sem); + + return ret; +} +#endif /* CONFIG_COMPAT */ + +static int __init vdso_init(void) +{ + struct page *pg; + char *vbase; + int i, ret = 0; + + vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", + vdso_pages + 1, vdso_pages, 1L, &vdso_start); + + /* Allocate the vDSO pagelist, plus a page for the data. */ + vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1), + GFP_KERNEL); + if (vdso_pagelist == NULL) { + pr_err("Failed to allocate vDSO pagelist!\n"); + return -ENOMEM; + } + + /* Grab the vDSO code pages. */ + for (i = 0; i < vdso_pages; i++) { + pg = virt_to_page(&vdso_start + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso_pagelist[i] = pg; + } + + /* Sanity check the shared object header. */ + vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL); + if (vbase == NULL) { + pr_err("Failed to map vDSO pagelist!\n"); + return -ENOMEM; + } else if (memcmp(vbase, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + ret = -EINVAL; + goto unmap; + } + + /* Grab the vDSO data page. */ + pg = virt_to_page(vdso_data); + get_page(pg); + vdso_pagelist[i] = pg; + +unmap: + vunmap(vbase); + return ret; +} +arch_initcall(vdso_init); + +int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp) +{ + struct mm_struct *mm = current->mm; + unsigned long vdso_base, vdso_mapping_len; + int ret; + + /* Be sure to map the data page */ + vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; + + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + ret = vdso_base; + goto up_fail; + } + mm->context.vdso = (void *)vdso_base; + + ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_pagelist); + if (ret) { + mm->context.vdso = NULL; + goto up_fail; + } + +up_fail: + up_write(&mm->mmap_sem); + + return ret; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + /* + * We can re-use the vdso pointer in mm_context_t for identifying + * the vectors page for compat applications. The vDSO will always + * sit above TASK_UNMAPPED_BASE and so we don't need to worry about + * it conflicting with the vectors base. + */ + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) { +#ifdef CONFIG_COMPAT + if (vma->vm_start == AARCH32_VECTORS_BASE) + return "[vectors]"; +#endif + return "[vdso]"; + } + + return NULL; +} + +/* + * We define AT_SYSINFO_EHDR, so we need these function stubs to keep + * Linux happy. + */ +int in_gate_area_no_mm(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +void update_vsyscall(struct timespec *ts, struct timespec *wtm, + struct clocksource *clock, u32 mult) +{ + struct timespec xtime_coarse; + u32 use_syscall = strcmp(clock->name, "arch_sys_counter"); + + ++vdso_data->tb_seq_count; + smp_wmb(); + + xtime_coarse = __current_kernel_time(); + vdso_data->use_syscall = use_syscall; + vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; + vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + + if (!use_syscall) { + vdso_data->cs_cycle_last = clock->cycle_last; + vdso_data->xtime_clock_sec = ts->tv_sec; + vdso_data->xtime_clock_nsec = ts->tv_nsec; + vdso_data->cs_mult = mult; + vdso_data->cs_shift = clock->shift; + vdso_data->wtm_clock_sec = wtm->tv_sec; + vdso_data->wtm_clock_nsec = wtm->tv_nsec; + } + + smp_wmb(); + ++vdso_data->tb_seq_count; +} + +void update_vsyscall_tz(void) +{ + ++vdso_data->tb_seq_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tb_seq_count; +} diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore new file mode 100644 index 00000000000..b8cc94e9698 --- /dev/null +++ b/arch/arm64/kernel/vdso/.gitignore @@ -0,0 +1,2 @@ +vdso.lds +vdso-offsets.h diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile new file mode 100644 index 00000000000..d8064af42e6 --- /dev/null +++ b/arch/arm64/kernel/vdso/Makefile @@ -0,0 +1,63 @@ +# +# Building a vDSO image for AArch64. +# +# Author: Will Deacon <will.deacon@arm.com> +# Heavily based on the vDSO Makefiles for other archs. +# + +obj-vdso := gettimeofday.o note.o sigreturn.o + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +obj-y += vdso.o +extra-y += vdso.lds vdso-offsets.h +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso.o : $(obj)/vdso.so + +# Link rule for the .so file, .lds has to be first +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) + $(call if_changed,vdsold) + +# Strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ +define cmd_vdsosym + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ + cp $@ include/generated/ +endef + +$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# Assembly rules for the .S files +$(obj-vdso): %.o: %.S + $(call if_changed_dep,vdsoas) + +# Actual build commands +quiet_cmd_vdsold = VDSOL $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdsoas = VDSOA $@ + cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< + +# Install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh new file mode 100755 index 00000000000..01924ff071a --- /dev/null +++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p' diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S new file mode 100644 index 00000000000..dcb8c203a3b --- /dev/null +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -0,0 +1,242 @@ +/* + * Userspace implementations of gettimeofday() and friends. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +#define NSEC_PER_SEC_LO16 0xca00 +#define NSEC_PER_SEC_HI16 0x3b9a + +vdso_data .req x6 +use_syscall .req w7 +seqcnt .req w8 + + .macro seqcnt_acquire +9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + tbnz seqcnt, #0, 9999b + dmb ishld + ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL] + .endm + + .macro seqcnt_read, cnt + dmb ishld + ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + .endm + + .macro seqcnt_check, cnt, fail + cmp \cnt, seqcnt + b.ne \fail + .endm + + .text + +/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ +ENTRY(__kernel_gettimeofday) + .cfi_startproc + mov x2, x30 + .cfi_register x30, x2 + + /* Acquire the sequence counter and get the timespec. */ + adr vdso_data, _vdso_data +1: seqcnt_acquire + cbnz use_syscall, 4f + + /* If tv is NULL, skip to the timezone code. */ + cbz x0, 2f + bl __do_get_tspec + seqcnt_check w13, 1b + + /* Convert ns to us. */ + mov x11, #1000 + udiv x10, x10, x11 + stp x9, x10, [x0, #TVAL_TV_SEC] +2: + /* If tz is NULL, return 0. */ + cbz x1, 3f + ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST] + seqcnt_read w13 + seqcnt_check w13, 1b + stp w4, w5, [x1, #TZ_MINWEST] +3: + mov x0, xzr + ret x2 +4: + /* Syscall fallback. */ + mov x8, #__NR_gettimeofday + svc #0 + ret x2 + .cfi_endproc +ENDPROC(__kernel_gettimeofday) + +/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ +ENTRY(__kernel_clock_gettime) + .cfi_startproc + cmp w0, #CLOCK_REALTIME + ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + b.ne 2f + + mov x2, x30 + .cfi_register x30, x2 + + /* Get kernel timespec. */ + adr vdso_data, _vdso_data +1: seqcnt_acquire + cbnz use_syscall, 7f + + bl __do_get_tspec + seqcnt_check w13, 1b + + cmp w0, #CLOCK_MONOTONIC + b.ne 6f + + /* Get wtm timespec. */ + ldp x14, x15, [vdso_data, #VDSO_WTM_CLK_SEC] + + /* Check the sequence counter. */ + seqcnt_read w13 + seqcnt_check w13, 1b + b 4f +2: + cmp w0, #CLOCK_REALTIME_COARSE + ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne + b.ne 8f + + /* Get coarse timespec. */ + adr vdso_data, _vdso_data +3: seqcnt_acquire + ldp x9, x10, [vdso_data, #VDSO_XTIME_CRS_SEC] + + cmp w0, #CLOCK_MONOTONIC_COARSE + b.ne 6f + + /* Get wtm timespec. */ + ldp x14, x15, [vdso_data, #VDSO_WTM_CLK_SEC] + + /* Check the sequence counter. */ + seqcnt_read w13 + seqcnt_check w13, 3b +4: + /* Add on wtm timespec. */ + add x9, x9, x14 + add x10, x10, x15 + + /* Normalise the new timespec. */ + mov x14, #NSEC_PER_SEC_LO16 + movk x14, #NSEC_PER_SEC_HI16, lsl #16 + cmp x10, x14 + b.lt 5f + sub x10, x10, x14 + add x9, x9, #1 +5: + cmp x10, #0 + b.ge 6f + add x10, x10, x14 + sub x9, x9, #1 + +6: /* Store to the user timespec. */ + stp x9, x10, [x1, #TSPEC_TV_SEC] + mov x0, xzr + ret x2 +7: + mov x30, x2 +8: /* Syscall fallback. */ + mov x8, #__NR_clock_gettime + svc #0 + ret + .cfi_endproc +ENDPROC(__kernel_clock_gettime) + +/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ +ENTRY(__kernel_clock_getres) + .cfi_startproc + cbz w1, 3f + + cmp w0, #CLOCK_REALTIME + ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + b.ne 1f + + ldr x2, 5f + b 2f +1: + cmp w0, #CLOCK_REALTIME_COARSE + ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne + b.ne 4f + ldr x2, 6f +2: + stp xzr, x2, [x1] + +3: /* res == NULL. */ + mov w0, wzr + ret + +4: /* Syscall fallback. */ + mov x8, #__NR_clock_getres + svc #0 + ret +5: + .quad CLOCK_REALTIME_RES +6: + .quad CLOCK_COARSE_RES + .cfi_endproc +ENDPROC(__kernel_clock_getres) + +/* + * Read the current time from the architected counter. + * Expects vdso_data to be initialised. + * Clobbers the temporary registers (x9 - x15). + * Returns: + * - (x9, x10) = (ts->tv_sec, ts->tv_nsec) + * - (x11, x12) = (xtime->tv_sec, xtime->tv_nsec) + * - w13 = vDSO sequence counter + */ +ENTRY(__do_get_tspec) + .cfi_startproc + + /* Read from the vDSO data page. */ + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + ldp x11, x12, [vdso_data, #VDSO_XTIME_CLK_SEC] + ldp w14, w15, [vdso_data, #VDSO_CS_MULT] + seqcnt_read w13 + + /* Read the physical counter. */ + isb + mrs x9, cntpct_el0 + + /* Calculate cycle delta and convert to ns. */ + sub x10, x9, x10 + /* We can only guarantee 56 bits of precision. */ + movn x9, #0xff0, lsl #48 + and x10, x9, x10 + mul x10, x10, x14 + lsr x10, x10, x15 + + /* Use the kernel time to calculate the new timespec. */ + add x10, x12, x10 + mov x14, #NSEC_PER_SEC_LO16 + movk x14, #NSEC_PER_SEC_HI16, lsl #16 + udiv x15, x10, x14 + add x9, x15, x11 + mul x14, x14, x15 + sub x10, x10, x14 + + ret + .cfi_endproc +ENDPROC(__do_get_tspec) diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S new file mode 100644 index 00000000000..b82c85e5d97 --- /dev/null +++ b/arch/arm64/kernel/vdso/note.S @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S new file mode 100644 index 00000000000..20d98effa7d --- /dev/null +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -0,0 +1,37 @@ +/* + * Sigreturn trampoline for returning from a signal when the SA_RESTORER + * flag is not set. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text + + nop +ENTRY(__kernel_rt_sigreturn) + .cfi_startproc + .cfi_signal_frame + .cfi_def_cfa x29, 0 + .cfi_offset x29, 0 * 8 + .cfi_offset x30, 1 * 8 + mov x8, #__NR_rt_sigreturn + svc #0 + .cfi_endproc +ENDPROC(__kernel_rt_sigreturn) diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S new file mode 100644 index 00000000000..60c1db54b41 --- /dev/null +++ b/arch/arm64/kernel/vdso/vdso.S @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso_start, vdso_end + .balign PAGE_SIZE +vdso_start: + .incbin "arch/arm64/kernel/vdso/vdso.so" + .balign PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S new file mode 100644 index 00000000000..8154b8d1c82 --- /dev/null +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -0,0 +1,100 @@ +/* + * GNU linker script for the VDSO library. +* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * Heavily based on the vDSO linker scripts for other archs. + */ + +#include <linux/const.h> +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") +OUTPUT_ARCH(aarch64) + +SECTIONS +{ + . = VDSO_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + + .text : { *(.text*) } :text =0xd503201f + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + _end = .; + PROVIDE(end = .); + + . = ALIGN(PAGE_SIZE); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.6.39 { + global: + __kernel_rt_sigreturn; + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp = __kernel_rt_sigreturn; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S new file mode 100644 index 00000000000..3fae2be8b01 --- /dev/null +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -0,0 +1,126 @@ +/* + * ld script to make ARM Linux kernel + * taken from the i386 version by Russell King + * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> + */ + +#include <asm-generic/vmlinux.lds.h> +#include <asm/thread_info.h> +#include <asm/memory.h> +#include <asm/page.h> + +#define ARM_EXIT_KEEP(x) +#define ARM_EXIT_DISCARD(x) x + +OUTPUT_ARCH(aarch64) +ENTRY(stext) + +jiffies = jiffies_64; + +SECTIONS +{ + /* + * XXX: The linker does not define how output sections are + * assigned to input sections when there are multiple statements + * matching the same input section name. There is no documented + * order of matching. + */ + /DISCARD/ : { + ARM_EXIT_DISCARD(EXIT_TEXT) + ARM_EXIT_DISCARD(EXIT_DATA) + EXIT_CALL + *(.discard) + *(.discard.*) + } + + . = PAGE_OFFSET + TEXT_OFFSET; + + .head.text : { + _text = .; + HEAD_TEXT + } + .text : { /* Real text segment */ + _stext = .; /* Text and read-only data */ + *(.smp.pen.text) + __exception_text_start = .; + *(.exception.text) + __exception_text_end = .; + IRQENTRY_TEXT + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + *(.fixup) + *(.gnu.warning) + . = ALIGN(16); + *(.got) /* Global offset table */ + } + + RO_DATA(PAGE_SIZE) + + _etext = .; /* End of text and rodata section */ + + . = ALIGN(PAGE_SIZE); + __init_begin = .; + + INIT_TEXT_SECTION(8) + .exit.text : { + ARM_EXIT_KEEP(EXIT_TEXT) + } + . = ALIGN(16); + .init.data : { + INIT_DATA + INIT_SETUP(16) + INIT_CALLS + CON_INITCALL + SECURITY_INITCALL + INIT_RAM_FS + } + .exit.data : { + ARM_EXIT_KEEP(EXIT_DATA) + } + + PERCPU_SECTION(64) + + __init_end = .; + . = ALIGN(THREAD_SIZE); + __data_loc = .; + + .data : AT(__data_loc) { + _data = .; /* address in memory */ + _sdata = .; + + /* + * first, the init task union, aligned + * to an 8192 byte boundary. + */ + INIT_TASK_DATA(THREAD_SIZE) + NOSAVE_DATA + CACHELINE_ALIGNED_DATA(64) + READ_MOSTLY_DATA(64) + + /* + * The exception fixup table (might need resorting at runtime) + */ + . = ALIGN(32); + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + + /* + * and the usual data section + */ + DATA_DATA + CONSTRUCTORS + + _edata = .; + } + _edata_loc = __data_loc + SIZEOF(.data); + + NOTES + + BSS_SECTION(0, 0, 0) + _end = .; + + STABS_DEBUG + .comment 0 : { *(.comment) } +} diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile new file mode 100644 index 00000000000..2fb7f6092aa --- /dev/null +++ b/arch/arm64/lib/Makefile @@ -0,0 +1,4 @@ +lib-y := bitops.o delay.o \ + strncpy_from_user.o strnlen_user.o clear_user.o \ + copy_from_user.o copy_to_user.o copy_in_user.o \ + copy_page.o clear_page.o diff --git a/arch/arm64/lib/bitops.c b/arch/arm64/lib/bitops.c new file mode 100644 index 00000000000..aa4965e60ac --- /dev/null +++ b/arch/arm64/lib/bitops.c @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> + +#ifdef CONFIG_SMP +arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = { + [0 ... (ATOMIC_HASH_SIZE-1)] = __ARCH_SPIN_LOCK_UNLOCKED +}; +#endif diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S new file mode 100644 index 00000000000..ef08e905e35 --- /dev/null +++ b/arch/arm64/lib/clear_page.S @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/assembler.h> +#include <asm/page.h> + +/* + * Clear page @dest + * + * Parameters: + * x0 - dest + */ +ENTRY(clear_page) + mrs x1, dczid_el0 + and w1, w1, #0xf + mov x2, #4 + lsl x1, x2, x1 + +1: dc zva, x0 + add x0, x0, x1 + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + ret +ENDPROC(clear_page) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S new file mode 100644 index 00000000000..6e0ed93d51f --- /dev/null +++ b/arch/arm64/lib/clear_user.S @@ -0,0 +1,58 @@ +/* + * Based on arch/arm/lib/clear_user.S + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* Prototype: int __clear_user(void *addr, size_t sz) + * Purpose : clear some user memory + * Params : addr - user memory address to clear + * : sz - number of bytes to clear + * Returns : number of bytes NOT cleared + * + * Alignment fixed up by hardware. + */ +ENTRY(__clear_user) + mov x2, x1 // save the size for fixup return + subs x1, x1, #8 + b.mi 2f +1: +USER(9f, str xzr, [x0], #8 ) + subs x1, x1, #8 + b.pl 1b +2: adds x1, x1, #4 + b.mi 3f +USER(9f, str wzr, [x0], #4 ) + sub x1, x1, #4 +3: adds x1, x1, #2 + b.mi 4f +USER(9f, strh wzr, [x0], #2 ) + sub x1, x1, #2 +4: adds x1, x1, #1 + b.mi 5f + strb wzr, [x0] +5: mov x0, #0 + ret +ENDPROC(__clear_user) + + .section .fixup,"ax" + .align 2 +9: mov x0, x2 // return the original size + ret + .previous diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S new file mode 100644 index 00000000000..5e27add9d36 --- /dev/null +++ b/arch/arm64/lib/copy_from_user.S @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Copy from user space to a kernel buffer (alignment handled by the hardware) + * + * Parameters: + * x0 - to + * x1 - from + * x2 - n + * Returns: + * x0 - bytes not copied + */ +ENTRY(__copy_from_user) + add x4, x1, x2 // upper user buffer boundary + subs x2, x2, #8 + b.mi 2f +1: +USER(9f, ldr x3, [x1], #8 ) + subs x2, x2, #8 + str x3, [x0], #8 + b.pl 1b +2: adds x2, x2, #4 + b.mi 3f +USER(9f, ldr w3, [x1], #4 ) + sub x2, x2, #4 + str w3, [x0], #4 +3: adds x2, x2, #2 + b.mi 4f +USER(9f, ldrh w3, [x1], #2 ) + sub x2, x2, #2 + strh w3, [x0], #2 +4: adds x2, x2, #1 + b.mi 5f +USER(9f, ldrb w3, [x1] ) + strb w3, [x0] +5: mov x0, #0 + ret +ENDPROC(__copy_from_user) + + .section .fixup,"ax" + .align 2 +9: sub x2, x4, x1 + mov x3, x2 +10: strb wzr, [x0], #1 // zero remaining buffer space + subs x3, x3, #1 + b.ne 10b + mov x0, x2 // bytes not copied + ret + .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S new file mode 100644 index 00000000000..84b6c9bb9b9 --- /dev/null +++ b/arch/arm64/lib/copy_in_user.S @@ -0,0 +1,63 @@ +/* + * Copy from user space to user space + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Copy from user space to user space (alignment handled by the hardware) + * + * Parameters: + * x0 - to + * x1 - from + * x2 - n + * Returns: + * x0 - bytes not copied + */ +ENTRY(__copy_in_user) + add x4, x0, x2 // upper user buffer boundary + subs x2, x2, #8 + b.mi 2f +1: +USER(9f, ldr x3, [x1], #8 ) + subs x2, x2, #8 +USER(9f, str x3, [x0], #8 ) + b.pl 1b +2: adds x2, x2, #4 + b.mi 3f +USER(9f, ldr w3, [x1], #4 ) + sub x2, x2, #4 +USER(9f, str w3, [x0], #4 ) +3: adds x2, x2, #2 + b.mi 4f +USER(9f, ldrh w3, [x1], #2 ) + sub x2, x2, #2 +USER(9f, strh w3, [x0], #2 ) +4: adds x2, x2, #1 + b.mi 5f +USER(9f, ldrb w3, [x1] ) +USER(9f, strb w3, [x0] ) +5: mov x0, #0 + ret +ENDPROC(__copy_in_user) + + .section .fixup,"ax" + .align 2 +9: sub x0, x4, x0 // bytes not copied + ret + .previous diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S new file mode 100644 index 00000000000..512b9a7b980 --- /dev/null +++ b/arch/arm64/lib/copy_page.S @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/assembler.h> +#include <asm/page.h> + +/* + * Copy a page from src to dest (both are page aligned) + * + * Parameters: + * x0 - dest + * x1 - src + */ +ENTRY(copy_page) + /* Assume cache line size is 64 bytes. */ + prfm pldl1strm, [x1, #64] +1: ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + ldp x6, x7, [x1, #32] + ldp x8, x9, [x1, #48] + add x1, x1, #64 + prfm pldl1strm, [x1, #64] + stnp x2, x3, [x0] + stnp x4, x5, [x0, #16] + stnp x6, x7, [x0, #32] + stnp x8, x9, [x0, #48] + add x0, x0, #64 + tst x1, #(PAGE_SIZE - 1) + b.ne 1b + ret +ENDPROC(copy_page) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S new file mode 100644 index 00000000000..a0aeeb9b7a2 --- /dev/null +++ b/arch/arm64/lib/copy_to_user.S @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Copy to user space from a kernel buffer (alignment handled by the hardware) + * + * Parameters: + * x0 - to + * x1 - from + * x2 - n + * Returns: + * x0 - bytes not copied + */ +ENTRY(__copy_to_user) + add x4, x0, x2 // upper user buffer boundary + subs x2, x2, #8 + b.mi 2f +1: + ldr x3, [x1], #8 + subs x2, x2, #8 +USER(9f, str x3, [x0], #8 ) + b.pl 1b +2: adds x2, x2, #4 + b.mi 3f + ldr w3, [x1], #4 + sub x2, x2, #4 +USER(9f, str w3, [x0], #4 ) +3: adds x2, x2, #2 + b.mi 4f + ldrh w3, [x1], #2 + sub x2, x2, #2 +USER(9f, strh w3, [x0], #2 ) +4: adds x2, x2, #1 + b.mi 5f + ldrb w3, [x1] +USER(9f, strb w3, [x0] ) +5: mov x0, #0 + ret +ENDPROC(__copy_to_user) + + .section .fixup,"ax" + .align 2 +9: sub x0, x4, x0 // bytes not copied + ret + .previous diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c new file mode 100644 index 00000000000..dad4ec9bbfd --- /dev/null +++ b/arch/arm64/lib/delay.c @@ -0,0 +1,55 @@ +/* + * Delay loops based on the OpenRISC implementation. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/timex.h> + +void __delay(unsigned long cycles) +{ + cycles_t start = get_cycles(); + + while ((get_cycles() - start) < cycles) + cpu_relax(); +} +EXPORT_SYMBOL(__delay); + +inline void __const_udelay(unsigned long xloops) +{ + unsigned long loops; + + loops = xloops * loops_per_jiffy * HZ; + __delay(loops >> 32); +} +EXPORT_SYMBOL(__const_udelay); + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x10C7UL); /* 2**32 / 1000000 (rounded up) */ +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */ +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/arm64/lib/strncpy_from_user.S b/arch/arm64/lib/strncpy_from_user.S new file mode 100644 index 00000000000..56e448a831a --- /dev/null +++ b/arch/arm64/lib/strncpy_from_user.S @@ -0,0 +1,50 @@ +/* + * Based on arch/arm/lib/strncpy_from_user.S + * + * Copyright (C) 1995-2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> + + .text + .align 5 + +/* + * Copy a string from user space to kernel space. + * x0 = dst, x1 = src, x2 = byte length + * returns the number of characters copied (strlen of copied string), + * -EFAULT on exception, or "len" if we fill the whole buffer + */ +ENTRY(__strncpy_from_user) + mov x4, x1 +1: subs x2, x2, #1 + bmi 2f +USER(9f, ldrb w3, [x1], #1 ) + strb w3, [x0], #1 + cbnz w3, 1b + sub x1, x1, #1 // take NUL character out of count +2: sub x0, x1, x4 + ret +ENDPROC(__strncpy_from_user) + + .section .fixup,"ax" + .align 0 +9: strb wzr, [x0] // null terminate + mov x0, #-EFAULT + ret + .previous diff --git a/arch/arm64/lib/strnlen_user.S b/arch/arm64/lib/strnlen_user.S new file mode 100644 index 00000000000..7f7b176a564 --- /dev/null +++ b/arch/arm64/lib/strnlen_user.S @@ -0,0 +1,47 @@ +/* + * Based on arch/arm/lib/strnlen_user.S + * + * Copyright (C) 1995-2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> + + .text + .align 5 + +/* Prototype: unsigned long __strnlen_user(const char *str, long n) + * Purpose : get length of a string in user memory + * Params : str - address of string in user memory + * Returns : length of string *including terminator* + * or zero on exception, or n if too long + */ +ENTRY(__strnlen_user) + mov x2, x0 +1: subs x1, x1, #1 + b.mi 2f +USER(9f, ldrb w3, [x0], #1 ) + cbnz w3, 1b +2: sub x0, x0, x2 + ret +ENDPROC(__strnlen_user) + + .section .fixup,"ax" + .align 0 +9: mov x0, #0 + ret + .previous diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile new file mode 100644 index 00000000000..3140a2abcdc --- /dev/null +++ b/arch/arm64/mm/Makefile @@ -0,0 +1,4 @@ +obj-y := dma-mapping.o extable.o fault.o init.o \ + cache.o copypage.o flush.o \ + ioremap.o mmap.o pgd.o mmu.o \ + context.o tlb.o proc.o diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S new file mode 100644 index 00000000000..abe69b80cf7 --- /dev/null +++ b/arch/arm64/mm/cache.S @@ -0,0 +1,168 @@ +/* + * Cache maintenance + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> + +#include "proc-macros.S" + +/* + * __flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: x0-x7, x9-x11 + */ +ENTRY(__flush_dcache_all) + dsb sy // ensure ordering with previous memory accesses + mrs x0, clidr_el1 // read clidr + and x3, x0, #0x7000000 // extract loc from clidr + lsr x3, x3, #23 // left align loc bit field + cbz x3, finished // if loc is 0, then no need to clean + mov x10, #0 // start clean at cache level 0 +loop1: + add x2, x10, x10, lsr #1 // work out 3x current cache level + lsr x1, x0, x2 // extract cache type bits from clidr + and x1, x1, #7 // mask of the bits for current cache only + cmp x1, #2 // see what cache we have at this level + b.lt skip // skip if no cache, or just i-cache + save_and_disable_irqs x9 // make CSSELR and CCSIDR access atomic + msr csselr_el1, x10 // select current cache level in csselr + isb // isb to sych the new cssr&csidr + mrs x1, ccsidr_el1 // read the new ccsidr + restore_irqs x9 + and x2, x1, #7 // extract the length of the cache lines + add x2, x2, #4 // add 4 (line length offset) + mov x4, #0x3ff + and x4, x4, x1, lsr #3 // find maximum number on the way size + clz x5, x4 // find bit position of way size increment + mov x7, #0x7fff + and x7, x7, x1, lsr #13 // extract max number of the index size +loop2: + mov x9, x4 // create working copy of max way size +loop3: + lsl x6, x9, x5 + orr x11, x10, x6 // factor way and cache number into x11 + lsl x6, x7, x2 + orr x11, x11, x6 // factor index number into x11 + dc cisw, x11 // clean & invalidate by set/way + subs x9, x9, #1 // decrement the way + b.ge loop3 + subs x7, x7, #1 // decrement the index + b.ge loop2 +skip: + add x10, x10, #2 // increment cache number + cmp x3, x10 + b.gt loop1 +finished: + mov x10, #0 // swith back to cache level 0 + msr csselr_el1, x10 // select current cache level in csselr + dsb sy + isb + ret +ENDPROC(__flush_dcache_all) + +/* + * flush_cache_all() + * + * Flush the entire cache system. The data cache flush is now achieved + * using atomic clean / invalidates working outwards from L1 cache. This + * is done using Set/Way based cache maintainance instructions. The + * instruction cache can still be invalidated back to the point of + * unification in a single instruction. + */ +ENTRY(flush_cache_all) + mov x12, lr + bl __flush_dcache_all + mov x0, #0 + ic ialluis // I+BTB cache invalidate + ret x12 +ENDPROC(flush_cache_all) + +/* + * flush_icache_range(start,end) + * + * Ensure that the I and D caches are coherent within specified region. + * This is typically used when code has been written to a memory region, + * and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(flush_icache_range) + /* FALLTHROUGH */ + +/* + * __flush_cache_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified region. + * This is typically used when code has been written to a memory region, + * and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(__flush_cache_user_range) + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x0, x3 +1: +USER(9f, dc cvau, x4 ) // clean D line to PoU + add x4, x4, x2 + cmp x4, x1 + b.lo 1b + dsb sy + + icache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x0, x3 +1: +USER(9f, ic ivau, x4 ) // invalidate I line PoU + add x4, x4, x2 + cmp x4, x1 + b.lo 1b +9: // ignore any faulting cache operation + dsb sy + isb + ret +ENDPROC(flush_icache_range) +ENDPROC(__flush_cache_user_range) + +/* + * __flush_kern_dcache_page(kaddr) + * + * Ensure that the data held in the page kaddr is written back to the + * page in question. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__flush_dcache_area) + dcache_line_size x2, x3 + add x1, x0, x1 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 // clean & invalidate D line / unified line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__flush_dcache_area) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c new file mode 100644 index 00000000000..baa758d3702 --- /dev/null +++ b/arch/arm64/mm/context.c @@ -0,0 +1,159 @@ +/* + * Based on arch/arm/mm/context.c + * + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/percpu.h> + +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/cachetype.h> + +#define asid_bits(reg) \ + (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8) + +#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS) + +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +unsigned int cpu_last_asid = ASID_FIRST_VERSION; + +/* + * We fork()ed a process, and we need a new context for the child to run in. + */ +void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + mm->context.id = 0; + raw_spin_lock_init(&mm->context.id_lock); +} + +static void flush_context(void) +{ + /* set the reserved TTBR0 before flushing the TLB */ + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + if (icache_is_aivivt()) + __flush_icache_all(); +} + +#ifdef CONFIG_SMP + +static void set_mm_context(struct mm_struct *mm, unsigned int asid) +{ + unsigned long flags; + + /* + * Locking needed for multi-threaded applications where the same + * mm->context.id could be set from different CPUs during the + * broadcast. This function is also called via IPI so the + * mm->context.id_lock has to be IRQ-safe. + */ + raw_spin_lock_irqsave(&mm->context.id_lock, flags); + if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { + /* + * Old version of ASID found. Set the new one and reset + * mm_cpumask(mm). + */ + mm->context.id = asid; + cpumask_clear(mm_cpumask(mm)); + } + raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); + + /* + * Set the mm_cpumask(mm) bit for the current CPU. + */ + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); +} + +/* + * Reset the ASID on the current CPU. This function call is broadcast from the + * CPU handling the ASID rollover and holding cpu_asid_lock. + */ +static void reset_context(void *info) +{ + unsigned int asid; + unsigned int cpu = smp_processor_id(); + struct mm_struct *mm = current->active_mm; + + smp_rmb(); + asid = cpu_last_asid + cpu; + + flush_context(); + set_mm_context(mm, asid); + + /* set the new ASID */ + cpu_switch_mm(mm->pgd, mm); +} + +#else + +static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) +{ + mm->context.id = asid; + cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); +} + +#endif + +void __new_context(struct mm_struct *mm) +{ + unsigned int asid; + unsigned int bits = asid_bits(); + + raw_spin_lock(&cpu_asid_lock); +#ifdef CONFIG_SMP + /* + * Check the ASID again, in case the change was broadcast from another + * CPU before we acquired the lock. + */ + if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + raw_spin_unlock(&cpu_asid_lock); + return; + } +#endif + /* + * At this point, it is guaranteed that the current mm (with an old + * ASID) isn't active on any other CPU since the ASIDs are changed + * simultaneously via IPI. + */ + asid = ++cpu_last_asid; + + /* + * If we've used up all our ASIDs, we need to start a new version and + * flush the TLB. + */ + if (unlikely((asid & ((1 << bits) - 1)) == 0)) { + /* increment the ASID version */ + cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits); + if (cpu_last_asid == 0) + cpu_last_asid = ASID_FIRST_VERSION; + asid = cpu_last_asid + smp_processor_id(); + flush_context(); +#ifdef CONFIG_SMP + smp_wmb(); + smp_call_function(reset_context, NULL, 1); +#endif + cpu_last_asid += NR_CPUS - 1; + } + + set_mm_context(mm, asid); + raw_spin_unlock(&cpu_asid_lock); +} diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c new file mode 100644 index 00000000000..9aecbace412 --- /dev/null +++ b/arch/arm64/mm/copypage.c @@ -0,0 +1,34 @@ +/* + * Based on arch/arm/mm/copypage.c + * + * Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/mm.h> + +#include <asm/page.h> +#include <asm/cacheflush.h> + +void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +{ + copy_page(kto, kfrom); + __flush_dcache_area(kto, PAGE_SIZE); +} + +void __cpu_clear_user_page(void *kaddr, unsigned long vaddr) +{ + clear_page(kaddr); +} diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c new file mode 100644 index 00000000000..5eb244453a5 --- /dev/null +++ b/arch/arm64/mm/dma-mapping.c @@ -0,0 +1,79 @@ +/* + * SWIOTLB-based DMA API implementation + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/gfp.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/swiotlb.h> + +#include <asm/cacheflush.h> + +struct dma_map_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + +static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + if (IS_ENABLED(CONFIG_ZONE_DMA32) && + dev->coherent_dma_mask <= DMA_BIT_MASK(32)) + flags |= GFP_DMA32; + return swiotlb_alloc_coherent(dev, size, dma_handle, flags); +} + +static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + swiotlb_free_coherent(dev, size, vaddr, dma_handle); +} + +static struct dma_map_ops arm64_swiotlb_dma_ops = { + .alloc = arm64_swiotlb_alloc_coherent, + .free = arm64_swiotlb_free_coherent, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .dma_supported = swiotlb_dma_supported, + .mapping_error = swiotlb_dma_mapping_error, +}; + +void __init swiotlb_init_with_default_size(size_t default_size, int verbose); + +void __init arm64_swiotlb_init(size_t max_size) +{ + dma_ops = &arm64_swiotlb_dma_ops; + swiotlb_init_with_default_size(min((size_t)SZ_64M, max_size), 1); +} + +#define PREALLOC_DMA_DEBUG_ENTRIES 4096 + +static int __init dma_debug_do_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + return 0; +} +fs_initcall(dma_debug_do_init); diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c new file mode 100644 index 00000000000..79444279ba8 --- /dev/null +++ b/arch/arm64/mm/extable.c @@ -0,0 +1,17 @@ +/* + * Based on arch/arm/mm/extable.c + */ + +#include <linux/module.h> +#include <linux/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(instruction_pointer(regs)); + if (fixup) + regs->pc = fixup->fixup; + + return fixup != NULL; +} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c new file mode 100644 index 00000000000..1909a69983c --- /dev/null +++ b/arch/arm64/mm/fault.c @@ -0,0 +1,534 @@ +/* + * Based on arch/arm/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + * Copyright (C) 1995-2004 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/module.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/hardirq.h> +#include <linux/init.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/page-flags.h> +#include <linux/sched.h> +#include <linux/highmem.h> +#include <linux/perf_event.h> + +#include <asm/exception.h> +#include <asm/debug-monitors.h> +#include <asm/system_misc.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +/* + * Dump out the page tables associated with 'addr' in mm 'mm'. + */ +void show_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + + if (!mm) + mm = &init_mm; + + pr_alert("pgd = %p\n", mm->pgd); + pgd = pgd_offset(mm, addr); + pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd)); + + do { + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (pgd_none_or_clear_bad(pgd)) + break; + + pud = pud_offset(pgd, addr); + if (pud_none_or_clear_bad(pud)) + break; + + pmd = pmd_offset(pud, addr); + printk(", *pmd=%016llx", pmd_val(*pmd)); + if (pmd_none_or_clear_bad(pmd)) + break; + + pte = pte_offset_map(pmd, addr); + printk(", *pte=%016llx", pte_val(*pte)); + pte_unmap(pte); + } while(0); + + printk("\n"); +} + +/* + * The kernel tried to access some page that wasn't present. + */ +static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, + unsigned int esr, struct pt_regs *regs) +{ + /* + * Are we prepared to handle this kernel fault? + */ + if (fixup_exception(regs)) + return; + + /* + * No handler, we'll have to terminate things with extreme prejudice. + */ + bust_spinlocks(1); + pr_alert("Unable to handle kernel %s at virtual address %08lx\n", + (addr < PAGE_SIZE) ? "NULL pointer dereference" : + "paging request", addr); + + show_pte(mm, addr); + die("Oops", regs, esr); + bust_spinlocks(0); + do_exit(SIGKILL); +} + +/* + * Something tried to access memory that isn't in our memory map. User mode + * accesses just cause a SIGSEGV + */ +static void __do_user_fault(struct task_struct *tsk, unsigned long addr, + unsigned int esr, unsigned int sig, int code, + struct pt_regs *regs) +{ + struct siginfo si; + + if (show_unhandled_signals) { + pr_info("%s[%d]: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", + tsk->comm, task_pid_nr(tsk), sig, addr, esr); + show_pte(tsk->mm, addr); + show_regs(regs); + } + + tsk->thread.fault_address = addr; + si.si_signo = sig; + si.si_errno = 0; + si.si_code = code; + si.si_addr = (void __user *)addr; + force_sig_info(sig, &si, tsk); +} + +void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->active_mm; + + /* + * If we are in kernel mode at this point, we have no context to + * handle this fault with. + */ + if (user_mode(regs)) + __do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs); + else + __do_kernel_fault(mm, addr, esr, regs); +} + +#define VM_FAULT_BADMAP 0x010000 +#define VM_FAULT_BADACCESS 0x020000 + +#define ESR_WRITE (1 << 6) +#define ESR_LNX_EXEC (1 << 24) + +/* + * Check that the permissions on the VMA allow for the fault which occurred. + * If we encountered a write fault, we must have write permission, otherwise + * we allow any permission. + */ +static inline bool access_error(unsigned int esr, struct vm_area_struct *vma) +{ + unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + + if (esr & ESR_WRITE) + mask = VM_WRITE; + if (esr & ESR_LNX_EXEC) + mask = VM_EXEC; + + return vma->vm_flags & mask ? false : true; +} + +static int __do_page_fault(struct mm_struct *mm, unsigned long addr, + unsigned int esr, unsigned int flags, + struct task_struct *tsk) +{ + struct vm_area_struct *vma; + int fault; + + vma = find_vma(mm, addr); + fault = VM_FAULT_BADMAP; + if (unlikely(!vma)) + goto out; + if (unlikely(vma->vm_start > addr)) + goto check_stack; + + /* + * Ok, we have a good vm_area for this memory access, so we can handle + * it. + */ +good_area: + if (access_error(esr, vma)) { + fault = VM_FAULT_BADACCESS; + goto out; + } + + return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + +check_stack: + if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + goto good_area; +out: + return fault; +} + +static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + struct task_struct *tsk; + struct mm_struct *mm; + int fault, sig, code; + int write = esr & ESR_WRITE; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | + (write ? FAULT_FLAG_WRITE : 0); + + tsk = current; + mm = tsk->mm; + + /* Enable interrupts if they were enabled in the parent context. */ + if (interrupts_enabled(regs)) + local_irq_enable(); + + /* + * If we're in an interrupt or have no user context, we must not take + * the fault. + */ + if (in_atomic() || !mm) + goto no_context; + + /* + * As per x86, we may deadlock here. However, since the kernel only + * validly references user space from well defined areas of the code, + * we can bug out early if this is from code which shouldn't. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (!user_mode(regs) && !search_exception_tables(regs->pc)) + goto no_context; +retry: + down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in which + * case, we'll have missed the might_sleep() from down_read(). + */ + might_sleep(); +#ifdef CONFIG_DEBUG_VM + if (!user_mode(regs) && !search_exception_tables(regs->pc)) + goto no_context; +#endif + } + + fault = __do_page_fault(mm, addr, esr, flags, tsk); + + /* + * If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_sem because it + * would already be released in __lock_page_or_retry in mm/filemap.c. + */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return 0; + + /* + * Major/minor page fault accounting is only done on the initial + * attempt. If we go through a retry, it is extremely likely that the + * page will be found in page cache at that point. + */ + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, + addr); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, + addr); + } + if (fault & VM_FAULT_RETRY) { + /* + * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of + * starvation. + */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + goto retry; + } + } + + up_read(&mm->mmap_sem); + + /* + * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR + */ + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | + VM_FAULT_BADACCESS)))) + return 0; + + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return to + * userspace (which will retry the fault, or kill us if we got + * oom-killed). + */ + pagefault_out_of_memory(); + return 0; + } + + /* + * If we are in kernel mode at this point, we have no context to + * handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + + if (fault & VM_FAULT_SIGBUS) { + /* + * We had some memory, but were unable to successfully fix up + * this page fault. + */ + sig = SIGBUS; + code = BUS_ADRERR; + } else { + /* + * Something tried to access memory that isn't in our memory + * map. + */ + sig = SIGSEGV; + code = fault == VM_FAULT_BADACCESS ? + SEGV_ACCERR : SEGV_MAPERR; + } + + __do_user_fault(tsk, addr, esr, sig, code, regs); + return 0; + +no_context: + __do_kernel_fault(mm, addr, esr, regs); + return 0; +} + +/* + * First Level Translation Fault Handler + * + * We enter here because the first level page table doesn't contain a valid + * entry for the address. + * + * If the address is in kernel space (>= TASK_SIZE), then we are probably + * faulting in the vmalloc() area. + * + * If the init_task's first level page tables contains the relevant entry, we + * copy the it to this task. If not, we send the process a signal, fixup the + * exception, or oops the kernel. + * + * NOTE! We MUST NOT take any locks for this case. We may be in an interrupt + * or a critical region, and should only copy the information from the master + * page table, nothing more. + */ +static int __kprobes do_translation_fault(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + if (addr < TASK_SIZE) + return do_page_fault(addr, esr, regs); + + do_bad_area(addr, esr, regs); + return 0; +} + +/* + * Some section permission faults need to be handled gracefully. They can + * happen due to a __{get,put}_user during an oops. + */ +static int do_sect_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + do_bad_area(addr, esr, regs); + return 0; +} + +/* + * This abort handler always returns "fault". + */ +static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + return 1; +} + +static struct fault_info { + int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); + int sig; + int code; + const char *name; +} fault_info[] = { + { do_bad, SIGBUS, 0, "ttbr address size fault" }, + { do_bad, SIGBUS, 0, "level 1 address size fault" }, + { do_bad, SIGBUS, 0, "level 2 address size fault" }, + { do_bad, SIGBUS, 0, "level 3 address size fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "input address range fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_bad, SIGBUS, 0, "reserved access flag fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, + { do_bad, SIGBUS, 0, "reserved permission fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, + { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_bad, SIGBUS, 0, "asynchronous external abort" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_bad, SIGBUS, 0, "asynchronous parity error" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "unknown 32" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment fault" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGBUS, 0, "unknown 35" }, + { do_bad, SIGBUS, 0, "unknown 36" }, + { do_bad, SIGBUS, 0, "unknown 37" }, + { do_bad, SIGBUS, 0, "unknown 38" }, + { do_bad, SIGBUS, 0, "unknown 39" }, + { do_bad, SIGBUS, 0, "unknown 40" }, + { do_bad, SIGBUS, 0, "unknown 41" }, + { do_bad, SIGBUS, 0, "unknown 42" }, + { do_bad, SIGBUS, 0, "unknown 43" }, + { do_bad, SIGBUS, 0, "unknown 44" }, + { do_bad, SIGBUS, 0, "unknown 45" }, + { do_bad, SIGBUS, 0, "unknown 46" }, + { do_bad, SIGBUS, 0, "unknown 47" }, + { do_bad, SIGBUS, 0, "unknown 48" }, + { do_bad, SIGBUS, 0, "unknown 49" }, + { do_bad, SIGBUS, 0, "unknown 50" }, + { do_bad, SIGBUS, 0, "unknown 51" }, + { do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" }, + { do_bad, SIGBUS, 0, "unknown 53" }, + { do_bad, SIGBUS, 0, "unknown 54" }, + { do_bad, SIGBUS, 0, "unknown 55" }, + { do_bad, SIGBUS, 0, "unknown 56" }, + { do_bad, SIGBUS, 0, "unknown 57" }, + { do_bad, SIGBUS, 0, "implementation fault (coprocessor abort)" }, + { do_bad, SIGBUS, 0, "unknown 59" }, + { do_bad, SIGBUS, 0, "unknown 60" }, + { do_bad, SIGBUS, 0, "unknown 61" }, + { do_bad, SIGBUS, 0, "unknown 62" }, + { do_bad, SIGBUS, 0, "unknown 63" }, +}; + +/* + * Dispatch a data abort to the relevant handler. + */ +asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + const struct fault_info *inf = fault_info + (esr & 63); + struct siginfo info; + + if (!inf->fn(addr, esr, regs)) + return; + + pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n", + inf->name, esr, addr); + + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info.si_addr = (void __user *)addr; + arm64_notify_die("", regs, &info, esr); +} + +/* + * Handle stack alignment exceptions. + */ +asmlinkage void __exception do_sp_pc_abort(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + struct siginfo info; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *)addr; + arm64_notify_die("", regs, &info, esr); +} + +static struct fault_info debug_fault_info[] = { + { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" }, + { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" }, + { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" }, + { do_bad, SIGBUS, 0, "unknown 3" }, + { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, + { do_bad, SIGTRAP, 0, "aarch32 vector catch" }, + { do_bad, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, + { do_bad, SIGBUS, 0, "unknown 7" }, +}; + +void __init hook_debug_fault_code(int nr, + int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int sig, int code, const char *name) +{ + BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); + + debug_fault_info[nr].fn = fn; + debug_fault_info[nr].sig = sig; + debug_fault_info[nr].code = code; + debug_fault_info[nr].name = name; +} + +asmlinkage int __exception do_debug_exception(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + struct siginfo info; + + if (!inf->fn(addr, esr, regs)) + return 1; + + pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n", + inf->name, esr, addr); + + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info.si_addr = (void __user *)addr; + arm64_notify_die("", regs, &info, esr); + + return 0; +} diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c new file mode 100644 index 00000000000..c144adb1682 --- /dev/null +++ b/arch/arm64/mm/flush.c @@ -0,0 +1,135 @@ +/* + * Based on arch/arm/mm/flush.c + * + * Copyright (C) 1995-2002 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/pagemap.h> + +#include <asm/cacheflush.h> +#include <asm/cachetype.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +void flush_cache_mm(struct mm_struct *mm) +{ +} + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); +} + +void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, + unsigned long pfn) +{ +} + +static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *kaddr, + unsigned long len) +{ + if (vma->vm_flags & VM_EXEC) { + unsigned long addr = (unsigned long)kaddr; + if (icache_is_aliasing()) { + __flush_dcache_area(kaddr, len); + __flush_icache_all(); + } else { + flush_icache_range(addr, addr + len); + } + } +} + +/* + * Copy user data from/to a page which is mapped into a different processes + * address space. Really, we want to allow our "user space" model to handle + * this. + * + * Note that this code needs to run on the current CPU. + */ +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ +#ifdef CONFIG_SMP + preempt_disable(); +#endif + memcpy(dst, src, len); + flush_ptrace_access(vma, page, uaddr, dst, len); +#ifdef CONFIG_SMP + preempt_enable(); +#endif +} + +void __flush_dcache_page(struct page *page) +{ + __flush_dcache_area(page_address(page), PAGE_SIZE); +} + +void __sync_icache_dcache(pte_t pte, unsigned long addr) +{ + unsigned long pfn; + struct page *page; + + pfn = pte_pfn(pte); + if (!pfn_valid(pfn)) + return; + + page = pfn_to_page(pfn); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { + __flush_dcache_page(page); + __flush_icache_all(); + } else if (icache_is_aivivt()) { + __flush_icache_all(); + } +} + +/* + * Ensure cache coherency between kernel mapping and userspace mapping of this + * page. + */ +void flush_dcache_page(struct page *page) +{ + struct address_space *mapping; + + /* + * The zero page is never written to, so never has any dirty cache + * lines, and therefore never needs to be flushed. + */ + if (page == ZERO_PAGE(0)) + return; + + mapping = page_mapping(page); + if (mapping && mapping_mapped(mapping)) { + __flush_dcache_page(page); + __flush_icache_all(); + set_bit(PG_dcache_clean, &page->flags); + } else { + clear_bit(PG_dcache_clean, &page->flags); + } +} +EXPORT_SYMBOL(flush_dcache_page); + +/* + * Additional functions defined in assembly. + */ +EXPORT_SYMBOL(flush_cache_all); +EXPORT_SYMBOL(flush_icache_range); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c new file mode 100644 index 00000000000..5f719ba949b --- /dev/null +++ b/arch/arm64/mm/init.c @@ -0,0 +1,437 @@ +/* + * Based on arch/arm/mm/init.c + * + * Copyright (C) 1995-2005 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mman.h> +#include <linux/nodemask.h> +#include <linux/initrd.h> +#include <linux/gfp.h> +#include <linux/memblock.h> +#include <linux/sort.h> +#include <linux/of_fdt.h> + +#include <asm/prom.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sizes.h> +#include <asm/tlb.h> + +#include "mm.h" + +static unsigned long phys_initrd_start __initdata = 0; +static unsigned long phys_initrd_size __initdata = 0; + +phys_addr_t memstart_addr __read_mostly = 0; + +void __init early_init_dt_setup_initrd_arch(unsigned long start, + unsigned long end) +{ + phys_initrd_start = start; + phys_initrd_size = end - start; +} + +static int __init early_initrd(char *p) +{ + unsigned long start, size; + char *endp; + + start = memparse(p, &endp); + if (*endp == ',') { + size = memparse(endp + 1, NULL); + + phys_initrd_start = start; + phys_initrd_size = size; + } + return 0; +} +early_param("initrd", early_initrd); + +#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) + +static void __init zone_sizes_init(unsigned long min, unsigned long max) +{ + struct memblock_region *reg; + unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; + unsigned long max_dma32 = min; + + memset(zone_size, 0, sizeof(zone_size)); + +#ifdef CONFIG_ZONE_DMA32 + /* 4GB maximum for 32-bit only capable devices */ + max_dma32 = min(max, MAX_DMA32_PFN); + zone_size[ZONE_DMA32] = max_dma32 - min; +#endif + zone_size[ZONE_NORMAL] = max - max_dma32; + + memcpy(zhole_size, zone_size, sizeof(zhole_size)); + + for_each_memblock(memory, reg) { + unsigned long start = memblock_region_memory_base_pfn(reg); + unsigned long end = memblock_region_memory_end_pfn(reg); + + if (start >= max) + continue; +#ifdef CONFIG_ZONE_DMA32 + if (start < max_dma32) { + unsigned long dma_end = min(end, max_dma32); + zhole_size[ZONE_DMA32] -= dma_end - start; + } +#endif + if (end > max_dma32) { + unsigned long normal_end = min(end, max); + unsigned long normal_start = max(start, max_dma32); + zhole_size[ZONE_NORMAL] -= normal_end - normal_start; + } + } + + free_area_init_node(0, zone_size, min, zhole_size); +} + +#ifdef CONFIG_HAVE_ARCH_PFN_VALID +int pfn_valid(unsigned long pfn) +{ + return memblock_is_memory(pfn << PAGE_SHIFT); +} +EXPORT_SYMBOL(pfn_valid); +#endif + +#ifndef CONFIG_SPARSEMEM +static void arm64_memory_present(void) +{ +} +#else +static void arm64_memory_present(void) +{ + struct memblock_region *reg; + + for_each_memblock(memory, reg) + memory_present(0, memblock_region_memory_base_pfn(reg), + memblock_region_memory_end_pfn(reg)); +} +#endif + +void __init arm64_memblock_init(void) +{ + u64 *reserve_map, base, size; + + /* Register the kernel text, kernel data and initrd with memblock */ + memblock_reserve(__pa(_text), _end - _text); +#ifdef CONFIG_BLK_DEV_INITRD + if (phys_initrd_size) { + memblock_reserve(phys_initrd_start, phys_initrd_size); + + /* Now convert initrd to virtual addresses */ + initrd_start = __phys_to_virt(phys_initrd_start); + initrd_end = initrd_start + phys_initrd_size; + } +#endif + + /* + * Reserve the page tables. These are already in use, + * and can only be in node 0. + */ + memblock_reserve(__pa(swapper_pg_dir), SWAPPER_DIR_SIZE); + memblock_reserve(__pa(idmap_pg_dir), IDMAP_DIR_SIZE); + + /* Reserve the dtb region */ + memblock_reserve(virt_to_phys(initial_boot_params), + be32_to_cpu(initial_boot_params->totalsize)); + + /* + * Process the reserve map. This will probably overlap the initrd + * and dtb locations which are already reserved, but overlapping + * doesn't hurt anything + */ + reserve_map = ((void*)initial_boot_params) + + be32_to_cpu(initial_boot_params->off_mem_rsvmap); + while (1) { + base = be64_to_cpup(reserve_map++); + size = be64_to_cpup(reserve_map++); + if (!size) + break; + memblock_reserve(base, size); + } + + memblock_allow_resize(); + memblock_dump_all(); +} + +void __init bootmem_init(void) +{ + unsigned long min, max; + + min = PFN_UP(memblock_start_of_DRAM()); + max = PFN_DOWN(memblock_end_of_DRAM()); + + /* + * Sparsemem tries to allocate bootmem in memory_present(), so must be + * done after the fixed reservations. + */ + arm64_memory_present(); + + sparse_init(); + zone_sizes_init(min, max); + + high_memory = __va((max << PAGE_SHIFT) - 1) + 1; + max_pfn = max_low_pfn = max; +} + +static inline int free_area(unsigned long pfn, unsigned long end, char *s) +{ + unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10); + + for (; pfn < end; pfn++) { + struct page *page = pfn_to_page(pfn); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + pages++; + } + + if (size && s) + pr_info("Freeing %s memory: %dK\n", s, size); + + return pages; +} + +/* + * Poison init memory with an undefined instruction (0x0). + */ +static inline void poison_init_mem(void *s, size_t count) +{ + memset(s, 0, count); +} + +#ifndef CONFIG_SPARSEMEM_VMEMMAP +static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *start_pg, *end_pg; + unsigned long pg, pgend; + + /* + * Convert start_pfn/end_pfn to a struct page pointer. + */ + start_pg = pfn_to_page(start_pfn - 1) + 1; + end_pg = pfn_to_page(end_pfn - 1) + 1; + + /* + * Convert to physical addresses, and round start upwards and end + * downwards. + */ + pg = (unsigned long)PAGE_ALIGN(__pa(start_pg)); + pgend = (unsigned long)__pa(end_pg) & PAGE_MASK; + + /* + * If there are free pages between these, free the section of the + * memmap array. + */ + if (pg < pgend) + free_bootmem(pg, pgend - pg); +} + +/* + * The mem_map array can get very big. Free the unused area of the memory map. + */ +static void __init free_unused_memmap(void) +{ + unsigned long start, prev_end = 0; + struct memblock_region *reg; + + for_each_memblock(memory, reg) { + start = __phys_to_pfn(reg->base); + +#ifdef CONFIG_SPARSEMEM + /* + * Take care not to free memmap entries that don't exist due + * to SPARSEMEM sections which aren't present. + */ + start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); +#endif + /* + * If we had a previous bank, and there is a space between the + * current bank and the previous, free it. + */ + if (prev_end && prev_end < start) + free_memmap(prev_end, start); + + /* + * Align up here since the VM subsystem insists that the + * memmap entries are valid from the bank end aligned to + * MAX_ORDER_NR_PAGES. + */ + prev_end = ALIGN(start + __phys_to_pfn(reg->size), + MAX_ORDER_NR_PAGES); + } + +#ifdef CONFIG_SPARSEMEM + if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) + free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); +#endif +} +#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ + +/* + * mem_init() marks the free areas in the mem_map and tells us how much memory + * is free. This is done after various parts of the system have claimed their + * memory after the kernel image. + */ +void __init mem_init(void) +{ + unsigned long reserved_pages, free_pages; + struct memblock_region *reg; + +#if CONFIG_SWIOTLB + extern void __init arm64_swiotlb_init(size_t max_size); + arm64_swiotlb_init(max_pfn << (PAGE_SHIFT - 1)); +#endif + + max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; + +#ifndef CONFIG_SPARSEMEM_VMEMMAP + /* this will put all unused low memory onto the freelists */ + free_unused_memmap(); +#endif + + totalram_pages += free_all_bootmem(); + + reserved_pages = free_pages = 0; + + for_each_memblock(memory, reg) { + unsigned int pfn1, pfn2; + struct page *page, *end; + + pfn1 = __phys_to_pfn(reg->base); + pfn2 = pfn1 + __phys_to_pfn(reg->size); + + page = pfn_to_page(pfn1); + end = pfn_to_page(pfn2 - 1) + 1; + + do { + if (PageReserved(page)) + reserved_pages++; + else if (!page_count(page)) + free_pages++; + page++; + } while (page < end); + } + + /* + * Since our memory may not be contiguous, calculate the real number + * of pages we have in this system. + */ + pr_info("Memory:"); + num_physpages = 0; + for_each_memblock(memory, reg) { + unsigned long pages = memblock_region_memory_end_pfn(reg) - + memblock_region_memory_base_pfn(reg); + num_physpages += pages; + printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); + } + printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); + + pr_notice("Memory: %luk/%luk available, %luk reserved\n", + nr_free_pages() << (PAGE_SHIFT-10), + free_pages << (PAGE_SHIFT-10), + reserved_pages << (PAGE_SHIFT-10)); + +#define MLK(b, t) b, t, ((t) - (b)) >> 10 +#define MLM(b, t) b, t, ((t) - (b)) >> 20 +#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) + + pr_notice("Virtual kernel memory layout:\n" + " vmalloc : 0x%16lx - 0x%16lx (%6ld MB)\n" +#ifdef CONFIG_SPARSEMEM_VMEMMAP + " vmemmap : 0x%16lx - 0x%16lx (%6ld MB)\n" +#endif + " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" + " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" + " .init : 0x%p" " - 0x%p" " (%6ld kB)\n" + " .text : 0x%p" " - 0x%p" " (%6ld kB)\n" + " .data : 0x%p" " - 0x%p" " (%6ld kB)\n", + MLM(VMALLOC_START, VMALLOC_END), +#ifdef CONFIG_SPARSEMEM_VMEMMAP + MLM((unsigned long)virt_to_page(PAGE_OFFSET), + (unsigned long)virt_to_page(high_memory)), +#endif + MLM(MODULES_VADDR, MODULES_END), + MLM(PAGE_OFFSET, (unsigned long)high_memory), + + MLK_ROUNDUP(__init_begin, __init_end), + MLK_ROUNDUP(_text, _etext), + MLK_ROUNDUP(_sdata, _edata)); + +#undef MLK +#undef MLM +#undef MLK_ROUNDUP + + /* + * Check boundaries twice: Some fundamental inconsistencies can be + * detected at build time already. + */ +#ifdef CONFIG_COMPAT + BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64); +#endif + BUILD_BUG_ON(TASK_SIZE_64 > MODULES_VADDR); + BUG_ON(TASK_SIZE_64 > MODULES_VADDR); + + if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + extern int sysctl_overcommit_memory; + /* + * On a machine this small we won't get anywhere without + * overcommit, so turn it on by default. + */ + sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; + } +} + +void free_initmem(void) +{ + poison_init_mem(__init_begin, __init_end - __init_begin); + totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)), + __phys_to_pfn(__pa(__init_end)), + "init"); +} + +#ifdef CONFIG_BLK_DEV_INITRD + +static int keep_initrd; + +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (!keep_initrd) { + poison_init_mem((void *)start, PAGE_ALIGN(end) - start); + totalram_pages += free_area(__phys_to_pfn(__pa(start)), + __phys_to_pfn(__pa(end)), + "initrd"); + } +} + +static int __init keepinitrd_setup(char *__unused) +{ + keep_initrd = 1; + return 1; +} + +__setup("keepinitrd", keepinitrd_setup); +#endif diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c new file mode 100644 index 00000000000..1725cd6db37 --- /dev/null +++ b/arch/arm64/mm/ioremap.c @@ -0,0 +1,84 @@ +/* + * Based on arch/arm/mm/ioremap.c + * + * (C) Copyright 1995 1996 Linus Torvalds + * Hacked for ARM by Phil Blundell <philb@gnu.org> + * Hacked to allow all architectures to build, and various cleanups + * by Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/io.h> + +static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, + pgprot_t prot, void *caller) +{ + unsigned long last_addr; + unsigned long offset = phys_addr & ~PAGE_MASK; + int err; + unsigned long addr; + struct vm_struct *area; + + /* + * Page align the mapping address and size, taking account of any + * offset. + */ + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(size + offset); + + /* + * Don't allow wraparound, zero size or outside PHYS_MASK. + */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK)) + return NULL; + + /* + * Don't allow RAM to be mapped. + */ + if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr)))) + return NULL; + + area = get_vm_area_caller(size, VM_IOREMAP, caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + + err = ioremap_page_range(addr, addr + size, phys_addr, prot); + if (err) { + vunmap((void *)addr); + return NULL; + } + + return (void __iomem *)(offset + addr); +} + +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) +{ + return __ioremap_caller(phys_addr, size, prot, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__ioremap); + +void __iounmap(volatile void __iomem *io_addr) +{ + void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); + + vunmap(addr); +} +EXPORT_SYMBOL(__iounmap); diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h new file mode 100644 index 00000000000..d8d6e7851c1 --- /dev/null +++ b/arch/arm64/mm/mm.h @@ -0,0 +1,2 @@ +extern void __flush_dcache_page(struct page *page); +extern void __init bootmem_init(void); diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c new file mode 100644 index 00000000000..7c7be785563 --- /dev/null +++ b/arch/arm64/mm/mmap.c @@ -0,0 +1,144 @@ +/* + * Based on arch/arm/mm/mmap.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/elf.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/export.h> +#include <linux/shm.h> +#include <linux/sched.h> +#include <linux/io.h> +#include <linux/personality.h> +#include <linux/random.h> + +#include <asm/cputype.h> + +/* + * Leave enough space between the mmap area and the stack to honour ulimit in + * the face of randomisation. + */ +#define MIN_GAP (SZ_128M + ((STACK_RND_MASK << PAGE_SHIFT) + 1)) +#define MAX_GAP (STACK_TOP/6*5) + +static int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +/* + * Since get_random_int() returns the same value within a 1 jiffy window, we + * will almost always get the same randomisation for the stack and mmap + * region. This will mean the relative distance between stack and mmap will be + * the same. + * + * To avoid this we can shift the randomness by 1 bit. + */ +static unsigned long mmap_rnd(void) +{ + unsigned long rnd = 0; + + if (current->flags & PF_RANDOMIZE) + rnd = (long)get_random_int() & (STACK_RND_MASK >> 1); + + return rnd << (PAGE_SHIFT + 1); +} + +static unsigned long mmap_base(void) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(STACK_TOP - gap - mmap_rnd()); +} + +/* + * This function, called very early during the creation of a new process VM + * image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality bit is set, or + * if the expected stack growth is unlimited: + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} +EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); + + +/* + * You really shouldn't be using read() or write() on /dev/mem. This might go + * away in the future. + */ +int valid_phys_addr_range(unsigned long addr, size_t size) +{ + if (addr < PHYS_OFFSET) + return 0; + if (addr + size > __pa(high_memory - 1) + 1) + return 0; + + return 1; +} + +/* + * Do not allow /dev/mem mappings beyond the supported physical range. + */ +int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) +{ + return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK); +} + +#ifdef CONFIG_STRICT_DEVMEM + +#include <linux/ioport.h> + +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. We mimic x86 here by + * disallowing access to system RAM as well as device-exclusive MMIO regions. + * This effectively disable read()/write() on /dev/mem. + */ +int devmem_is_allowed(unsigned long pfn) +{ + if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pfn)) + return 1; + return 0; +} + +#endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c new file mode 100644 index 00000000000..a6885d896ab --- /dev/null +++ b/arch/arm64/mm/mmu.c @@ -0,0 +1,395 @@ +/* + * Based on arch/arm/mm/mmu.c + * + * Copyright (C) 1995-2005 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/mman.h> +#include <linux/nodemask.h> +#include <linux/memblock.h> +#include <linux/fs.h> + +#include <asm/cputype.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sizes.h> +#include <asm/tlb.h> +#include <asm/mmu_context.h> + +#include "mm.h" + +/* + * Empty_zero_page is a special page that is used for zero-initialized data + * and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + +pgprot_t pgprot_default; +EXPORT_SYMBOL(pgprot_default); + +static pmdval_t prot_sect_kernel; + +struct cachepolicy { + const char policy[16]; + u64 mair; + u64 tcr; +}; + +static struct cachepolicy cache_policies[] __initdata = { + { + .policy = "uncached", + .mair = 0x44, /* inner, outer non-cacheable */ + .tcr = TCR_IRGN_NC | TCR_ORGN_NC, + }, { + .policy = "writethrough", + .mair = 0xaa, /* inner, outer write-through, read-allocate */ + .tcr = TCR_IRGN_WT | TCR_ORGN_WT, + }, { + .policy = "writeback", + .mair = 0xee, /* inner, outer write-back, read-allocate */ + .tcr = TCR_IRGN_WBnWA | TCR_ORGN_WBnWA, + } +}; + +/* + * These are useful for identifying cache coherency problems by allowing the + * cache or the cache and writebuffer to be turned off. It changes the Normal + * memory caching attributes in the MAIR_EL1 register. + */ +static int __init early_cachepolicy(char *p) +{ + int i; + u64 tmp; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { + int len = strlen(cache_policies[i].policy); + + if (memcmp(p, cache_policies[i].policy, len) == 0) + break; + } + if (i == ARRAY_SIZE(cache_policies)) { + pr_err("ERROR: unknown or unsupported cache policy: %s\n", p); + return 0; + } + + flush_cache_all(); + + /* + * Modify MT_NORMAL attributes in MAIR_EL1. + */ + asm volatile( + " mrs %0, mair_el1\n" + " bfi %0, %1, #%2, #8\n" + " msr mair_el1, %0\n" + " isb\n" + : "=&r" (tmp) + : "r" (cache_policies[i].mair), "i" (MT_NORMAL * 8)); + + /* + * Modify TCR PTW cacheability attributes. + */ + asm volatile( + " mrs %0, tcr_el1\n" + " bic %0, %0, %2\n" + " orr %0, %0, %1\n" + " msr tcr_el1, %0\n" + " isb\n" + : "=&r" (tmp) + : "r" (cache_policies[i].tcr), "r" (TCR_IRGN_MASK | TCR_ORGN_MASK)); + + flush_cache_all(); + + return 0; +} +early_param("cachepolicy", early_cachepolicy); + +/* + * Adjust the PMD section entries according to the CPU in use. + */ +static void __init init_mem_pgprot(void) +{ + pteval_t default_pgprot; + int i; + + default_pgprot = PTE_ATTRINDX(MT_NORMAL); + prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | PMD_ATTRINDX(MT_NORMAL); + +#ifdef CONFIG_SMP + /* + * Mark memory with the "shared" attribute for SMP systems + */ + default_pgprot |= PTE_SHARED; + prot_sect_kernel |= PMD_SECT_S; +#endif + + for (i = 0; i < 16; i++) { + unsigned long v = pgprot_val(protection_map[i]); + protection_map[i] = __pgprot(v | default_pgprot); + } + + pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot); +} + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); + +static void __init *early_alloc(unsigned long sz) +{ + void *ptr = __va(memblock_alloc(sz, sz)); + memset(ptr, 0, sz); + return ptr; +} + +static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long pfn) +{ + pte_t *pte; + + if (pmd_none(*pmd)) { + pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + } + BUG_ON(pmd_bad(*pmd)); + + pte = pte_offset_kernel(pmd, addr); + do { + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys) +{ + pmd_t *pmd; + unsigned long next; + + /* + * Check for initial section mappings in the pgd/pud and remove them. + */ + if (pud_none(*pud) || pud_bad(*pud)) { + pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); + pud_populate(&init_mm, pud, pmd); + } + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + /* try section mapping first */ + if (((addr | next | phys) & ~SECTION_MASK) == 0) + set_pmd(pmd, __pmd(phys | prot_sect_kernel)); + else + alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys)); + phys += next - addr; + } while (pmd++, addr = next, addr != end); +} + +static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, + unsigned long end, unsigned long phys) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + alloc_init_pmd(pud, addr, next, phys); + phys += next - addr; + } while (pud++, addr = next, addr != end); +} + +/* + * Create the page directory entries and any necessary page tables for the + * mapping specified by 'md'. + */ +static void __init create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size) +{ + unsigned long addr, length, end, next; + pgd_t *pgd; + + if (virt < VMALLOC_START) { + pr_warning("BUG: not creating mapping for 0x%016llx at 0x%016lx - outside kernel range\n", + phys, virt); + return; + } + + addr = virt & PAGE_MASK; + length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); + + pgd = pgd_offset_k(addr); + end = addr + length; + do { + next = pgd_addr_end(addr, end); + alloc_init_pud(pgd, addr, next, phys); + phys += next - addr; + } while (pgd++, addr = next, addr != end); +} + +static void __init map_mem(void) +{ + struct memblock_region *reg; + + /* map all the memory banks */ + for_each_memblock(memory, reg) { + phys_addr_t start = reg->base; + phys_addr_t end = start + reg->size; + + if (start >= end) + break; + + create_mapping(start, __phys_to_virt(start), end - start); + } +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps and sets up the zero page. + */ +void __init paging_init(void) +{ + void *zero_page; + + /* + * Maximum PGDIR_SIZE addressable via the initial direct kernel + * mapping in swapper_pg_dir. + */ + memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); + + init_mem_pgprot(); + map_mem(); + + /* + * Finally flush the caches and tlb to ensure that we're in a + * consistent state. + */ + flush_cache_all(); + flush_tlb_all(); + + /* allocate the zero page. */ + zero_page = early_alloc(PAGE_SIZE); + + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + __flush_dcache_page(empty_zero_page); + + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_set_reserved_ttbr0(); + flush_tlb_all(); +} + +/* + * Enable the identity mapping to allow the MMU disabling. + */ +void setup_mm_for_reboot(void) +{ + cpu_switch_mm(idmap_pg_dir, &init_mm); + flush_tlb_all(); +} + +/* + * Check whether a kernel address is valid (derived from arch/x86/). + */ +int kern_addr_valid(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if ((((long)addr) >> VA_BITS) != -1UL) + return 0; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return 0; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return 0; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return 0; + + return pfn_valid(pte_pfn(*pte)); +} +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#ifdef CONFIG_ARM64_64K_PAGES +int __meminit vmemmap_populate(struct page *start_page, + unsigned long size, int node) +{ + return vmemmap_populate_basepages(start_page, size, node); +} +#else /* !CONFIG_ARM64_64K_PAGES */ +int __meminit vmemmap_populate(struct page *start_page, + unsigned long size, int node) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + do { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + + pud = vmemmap_pud_populate(pgd, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = NULL; + + p = vmemmap_alloc_block_buf(PMD_SIZE, node); + if (!p) + return -ENOMEM; + + set_pmd(pmd, __pmd(__pa(p) | prot_sect_kernel)); + } else + vmemmap_verify((pte_t *)pmd, node, addr, next); + } while (addr = next, addr != end); + + return 0; +} +#endif /* CONFIG_ARM64_64K_PAGES */ +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c new file mode 100644 index 00000000000..7083cdada65 --- /dev/null +++ b/arch/arm64/mm/pgd.c @@ -0,0 +1,54 @@ +/* + * PGD allocation/freeing + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/slab.h> + +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *new_pgd; + + if (PGD_SIZE == PAGE_SIZE) + new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); + else + new_pgd = kzalloc(PGD_SIZE, GFP_KERNEL); + + if (!new_pgd) + return NULL; + + return new_pgd; +} + +void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + if (PGD_SIZE == PAGE_SIZE) + free_page((unsigned long)pgd); + else + kfree(pgd); +} diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S new file mode 100644 index 00000000000..8957b822010 --- /dev/null +++ b/arch/arm64/mm/proc-macros.S @@ -0,0 +1,55 @@ +/* + * Based on arch/arm/mm/proc-macros.S + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> + +/* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + */ + .macro mmid, rd, rn + ldr \rd, [\rn, #MM_CONTEXT_ID] + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register. + */ + .macro dcache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register. + */ + .macro icache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S new file mode 100644 index 00000000000..f1d8b9bbfda --- /dev/null +++ b/arch/arm64/mm/proc.S @@ -0,0 +1,175 @@ +/* + * Based on arch/arm/mm/proc.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> + +#include "proc-macros.S" + +#ifndef CONFIG_SMP +/* PTWs cacheable, inner/outer WBWA not shareable */ +#define TCR_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA +#else +/* PTWs cacheable, inner/outer WBWA shareable */ +#define TCR_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA | TCR_SHARED +#endif + +#define MAIR(attr, mt) ((attr) << ((mt) * 8)) + +/* + * cpu_cache_off() + * + * Turn the CPU D-cache off. + */ +ENTRY(cpu_cache_off) + mrs x0, sctlr_el1 + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el1, x0 + isb + ret +ENDPROC(cpu_cache_off) + +/* + * cpu_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the same state + * as it would be if it had been reset, and branch to what would be the + * reset vector. It must be executed with the flat identity mapping. + * + * - loc - location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_reset) + mrs x1, sctlr_el1 + bic x1, x1, #1 + msr sctlr_el1, x1 // disable the MMU + isb + ret x0 +ENDPROC(cpu_reset) + +/* + * cpu_do_idle() + * + * Idle the processor (wait for interrupt). + */ +ENTRY(cpu_do_idle) + dsb sy // WFI may enter a low-power mode + wfi + ret +ENDPROC(cpu_do_idle) + +/* + * cpu_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys. + * + * - pgd_phys - physical address of new TTB + */ +ENTRY(cpu_do_switch_mm) + mmid w1, x1 // get mm->context.id + bfi x0, x1, #48, #16 // set the ASID + msr ttbr0_el1, x0 // set TTBR0 + isb + ret +ENDPROC(cpu_do_switch_mm) + +cpu_name: + .ascii "AArch64 Processor" + .align + + .section ".text.init", #alloc, #execinstr + +/* + * __cpu_setup + * + * Initialise the processor for turning the MMU on. Return in x0 the + * value of the SCTLR_EL1 register. + */ +ENTRY(__cpu_setup) + /* + * Preserve the link register across the function call. + */ + mov x28, lr + bl __flush_dcache_all + mov lr, x28 + ic iallu // I+BTB cache invalidate + dsb sy + + mov x0, #3 << 20 + msr cpacr_el1, x0 // Enable FP/ASIMD + mov x0, #1 + msr oslar_el1, x0 // Set the debug OS lock + tlbi vmalle1is // invalidate I + D TLBs + /* + * Memory region attributes for LPAE: + * + * n = AttrIndx[2:0] + * n MAIR + * DEVICE_nGnRnE 000 00000000 + * DEVICE_nGnRE 001 00000100 + * DEVICE_GRE 010 00001100 + * NORMAL_NC 011 01000100 + * NORMAL 100 11111111 + */ + ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \ + MAIR(0x04, MT_DEVICE_nGnRE) | \ + MAIR(0x0c, MT_DEVICE_GRE) | \ + MAIR(0x44, MT_NORMAL_NC) | \ + MAIR(0xff, MT_NORMAL) + msr mair_el1, x5 + /* + * Prepare SCTLR + */ + adr x5, crval + ldp w5, w6, [x5] + mrs x0, sctlr_el1 + bic x0, x0, x5 // clear bits + orr x0, x0, x6 // set bits + /* + * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for + * both user and kernel. + */ + ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ + TCR_ASID16 | (1 << 31) +#ifdef CONFIG_ARM64_64K_PAGES + orr x10, x10, TCR_TG0_64K + orr x10, x10, TCR_TG1_64K +#endif + msr tcr_el1, x10 + ret // return to head.S +ENDPROC(__cpu_setup) + + /* + * n n T + * U E WT T UD US IHBS + * CE0 XWHW CZ ME TEEA S + * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM + * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved + * .... .100 .... 01.1 11.1 ..01 0001 1101 < software settings + */ + .type crval, #object +crval: + .word 0x030802e2 // clear + .word 0x0405d11d // set diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S new file mode 100644 index 00000000000..8ae80a18e8e --- /dev/null +++ b/arch/arm64/mm/tlb.S @@ -0,0 +1,71 @@ +/* + * Based on arch/arm/mm/tlb.S + * + * Copyright (C) 1997-2002 Russell King + * Copyright (C) 2012 ARM Ltd. + * Written by Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +/* + * __cpu_flush_user_tlb_range(start, end, vma) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_struct describing address range + */ +ENTRY(__cpu_flush_user_tlb_range) + vma_vm_mm x3, x2 // get vma->vm_mm + mmid x3, x3 // get vm_mm->context.id + dsb sy + lsr x0, x0, #12 // align address + lsr x1, x1, #12 + bfi x0, x3, #48, #16 // start VA and ASID + bfi x1, x3, #48, #16 // end VA and ASID +1: tlbi vae1is, x0 // TLB invalidate by address and ASID + add x0, x0, #1 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__cpu_flush_user_tlb_range) + +/* + * __cpu_flush_kern_tlb_range(start,end) + * + * Invalidate a range of kernel TLB entries. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + */ +ENTRY(__cpu_flush_kern_tlb_range) + dsb sy + lsr x0, x0, #12 // align address + lsr x1, x1, #12 +1: tlbi vaae1is, x0 // TLB invalidate by address + add x0, x0, #1 + cmp x0, x1 + b.lo 1b + dsb sy + isb + ret +ENDPROC(__cpu_flush_kern_tlb_range) diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 3af601e31e6..f08e89183cd 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm generic-y += atomic.h generic-y += auxvec.h +generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += cputime.h diff --git a/arch/c6x/include/asm/barrier.h b/arch/c6x/include/asm/barrier.h deleted file mode 100644 index 538240e8590..00000000000 --- a/arch/c6x/include/asm/barrier.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated - * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _ASM_C6X_BARRIER_H -#define _ASM_C6X_BARRIER_H - -#define nop() asm("NOP\n"); - -#define mb() barrier() -#define rmb() barrier() -#define wmb() barrier() -#define set_mb(var, value) do { var = value; mb(); } while (0) -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) - -#endif /* _ASM_C6X_BARRIER_H */ diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 66fd0172879..7f65be6f7f1 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/elfcore.h> #include <linux/mqueue.h> #include <linux/reboot.h> +#include <linux/rcupdate.h> //#define DEBUG @@ -74,6 +75,7 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void); /* @@ -86,6 +88,7 @@ void cpu_idle (void) idle = default_idle; idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index ff95f50efea..2eb7fa5bf9d 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/pagemap.h> +#include <linux/rcupdate.h> #include <asm/asm-offsets.h> #include <asm/uaccess.h> @@ -69,12 +70,14 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); if (!frv_dma_inprogress && idle) idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } diff --git a/arch/frv/mb93090-mb00/pci-vdk.c b/arch/frv/mb93090-mb00/pci-vdk.c index d04ed14bbf0..71e9bcf5810 100644 --- a/arch/frv/mb93090-mb00/pci-vdk.c +++ b/arch/frv/mb93090-mb00/pci-vdk.c @@ -330,10 +330,8 @@ void __init pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); if (bus->number == 0) { - struct list_head *ln; struct pci_dev *dev; - for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) { - dev = pci_dev_b(ln); + list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->devfn == 0) { dev->resource[0].start = 0; dev->resource[0].end = 0; diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 0e9c315be10..f153ed1a4c0 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -36,6 +36,7 @@ #include <linux/reboot.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/traps.h> @@ -78,8 +79,10 @@ void (*idle)(void) = default_idle; void cpu_idle(void) { while (1) { + rcu_idle_enter(); while (!need_resched()) idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 310cf5781fa..3c720ef6c32 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -25,6 +25,7 @@ config IA64 select HAVE_GENERIC_HARDIRQS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP + select HAVE_VIRT_CPU_ACCOUNTING select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP @@ -340,17 +341,6 @@ config FORCE_MAX_ZONEORDER default "17" if HUGETLB_PAGE default "11" -config VIRT_CPU_ACCOUNTING - bool "Deterministic task and CPU time accounting" - default n - help - Select this option to enable more accurate task and CPU time - accounting. This is done by reading a CPU counter on each - kernel entry and exit and on transitions within the kernel - between system, softirq and hardirq state, so there is a - small performance impact. - If in doubt, say N here. - config SMP bool "Symmetric multi-processing support" select USE_GENERIC_SMP_HELPERS diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index c34785dca92..ec536e4e36c 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -338,7 +338,7 @@ static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { /* Handle turning off CRTSCTS */ if ((old_termios->c_cflag & CRTSCTS) && - !(tty->termios->c_cflag & CRTSCTS)) { + !(tty->termios.c_cflag & CRTSCTS)) { tty->hw_stopped = 0; } } @@ -545,6 +545,7 @@ static int __init simrs_init(void) /* the port is imaginary */ printk(KERN_INFO "ttyS0 at 0x03f8 (irq = %d) is a 16550\n", state->irq); + tty_port_link_device(&state->port, hp_simserial_driver, 0); retval = tty_register_driver(hp_simserial_driver); if (retval) { printk(KERN_ERR "Couldn't register simserial driver\n"); diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h index cb2412fcd17..d38c7ea5eea 100644 --- a/arch/ia64/include/asm/switch_to.h +++ b/arch/ia64/include/asm/switch_to.h @@ -30,13 +30,6 @@ extern struct task_struct *ia64_switch_to (void *next_task); extern void ia64_save_extra (struct task_struct *task); extern void ia64_load_extra (struct task_struct *task); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next); -# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n) -#else -# define IA64_ACCOUNT_ON_SWITCH(p,n) -#endif - #ifdef CONFIG_PERFMON DECLARE_PER_CPU(unsigned long, pfm_syst_info); # define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1) @@ -49,7 +42,6 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct || PERFMON_IS_SYSWIDE()) #define __switch_to(prev,next,last) do { \ - IA64_ACCOUNT_ON_SWITCH(prev, next); \ if (IA64_HAS_EXTRA_STATE(prev)) \ ia64_save_extra(prev); \ if (IA64_HAS_EXTRA_STATE(next)) \ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index dd6fc144974..3e316ec0b83 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -29,6 +29,7 @@ #include <linux/kdebug.h> #include <linux/utsname.h> #include <linux/tracehook.h> +#include <linux/rcupdate.h> #include <asm/cpu.h> #include <asm/delay.h> @@ -279,6 +280,7 @@ cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); if (can_do_pal_halt) { current_thread_info()->status &= ~TS_POLLING; /* @@ -309,6 +311,7 @@ cpu_idle (void) normal_xtp(); #endif } + rcu_idle_exit(); schedule_preempt_disabled(); check_pgt_cache(); if (cpu_is_offline(cpu)) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index ecc904b33c5..80ff9acc5ed 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -83,32 +83,36 @@ static struct clocksource *itc_clocksource; extern cputime_t cycle_to_cputime(u64 cyc); +static void vtime_account_user(struct task_struct *tsk) +{ + cputime_t delta_utime; + struct thread_info *ti = task_thread_info(tsk); + + if (ti->ac_utime) { + delta_utime = cycle_to_cputime(ti->ac_utime); + account_user_time(tsk, delta_utime, delta_utime); + ti->ac_utime = 0; + } +} + /* * Called from the context switch with interrupts disabled, to charge all * accumulated times to the current process, and to prepare accounting on * the next process. */ -void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) +void vtime_task_switch(struct task_struct *prev) { struct thread_info *pi = task_thread_info(prev); - struct thread_info *ni = task_thread_info(next); - cputime_t delta_stime, delta_utime; - __u64 now; + struct thread_info *ni = task_thread_info(current); - now = ia64_get_itc(); - - delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); if (idle_task(smp_processor_id()) != prev) - account_system_time(prev, 0, delta_stime, delta_stime); + vtime_account_system(prev); else - account_idle_time(delta_stime); + vtime_account_idle(prev); - if (pi->ac_utime) { - delta_utime = cycle_to_cputime(pi->ac_utime); - account_user_time(prev, delta_utime, delta_utime); - } + vtime_account_user(prev); - pi->ac_stamp = ni->ac_stamp = now; + pi->ac_stamp = ni->ac_stamp; ni->ac_stime = ni->ac_utime = 0; } @@ -116,29 +120,32 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) * Account time for a transition between system, hard irq or soft irq state. * Note that this function is called with interrupts enabled. */ -void account_system_vtime(struct task_struct *tsk) +static cputime_t vtime_delta(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); - unsigned long flags; cputime_t delta_stime; __u64 now; - local_irq_save(flags); - now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - if (irq_count() || idle_task(smp_processor_id()) != tsk) - account_system_time(tsk, 0, delta_stime, delta_stime); - else - account_idle_time(delta_stime); ti->ac_stime = 0; - ti->ac_stamp = now; - local_irq_restore(flags); + return delta_stime; +} + +void vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta = vtime_delta(tsk); + + account_system_time(tsk, 0, delta, delta); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + account_idle_time(vtime_delta(tsk)); } -EXPORT_SYMBOL_GPL(account_system_vtime); /* * Called from the timer interrupt handler to charge accumulated user time @@ -146,14 +153,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime); */ void account_process_tick(struct task_struct *p, int user_tick) { - struct thread_info *ti = task_thread_info(p); - cputime_t delta_utime; - - if (ti->ac_utime) { - delta_utime = cycle_to_cputime(ti->ac_utime); - account_user_time(p, delta_utime, delta_utime); - ti->ac_utime = 0; - } + vtime_account_user(p); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 81acc7a57f3..5faa66c5c2a 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -295,7 +295,6 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data) window->resource.flags = flags; window->resource.start = addr.minimum + offset; window->resource.end = window->resource.start + addr.address_length - 1; - window->resource.child = NULL; window->offset = offset; if (insert_resource(root, &window->resource)) { @@ -357,7 +356,7 @@ pci_acpi_scan_root(struct acpi_pci_root *root) &windows); if (windows) { controller->window = - kmalloc_node(sizeof(*controller->window) * windows, + kzalloc_node(sizeof(*controller->window) * windows, GFP_KERNEL, controller->node); if (!controller->window) goto out2; @@ -461,14 +460,6 @@ void pcibios_set_master (struct pci_dev *dev) /* No special bus mastering setup handling */ } -void __devinit -pcibios_update_irq (struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); - - /* ??? FIXME -- record old value for shutdown. */ -} - int pcibios_enable_device (struct pci_dev *dev, int mask) { diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index fbb5f2f87ee..8630875e74b 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -229,7 +229,6 @@ void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *pcidev_info, { int segment = pci_domain_nr(dev->bus); struct pcibus_bussoft *bs; - struct pci_bus *host_pci_bus; struct pci_dev *host_pci_dev; unsigned int bus_no, devfn; @@ -245,8 +244,7 @@ void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *pcidev_info, bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff; devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff; - host_pci_bus = pci_find_bus(segment, bus_no); - host_pci_dev = pci_get_slot(host_pci_bus, devfn); + host_pci_dev = pci_get_domain_bus_and_slot(segment, bus_no, devfn); pcidev_info->host_pci_dev = host_pci_dev; pcidev_info->pdi_linux_pcidev = dev; diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 3a4a32b2720..384e63f3a4c 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -26,6 +26,7 @@ #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/hardirq.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/uaccess.h> @@ -82,6 +83,7 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void) = pm_idle; @@ -90,6 +92,7 @@ void cpu_idle (void) idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c index 8db25e80694..16d170f53bf 100644 --- a/arch/m68k/emu/nfcon.c +++ b/arch/m68k/emu/nfcon.c @@ -19,6 +19,7 @@ #include <asm/natfeat.h> static int stderr_id; +static struct tty_port nfcon_tty_port; static struct tty_driver *nfcon_tty_driver; static void nfputs(const char *str, unsigned int count) @@ -119,6 +120,8 @@ static int __init nfcon_init(void) { int res; + tty_port_init(&nfcon_tty_port); + stderr_id = nf_get_id("NF_STDERR"); if (!stderr_id) return -ENODEV; @@ -135,6 +138,7 @@ static int __init nfcon_init(void) nfcon_tty_driver->flags = TTY_DRIVER_REAL_RAW; tty_set_operations(nfcon_tty_driver, &nfcon_tty_ops); + tty_port_link_device(&nfcon_tty_port, nfcon_tty_driver, 0); res = tty_register_driver(nfcon_tty_driver); if (res) { pr_err("failed to register nfcon tty driver\n"); diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c index b2988aa1840..73fa0b56a06 100644 --- a/arch/m68k/kernel/pcibios.c +++ b/arch/m68k/kernel/pcibios.c @@ -87,11 +87,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return 0; } -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - void __devinit pcibios_fixup_bus(struct pci_bus *bus) { struct pci_dev *dev; diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index c488e3cfab5..ac2892e49c7 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/init_task.h> #include <linux/mqueue.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/traps.h> @@ -75,8 +76,10 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/m68k/platform/coldfire/clk.c b/arch/m68k/platform/coldfire/clk.c index 75f9ee967ea..9cd13b4ce42 100644 --- a/arch/m68k/platform/coldfire/clk.c +++ b/arch/m68k/platform/coldfire/clk.c @@ -146,9 +146,3 @@ struct clk_ops clk_ops1 = { }; #endif /* MCFPM_PPMCR1 */ #endif /* MCFPM_PPMCR0 */ - -struct clk *devm_clk_get(struct device *dev, const char *id) -{ - return NULL; -} -EXPORT_SYMBOL(devm_clk_get); diff --git a/arch/mips/cavium-octeon/serial.c b/arch/mips/cavium-octeon/serial.c index 138b2216b4f..569f41bdcc4 100644 --- a/arch/mips/cavium-octeon/serial.c +++ b/arch/mips/cavium-octeon/serial.c @@ -47,40 +47,40 @@ static int __devinit octeon_serial_probe(struct platform_device *pdev) { int irq, res; struct resource *res_mem; - struct uart_port port; + struct uart_8250_port up; /* All adaptors have an irq. */ irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; - memset(&port, 0, sizeof(port)); + memset(&up, 0, sizeof(up)); - port.flags = ASYNC_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE; - port.type = PORT_OCTEON; - port.iotype = UPIO_MEM; - port.regshift = 3; - port.dev = &pdev->dev; + up.port.flags = ASYNC_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE; + up.port.type = PORT_OCTEON; + up.port.iotype = UPIO_MEM; + up.port.regshift = 3; + up.port.dev = &pdev->dev; if (octeon_is_simulation()) /* Make simulator output fast*/ - port.uartclk = 115200 * 16; + up.port.uartclk = 115200 * 16; else - port.uartclk = octeon_get_io_clock_rate(); + up.port.uartclk = octeon_get_io_clock_rate(); - port.serial_in = octeon_serial_in; - port.serial_out = octeon_serial_out; - port.irq = irq; + up.port.serial_in = octeon_serial_in; + up.port.serial_out = octeon_serial_out; + up.port.irq = irq; res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res_mem == NULL) { dev_err(&pdev->dev, "found no memory resource\n"); return -ENXIO; } - port.mapbase = res_mem->start; - port.membase = ioremap(res_mem->start, resource_size(res_mem)); + up.port.mapbase = res_mem->start; + up.port.membase = ioremap(res_mem->start, resource_size(res_mem)); - res = serial8250_register_port(&port); + res = serial8250_register_8250_port(&up); return res >= 0 ? 0 : res; } diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index e7e03ecf549..afc379ca375 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -102,7 +102,7 @@ static void cmp_init_secondary(void) c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE; #endif #ifdef CONFIG_MIPS_MT_SMTC - c->tc_id = (read_c0_tcbind() >> TCBIND_CURTC_SHIFT) & TCBIND_CURTC; + c->tc_id = (read_c0_tcbind() & TCBIND_CURTC) >> TCBIND_CURTC_SHIFT; #endif } diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 33aadbcf170..dcfd573871c 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -152,6 +152,8 @@ static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; + if (PageTail(page)) + get_huge_page_tail(page); (*nr)++; page++; refs++; diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 7b13a4caeea..fea823f1847 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -273,16 +273,19 @@ asmlinkage void plat_irq_dispatch(void) unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; int irq; + if (unlikely(!pending)) { + spurious_interrupt(); + return; + } + irq = irq_ffs(pending); if (irq == MIPSCPU_INT_I8259A) malta_hw0_irqdispatch(); else if (gic_present && ((1 << irq) & ipi_map[smp_processor_id()])) malta_ipi_irqdispatch(); - else if (irq >= 0) - do_IRQ(MIPS_CPU_IRQ_BASE + irq); else - spurious_interrupt(); + do_IRQ(MIPS_CPU_IRQ_BASE + irq); } #ifdef CONFIG_MIPS_MT_SMP diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c index 4c35301720e..80562b81f0f 100644 --- a/arch/mips/mti-malta/malta-platform.c +++ b/arch/mips/mti-malta/malta-platform.c @@ -138,11 +138,6 @@ static int __init malta_add_devices(void) if (err) return err; - /* - * Set RTC to BCD mode to support current alarm code. - */ - CMOS_WRITE(CMOS_READ(RTC_CONTROL) & ~RTC_DM_BINARY, RTC_CONTROL); - return 0; } diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 52a1ba70b3b..c5dfb2c87d4 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -117,16 +117,11 @@ int pcibios_plat_dev_init(struct pci_dev *dev) } /* Enable the PCIe normal error reporting */ - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (pos) { - /* Update Device Control */ - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &config); - config |= PCI_EXP_DEVCTL_CERE; /* Correctable Error Reporting */ - config |= PCI_EXP_DEVCTL_NFERE; /* Non-Fatal Error Reporting */ - config |= PCI_EXP_DEVCTL_FERE; /* Fatal Error Reporting */ - config |= PCI_EXP_DEVCTL_URRE; /* Unsupported Request */ - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, config); - } + config = PCI_EXP_DEVCTL_CERE; /* Correctable Error Reporting */ + config |= PCI_EXP_DEVCTL_NFERE; /* Non-Fatal Error Reporting */ + config |= PCI_EXP_DEVCTL_FERE; /* Fatal Error Reporting */ + config |= PCI_EXP_DEVCTL_URRE; /* Unsupported Request */ + pcie_capability_set_word(dev, PCI_EXP_DEVCTL, config); /* Find the Advanced Error Reporting capability */ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 690356808f8..04e35bcde07 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -313,12 +313,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) } } -void __init -pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - #ifdef CONFIG_HOTPLUG EXPORT_SYMBOL(PCIBIOS_MIN_IO); EXPORT_SYMBOL(PCIBIOS_MIN_MEM); diff --git a/arch/mips/sni/a20r.c b/arch/mips/sni/a20r.c index c48194c3073..b2d4f492d78 100644 --- a/arch/mips/sni/a20r.c +++ b/arch/mips/sni/a20r.c @@ -133,6 +133,38 @@ static struct platform_device sc26xx_pdev = { } }; +#warning "Please try migrate to use new driver SCCNXP and report the status" \ + "in the linux-serial mailing list." + +/* The code bellow is a replacement of SC26XX to SCCNXP */ +#if 0 +#include <linux/platform_data/sccnxp.h> + +static struct sccnxp_pdata sccnxp_data = { + .reg_shift = 2, + .frequency = 3686400, + .mctrl_cfg[0] = MCTRL_SIG(DTR_OP, LINE_OP7) | + MCTRL_SIG(RTS_OP, LINE_OP3) | + MCTRL_SIG(DSR_IP, LINE_IP5) | + MCTRL_SIG(DCD_IP, LINE_IP6), + .mctrl_cfg[1] = MCTRL_SIG(DTR_OP, LINE_OP2) | + MCTRL_SIG(RTS_OP, LINE_OP1) | + MCTRL_SIG(DSR_IP, LINE_IP0) | + MCTRL_SIG(CTS_IP, LINE_IP1) | + MCTRL_SIG(DCD_IP, LINE_IP2) | + MCTRL_SIG(RNG_IP, LINE_IP3), +}; + +static struct platform_device sc2681_pdev = { + .name = "sc2681", + .resource = sc2xxx_rsrc, + .num_resources = ARRAY_SIZE(sc2xxx_rsrc), + .dev = { + .platform_data = &sccnxp_data, + }, +}; +#endif + static u32 a20r_ack_hwint(void) { u32 status = read_c0_status(); diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 7dab0cd3646..e9cceba193b 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/err.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -107,6 +108,7 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ for (;;) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void); @@ -121,6 +123,7 @@ void cpu_idle(void) } idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 47341aa208f..88238638aee 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -202,6 +202,7 @@ static int __init pdc_console_tty_driver_init(void) pdc_console_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS; tty_set_operations(pdc_console_tty_driver, &pdc_console_tty_ops); + tty_port_link_device(&tty_port, pdc_console_tty_driver, 0); err = tty_register_driver(pdc_console_tty_driver); if (err) { diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 2c05a9292a8..8c6b6b6561f 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -48,6 +48,7 @@ #include <linux/unistd.h> #include <linux/kallsyms.h> #include <linux/uaccess.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/asm-offsets.h> @@ -69,8 +70,10 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) barrier(); + rcu_idle_exit(); schedule_preempt_disabled(); check_pgt_cache(); } diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 1c1aadc8c48..c32ae5ce9ff 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -1,10 +1,6 @@ addnote empty.c hack-coff -infblock.c -infblock.h -infcodes.c -infcodes.h inffast.c inffast.h inffixed.h diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 42ce570812c..f7706d722b3 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -214,6 +214,9 @@ struct machdep_calls { /* Called after scan and before resource survey */ void (*pcibios_fixup_phb)(struct pci_controller *hose); + /* Called during PCI resource reassignment */ + resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type); + /* Called to shutdown machine specific hardware not already controlled * by other drivers. */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 3b4b4a8da92..c1f267694ac 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -197,12 +197,6 @@ struct cpu_usage { DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) -#define account_process_vtime(tsk) account_process_tick(tsk, 0) -#else -#define account_process_vtime(tsk) do { } while (0) -#endif - extern void secondary_cpu_time_init(void); DECLARE_PER_CPU(u64, decrementers_next_tb); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2aa04f29e1d..43fea543d68 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -99,6 +99,26 @@ void pcibios_free_controller(struct pci_controller *phb) kfree(phb); } +/* + * The function is used to return the minimal alignment + * for memory or I/O windows of the associated P2P bridge. + * By default, 4KiB alignment for I/O windows and 1MiB for + * memory windows. + */ +resource_size_t pcibios_window_alignment(struct pci_bus *bus, + unsigned long type) +{ + if (ppc_md.pcibios_window_alignment) + return ppc_md.pcibios_window_alignment(bus, type); + + /* + * PCI core will figure out the default + * alignment: 4KiB for I/O and 1MiB for + * memory window. + */ + return 1; +} + static resource_size_t pcibios_io_size(const struct pci_controller *hose) { #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1a1f2ddfb58..e9cb51f5f80 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -514,9 +514,6 @@ struct task_struct *__switch_to(struct task_struct *prev, local_irq_save(flags); - account_system_vtime(current); - account_process_vtime(current); - /* * We can't take a PMU exception inside _switch() since there is a * window where the kernel stack SLB and the kernel stack are out diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e49e93191b6..eaa9d0e6abc 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -291,13 +291,12 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * Account time for a transition between system, hard irq * or soft irq state. */ -void account_system_vtime(struct task_struct *tsk) +static u64 vtime_delta(struct task_struct *tsk, + u64 *sys_scaled, u64 *stolen) { - u64 now, nowscaled, delta, deltascaled; - unsigned long flags; - u64 stolen, udelta, sys_scaled, user_scaled; + u64 now, nowscaled, deltascaled; + u64 udelta, delta, user_scaled; - local_irq_save(flags); now = mftb(); nowscaled = read_spurr(now); get_paca()->system_time += now - get_paca()->starttime; @@ -305,7 +304,7 @@ void account_system_vtime(struct task_struct *tsk) deltascaled = nowscaled - get_paca()->startspurr; get_paca()->startspurr = nowscaled; - stolen = calculate_stolen_time(now); + *stolen = calculate_stolen_time(now); delta = get_paca()->system_time; get_paca()->system_time = 0; @@ -322,35 +321,45 @@ void account_system_vtime(struct task_struct *tsk) * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick. */ - sys_scaled = delta; + *sys_scaled = delta; user_scaled = udelta; if (deltascaled != delta + udelta) { if (udelta) { - sys_scaled = deltascaled * delta / (delta + udelta); - user_scaled = deltascaled - sys_scaled; + *sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - *sys_scaled; } else { - sys_scaled = deltascaled; + *sys_scaled = deltascaled; } } get_paca()->user_time_scaled += user_scaled; - if (in_interrupt() || idle_task(smp_processor_id()) != tsk) { - account_system_time(tsk, 0, delta, sys_scaled); - if (stolen) - account_steal_time(stolen); - } else { - account_idle_time(delta + stolen); - } - local_irq_restore(flags); + return delta; +} + +void vtime_account_system(struct task_struct *tsk) +{ + u64 delta, sys_scaled, stolen; + + delta = vtime_delta(tsk, &sys_scaled, &stolen); + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + u64 delta, sys_scaled, stolen; + + delta = vtime_delta(tsk, &sys_scaled, &stolen); + account_idle_time(delta + stolen); } -EXPORT_SYMBOL_GPL(account_system_vtime); /* * Transfer the user and system times accumulated in the paca * by the exception entry and exit code to the generic process * user and system time records. * Must be called with interrupts disabled. - * Assumes that account_system_vtime() has been called recently + * Assumes that vtime_account() has been called recently * (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ @@ -366,6 +375,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick) account_user_time(tsk, utime, utimescaled); } +void vtime_task_switch(struct task_struct *prev) +{ + vtime_account(prev); + account_process_tick(prev, 0); +} + #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() #endif diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 30fd01de6be..72afd2888ca 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,6 +1,7 @@ config PPC64 bool "64-bit kernel" default n + select HAVE_VIRT_CPU_ACCOUNTING help This option selects whether a 32-bit or a 64-bit kernel will be built. @@ -337,21 +338,6 @@ config PPC_MM_SLICES default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES) default n -config VIRT_CPU_ACCOUNTING - bool "Deterministic task and CPU time accounting" - depends on PPC64 - default y - help - Select this option to enable more accurate task and CPU time - accounting. This is done by reading a CPU counter on each - kernel entry and exit and on transitions within the kernel - between system, softirq and hardirq state, so there is a - small performance impact. This also enables accounting of - stolen time on logically-partitioned systems running on - IBM POWER5-based machines. - - If in doubt, say Y here. - config PPC_HAVE_PMU_SUPPORT bool diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9cda6a1ad0c..0e7eccc0f88 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -855,7 +855,7 @@ static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus) if (pe == NULL) continue; /* Leaving the PCIe domain ... single PE# */ - if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) + if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) pnv_ioda_setup_bus_PE(dev, pe); else if (dev->subordinate) pnv_ioda_setup_PEs(dev->subordinate); @@ -1139,6 +1139,44 @@ static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose) } } +/* + * Returns the alignment for I/O or memory windows for P2P + * bridges. That actually depends on how PEs are segmented. + * For now, we return I/O or M32 segment size for PE sensitive + * P2P bridges. Otherwise, the default values (4KiB for I/O, + * 1MiB for memory) will be returned. + * + * The current PCI bus might be put into one PE, which was + * create against the parent PCI bridge. For that case, we + * needn't enlarge the alignment so that we can save some + * resources. + */ +static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, + unsigned long type) +{ + struct pci_dev *bridge; + struct pci_controller *hose = pci_bus_to_host(bus); + struct pnv_phb *phb = hose->private_data; + int num_pci_bridges = 0; + + bridge = bus->self; + while (bridge) { + if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { + num_pci_bridges++; + if (num_pci_bridges >= 2) + return 1; + } + + bridge = bridge->bus->self; + } + + /* We need support prefetchable memory window later */ + if (type & IORESOURCE_MEM) + return phb->ioda.m32_segsize; + + return phb->ioda.io_segsize; +} + /* Prevent enabling devices for which we couldn't properly * assign a PE */ @@ -1306,6 +1344,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np) */ ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb; ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; + ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index 9858476fa0f..cc45d25487b 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -5,3 +5,4 @@ obj-$(CONFIG_CRYPTO_HW) += crypto/ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-$(CONFIG_MATHEMU) += math-emu/ +obj-y += net/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 107610e01a2..f9acddd9ace 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -49,10 +49,13 @@ config GENERIC_LOCKBREAK config PGSTE def_bool y if KVM -config VIRT_CPU_ACCOUNTING +config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config ARCH_SUPPORTS_DEBUG_PAGEALLOC +config KEXEC + def_bool y + +config AUDIT_ARCH def_bool y config S390 @@ -84,11 +87,15 @@ config S390 select HAVE_KERNEL_XZ select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 + select HAVE_BPF_JIT if 64BIT && PACK_STACK select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_CMPXCHG_LOCAL + select HAVE_CMPXCHG_DOUBLE + select HAVE_VIRT_CPU_ACCOUNTING + select VIRT_CPU_ACCOUNTING select ARCH_DISCARD_MEMBLOCK select BUILDTIME_EXTABLE_SORT select ARCH_INLINE_SPIN_TRYLOCK @@ -133,9 +140,79 @@ source "init/Kconfig" source "kernel/Kconfig.freezer" -menu "Base setup" +menu "Processor type and features" + +config HAVE_MARCH_Z900_FEATURES + def_bool n + +config HAVE_MARCH_Z990_FEATURES + def_bool n + select HAVE_MARCH_Z900_FEATURES + +config HAVE_MARCH_Z9_109_FEATURES + def_bool n + select HAVE_MARCH_Z990_FEATURES + +config HAVE_MARCH_Z10_FEATURES + def_bool n + select HAVE_MARCH_Z9_109_FEATURES + +config HAVE_MARCH_Z196_FEATURES + def_bool n + select HAVE_MARCH_Z10_FEATURES + +choice + prompt "Processor type" + default MARCH_G5 + +config MARCH_G5 + bool "System/390 model G5 and G6" + depends on !64BIT + help + Select this to build a 31 bit kernel that works + on all ESA/390 and z/Architecture machines. -comment "Processor type and features" +config MARCH_Z900 + bool "IBM zSeries model z800 and z900" + select HAVE_MARCH_Z900_FEATURES if 64BIT + help + Select this to enable optimizations for model z800/z900 (2064 and + 2066 series). This will enable some optimizations that are not + available on older ESA/390 (31 Bit) only CPUs. + +config MARCH_Z990 + bool "IBM zSeries model z890 and z990" + select HAVE_MARCH_Z990_FEATURES if 64BIT + help + Select this to enable optimizations for model z890/z990 (2084 and + 2086 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z9_109 + bool "IBM System z9" + select HAVE_MARCH_Z9_109_FEATURES if 64BIT + help + Select this to enable optimizations for IBM System z9 (2094 and + 2096 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z10 + bool "IBM System z10" + select HAVE_MARCH_Z10_FEATURES if 64BIT + help + Select this to enable optimizations for IBM System z10 (2097 and + 2098 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z196 + bool "IBM zEnterprise 114 and 196" + select HAVE_MARCH_Z196_FEATURES if 64BIT + help + Select this to enable optimizations for IBM zEnterprise 114 and 196 + (2818 and 2817 series). The kernel will be slightly faster but will + not work on older machines. + +endchoice config 64BIT def_bool y @@ -147,6 +224,24 @@ config 64BIT config 32BIT def_bool y if !64BIT +config COMPAT + def_bool y + prompt "Kernel support for 31 bit emulation" + depends on 64BIT + select COMPAT_BINFMT_ELF if BINFMT_ELF + select ARCH_WANT_OLD_COMPAT_IPC + help + Select this option if you want to enable your system kernel to + handle system-calls from ELF binaries for 31 bit ESA. This option + (and some other stuff like libraries and such) is needed for + executing 31 bit applications. It is safe to say "Y". + +config SYSVIPC_COMPAT + def_bool y if COMPAT && SYSVIPC + +config KEYS_COMPAT + def_bool y if COMPAT && KEYS + config SMP def_bool y prompt "Symmetric multi-processing support" @@ -202,6 +297,8 @@ config SCHED_BOOK Book scheduler support improves the CPU scheduler's decision making when dealing with machines that have several books. +source kernel/Kconfig.preempt + config MATHEMU def_bool y prompt "IEEE FPU emulation" @@ -211,100 +308,35 @@ config MATHEMU on older ESA/390 machines. Say Y unless you know your machine doesn't need this. -config COMPAT - def_bool y - prompt "Kernel support for 31 bit emulation" - depends on 64BIT - select COMPAT_BINFMT_ELF if BINFMT_ELF - select ARCH_WANT_OLD_COMPAT_IPC - help - Select this option if you want to enable your system kernel to - handle system-calls from ELF binaries for 31 bit ESA. This option - (and some other stuff like libraries and such) is needed for - executing 31 bit applications. It is safe to say "Y". +source kernel/Kconfig.hz -config SYSVIPC_COMPAT - def_bool y if COMPAT && SYSVIPC +endmenu -config KEYS_COMPAT - def_bool y if COMPAT && KEYS +menu "Memory setup" -config AUDIT_ARCH +config ARCH_SPARSEMEM_ENABLE def_bool y + select SPARSEMEM_VMEMMAP_ENABLE + select SPARSEMEM_VMEMMAP + select SPARSEMEM_STATIC if !64BIT -config HAVE_MARCH_Z900_FEATURES - def_bool n - -config HAVE_MARCH_Z990_FEATURES - def_bool n - select HAVE_MARCH_Z900_FEATURES - -config HAVE_MARCH_Z9_109_FEATURES - def_bool n - select HAVE_MARCH_Z990_FEATURES - -config HAVE_MARCH_Z10_FEATURES - def_bool n - select HAVE_MARCH_Z9_109_FEATURES - -config HAVE_MARCH_Z196_FEATURES - def_bool n - select HAVE_MARCH_Z10_FEATURES - -comment "Code generation options" - -choice - prompt "Processor type" - default MARCH_G5 - -config MARCH_G5 - bool "System/390 model G5 and G6" - depends on !64BIT - help - Select this to build a 31 bit kernel that works - on all ESA/390 and z/Architecture machines. - -config MARCH_Z900 - bool "IBM zSeries model z800 and z900" - select HAVE_MARCH_Z900_FEATURES if 64BIT - help - Select this to enable optimizations for model z800/z900 (2064 and - 2066 series). This will enable some optimizations that are not - available on older ESA/390 (31 Bit) only CPUs. +config ARCH_SPARSEMEM_DEFAULT + def_bool y -config MARCH_Z990 - bool "IBM zSeries model z890 and z990" - select HAVE_MARCH_Z990_FEATURES if 64BIT - help - Select this to enable optimizations for model z890/z990 (2084 and - 2086 series). The kernel will be slightly faster but will not work - on older machines. +config ARCH_SELECT_MEMORY_MODEL + def_bool y -config MARCH_Z9_109 - bool "IBM System z9" - select HAVE_MARCH_Z9_109_FEATURES if 64BIT - help - Select this to enable optimizations for IBM System z9 (2094 and - 2096 series). The kernel will be slightly faster but will not work - on older machines. +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y if SPARSEMEM -config MARCH_Z10 - bool "IBM System z10" - select HAVE_MARCH_Z10_FEATURES if 64BIT - help - Select this to enable optimizations for IBM System z10 (2097 and - 2098 series). The kernel will be slightly faster but will not work - on older machines. +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y -config MARCH_Z196 - bool "IBM zEnterprise 114 and 196" - select HAVE_MARCH_Z196_FEATURES if 64BIT - help - Select this to enable optimizations for IBM zEnterprise 114 and 196 - (2818 and 2817 series). The kernel will be slightly faster but will - not work on older machines. +config FORCE_MAX_ZONEORDER + int + default "9" -endchoice +source "mm/Kconfig" config PACK_STACK def_bool y @@ -368,34 +400,9 @@ config WARN_DYNAMIC_STACK Say N if you are unsure. -comment "Kernel preemption" - -source "kernel/Kconfig.preempt" - -config ARCH_SPARSEMEM_ENABLE - def_bool y - select SPARSEMEM_VMEMMAP_ENABLE - select SPARSEMEM_VMEMMAP - select SPARSEMEM_STATIC if !64BIT - -config ARCH_SPARSEMEM_DEFAULT - def_bool y - -config ARCH_SELECT_MEMORY_MODEL - def_bool y - -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y if SPARSEMEM - -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y - -config ARCH_HIBERNATION_POSSIBLE - def_bool y if 64BIT - -source "mm/Kconfig" +endmenu -comment "I/O subsystem configuration" +menu "I/O subsystem" config QDIO def_tristate y @@ -426,13 +433,102 @@ config CHSC_SCH If unsure, say N. -comment "Misc" +config SCM_BUS + def_bool y + depends on 64BIT + prompt "SCM bus driver" + help + Bus driver for Storage Class Memory. + +config EADM_SCH + def_tristate m + prompt "Support for EADM subchannels" + depends on SCM_BUS + help + This driver allows usage of EADM subchannels. EADM subchannels act + as a communication vehicle for SCM increments. + + To compile this driver as a module, choose M here: the + module will be called eadm_sch. + +endmenu + +menu "Dump support" + +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT && SMP + select KEXEC + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + For more details see Documentation/kdump/kdump.txt + +config ZFCPDUMP + def_bool n + prompt "zfcpdump support" + select SMP + help + Select this option if you want to build an zfcpdump enabled kernel. + Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. + +endmenu + +menu "Executable file formats / Emulations" source "fs/Kconfig.binfmt" -config FORCE_MAX_ZONEORDER - int - default "9" +config SECCOMP + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc/<pid>/seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. + +endmenu + +menu "Power Management" + +config ARCH_HIBERNATION_POSSIBLE + def_bool y if 64BIT + +source "kernel/power/Kconfig" + +endmenu + +source "net/Kconfig" + +config PCMCIA + def_bool n + +config CCW + def_bool y + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/s390/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + +menu "Virtualization" config PFAULT def_bool y @@ -448,8 +544,8 @@ config PFAULT this option. config SHARED_KERNEL - def_bool y - prompt "VM shared kernel support" + bool "VM shared kernel support" + depends on !JUMP_LABEL help Select this option, if you want to share the text segment of the Linux kernel between different VM guests. This reduces memory @@ -544,8 +640,6 @@ config APPLDATA_NET_SUM This can also be compiled as a module, which will be called appldata_net_sum.o. -source kernel/Kconfig.hz - config S390_HYPFS_FS def_bool y prompt "s390 hypervisor file system support" @@ -554,90 +648,21 @@ config S390_HYPFS_FS This is a virtual file system intended to provide accounting information in an s390 hypervisor environment. -config KEXEC - def_bool n - prompt "kexec system call" - help - kexec is a system call that implements the ability to shutdown your - current kernel, and to start another kernel. It is like a reboot - but is independent of hardware/microcode support. - -config CRASH_DUMP - bool "kernel crash dumps" - depends on 64BIT && SMP - select KEXEC - help - Generate crash dump after being started by kexec. - Crash dump kernels are loaded in the main kernel with kexec-tools - into a specially reserved region and then later executed after - a crash by kdump/kexec. - For more details see Documentation/kdump/kdump.txt - -config ZFCPDUMP - def_bool n - prompt "zfcpdump support" - select SMP - help - Select this option if you want to build an zfcpdump enabled kernel. - Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. +source "arch/s390/kvm/Kconfig" config S390_GUEST def_bool y - prompt "s390 guest support for KVM (EXPERIMENTAL)" + prompt "s390 support for virtio devices (EXPERIMENTAL)" depends on 64BIT && EXPERIMENTAL select VIRTUALIZATION select VIRTIO select VIRTIO_RING select VIRTIO_CONSOLE help - Select this option if you want to run the kernel as a guest under - the KVM hypervisor. This will add detection for KVM as well as a - virtio transport. If KVM is detected, the virtio console will be - the default console. - -config SECCOMP - def_bool y - prompt "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc/<pid>/seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. - -endmenu + Enabling this option adds support for virtio based paravirtual device + drivers on s390. -menu "Power Management" - -source "kernel/power/Kconfig" + Select this option if you want to run the kernel as a guest under + the KVM hypervisor. endmenu - -source "net/Kconfig" - -config PCMCIA - def_bool n - -config CCW - def_bool y - -source "drivers/Kconfig" - -source "fs/Kconfig" - -source "arch/s390/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" - -source "lib/Kconfig" - -source "arch/s390/kvm/Kconfig" diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 10e22c4ec4a..3ad8f61c998 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -11,6 +11,7 @@ targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ sizes.h head$(BITS).o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 465eca756fe..c4c6a1cf221 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -71,34 +71,37 @@ void *memset(void *s, int c, size_t n) { char *xs; - if (c == 0) - return __builtin_memset(s, 0, n); - - xs = (char *) s; - if (n > 0) - do { - *xs++ = c; - } while (--n > 0); + xs = s; + while (n--) + *xs++ = c; return s; } -void *memcpy(void *__dest, __const void *__src, size_t __n) +void *memcpy(void *dest, const void *src, size_t n) { - return __builtin_memcpy(__dest, __src, __n); + const char *s = src; + char *d = dest; + + while (n--) + *d++ = *s++; + return dest; } -void *memmove(void *__dest, __const void *__src, size_t __n) +void *memmove(void *dest, const void *src, size_t n) { - char *d; - const char *s; - - if (__dest <= __src) - return __builtin_memcpy(__dest, __src, __n); - d = __dest + __n; - s = __src + __n; - while (__n--) - *--d = *--s; - return __dest; + const char *s = src; + char *d = dest; + + if (d <= s) { + while (n--) + *d++ = *s++; + } else { + d += n; + s += n; + while (n--) + *--d = *--s; + } + return dest; } static void error(char *x) diff --git a/arch/s390/defconfig b/arch/s390/defconfig index f39cd710980..b74400e3e03 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -16,8 +16,8 @@ CONFIG_CGROUPS=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEMCG=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y @@ -32,20 +32,19 @@ CONFIG_EXPERT=y CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y -CONFIG_PREEMPT=y +CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y -CONFIG_BINFMT_MISC=m -CONFIG_CMM=m -CONFIG_HZ_100=y CONFIG_CRASH_DUMP=y +CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -75,6 +74,7 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y +CONFIG_BPF_JIT=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=m @@ -121,7 +121,6 @@ CONFIG_DEBUG_NOTIFIERS=y CONFIG_RCU_TRACE=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y -CONFIG_CPU_NOTIFIER_ERROR_INJECT=m CONFIG_LATENCYTOP=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_BLK_DEV_IO_TRACE=y @@ -173,3 +172,4 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRC7=m +CONFIG_CMM=m diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index f328294faea..32a70598715 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -70,7 +70,7 @@ static inline int appldata_asm(struct appldata_product_id *id, int ry; if (!MACHINE_IS_VM) - return -ENOSYS; + return -EOPNOTSUPP; parm_list.diag = 0xdc; parm_list.function = fn; parm_list.parlist_length = sizeof(parm_list); diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h index bf115b49f44..aea451fd182 100644 --- a/arch/s390/include/asm/chsc.h +++ b/arch/s390/include/asm/chsc.h @@ -125,32 +125,4 @@ struct chsc_cpd_info { #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) -#ifdef __KERNEL__ - -struct css_general_char { - u64 : 12; - u32 dynio : 1; /* bit 12 */ - u32 : 28; - u32 aif : 1; /* bit 41 */ - u32 : 3; - u32 mcss : 1; /* bit 45 */ - u32 fcs : 1; /* bit 46 */ - u32 : 1; - u32 ext_mb : 1; /* bit 48 */ - u32 : 7; - u32 aif_tdd : 1; /* bit 56 */ - u32 : 1; - u32 qebsm : 1; /* bit 58 */ - u32 : 8; - u32 aif_osa : 1; /* bit 67 */ - u32 : 14; - u32 cib : 1; /* bit 82 */ - u32 : 5; - u32 fcx : 1; /* bit 88 */ - u32 : 7; -}__attribute__((packed)); - -extern struct css_general_char css_general_characteristics; - -#endif /* __KERNEL__ */ #endif diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 77043aa44d6..55bde603521 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -80,6 +80,18 @@ struct erw { } __attribute__ ((packed)); /** + * struct erw_eadm - EADM Subchannel extended report word + * @b: aob error + * @r: arsb error + */ +struct erw_eadm { + __u32 : 16; + __u32 b : 1; + __u32 r : 1; + __u32 : 14; +} __packed; + +/** * struct sublog - subchannel logout area * @res0: reserved * @esf: extended status flags @@ -170,9 +182,22 @@ struct esw3 { } __attribute__ ((packed)); /** + * struct esw_eadm - EADM Subchannel Extended Status Word (ESW) + * @sublog: subchannel logout + * @erw: extended report word + */ +struct esw_eadm { + __u32 sublog; + struct erw_eadm erw; + __u32 : 32; + __u32 : 32; + __u32 : 32; +} __packed; + +/** * struct irb - interruption response block * @scsw: subchannel status word - * @esw: extened status word, 4 formats + * @esw: extened status word * @ecw: extended control word * * The irb that is handed to the device driver when an interrupt occurs. For @@ -191,6 +216,7 @@ struct irb { struct esw1 esw1; struct esw2 esw2; struct esw3 esw3; + struct esw_eadm eadm; } esw; __u8 ecw[32]; } __attribute__ ((packed,aligned(4))); diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 8d798e962b6..0f636cbdf34 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -7,7 +7,9 @@ #ifndef __ASM_CMPXCHG_H #define __ASM_CMPXCHG_H +#include <linux/mmdebug.h> #include <linux/types.h> +#include <linux/bug.h> extern void __xchg_called_with_bad_pointer(void); @@ -203,6 +205,65 @@ static inline unsigned long long __cmpxchg64(void *ptr, }) #endif /* CONFIG_64BIT */ +#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ +({ \ + register __typeof__(*(p1)) __old1 asm("2") = (o1); \ + register __typeof__(*(p2)) __old2 asm("3") = (o2); \ + register __typeof__(*(p1)) __new1 asm("4") = (n1); \ + register __typeof__(*(p2)) __new2 asm("5") = (n2); \ + int cc; \ + asm volatile( \ + insn " %[old],%[new],%[ptr]\n" \ + " ipm %[cc]\n" \ + " srl %[cc],28" \ + : [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \ + : [new] "d" (__new1), "d" (__new2), \ + [ptr] "Q" (*(p1)), "Q" (*(p2)) \ + : "memory", "cc"); \ + !cc; \ +}) + +#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds") + +#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg") + +extern void __cmpxchg_double_called_with_bad_pointer(void); + +#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + switch (sizeof(*(p1))) { \ + case 4: \ + __ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2); \ + break; \ + case 8: \ + __ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2); \ + break; \ + default: \ + __cmpxchg_double_called_with_bad_pointer(); \ + } \ + __ret; \ +}) + +#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +({ \ + __typeof__(p1) __p1 = (p1); \ + __typeof__(p2) __p2 = (p2); \ + int __ret; \ + BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ + BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ + VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\ + if (sizeof(long) == 4) \ + __ret = __cmpxchg_double_4(__p1, __p2, o1, o2, n1, n2); \ + else \ + __ret = __cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2); \ + __ret; \ +}) + +#define system_has_cmpxchg_double() 1 + #include <asm-generic/cmpxchg-local.h> static inline unsigned long __cmpxchg_local(void *ptr, diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index a3afecdae14..35f0020b7ba 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -21,11 +21,15 @@ #define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ #define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ #define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ +#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */ +#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program + buffer full */ #define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) #define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ CPU_MF_INT_SF_LSDA) +#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL) /* CPU measurement facility support */ static inline int cpum_cf_avail(void) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 8709bdef233..023d5ae2448 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -12,6 +12,9 @@ #include <linux/spinlock.h> #include <asm/div64.h> + +#define __ARCH_HAS_VTIME_ACCOUNT + /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ typedef unsigned long long __nocast cputime_t; diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h new file mode 100644 index 00000000000..a06ebc2623f --- /dev/null +++ b/arch/s390/include/asm/css_chars.h @@ -0,0 +1,39 @@ +#ifndef _ASM_CSS_CHARS_H +#define _ASM_CSS_CHARS_H + +#include <linux/types.h> + +#ifdef __KERNEL__ + +struct css_general_char { + u64 : 12; + u32 dynio : 1; /* bit 12 */ + u32 : 4; + u32 eadm : 1; /* bit 17 */ + u32 : 23; + u32 aif : 1; /* bit 41 */ + u32 : 3; + u32 mcss : 1; /* bit 45 */ + u32 fcs : 1; /* bit 46 */ + u32 : 1; + u32 ext_mb : 1; /* bit 48 */ + u32 : 7; + u32 aif_tdd : 1; /* bit 56 */ + u32 : 1; + u32 qebsm : 1; /* bit 58 */ + u32 : 8; + u32 aif_osa : 1; /* bit 67 */ + u32 : 12; + u32 eadm_rf : 1; /* bit 80 */ + u32 : 1; + u32 cib : 1; /* bit 82 */ + u32 : 5; + u32 fcx : 1; /* bit 88 */ + u32 : 19; + u32 alt_ssi : 1; /* bit 108 */ +} __packed; + +extern struct css_general_char css_general_characteristics; + +#endif /* __KERNEL__ */ +#endif diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h new file mode 100644 index 00000000000..8d4847191ec --- /dev/null +++ b/arch/s390/include/asm/eadm.h @@ -0,0 +1,124 @@ +#ifndef _ASM_S390_EADM_H +#define _ASM_S390_EADM_H + +#include <linux/types.h> +#include <linux/device.h> + +struct arqb { + u64 data; + u16 fmt:4; + u16:12; + u16 cmd_code; + u16:16; + u16 msb_count; + u32 reserved[12]; +} __packed; + +#define ARQB_CMD_MOVE 1 + +struct arsb { + u16 fmt:4; + u32:28; + u8 ef; + u8:8; + u8 ecbi; + u8:8; + u8 fvf; + u16:16; + u8 eqc; + u32:32; + u64 fail_msb; + u64 fail_aidaw; + u64 fail_ms; + u64 fail_scm; + u32 reserved[4]; +} __packed; + +struct msb { + u8 fmt:4; + u8 oc:4; + u8 flags; + u16:12; + u16 bs:4; + u32 blk_count; + u64 data_addr; + u64 scm_addr; + u64:64; +} __packed; + +struct aidaw { + u8 flags; + u32 :24; + u32 :32; + u64 data_addr; +} __packed; + +#define MSB_OC_CLEAR 0 +#define MSB_OC_READ 1 +#define MSB_OC_WRITE 2 +#define MSB_OC_RELEASE 3 + +#define MSB_FLAG_BNM 0x80 +#define MSB_FLAG_IDA 0x40 + +#define MSB_BS_4K 0 +#define MSB_BS_1M 1 + +#define AOB_NR_MSB 124 + +struct aob { + struct arqb request; + struct arsb response; + struct msb msb[AOB_NR_MSB]; +} __packed __aligned(PAGE_SIZE); + +struct aob_rq_header { + struct scm_device *scmdev; + char data[0]; +}; + +struct scm_device { + u64 address; + u64 size; + unsigned int nr_max_block; + struct device dev; + struct { + unsigned int persistence:4; + unsigned int oper_state:4; + unsigned int data_state:4; + unsigned int rank:4; + unsigned int release:1; + unsigned int res_id:8; + } __packed attrs; +}; + +#define OP_STATE_GOOD 1 +#define OP_STATE_TEMP_ERR 2 +#define OP_STATE_PERM_ERR 3 + +struct scm_driver { + struct device_driver drv; + int (*probe) (struct scm_device *scmdev); + int (*remove) (struct scm_device *scmdev); + void (*notify) (struct scm_device *scmdev); + void (*handler) (struct scm_device *scmdev, void *data, int error); +}; + +int scm_driver_register(struct scm_driver *scmdrv); +void scm_driver_unregister(struct scm_driver *scmdrv); + +int scm_start_aob(struct aob *aob); +void scm_irq_handler(struct aob *aob, int error); + +struct eadm_ops { + int (*eadm_start) (struct aob *aob); + struct module *owner; +}; + +int scm_get_ref(void); +void scm_put_ref(void); + +void register_eadm_ops(struct eadm_ops *ops); +void unregister_eadm_ops(struct eadm_ops *ops); + +#endif /* _ASM_S390_EADM_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 9b94a160fe7..178ff966a8b 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -101,6 +101,7 @@ #define HWCAP_S390_HPAGE 128 #define HWCAP_S390_ETF3EH 256 #define HWCAP_S390_HIGH_GPRS 512 +#define HWCAP_S390_TE 1024 /* * These are used to set parameters in the core dumps. @@ -212,4 +213,6 @@ int arch_setup_additional_pages(struct linux_binprm *, int); extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + #endif diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h index a24b03b9fb6..629b79a9316 100644 --- a/arch/s390/include/asm/etr.h +++ b/arch/s390/include/asm/etr.h @@ -140,7 +140,7 @@ struct etr_ptff_qto { /* Inline assembly helper functions */ static inline int etr_setr(struct etr_eacr *ctrl) { - int rc = -ENOSYS; + int rc = -EOPNOTSUPP; asm volatile( " .insn s,0xb2160000,%1\n" @@ -154,7 +154,7 @@ static inline int etr_setr(struct etr_eacr *ctrl) /* Stores a format 1 aib with 64 bytes */ static inline int etr_stetr(struct etr_aib *aib) { - int rc = -ENOSYS; + int rc = -EOPNOTSUPP; asm volatile( " .insn s,0xb2170000,%1\n" @@ -169,7 +169,7 @@ static inline int etr_stetr(struct etr_aib *aib) static inline int etr_steai(struct etr_aib *aib, unsigned int func) { register unsigned int reg0 asm("0") = func; - int rc = -ENOSYS; + int rc = -EOPNOTSUPP; asm volatile( " .insn s,0xb2b30000,%1\n" @@ -190,7 +190,7 @@ static inline int etr_ptff(void *ptff_block, unsigned int func) { register unsigned int reg0 asm("0") = func; register unsigned long reg1 asm("1") = (unsigned long) ptff_block; - int rc = -ENOSYS; + int rc = -EOPNOTSUPP; asm volatile( " .word 0x0104\n" diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 799ed0f1643..2d6e6e38056 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -66,16 +66,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return pte; } -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = huge_ptep_get(ptep); - - mm->context.flush_mm = 1; - pmd_clear((pmd_t *) ptep); - return pte; -} - static inline void __pmd_csp(pmd_t *pmdp) { register unsigned long reg2 asm("2") = pmd_val(*pmdp); @@ -117,6 +107,15 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, __pmd_csp(pmdp); } +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = huge_ptep_get(ptep); + + huge_ptep_invalidate(mm, addr, ptep); + return pte; +} + #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ ({ \ int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ @@ -131,10 +130,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, ({ \ pte_t __pte = huge_ptep_get(__ptep); \ if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - huge_ptep_invalidate(__mm, __addr, __ptep); \ + huge_ptep_invalidate(__mm, __addr, __ptep); \ set_huge_pte_at(__mm, __addr, __ptep, \ huge_pte_wrprotect(__pte)); \ } \ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 2b9d41899d2..6703dd986fd 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -19,6 +19,7 @@ enum interruption_class { EXTINT_IUC, EXTINT_CMS, EXTINT_CMC, + EXTINT_CMR, IOINT_CIO, IOINT_QAI, IOINT_DAS, @@ -30,6 +31,7 @@ enum interruption_class { IOINT_CLW, IOINT_CTC, IOINT_APB, + IOINT_ADM, IOINT_CSC, NMI_NMI, NR_IRQS, diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h index 1420a111594..5ae606456b0 100644 --- a/arch/s390/include/asm/isc.h +++ b/arch/s390/include/asm/isc.h @@ -14,6 +14,7 @@ /* Regular I/O interrupts. */ #define IO_SCH_ISC 3 /* regular I/O subchannels */ #define CONSOLE_ISC 1 /* console I/O subchannel */ +#define EADM_SCH_ISC 4 /* EADM subchannels */ #define CHSC_SCH_ISC 7 /* CHSC subchannels */ /* Adapter interrupts. */ #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index aab5555bbbd..bbf8141408c 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -329,9 +329,13 @@ struct _lowcore { __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */ __u32 access_regs_save_area[16]; /* 0x1340 */ __u64 cregs_save_area[16]; /* 0x1380 */ + __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */ + + /* Transaction abort diagnostic block */ + __u8 pgm_tdb[256]; /* 0x1800 */ /* align to the top of the prefix area */ - __u8 pad_0x1400[0x2000-0x1400]; /* 0x1400 */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed; #endif /* CONFIG_32BIT */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index b749c573365..084e7755ed9 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -57,7 +57,7 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) pgd_t *pgd = mm->pgd; S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); - if (addressing_mode != HOME_SPACE_MODE) { + if (s390_user_mode != HOME_SPACE_MODE) { /* Load primary space page table origin. */ asm volatile(LCTL_OPCODE" 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 6537e72e085..86fe0ee2cee 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -20,7 +20,7 @@ #endif #define arch_this_cpu_to_op(pcp, val, op) \ -do { \ +({ \ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ old__, new__, prev__; \ pcp_op_T__ *ptr__; \ @@ -39,13 +39,19 @@ do { \ } \ } while (prev__ != old__); \ preempt_enable(); \ -} while (0) + new__; \ +}) #define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +) #define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +) #define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +) #define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op(pcp, val, +) + #define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &) #define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &) #define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &) @@ -61,7 +67,7 @@ do { \ #define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^) #define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^) -#define arch_this_cpu_cmpxchg(pcp, oval, nval) \ +#define arch_this_cpu_cmpxchg(pcp, oval, nval) \ ({ \ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ ret__; \ @@ -84,6 +90,44 @@ do { \ #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define arch_this_cpu_xchg(pcp, nval) \ +({ \ + typeof(pcp) *ptr__; \ + typeof(pcp) ret__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + ret__ = xchg(ptr__, nval); \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#ifdef CONFIG_64BIT +#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#endif + +#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \ +({ \ + typeof(pcp1) o1__ = (o1), n1__ = (n1); \ + typeof(pcp2) o2__ = (o2), n2__ = (n2); \ + typeof(pcp1) *p1__; \ + typeof(pcp2) *p2__; \ + int ret__; \ + preempt_disable(); \ + p1__ = __this_cpu_ptr(&(pcp1)); \ + p2__ = __this_cpu_ptr(&(pcp2)); \ + ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double +#ifdef CONFIG_64BIT +#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double +#endif + #include <asm-generic/percpu.h> #endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 11e4e323693..f3e0aabfc6b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -11,12 +11,15 @@ #ifndef __ASM_S390_PROCESSOR_H #define __ASM_S390_PROCESSOR_H +#ifndef __ASSEMBLY__ + #include <linux/linkage.h> #include <linux/irqflags.h> #include <asm/cpu.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/setup.h> +#include <asm/runtime_instr.h> /* * Default implementation of macro that returns current @@ -75,11 +78,20 @@ struct thread_struct { unsigned long gmap_addr; /* address of last gmap fault. */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ + unsigned long per_flags; /* Flags to control debug behavior */ /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; struct list_head list; + /* cpu runtime instrumentation */ + struct runtime_instr_cb *ri_cb; + int ri_signum; +#ifdef CONFIG_64BIT + unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ +#endif }; +#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */ + typedef struct thread_struct thread_struct; /* @@ -130,6 +142,12 @@ struct task_struct; struct mm_struct; struct seq_file; +#ifdef CONFIG_64BIT +extern void show_cacheinfo(struct seq_file *m); +#else +static inline void show_cacheinfo(struct seq_file *m) { } +#endif + /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); @@ -140,6 +158,7 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); extern unsigned long thread_saved_pc(struct task_struct *t); extern void show_code(struct pt_regs *regs); +extern void print_fn_code(unsigned char *code, unsigned long len); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ @@ -331,23 +350,6 @@ extern void (*s390_base_ext_handler_fn)(void); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -/* - * Helper macro for exception table entries - */ -#ifndef CONFIG_64BIT -#define EX_TABLE(_fault,_target) \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long " #_fault "," #_target "\n" \ - ".previous\n" -#else -#define EX_TABLE(_fault,_target) \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad " #_fault "," #_target "\n" \ - ".previous\n" -#endif - extern int memcpy_real(void *, void *, size_t); extern void memcpy_absolute(void *, void *, size_t); @@ -358,4 +360,25 @@ extern void memcpy_absolute(void *, void *, size_t); memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \ } -#endif /* __ASM_S390_PROCESSOR_H */ +/* + * Helper macro for exception table entries + */ +#define EX_TABLE(_fault, _target) \ + ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ + ".long (" #_fault ") - .\n" \ + ".long (" #_target ") - .\n" \ + ".previous\n" + +#else /* __ASSEMBLY__ */ + +#define EX_TABLE(_fault, _target) \ + .section __ex_table,"a" ; \ + .align 4 ; \ + .long (_fault) - . ; \ + .long (_target) - . ; \ + .previous + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index d5f08ea566e..ce20a53afe9 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -235,6 +235,7 @@ typedef struct #define PSW_MASK_ASC 0x0000C000UL #define PSW_MASK_CC 0x00003000UL #define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_RI 0x00000000UL #define PSW_MASK_EA 0x00000000UL #define PSW_MASK_BA 0x00000000UL @@ -264,10 +265,11 @@ typedef struct #define PSW_MASK_ASC 0x0000C00000000000UL #define PSW_MASK_CC 0x0000300000000000UL #define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_RI 0x0000008000000000UL #define PSW_MASK_EA 0x0000000100000000UL #define PSW_MASK_BA 0x0000000080000000UL -#define PSW_MASK_USER 0x00003F0180000000UL +#define PSW_MASK_USER 0x00003F8180000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL @@ -359,17 +361,19 @@ struct per_struct_kernel { unsigned char access_id; /* PER trap access identification */ }; -#define PER_EVENT_MASK 0xE9000000UL +#define PER_EVENT_MASK 0xEB000000UL #define PER_EVENT_BRANCH 0x80000000UL #define PER_EVENT_IFETCH 0x40000000UL #define PER_EVENT_STORE 0x20000000UL #define PER_EVENT_STORE_REAL 0x08000000UL +#define PER_EVENT_TRANSACTION_END 0x02000000UL #define PER_EVENT_NULLIFICATION 0x01000000UL -#define PER_CONTROL_MASK 0x00a00000UL +#define PER_CONTROL_MASK 0x00e00000UL #define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL +#define PER_CONTROL_SUSPENSION 0x00400000UL #define PER_CONTROL_ALTERATION 0x00200000UL #endif @@ -483,6 +487,8 @@ typedef struct #define PTRACE_GET_LAST_BREAK 0x5006 #define PTRACE_PEEK_SYSTEM_CALL 0x5007 #define PTRACE_POKE_SYSTEM_CALL 0x5008 +#define PTRACE_ENABLE_TE 0x5009 +#define PTRACE_DISABLE_TE 0x5010 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h new file mode 100644 index 00000000000..830da737ff8 --- /dev/null +++ b/arch/s390/include/asm/runtime_instr.h @@ -0,0 +1,98 @@ +#ifndef _RUNTIME_INSTR_H +#define _RUNTIME_INSTR_H + +#define S390_RUNTIME_INSTR_START 0x1 +#define S390_RUNTIME_INSTR_STOP 0x2 + +struct runtime_instr_cb { + __u64 buf_current; + __u64 buf_origin; + __u64 buf_limit; + + __u32 valid : 1; + __u32 pstate : 1; + __u32 pstate_set_buf : 1; + __u32 home_space : 1; + __u32 altered : 1; + __u32 : 3; + __u32 pstate_sample : 1; + __u32 sstate_sample : 1; + __u32 pstate_collect : 1; + __u32 sstate_collect : 1; + __u32 : 1; + __u32 halted_int : 1; + __u32 int_requested : 1; + __u32 buffer_full_int : 1; + __u32 key : 4; + __u32 : 9; + __u32 rgs : 3; + + __u32 mode : 4; + __u32 next : 1; + __u32 mae : 1; + __u32 : 2; + __u32 call_type_br : 1; + __u32 return_type_br : 1; + __u32 other_type_br : 1; + __u32 bc_other_type : 1; + __u32 emit : 1; + __u32 tx_abort : 1; + __u32 : 2; + __u32 bp_xn : 1; + __u32 bp_xt : 1; + __u32 bp_ti : 1; + __u32 bp_ni : 1; + __u32 suppr_y : 1; + __u32 suppr_z : 1; + + __u32 dc_miss_extra : 1; + __u32 lat_lev_ignore : 1; + __u32 ic_lat_lev : 4; + __u32 dc_lat_lev : 4; + + __u64 reserved1; + __u64 scaling_factor; + __u64 rsic; + __u64 reserved2; +} __packed __aligned(8); + +extern struct runtime_instr_cb runtime_instr_empty_cb; + +static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */ + : : "Q" (*cb)); +} + +static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */ + : "=Q" (*cb) : : "cc"); +} + +static inline void save_ri_cb(struct runtime_instr_cb *cb_prev) +{ +#ifdef CONFIG_64BIT + if (cb_prev) + store_runtime_instr_cb(cb_prev); +#endif +} + +static inline void restore_ri_cb(struct runtime_instr_cb *cb_next, + struct runtime_instr_cb *cb_prev) +{ +#ifdef CONFIG_64BIT + if (cb_next) + load_runtime_instr_cb(cb_next); + else if (cb_prev) + load_runtime_instr_cb(&runtime_instr_empty_cb); +#endif +} + +#ifdef CONFIG_64BIT +extern void exit_thread_runtime_instr(void); +#else +static inline void exit_thread_runtime_instr(void) { } +#endif + +#endif /* _RUNTIME_INSTR_H */ diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h index 4071d00978c..4af99cdaddf 100644 --- a/arch/s390/include/asm/scsw.h +++ b/arch/s390/include/asm/scsw.h @@ -1,7 +1,7 @@ /* * Helper functions for scsw access. * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2012 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> */ @@ -9,7 +9,7 @@ #define _ASM_S390_SCSW_H_ #include <linux/types.h> -#include <asm/chsc.h> +#include <asm/css_chars.h> #include <asm/cio.h> /** @@ -100,14 +100,46 @@ struct tm_scsw { } __attribute__ ((packed)); /** + * struct eadm_scsw - subchannel status word for eadm subchannels + * @key: subchannel key + * @eswf: esw format + * @cc: deferred condition code + * @ectl: extended control + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @aob: AOB address + * @dstat: device status + * @cstat: subchannel status + */ +struct eadm_scsw { + u32 key:4; + u32:1; + u32 eswf:1; + u32 cc:2; + u32:6; + u32 ectl:1; + u32:2; + u32 fctl:3; + u32 actl:7; + u32 stctl:5; + u32 aob; + u32 dstat:8; + u32 cstat:8; + u32:16; +} __packed; + +/** * union scsw - subchannel status word * @cmd: command-mode SCSW * @tm: transport-mode SCSW + * @eadm: eadm SCSW */ union scsw { struct cmd_scsw cmd; struct tm_scsw tm; -} __attribute__ ((packed)); + struct eadm_scsw eadm; +} __packed; #define SCSW_FCTL_CLEAR_FUNC 0x1 #define SCSW_FCTL_HALT_FUNC 0x2 diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index e6859d16ee2..87b47ca954f 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -60,7 +60,7 @@ void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, #define SECONDARY_SPACE_MODE 2 #define HOME_SPACE_MODE 3 -extern unsigned int addressing_mode; +extern unsigned int s390_user_mode; /* * Machine features detected in head.S @@ -80,6 +80,7 @@ extern unsigned int addressing_mode; #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_TE (1UL << 15) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -98,6 +99,7 @@ extern unsigned int addressing_mode; #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) +#define MACHINE_HAS_TE (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -109,6 +111,7 @@ extern unsigned int addressing_mode; #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #endif /* CONFIG_64BIT */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index ce26ac3cb16..b64f15c3b4c 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -30,6 +30,8 @@ extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); extern void smp_yield(void); extern void smp_stop_cpu(void); +extern void smp_cpu_set_polarization(int cpu, int val); +extern int smp_cpu_get_polarization(int cpu); #else /* CONFIG_SMP */ @@ -43,7 +45,7 @@ static inline void smp_call_online_cpu(void (*func)(void *), void *data) func(data); } -static inline int smp_find_processor_id(int address) { return 0; } +static inline int smp_find_processor_id(u16 address) { return 0; } static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 1bd1352fa3b..7e2dcd7c57e 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -96,7 +96,6 @@ static inline char *strcat(char *dst, const char *src) static inline char *strcpy(char *dst, const char *src) { -#if __GNUC__ < 4 register int r0 asm("0") = 0; char *ret = dst; @@ -106,14 +105,10 @@ static inline char *strcpy(char *dst, const char *src) : "+&a" (dst), "+&a" (src) : "d" (r0) : "cc", "memory"); return ret; -#else - return __builtin_strcpy(dst, src); -#endif } static inline size_t strlen(const char *s) { -#if __GNUC__ < 4 register unsigned long r0 asm("0") = 0; const char *tmp = s; @@ -122,9 +117,6 @@ static inline size_t strlen(const char *s) " jo 0b" : "+d" (r0), "+a" (tmp) : : "cc"); return r0 - (unsigned long) s; -#else - return __builtin_strlen(s); -#endif } static inline size_t strnlen(const char * s, size_t n) diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index f223068b782..f3a9e0f9270 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -80,21 +80,19 @@ static inline void restore_access_regs(unsigned int *acrs) if (prev->mm) { \ save_fp_regs(&prev->thread.fp_regs); \ save_access_regs(&prev->thread.acrs[0]); \ + save_ri_cb(prev->thread.ri_cb); \ } \ if (next->mm) { \ restore_fp_regs(&next->thread.fp_regs); \ restore_access_regs(&next->thread.acrs[0]); \ + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ update_per_regs(next); \ } \ prev = __switch_to(prev,next); \ } while (0) -extern void account_vtime(struct task_struct *, struct task_struct *); -extern void account_tick_vtime(struct task_struct *); - #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ - account_vtime(prev, current); \ } while (0) #endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 282ee36f616..f92428e459f 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -17,7 +17,10 @@ #include <asm/bitsperlong.h> struct sysinfo_1_1_1 { - unsigned short :16; + unsigned char p:1; + unsigned char :6; + unsigned char t:1; + unsigned char :8; unsigned char ccr; unsigned char cai; char reserved_0[28]; @@ -30,9 +33,14 @@ struct sysinfo_1_1_1 { char model[16]; char model_perm_cap[16]; char model_temp_cap[16]; - char model_cap_rating[4]; - char model_perm_cap_rating[4]; - char model_temp_cap_rating[4]; + unsigned int model_cap_rating; + unsigned int model_perm_cap_rating; + unsigned int model_temp_cap_rating; + unsigned char typepct[5]; + unsigned char reserved_2[3]; + unsigned int ncr; + unsigned int npr; + unsigned int ntr; }; struct sysinfo_1_2_1 { @@ -47,8 +55,9 @@ struct sysinfo_1_2_2 { char format; char reserved_0[1]; unsigned short acc_offset; - char reserved_1[24]; - unsigned int secondary_capability; + char reserved_1[20]; + unsigned int nominal_cap; + unsigned int secondary_cap; unsigned int capability; unsigned short cpus_total; unsigned short cpus_configured; @@ -109,6 +118,8 @@ struct sysinfo_3_2_2 { char reserved_544[3552]; }; +extern int topology_max_mnest; + #define TOPOLOGY_CPU_BITS 64 #define TOPOLOGY_NR_MAG 6 @@ -142,21 +153,7 @@ struct sysinfo_15_1_x { union topology_entry tle[0]; }; -static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) -{ - register int r0 asm("0") = (fc << 28) | sel1; - register int r1 asm("1") = sel2; - - asm volatile( - " stsi 0(%2)\n" - "0: jz 2f\n" - "1: lhi %0,%3\n" - "2:\n" - EX_TABLE(0b, 1b) - : "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS) - : "cc", "memory"); - return r0; -} +int stsi(void *sysinfo, int fc, int sel1, int sel2); /* * Service level reporting interface. diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 9fde315f3a7..1d8fe2b17ef 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -90,12 +90,10 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) static inline void __tlb_flush_mm_cond(struct mm_struct * mm) { - spin_lock(&mm->page_table_lock); if (mm->context.flush_mm) { __tlb_flush_mm(mm); mm->context.flush_mm = 0; } - spin_unlock(&mm->page_table_lock); } /* diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 0837de80c35..9ca30538376 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -2,8 +2,8 @@ #define _ASM_S390_TOPOLOGY_H #include <linux/cpumask.h> -#include <asm/sysinfo.h> +struct sysinfo_15_1_x; struct cpu; #ifdef CONFIG_SCHED_BOOK @@ -51,24 +51,6 @@ static inline void topology_expect_change(void) { } #define POLARIZATION_VM (2) #define POLARIZATION_VH (3) -extern int cpu_polarization[]; - -static inline void cpu_set_polarization(int cpu, int val) -{ -#ifdef CONFIG_SCHED_BOOK - cpu_polarization[cpu] = val; -#endif -} - -static inline int cpu_read_polarization(int cpu) -{ -#ifdef CONFIG_SCHED_BOOK - return cpu_polarization[cpu]; -#else - return POLARIZATION_HRZ; -#endif -} - #ifdef CONFIG_SCHED_BOOK void s390_init_cpu_topology(void); #else diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a8ab18b18b5..34268df959a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -76,9 +76,22 @@ static inline int __range_ok(unsigned long addr, unsigned long size) struct exception_table_entry { - unsigned long insn, fixup; + int insn, fixup; }; +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + +#define ARCH_HAS_SORT_EXTABLE +#define ARCH_HAS_SEARCH_EXTABLE + struct uaccess_ops { size_t (*copy_from_user)(size_t, const void __user *, void *); size_t (*copy_from_user_small)(size_t, const void __user *, void *); diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 6756e78f480..4e64b5cd155 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -277,7 +277,9 @@ #define __NR_setns 339 #define __NR_process_vm_readv 340 #define __NR_process_vm_writev 341 -#define NR_syscalls 342 +#define __NR_s390_runtime_instr 342 +#define __NR_kcmp 343 +#define NR_syscalls 344 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 9733b3f0eb6..4da52fe3174 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -23,10 +23,11 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ - sysinfo.o jump_label.o lgr.o os_info.o + sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) +obj-y += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) extra-y += head.o vmlinux.lds extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) @@ -48,12 +49,11 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o -# Kexec part -S390_KEXEC_OBJS := machine_kexec.o crash.o -S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) -obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) +ifdef CONFIG_64BIT +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o +obj-y += runtime_instr.o cache.o +endif # vdso obj-$(CONFIG_64BIT) += vdso64/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 45ef1a7b08f..fface87056e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -157,6 +157,8 @@ int main(void) DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); + DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); + DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); #endif /* CONFIG_32BIT */ return 0; diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c new file mode 100644 index 00000000000..8df8d8a19c9 --- /dev/null +++ b/arch/s390/kernel/cache.c @@ -0,0 +1,385 @@ +/* + * Extract CPU cache information and expose them via sysfs. + * + * Copyright IBM Corp. 2012 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/notifier.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <asm/facility.h> + +struct cache { + unsigned long size; + unsigned int line_size; + unsigned int associativity; + unsigned int nr_sets; + unsigned int level : 3; + unsigned int type : 2; + unsigned int private : 1; + struct list_head list; +}; + +struct cache_dir { + struct kobject *kobj; + struct cache_index_dir *index; +}; + +struct cache_index_dir { + struct kobject kobj; + int cpu; + struct cache *cache; + struct cache_index_dir *next; +}; + +enum { + CACHE_SCOPE_NOTEXISTS, + CACHE_SCOPE_PRIVATE, + CACHE_SCOPE_SHARED, + CACHE_SCOPE_RESERVED, +}; + +enum { + CACHE_TYPE_SEPARATE, + CACHE_TYPE_DATA, + CACHE_TYPE_INSTRUCTION, + CACHE_TYPE_UNIFIED, +}; + +enum { + EXTRACT_TOPOLOGY, + EXTRACT_LINE_SIZE, + EXTRACT_SIZE, + EXTRACT_ASSOCIATIVITY, +}; + +enum { + CACHE_TI_UNIFIED = 0, + CACHE_TI_INSTRUCTION = 0, + CACHE_TI_DATA, +}; + +struct cache_info { + unsigned char : 4; + unsigned char scope : 2; + unsigned char type : 2; +}; + +#define CACHE_MAX_LEVEL 8 + +union cache_topology { + struct cache_info ci[CACHE_MAX_LEVEL]; + unsigned long long raw; +}; + +static const char * const cache_type_string[] = { + "Data", + "Instruction", + "Unified", +}; + +static struct cache_dir *cache_dir_cpu[NR_CPUS]; +static LIST_HEAD(cache_list); + +void show_cacheinfo(struct seq_file *m) +{ + struct cache *cache; + int index = 0; + + list_for_each_entry(cache, &cache_list, list) { + seq_printf(m, "cache%-11d: ", index); + seq_printf(m, "level=%d ", cache->level); + seq_printf(m, "type=%s ", cache_type_string[cache->type]); + seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared"); + seq_printf(m, "size=%luK ", cache->size >> 10); + seq_printf(m, "line_size=%u ", cache->line_size); + seq_printf(m, "associativity=%d", cache->associativity); + seq_puts(m, "\n"); + index++; + } +} + +static inline unsigned long ecag(int ai, int li, int ti) +{ + unsigned long cmd, val; + + cmd = ai << 4 | li << 1 | ti; + asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ + : "=d" (val) : "a" (cmd)); + return val; +} + +static int __init cache_add(int level, int private, int type) +{ + struct cache *cache; + int ti; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return -ENOMEM; + ti = type == CACHE_TYPE_DATA ? CACHE_TI_DATA : CACHE_TI_UNIFIED; + cache->size = ecag(EXTRACT_SIZE, level, ti); + cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti); + cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); + cache->nr_sets = cache->size / cache->associativity; + cache->nr_sets /= cache->line_size; + cache->private = private; + cache->level = level + 1; + cache->type = type - 1; + list_add_tail(&cache->list, &cache_list); + return 0; +} + +static void __init cache_build_info(void) +{ + struct cache *cache, *next; + union cache_topology ct; + int level, private, rc; + + ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); + for (level = 0; level < CACHE_MAX_LEVEL; level++) { + switch (ct.ci[level].scope) { + case CACHE_SCOPE_NOTEXISTS: + case CACHE_SCOPE_RESERVED: + return; + case CACHE_SCOPE_SHARED: + private = 0; + break; + case CACHE_SCOPE_PRIVATE: + private = 1; + break; + } + if (ct.ci[level].type == CACHE_TYPE_SEPARATE) { + rc = cache_add(level, private, CACHE_TYPE_DATA); + rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION); + } else { + rc = cache_add(level, private, ct.ci[level].type); + } + if (rc) + goto error; + } + return; +error: + list_for_each_entry_safe(cache, next, &cache_list, list) { + list_del(&cache->list); + kfree(cache); + } +} + +static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu) +{ + struct cache_dir *cache_dir; + struct kobject *kobj = NULL; + struct device *dev; + + dev = get_cpu_device(cpu); + if (!dev) + goto out; + kobj = kobject_create_and_add("cache", &dev->kobj); + if (!kobj) + goto out; + cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL); + if (!cache_dir) + goto out; + cache_dir->kobj = kobj; + cache_dir_cpu[cpu] = cache_dir; + return cache_dir; +out: + kobject_put(kobj); + return NULL; +} + +static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj) +{ + return container_of(kobj, struct cache_index_dir, kobj); +} + +static void cache_index_release(struct kobject *kobj) +{ + struct cache_index_dir *index; + + index = kobj_to_cache_index_dir(kobj); + kfree(index); +} + +static ssize_t cache_index_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + + kobj_attr = container_of(attr, struct kobj_attribute, attr); + return kobj_attr->show(kobj, kobj_attr, buf); +} + +#define DEFINE_CACHE_ATTR(_name, _format, _value) \ +static ssize_t cache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ +{ \ + struct cache_index_dir *index; \ + \ + index = kobj_to_cache_index_dir(kobj); \ + return sprintf(buf, _format, _value); \ +} \ +static struct kobj_attribute cache_##_name##_attr = \ + __ATTR(_name, 0444, cache_##_name##_show, NULL); + +DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10); +DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size); +DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets); +DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity); +DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]); +DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level); + +static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf) +{ + struct cache_index_dir *index; + int len; + + index = kobj_to_cache_index_dir(kobj); + len = type ? + cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) : + cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)); + len += sprintf(&buf[len], "\n"); + return len; +} + +static ssize_t shared_cpu_map_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return shared_cpu_map_func(kobj, 0, buf); +} +static struct kobj_attribute cache_shared_cpu_map_attr = + __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); + +static ssize_t shared_cpu_list_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return shared_cpu_map_func(kobj, 1, buf); +} +static struct kobj_attribute cache_shared_cpu_list_attr = + __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); + +static struct attribute *cache_index_default_attrs[] = { + &cache_type_attr.attr, + &cache_size_attr.attr, + &cache_number_of_sets_attr.attr, + &cache_ways_of_associativity_attr.attr, + &cache_level_attr.attr, + &cache_coherency_line_size_attr.attr, + &cache_shared_cpu_map_attr.attr, + &cache_shared_cpu_list_attr.attr, + NULL, +}; + +static const struct sysfs_ops cache_index_ops = { + .show = cache_index_show, +}; + +static struct kobj_type cache_index_type = { + .sysfs_ops = &cache_index_ops, + .release = cache_index_release, + .default_attrs = cache_index_default_attrs, +}; + +static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir, + struct cache *cache, int index, + int cpu) +{ + struct cache_index_dir *index_dir; + int rc; + + index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); + if (!index_dir) + return -ENOMEM; + index_dir->cache = cache; + index_dir->cpu = cpu; + rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, + cache_dir->kobj, "index%d", index); + if (rc) + goto out; + index_dir->next = cache_dir->index; + cache_dir->index = index_dir; + return 0; +out: + kfree(index_dir); + return rc; +} + +static int __cpuinit cache_add_cpu(int cpu) +{ + struct cache_dir *cache_dir; + struct cache *cache; + int rc, index = 0; + + if (list_empty(&cache_list)) + return 0; + cache_dir = cache_create_cache_dir(cpu); + if (!cache_dir) + return -ENOMEM; + list_for_each_entry(cache, &cache_list, list) { + if (!cache->private) + break; + rc = cache_create_index_dir(cache_dir, cache, index, cpu); + if (rc) + return rc; + index++; + } + return 0; +} + +static void __cpuinit cache_remove_cpu(int cpu) +{ + struct cache_index_dir *index, *next; + struct cache_dir *cache_dir; + + cache_dir = cache_dir_cpu[cpu]; + if (!cache_dir) + return; + index = cache_dir->index; + while (index) { + next = index->next; + kobject_put(&index->kobj); + index = next; + } + kobject_put(cache_dir->kobj); + kfree(cache_dir); + cache_dir_cpu[cpu] = NULL; +} + +static int __cpuinit cache_hotplug(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + int rc = 0; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + rc = cache_add_cpu(cpu); + if (rc) + cache_remove_cpu(cpu); + break; + case CPU_DEAD: + cache_remove_cpu(cpu); + break; + } + return rc ? NOTIFY_BAD : NOTIFY_OK; +} + +static int __init cache_init(void) +{ + int cpu; + + if (!test_facility(34)) + return 0; + cache_build_info(); + for_each_online_cpu(cpu) + cache_add_cpu(cpu); + hotcpu_notifier(cache_hotplug, 0); + return 0; +} +device_initcall(cache_init); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 2d82cfcbce5..3afba804fe9 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1646,3 +1646,16 @@ ENTRY(compat_sys_process_vm_writev_wrapper) llgf %r0,164(%r15) # unsigned long stg %r0,160(%r15) jg compat_sys_process_vm_writev + +ENTRY(sys_s390_runtime_instr_wrapper) + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_s390_runtime_instr + +ENTRY(sys_kcmp_wrapper) + lgfr %r2,%r2 # pid_t + lgfr %r3,%r3 # pid_t + lgfr %r4,%r4 # int + llgfr %r5,%r5 # unsigned long + llgfr %r6,%r6 # unsigned long + jg sys_kcmp diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c deleted file mode 100644 index 3819153de8b..00000000000 --- a/arch/s390/kernel/crash.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright IBM Corp. 2005 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - * - */ - -#include <linux/threads.h> -#include <linux/kexec.h> -#include <linux/reboot.h> - -void machine_crash_shutdown(struct pt_regs *regs) -{ -} diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index cc1172b2687..fb8d8781a01 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -13,8 +13,9 @@ #include <linux/slab.h> #include <linux/bootmem.h> #include <linux/elf.h> -#include <asm/ipl.h> #include <asm/os_info.h> +#include <asm/elf.h> +#include <asm/ipl.h> #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 619c5d35072..cc84a24c023 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -315,6 +315,11 @@ enum { LONG_INSN_POPCNT, LONG_INSN_RISBHG, LONG_INSN_RISBLG, + LONG_INSN_RINEXT, + LONG_INSN_RIEMIT, + LONG_INSN_TABORT, + LONG_INSN_TBEGIN, + LONG_INSN_TBEGINC, }; static char *long_insn_name[] = { @@ -329,7 +334,12 @@ static char *long_insn_name[] = { [LONG_INSN_LLGHRL] = "llghrl", [LONG_INSN_POPCNT] = "popcnt", [LONG_INSN_RISBHG] = "risbhg", - [LONG_INSN_RISBLG] = "risblk", + [LONG_INSN_RISBLG] = "risblg", + [LONG_INSN_RINEXT] = "rinext", + [LONG_INSN_RIEMIT] = "riemit", + [LONG_INSN_TABORT] = "tabort", + [LONG_INSN_TBEGIN] = "tbegin", + [LONG_INSN_TBEGINC] = "tbeginc", }; static struct insn opcode[] = { @@ -582,6 +592,17 @@ static struct insn opcode_a7[] = { { "", 0, INSTR_INVALID } }; +static struct insn opcode_aa[] = { +#ifdef CONFIG_64BIT + { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI }, + { "rion", 0x01, INSTR_RI_RI }, + { "tric", 0x02, INSTR_RI_RI }, + { "rioff", 0x03, INSTR_RI_RI }, + { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI }, +#endif + { "", 0, INSTR_INVALID } +}; + static struct insn opcode_b2[] = { #ifdef CONFIG_64BIT { "sske", 0x2b, INSTR_RRF_M0RR }, @@ -594,6 +615,9 @@ static struct insn opcode_b2[] = { { "lpswe", 0xb2, INSTR_S_RD }, { "srnmt", 0xb9, INSTR_S_RD }, { "lfas", 0xbd, INSTR_S_RD }, + { "etndg", 0xec, INSTR_RRE_R0 }, + { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD }, + { "tend", 0xf8, INSTR_S_RD }, #endif { "stidp", 0x02, INSTR_S_RD }, { "sck", 0x04, INSTR_S_RD }, @@ -1150,6 +1174,7 @@ static struct insn opcode_e3[] = { { "stfh", 0xcb, INSTR_RXY_RRRD }, { "chf", 0xcd, INSTR_RXY_RRRD }, { "clhf", 0xcf, INSTR_RXY_RRRD }, + { "ntstg", 0x25, INSTR_RXY_RRRD }, #endif { "lrv", 0x1e, INSTR_RXY_RRRD }, { "lrvh", 0x1f, INSTR_RXY_RRRD }, @@ -1173,6 +1198,8 @@ static struct insn opcode_e5[] = { { "mvhhi", 0x44, INSTR_SIL_RDI }, { "mvhi", 0x4c, INSTR_SIL_RDI }, { "mvghi", 0x48, INSTR_SIL_RDI }, + { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU }, + { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU }, #endif { "lasp", 0x00, INSTR_SSE_RDRD }, { "tprot", 0x01, INSTR_SSE_RDRD }, @@ -1210,6 +1237,9 @@ static struct insn opcode_eb[] = { { "cliy", 0x55, INSTR_SIY_URD }, { "oiy", 0x56, INSTR_SIY_URD }, { "xiy", 0x57, INSTR_SIY_URD }, + { "lric", 0x60, INSTR_RSY_RDRM }, + { "stric", 0x61, INSTR_RSY_RDRM }, + { "mric", 0x62, INSTR_RSY_RDRM }, { "icmh", 0x80, INSTR_RSE_RURD }, { "icmh", 0x80, INSTR_RSY_RURD }, { "icmy", 0x81, INSTR_RSY_RURD }, @@ -1408,6 +1438,9 @@ static struct insn *find_insn(unsigned char *code) case 0xa7: table = opcode_a7; break; + case 0xaa: + table = opcode_aa; + break; case 0xb2: table = opcode_b2; break; @@ -1601,3 +1634,26 @@ void show_code(struct pt_regs *regs) } printk("\n"); } + +void print_fn_code(unsigned char *code, unsigned long len) +{ + char buffer[64], *ptr; + int opsize, i; + + while (len) { + ptr = buffer; + opsize = insn_length(*code); + ptr += sprintf(ptr, "%p: ", code); + for (i = 0; i < opsize; i++) + ptr += sprintf(ptr, "%02x", code[i]); + *ptr++ = '\t'; + if (i < 4) + *ptr++ = '\t'; + ptr += print_insn(ptr, code, (unsigned long) code); + *ptr++ = '\n'; + *ptr++ = 0; + printk(buffer); + code += opsize; + len -= opsize; + } +} diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 83c3271c442..7f4717675c1 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -215,36 +215,54 @@ static noinline __init void init_kernel_storage_key(void) PAGE_DEFAULT_KEY, 0); } -static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE); +static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); static noinline __init void detect_machine_type(void) { + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + /* Check current-configuration-level */ - if ((stsi(NULL, 0, 0, 0) >> 28) <= 2) { + if (stsi(NULL, 0, 0, 0) <= 2) { S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; return; } /* Get virtual-machine cpu information. */ - if (stsi(&vmms, 3, 2, 2) == -ENOSYS || !vmms.count) + if (stsi(vmms, 3, 2, 2) || !vmms->count) return; /* Running under KVM? If not we assume z/VM */ - if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3)) + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; else S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } +static __init void setup_topology(void) +{ +#ifdef CONFIG_64BIT + int max_mnest; + + if (!test_facility(11)) + return; + S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; + for (max_mnest = 6; max_mnest > 1; max_mnest--) { + if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) + break; + } + topology_max_mnest = max_mnest; +#endif +} + static void early_pgm_check_handler(void) { - unsigned long addr; const struct exception_table_entry *fixup; + unsigned long addr; addr = S390_lowcore.program_old_psw.addr; fixup = search_exception_tables(addr & PSW_ADDR_INSN); if (!fixup) disabled_wait(0); - S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE; + S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; } static noinline __init void setup_lowcore_early(void) @@ -267,12 +285,10 @@ static noinline __init void setup_facility_list(void) static noinline __init void setup_hpage(void) { -#ifndef CONFIG_DEBUG_PAGEALLOC if (!test_facility(2) || !test_facility(8)) return; S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; __ctl_set_bit(0, 23); -#endif } static __init void detect_mvpg(void) @@ -366,12 +382,12 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; if (test_facility(8)) S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; - if (test_facility(11)) - S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; if (test_facility(27)) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; + if (test_facility(50) && test_facility(73)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TE; #endif } @@ -441,7 +457,6 @@ static void __init setup_boot_command_line(void) append_to_cmdline(append_ipl_scpdata); } - /* * Save ipl parameters, clear bss memory, initialize storage keys * and create a kernel NSS at startup if the SAVESYS= parm is defined @@ -468,6 +483,7 @@ void __init startup_init(void) detect_diag44(); detect_machine_facilities(); setup_hpage(); + setup_topology(); sclp_facilities_detect(); detect_memory_layout(memory_chunk); #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 349b7eeb348..7549985402f 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ptrace.h> @@ -412,6 +413,11 @@ ENTRY(pgm_check_handler) 1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER LAST_BREAK %r14 lg %r15,__LC_KERNEL_STACK + lg %r14,__TI_task(%r12) + lghi %r13,__LC_PGM_TDB + tm __LC_PGM_ILC+2,0x02 # check for transaction abort + jz 2f + mvc __THREAD_trap_tdb(256,%r14),0(%r13) 2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -422,13 +428,12 @@ ENTRY(pgm_check_handler) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception jz 0f - lg %r1,__TI_task(%r12) tmhh %r8,0x0001 # kernel per event ? jz pgm_kprobe oi __TI_flags+7(%r12),_TIF_PER_TRAP - mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE - mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID + mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE + mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID 0: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table @@ -1004,9 +1009,7 @@ sie_fault: .Lhost_id: .quad 0 - .section __ex_table,"a" - .quad sie_loop,sie_fault - .previous + EX_TABLE(sie_loop,sie_fault) #endif .section .rodata, "a" diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index dd7630d8aab..6cdc55b26d6 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -30,33 +30,35 @@ struct irq_class { }; static const struct irq_class intrclass_names[] = { - {.name = "EXT" }, - {.name = "I/O" }, - {.name = "CLK", .desc = "[EXT] Clock Comparator" }, - {.name = "EXC", .desc = "[EXT] External Call" }, - {.name = "EMS", .desc = "[EXT] Emergency Signal" }, - {.name = "TMR", .desc = "[EXT] CPU Timer" }, - {.name = "TAL", .desc = "[EXT] Timing Alert" }, - {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, - {.name = "DSD", .desc = "[EXT] DASD Diag" }, - {.name = "VRT", .desc = "[EXT] Virtio" }, - {.name = "SCP", .desc = "[EXT] Service Call" }, - {.name = "IUC", .desc = "[EXT] IUCV" }, - {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling" }, - {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter" }, - {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" }, - {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, - {.name = "DAS", .desc = "[I/O] DASD" }, - {.name = "C15", .desc = "[I/O] 3215" }, - {.name = "C70", .desc = "[I/O] 3270" }, - {.name = "TAP", .desc = "[I/O] Tape" }, - {.name = "VMR", .desc = "[I/O] Unit Record Devices" }, - {.name = "LCS", .desc = "[I/O] LCS" }, - {.name = "CLW", .desc = "[I/O] CLAW" }, - {.name = "CTC", .desc = "[I/O] CTC" }, - {.name = "APB", .desc = "[I/O] AP Bus" }, - {.name = "CSC", .desc = "[I/O] CHSC Subchannel" }, - {.name = "NMI", .desc = "[NMI] Machine Check" }, + [EXTERNAL_INTERRUPT] = {.name = "EXT"}, + [IO_INTERRUPT] = {.name = "I/O"}, + [EXTINT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"}, + [EXTINT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"}, + [EXTINT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"}, + [EXTINT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"}, + [EXTINT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"}, + [EXTINT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, + [EXTINT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"}, + [EXTINT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"}, + [EXTINT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"}, + [EXTINT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"}, + [EXTINT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, + [EXTINT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, + [EXTINT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + [IOINT_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, + [IOINT_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, + [IOINT_DAS] = {.name = "DAS", .desc = "[I/O] DASD"}, + [IOINT_C15] = {.name = "C15", .desc = "[I/O] 3215"}, + [IOINT_C70] = {.name = "C70", .desc = "[I/O] 3270"}, + [IOINT_TAP] = {.name = "TAP", .desc = "[I/O] Tape"}, + [IOINT_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"}, + [IOINT_LCS] = {.name = "LCS", .desc = "[I/O] LCS"}, + [IOINT_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"}, + [IOINT_CTC] = {.name = "CTC", .desc = "[I/O] CTC"}, + [IOINT_APB] = {.name = "APB", .desc = "[I/O] AP Bus"}, + [IOINT_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"}, + [IOINT_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, + [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, }; /* diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 8aa634f5944..d1c7214e157 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -547,7 +547,7 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) */ entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (entry) { - regs->psw.addr = entry->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE; return 1; } diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index eca94e74d19..6ea6d69339b 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -51,16 +51,6 @@ static struct lgr_info lgr_info_cur; static struct debug_info *lgr_dbf; /* - * Return number of valid stsi levels - */ -static inline int stsi_0(void) -{ - int rc = stsi(NULL, 0, 0, 0); - - return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); -} - -/* * Copy buffer and then convert it to ASCII */ static void cpascii(char *dst, char *src, int size) @@ -76,7 +66,7 @@ static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) { struct sysinfo_1_1_1 *si = (void *) lgr_page; - if (stsi(si, 1, 1, 1) == -ENOSYS) + if (stsi(si, 1, 1, 1)) return; cpascii(lgr_info->manufacturer, si->manufacturer, sizeof(si->manufacturer)); @@ -93,7 +83,7 @@ static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) { struct sysinfo_2_2_2 *si = (void *) lgr_page; - if (stsi(si, 2, 2, 2) == -ENOSYS) + if (stsi(si, 2, 2, 2)) return; cpascii(lgr_info->name, si->name, sizeof(si->name)); memcpy(&lgr_info->lpar_number, &si->lpar_number, @@ -108,7 +98,7 @@ static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) struct sysinfo_3_2_2 *si = (void *) lgr_page; int i; - if (stsi(si, 3, 2, 2) == -ENOSYS) + if (stsi(si, 3, 2, 2)) return; for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { cpascii(lgr_info->vm[i].name, si->vm[i].name, @@ -124,16 +114,17 @@ static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) */ static void lgr_info_get(struct lgr_info *lgr_info) { + int level; + memset(lgr_info, 0, sizeof(*lgr_info)); stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); - lgr_info->level = stsi_0(); - if (lgr_info->level == -ENOSYS) - return; - if (lgr_info->level >= 1) + level = stsi(NULL, 0, 0, 0); + lgr_info->level = level; + if (level >= 1) lgr_stsi_1_1_1(lgr_info); - if (lgr_info->level >= 2) + if (level >= 2) lgr_stsi_2_2_2(lgr_info); - if (lgr_info->level >= 3) + if (level >= 3) lgr_stsi_3_2_2(lgr_info); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 493304bdf1c..b3de2770001 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -21,6 +21,7 @@ #include <asm/reset.h> #include <asm/ipl.h> #include <asm/diag.h> +#include <asm/elf.h> #include <asm/asm-offsets.h> #include <asm/os_info.h> @@ -31,8 +32,6 @@ extern const unsigned long long relocate_kernel_len; #ifdef CONFIG_CRASH_DUMP -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); - /* * Create ELF notes for one CPU */ @@ -159,7 +158,7 @@ int machine_kexec_prepare(struct kimage *image) /* Can't replace kernel image since it is read-only. */ if (ipl_flags & IPL_NSS_VALID) - return -ENOSYS; + return -EOPNOTSUPP; if (image->type == KEXEC_TYPE_CRASH) return machine_kexec_prepare_kdump(); @@ -191,6 +190,10 @@ void machine_shutdown(void) { } +void machine_crash_shutdown(struct pt_regs *regs) +{ +} + /* * Do normal kexec */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 733175373a4..5024be27df4 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -26,10 +26,12 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/vtimer.h> +#include <asm/exec.h> #include <asm/irq.h> #include <asm/nmi.h> #include <asm/smp.h> #include <asm/switch_to.h> +#include <asm/runtime_instr.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -132,6 +134,7 @@ EXPORT_SYMBOL(kernel_thread); */ void exit_thread(void) { + exit_thread_runtime_instr(); } void flush_thread(void) @@ -170,6 +173,11 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); + /* Don't copy runtime instrumentation info */ + p->thread.ri_cb = NULL; + p->thread.ri_signum = 0; + frame->childregs.psw.mask &= ~PSW_MASK_RI; + #ifndef CONFIG_64BIT /* * save fprs to current->thread.fp_regs to merge them with diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 572d4c9cb33..753c41d0ffd 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -39,9 +39,9 @@ void __cpuinit cpu_init(void) */ static int show_cpuinfo(struct seq_file *m, void *v) { - static const char *hwcap_str[10] = { + static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs" + "edat", "etf3eh", "highgprs", "te" }; unsigned long n = (unsigned long) v - 1; int i; @@ -54,10 +54,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) num_online_cpus(), loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ))%100); seq_puts(m, "features\t: "); - for (i = 0; i < 10; i++) + for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); seq_puts(m, "\n"); + show_cacheinfo(m); } get_online_cpus(); if (cpu_online(n)) { diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e4be113fbac..a314c57f4e9 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,6 +42,7 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_TDB, REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; @@ -52,6 +53,22 @@ void update_per_regs(struct task_struct *task) struct thread_struct *thread = &task->thread; struct per_regs old, new; +#ifdef CONFIG_64BIT + /* Take care of the enable/disable of transactional execution. */ + if (MACHINE_HAS_TE) { + unsigned long cr0, cr0_new; + + __ctl_store(cr0, 0, 0); + /* set or clear transaction execution bits 8 and 9. */ + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr0_new = cr0 & ~(3UL << 54); + else + cr0_new = cr0 | (3UL << 54); + /* Only load control register 0 if necessary. */ + if (cr0 != cr0_new) + __ctl_load(cr0_new, 0, 0); + } +#endif /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -60,6 +77,10 @@ void update_per_regs(struct task_struct *task) /* merge TIF_SINGLE_STEP into user specified PER registers. */ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { new.control |= PER_EVENT_IFETCH; +#ifdef CONFIG_64BIT + new.control |= PER_CONTROL_SUSPENSION; + new.control |= PER_EVENT_TRANSACTION_END; +#endif new.start = 0; new.end = PSW_ADDR_INSN; } @@ -100,6 +121,7 @@ void ptrace_disable(struct task_struct *task) memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); clear_tsk_thread_flag(task, TIF_PER_TRAP); + task->thread.per_flags = 0; } #ifndef CONFIG_64BIT @@ -416,6 +438,16 @@ long arch_ptrace(struct task_struct *child, long request, put_user(task_thread_info(child)->last_break, (unsigned long __user *) data); return 0; + case PTRACE_ENABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags &= ~PER_FLAG_NO_TE; + return 0; + case PTRACE_DISABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags |= PER_FLAG_NO_TE; + return 0; default: /* Removing high order bit from addr (only for 31 bit). */ addr &= PSW_ADDR_INSN; @@ -903,6 +935,28 @@ static int s390_last_break_set(struct task_struct *target, return 0; } +static int s390_tdb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + unsigned char *data; + + if (!(regs->int_code & 0x200)) + return -ENODATA; + data = target->thread.trap_tdb; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); +} + +static int s390_tdb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + #endif static int s390_system_call_get(struct task_struct *target, @@ -951,6 +1005,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, .set = s390_last_break_set, }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, #endif [REGSET_SYSTEM_CALL] = { .core_note_type = NT_S390_SYSTEM_CALL, @@ -1148,6 +1210,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_last_break_get, .set = s390_compat_last_break_set, }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, [REGSET_SYSTEM_CALL] = { .core_note_type = NT_S390_SYSTEM_CALL, .n = 1, diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c new file mode 100644 index 00000000000..61066f6f71a --- /dev/null +++ b/arch/s390/kernel/runtime_instr.c @@ -0,0 +1,150 @@ +/* + * Copyright IBM Corp. 2012 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <asm/runtime_instr.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> + +/* empty control block to disable RI by loading it */ +struct runtime_instr_cb runtime_instr_empty_cb; + +static int runtime_instr_avail(void) +{ + return test_facility(64); +} + +static void disable_runtime_instr(void) +{ + struct pt_regs *regs = task_pt_regs(current); + + load_runtime_instr_cb(&runtime_instr_empty_cb); + + /* + * Make sure the RI bit is deleted from the PSW. If the user did not + * switch off RI before the system call the process will get a + * specification exception otherwise. + */ + regs->psw.mask &= ~PSW_MASK_RI; +} + +static void init_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + cb->buf_limit = 0xfff; + if (s390_user_mode == HOME_SPACE_MODE) + cb->home_space = 1; + cb->int_requested = 1; + cb->pstate = 1; + cb->pstate_set_buf = 1; + cb->pstate_sample = 1; + cb->pstate_collect = 1; + cb->key = PAGE_DEFAULT_KEY; + cb->valid = 1; +} + +void exit_thread_runtime_instr(void) +{ + struct task_struct *task = current; + + if (!task->thread.ri_cb) + return; + disable_runtime_instr(); + kfree(task->thread.ri_cb); + task->thread.ri_signum = 0; + task->thread.ri_cb = NULL; +} + +static void runtime_instr_int_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct siginfo info; + + if (!(param32 & CPU_MF_INT_RI_MASK)) + return; + + kstat_cpu(smp_processor_id()).irqs[EXTINT_CMR]++; + + if (!current->thread.ri_cb) + return; + if (current->thread.ri_signum < SIGRTMIN || + current->thread.ri_signum > SIGRTMAX) { + WARN_ON_ONCE(1); + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = current->thread.ri_signum; + info.si_code = SI_QUEUE; + if (param32 & CPU_MF_INT_RI_BUF_FULL) + info.si_int = ENOBUFS; + else if (param32 & CPU_MF_INT_RI_HALTED) + info.si_int = ECANCELED; + else + return; /* unknown reason */ + + send_sig_info(current->thread.ri_signum, &info, current); +} + +SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) +{ + struct runtime_instr_cb *cb; + + if (!runtime_instr_avail()) + return -EOPNOTSUPP; + + if (command == S390_RUNTIME_INSTR_STOP) { + preempt_disable(); + exit_thread_runtime_instr(); + preempt_enable(); + return 0; + } + + if (command != S390_RUNTIME_INSTR_START || + (signum < SIGRTMIN || signum > SIGRTMAX)) + return -EINVAL; + + if (!current->thread.ri_cb) { + cb = kzalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return -ENOMEM; + } else { + cb = current->thread.ri_cb; + memset(cb, 0, sizeof(*cb)); + } + + init_runtime_instr_cb(cb); + current->thread.ri_signum = signum; + + /* now load the control block to make it available */ + preempt_disable(); + current->thread.ri_cb = cb; + load_runtime_instr_cb(cb); + preempt_enable(); + return 0; +} + +static int __init runtime_instr_init(void) +{ + int rc; + + if (!runtime_instr_avail()) + return 0; + + measurement_alert_subclass_register(); + rc = register_external_interrupt(0x1407, runtime_instr_int_handler); + if (rc) + measurement_alert_subclass_unregister(); + else + pr_info("Runtime instrumentation facility initialized\n"); + return rc; +} +device_initcall(runtime_instr_init); diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 57b536649b0..9bdbcef1da9 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -8,3 +8,5 @@ EXPORT_SYMBOL(_mcount); #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) EXPORT_SYMBOL(sie64a); #endif +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f86c81e13c3..afa9fdba200 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -302,10 +302,10 @@ static int __init parse_vmalloc(char *arg) } early_param("vmalloc", parse_vmalloc); -unsigned int addressing_mode = HOME_SPACE_MODE; -EXPORT_SYMBOL_GPL(addressing_mode); +unsigned int s390_user_mode = PRIMARY_SPACE_MODE; +EXPORT_SYMBOL_GPL(s390_user_mode); -static int set_amode_primary(void) +static void __init set_user_mode_primary(void) { psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME; psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY; @@ -313,48 +313,30 @@ static int set_amode_primary(void) psw32_user_bits = (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY; #endif - - if (MACHINE_HAS_MVCOS) { - memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); - return 1; - } else { - memcpy(&uaccess, &uaccess_pt, sizeof(uaccess)); - return 0; - } -} - -/* - * Switch kernel/user addressing modes? - */ -static int __init early_parse_switch_amode(char *p) -{ - addressing_mode = PRIMARY_SPACE_MODE; - return 0; + uaccess = MACHINE_HAS_MVCOS ? uaccess_mvcos_switch : uaccess_pt; } -early_param("switch_amode", early_parse_switch_amode); static int __init early_parse_user_mode(char *p) { if (p && strcmp(p, "primary") == 0) - addressing_mode = PRIMARY_SPACE_MODE; + s390_user_mode = PRIMARY_SPACE_MODE; else if (!p || strcmp(p, "home") == 0) - addressing_mode = HOME_SPACE_MODE; + s390_user_mode = HOME_SPACE_MODE; else return 1; return 0; } early_param("user_mode", early_parse_user_mode); -static void setup_addressing_mode(void) +static void __init setup_addressing_mode(void) { - if (addressing_mode == PRIMARY_SPACE_MODE) { - if (set_amode_primary()) - pr_info("Address spaces switched, " - "mvcos available\n"); - else - pr_info("Address spaces switched, " - "mvcos not available\n"); - } + if (s390_user_mode != PRIMARY_SPACE_MODE) + return; + set_user_mode_primary(); + if (MACHINE_HAS_MVCOS) + pr_info("Address spaces switched, mvcos available\n"); + else + pr_info("Address spaces switched, mvcos not available\n"); } void *restart_stack __attribute__((__section__(".data"))); @@ -602,9 +584,7 @@ static void __init setup_memory_end(void) static void __init setup_vmcoreinfo(void) { -#ifdef CONFIG_KEXEC mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); -#endif } #ifdef CONFIG_CRASH_DUMP @@ -974,12 +954,20 @@ static void __init setup_hwcaps(void) if (MACHINE_HAS_HPAGE) elf_hwcap |= HWCAP_S390_HPAGE; +#if defined(CONFIG_64BIT) /* * 64-bit register support for 31-bit processes * HWCAP_S390_HIGH_GPRS is bit 9. */ elf_hwcap |= HWCAP_S390_HIGH_GPRS; + /* + * Transactional execution support HWCAP_S390_TE is bit 10. + */ + if (test_facility(50) && test_facility(73)) + elf_hwcap |= HWCAP_S390_TE; +#endif + get_cpu_id(&cpu_id); switch (cpu_id.machine) { case 0x9672: diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 720fda1620f..ea431e551c6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -66,7 +66,7 @@ struct pcpu { unsigned long panic_stack; /* panic stack for the cpu */ unsigned long ec_mask; /* bit mask for ec_xxx functions */ int state; /* physical cpu state */ - u32 status; /* last status received via sigp */ + int polarization; /* physical polarization */ u16 address; /* physical cpu address */ }; @@ -74,6 +74,10 @@ static u8 boot_cpu_type; static u16 boot_cpu_address; static struct pcpu pcpu_devices[NR_CPUS]; +/* + * The smp_cpu_state_mutex must be held when changing the state or polarization + * member of a pcpu data structure within the pcpu_devices arreay. + */ DEFINE_MUTEX(smp_cpu_state_mutex); /* @@ -99,7 +103,7 @@ static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) int cc; while (1) { - cc = __pcpu_sigp(addr, order, parm, status); + cc = __pcpu_sigp(addr, order, parm, NULL); if (cc != SIGP_CC_BUSY) return cc; cpu_relax(); @@ -111,7 +115,7 @@ static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) int cc, retry; for (retry = 0; ; retry++) { - cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status); + cc = __pcpu_sigp(pcpu->address, order, parm, NULL); if (cc != SIGP_CC_BUSY) break; if (retry >= 3) @@ -122,16 +126,18 @@ static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) static inline int pcpu_stopped(struct pcpu *pcpu) { + u32 uninitialized_var(status); + if (__pcpu_sigp(pcpu->address, SIGP_SENSE, - 0, &pcpu->status) != SIGP_CC_STATUS_STORED) + 0, &status) != SIGP_CC_STATUS_STORED) return 0; - return !!(pcpu->status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); + return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); } static inline int pcpu_running(struct pcpu *pcpu) { if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING, - 0, &pcpu->status) != SIGP_CC_STATUS_STORED) + 0, NULL) != SIGP_CC_STATUS_STORED) return 1; /* Status stored condition code is equivalent to cpu not running. */ return 0; @@ -586,6 +592,16 @@ static inline void smp_get_save_area(int cpu, u16 address) { } #endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ +void smp_cpu_set_polarization(int cpu, int val) +{ + pcpu_devices[cpu].polarization = val; +} + +int smp_cpu_get_polarization(int cpu) +{ + return pcpu_devices[cpu].polarization; +} + static struct sclp_cpu_info *smp_get_cpu_info(void) { static int use_sigp_detection; @@ -628,7 +644,7 @@ static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info, pcpu->address = info->cpu[i].address; pcpu->state = (cpu >= info->configured) ? CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); set_cpu_present(cpu, true); if (sysfs_add && smp_add_present_cpu(cpu) != 0) set_cpu_present(cpu, false); @@ -796,7 +812,7 @@ void __init smp_prepare_boot_cpu(void) pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE; pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE; S390_lowcore.percpu_offset = __per_cpu_offset[0]; - cpu_set_polarization(0, POLARIZATION_UNKNOWN); + smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); set_cpu_present(0, true); set_cpu_online(0, true); } @@ -862,7 +878,7 @@ static ssize_t cpu_configure_store(struct device *dev, if (rc) break; pcpu->state = CPU_STATE_STANDBY; - cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); topology_expect_change(); break; case 1: @@ -872,7 +888,7 @@ static ssize_t cpu_configure_store(struct device *dev, if (rc) break; pcpu->state = CPU_STATE_CONFIGURED; - cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); topology_expect_change(); break; default: @@ -959,23 +975,17 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, struct device *s = &c->dev; int err = 0; - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: err = sysfs_create_group(&s->kobj, &cpu_online_attr_group); break; case CPU_DEAD: - case CPU_DEAD_FROZEN: sysfs_remove_group(&s->kobj, &cpu_online_attr_group); break; } return notifier_from_errno(err); } -static struct notifier_block __cpuinitdata smp_cpu_nb = { - .notifier_call = smp_cpu_notify, -}; - static int __devinit smp_add_present_cpu(int cpu) { struct cpu *c = &pcpu_devices[cpu].cpu; @@ -1050,7 +1060,7 @@ static int __init s390_smp_init(void) { int cpu, rc; - register_cpu_notifier(&smp_cpu_nb); + hotcpu_notifier(smp_cpu_notify, 0); #ifdef CONFIG_HOTPLUG_CPU rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index bcab2f04ba5..48174850f3b 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -350,3 +350,5 @@ SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) +SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper) +SYSCALL(sys_kcmp,sys_kcmp,sys_kcmp_wrapper) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index fa0eb238dac..62f89d98e88 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -22,17 +22,41 @@ #include <math-emu/soft-fp.h> #include <math-emu/single.h> -static inline int stsi_0(void) +int topology_max_mnest; + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +int stsi(void *sysinfo, int fc, int sel1, int sel2) { - int rc = stsi(NULL, 0, 0, 0); - return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); + register int r0 asm("0") = (fc << 28) | sel1; + register int r1 asm("1") = sel2; + int rc = 0; + + asm volatile( + " stsi 0(%3)\n" + "0: jz 2f\n" + "1: lhi %1,%4\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (r0), "+d" (rc) + : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP) + : "cc", "memory"); + if (rc) + return rc; + return fc ? 0 : ((unsigned int) r0) >> 28; } +EXPORT_SYMBOL(stsi); -static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) +static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) { - if (stsi(info, 1, 1, 1) == -ENOSYS) - return len; + int i; + if (stsi(info, 1, 1, 1)) + return; EBCASC(info->manufacturer, sizeof(info->manufacturer)); EBCASC(info->type, sizeof(info->type)); EBCASC(info->model, sizeof(info->model)); @@ -41,242 +65,197 @@ static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) EBCASC(info->model_capacity, sizeof(info->model_capacity)); EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap)); EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap)); - len += sprintf(page + len, "Manufacturer: %-16.16s\n", - info->manufacturer); - len += sprintf(page + len, "Type: %-4.4s\n", - info->type); + seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer); + seq_printf(m, "Type: %-4.4s\n", info->type); + /* + * Sigh: the model field has been renamed with System z9 + * to model_capacity and a new model field has been added + * after the plant field. To avoid confusing older programs + * the "Model:" prints "model_capacity model" or just + * "model_capacity" if the model string is empty . + */ + seq_printf(m, "Model: %-16.16s", info->model_capacity); if (info->model[0] != '\0') - /* - * Sigh: the model field has been renamed with System z9 - * to model_capacity and a new model field has been added - * after the plant field. To avoid confusing older programs - * the "Model:" prints "model_capacity model" or just - * "model_capacity" if the model string is empty . - */ - len += sprintf(page + len, - "Model: %-16.16s %-16.16s\n", - info->model_capacity, info->model); - else - len += sprintf(page + len, "Model: %-16.16s\n", - info->model_capacity); - len += sprintf(page + len, "Sequence Code: %-16.16s\n", - info->sequence); - len += sprintf(page + len, "Plant: %-4.4s\n", - info->plant); - len += sprintf(page + len, "Model Capacity: %-16.16s %08u\n", - info->model_capacity, *(u32 *) info->model_cap_rating); - if (info->model_perm_cap[0] != '\0') - len += sprintf(page + len, - "Model Perm. Capacity: %-16.16s %08u\n", - info->model_perm_cap, - *(u32 *) info->model_perm_cap_rating); - if (info->model_temp_cap[0] != '\0') - len += sprintf(page + len, - "Model Temp. Capacity: %-16.16s %08u\n", - info->model_temp_cap, - *(u32 *) info->model_temp_cap_rating); + seq_printf(m, " %-16.16s", info->model); + seq_putc(m, '\n'); + seq_printf(m, "Sequence Code: %-16.16s\n", info->sequence); + seq_printf(m, "Plant: %-4.4s\n", info->plant); + seq_printf(m, "Model Capacity: %-16.16s %08u\n", + info->model_capacity, info->model_cap_rating); + if (info->model_perm_cap_rating) + seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n", + info->model_perm_cap, + info->model_perm_cap_rating); + if (info->model_temp_cap_rating) + seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n", + info->model_temp_cap, + info->model_temp_cap_rating); + if (info->ncr) + seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr); + if (info->npr) + seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr); + if (info->ntr) + seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr); if (info->cai) { - len += sprintf(page + len, - "Capacity Adj. Ind.: %d\n", - info->cai); - len += sprintf(page + len, "Capacity Ch. Reason: %d\n", - info->ccr); + seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai); + seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr); + seq_printf(m, "Capacity Transient: %d\n", info->t); + } + if (info->p) { + for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) { + seq_printf(m, "Type %d Percentage: %d\n", + i, info->typepct[i - 1]); + } } - return len; } -static int stsi_15_1_x(struct sysinfo_15_1_x *info, char *page, int len) +static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) { static int max_mnest; int i, rc; - len += sprintf(page + len, "\n"); + seq_putc(m, '\n'); if (!MACHINE_HAS_TOPOLOGY) - return len; - if (max_mnest) { - stsi(info, 15, 1, max_mnest); - } else { - for (max_mnest = 6; max_mnest > 1; max_mnest--) { - rc = stsi(info, 15, 1, max_mnest); - if (rc != -ENOSYS) - break; - } - } - len += sprintf(page + len, "CPU Topology HW: "); + return; + if (stsi(info, 15, 1, topology_max_mnest)) + return; + seq_printf(m, "CPU Topology HW: "); for (i = 0; i < TOPOLOGY_NR_MAG; i++) - len += sprintf(page + len, " %d", info->mag[i]); - len += sprintf(page + len, "\n"); + seq_printf(m, " %d", info->mag[i]); + seq_putc(m, '\n'); #ifdef CONFIG_SCHED_MC store_topology(info); - len += sprintf(page + len, "CPU Topology SW: "); + seq_printf(m, "CPU Topology SW: "); for (i = 0; i < TOPOLOGY_NR_MAG; i++) - len += sprintf(page + len, " %d", info->mag[i]); - len += sprintf(page + len, "\n"); + seq_printf(m, " %d", info->mag[i]); + seq_putc(m, '\n'); #endif - return len; } -static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len) +static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info) { struct sysinfo_1_2_2_extension *ext; int i; - if (stsi(info, 1, 2, 2) == -ENOSYS) - return len; + if (stsi(info, 1, 2, 2)) + return; ext = (struct sysinfo_1_2_2_extension *) ((unsigned long) info + info->acc_offset); - - len += sprintf(page + len, "CPUs Total: %d\n", - info->cpus_total); - len += sprintf(page + len, "CPUs Configured: %d\n", - info->cpus_configured); - len += sprintf(page + len, "CPUs Standby: %d\n", - info->cpus_standby); - len += sprintf(page + len, "CPUs Reserved: %d\n", - info->cpus_reserved); - - if (info->format == 1) { - /* - * Sigh 2. According to the specification the alternate - * capability field is a 32 bit floating point number - * if the higher order 8 bits are not zero. Printing - * a floating point number in the kernel is a no-no, - * always print the number as 32 bit unsigned integer. - * The user-space needs to know about the strange - * encoding of the alternate cpu capability. - */ - len += sprintf(page + len, "Capability: %u %u\n", - info->capability, ext->alt_capability); - for (i = 2; i <= info->cpus_total; i++) - len += sprintf(page + len, - "Adjustment %02d-way: %u %u\n", - i, info->adjustment[i-2], - ext->alt_adjustment[i-2]); - - } else { - len += sprintf(page + len, "Capability: %u\n", - info->capability); - for (i = 2; i <= info->cpus_total; i++) - len += sprintf(page + len, - "Adjustment %02d-way: %u\n", - i, info->adjustment[i-2]); + seq_printf(m, "CPUs Total: %d\n", info->cpus_total); + seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured); + seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby); + seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved); + /* + * Sigh 2. According to the specification the alternate + * capability field is a 32 bit floating point number + * if the higher order 8 bits are not zero. Printing + * a floating point number in the kernel is a no-no, + * always print the number as 32 bit unsigned integer. + * The user-space needs to know about the strange + * encoding of the alternate cpu capability. + */ + seq_printf(m, "Capability: %u", info->capability); + if (info->format == 1) + seq_printf(m, " %u", ext->alt_capability); + seq_putc(m, '\n'); + if (info->nominal_cap) + seq_printf(m, "Nominal Capability: %d\n", info->nominal_cap); + if (info->secondary_cap) + seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap); + for (i = 2; i <= info->cpus_total; i++) { + seq_printf(m, "Adjustment %02d-way: %u", + i, info->adjustment[i-2]); + if (info->format == 1) + seq_printf(m, " %u", ext->alt_adjustment[i-2]); + seq_putc(m, '\n'); } - - if (info->secondary_capability != 0) - len += sprintf(page + len, "Secondary Capability: %d\n", - info->secondary_capability); - return len; } -static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len) +static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) { - if (stsi(info, 2, 2, 2) == -ENOSYS) - return len; - + if (stsi(info, 2, 2, 2)) + return; EBCASC(info->name, sizeof(info->name)); - - len += sprintf(page + len, "\n"); - len += sprintf(page + len, "LPAR Number: %d\n", - info->lpar_number); - - len += sprintf(page + len, "LPAR Characteristics: "); + seq_putc(m, '\n'); + seq_printf(m, "LPAR Number: %d\n", info->lpar_number); + seq_printf(m, "LPAR Characteristics: "); if (info->characteristics & LPAR_CHAR_DEDICATED) - len += sprintf(page + len, "Dedicated "); + seq_printf(m, "Dedicated "); if (info->characteristics & LPAR_CHAR_SHARED) - len += sprintf(page + len, "Shared "); + seq_printf(m, "Shared "); if (info->characteristics & LPAR_CHAR_LIMITED) - len += sprintf(page + len, "Limited "); - len += sprintf(page + len, "\n"); - - len += sprintf(page + len, "LPAR Name: %-8.8s\n", - info->name); - - len += sprintf(page + len, "LPAR Adjustment: %d\n", - info->caf); - - len += sprintf(page + len, "LPAR CPUs Total: %d\n", - info->cpus_total); - len += sprintf(page + len, "LPAR CPUs Configured: %d\n", - info->cpus_configured); - len += sprintf(page + len, "LPAR CPUs Standby: %d\n", - info->cpus_standby); - len += sprintf(page + len, "LPAR CPUs Reserved: %d\n", - info->cpus_reserved); - len += sprintf(page + len, "LPAR CPUs Dedicated: %d\n", - info->cpus_dedicated); - len += sprintf(page + len, "LPAR CPUs Shared: %d\n", - info->cpus_shared); - return len; + seq_printf(m, "Limited "); + seq_putc(m, '\n'); + seq_printf(m, "LPAR Name: %-8.8s\n", info->name); + seq_printf(m, "LPAR Adjustment: %d\n", info->caf); + seq_printf(m, "LPAR CPUs Total: %d\n", info->cpus_total); + seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured); + seq_printf(m, "LPAR CPUs Standby: %d\n", info->cpus_standby); + seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved); + seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated); + seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared); } -static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len) +static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) { int i; - if (stsi(info, 3, 2, 2) == -ENOSYS) - return len; + if (stsi(info, 3, 2, 2)) + return; for (i = 0; i < info->count; i++) { EBCASC(info->vm[i].name, sizeof(info->vm[i].name)); EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi)); - len += sprintf(page + len, "\n"); - len += sprintf(page + len, "VM%02d Name: %-8.8s\n", - i, info->vm[i].name); - len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n", - i, info->vm[i].cpi); - - len += sprintf(page + len, "VM%02d Adjustment: %d\n", - i, info->vm[i].caf); - - len += sprintf(page + len, "VM%02d CPUs Total: %d\n", - i, info->vm[i].cpus_total); - len += sprintf(page + len, "VM%02d CPUs Configured: %d\n", - i, info->vm[i].cpus_configured); - len += sprintf(page + len, "VM%02d CPUs Standby: %d\n", - i, info->vm[i].cpus_standby); - len += sprintf(page + len, "VM%02d CPUs Reserved: %d\n", - i, info->vm[i].cpus_reserved); + seq_putc(m, '\n'); + seq_printf(m, "VM%02d Name: %-8.8s\n", i, info->vm[i].name); + seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi); + seq_printf(m, "VM%02d Adjustment: %d\n", i, info->vm[i].caf); + seq_printf(m, "VM%02d CPUs Total: %d\n", i, info->vm[i].cpus_total); + seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured); + seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby); + seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved); } - return len; } -static int proc_read_sysinfo(char *page, char **start, - off_t off, int count, - int *eof, void *data) +static int sysinfo_show(struct seq_file *m, void *v) { - unsigned long info = get_zeroed_page(GFP_KERNEL); - int level, len; + void *info = (void *)get_zeroed_page(GFP_KERNEL); + int level; if (!info) return 0; - - len = 0; - level = stsi_0(); + level = stsi(NULL, 0, 0, 0); if (level >= 1) - len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len); - + stsi_1_1_1(m, info); if (level >= 1) - len = stsi_15_1_x((struct sysinfo_15_1_x *) info, page, len); - + stsi_15_1_x(m, info); if (level >= 1) - len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len); - + stsi_1_2_2(m, info); if (level >= 2) - len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len); - + stsi_2_2_2(m, info); if (level >= 3) - len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len); + stsi_3_2_2(m, info); + free_page((unsigned long)info); + return 0; +} - free_page(info); - return len; +static int sysinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, sysinfo_show, NULL); } -static __init int create_proc_sysinfo(void) +static const struct file_operations sysinfo_fops = { + .open = sysinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init sysinfo_create_proc(void) { - create_proc_read_entry("sysinfo", 0444, NULL, - proc_read_sysinfo, NULL); + proc_create("sysinfo", 0444, NULL, &sysinfo_fops); return 0; } -device_initcall(create_proc_sysinfo); +device_initcall(sysinfo_create_proc); /* * Service levels interface. @@ -407,7 +386,7 @@ void s390_adjust_jiffies(void) if (!info) return; - if (stsi(info, 1, 2, 2) != -ENOSYS) { + if (stsi(info, 1, 2, 2) == 0) { /* * Major sigh. The cpu capability encoding is "special". * If the first 9 bits of info->capability are 0 then it diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index dcec960fc72..2db1011b8b1 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -329,7 +329,7 @@ static unsigned long clock_sync_flags; * The synchronous get_clock function. It will write the current clock * value to the clock pointer and return 0 if the clock is in sync with * the external time source. If the clock mode is local it will return - * -ENOSYS and -EAGAIN if the clock is not in sync with the external + * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external * reference. */ int get_sync_clock(unsigned long long *clock) @@ -347,7 +347,7 @@ int get_sync_clock(unsigned long long *clock) return 0; if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) && !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) - return -ENOSYS; + return -EOPNOTSUPP; if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) && !test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) return -EACCES; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 05151e06c38..54d93f4b681 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -17,6 +17,7 @@ #include <linux/cpu.h> #include <linux/smp.h> #include <linux/mm.h> +#include <asm/sysinfo.h> #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) @@ -44,9 +45,6 @@ static struct mask_info book_info; cpumask_t cpu_book_map[NR_CPUS]; unsigned char cpu_book_id[NR_CPUS]; -/* smp_cpu_state_mutex must be held when accessing this array */ -int cpu_polarization[NR_CPUS]; - static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { cpumask_t mask; @@ -75,10 +73,7 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, { unsigned int cpu; - for (cpu = find_first_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS); - cpu < TOPOLOGY_CPU_BITS; - cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1)) - { + for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) { unsigned int rcpu; int lcpu; @@ -94,7 +89,7 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, } else { cpu_core_id[lcpu] = core->id; } - cpu_set_polarization(lcpu, tl_cpu->pp); + smp_cpu_set_polarization(lcpu, tl_cpu->pp); } } return core; @@ -201,7 +196,7 @@ static void topology_update_polarization_simple(void) mutex_lock(&smp_cpu_state_mutex); for_each_possible_cpu(cpu) - cpu_set_polarization(cpu, POLARIZATION_HRZ); + smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); mutex_unlock(&smp_cpu_state_mutex); } @@ -231,7 +226,7 @@ int topology_set_cpu_management(int fc) if (rc) return -EBUSY; for_each_possible_cpu(cpu) - cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); return rc; } @@ -250,12 +245,10 @@ static void update_cpu_core_map(void) void store_topology(struct sysinfo_15_1_x *info) { - int rc; - - rc = stsi(info, 15, 1, 3); - if (rc != -ENOSYS) - return; - stsi(info, 15, 1, 2); + if (topology_max_mnest >= 3) + stsi(info, 15, 1, 3); + else + stsi(info, 15, 1, 2); } int arch_update_cpu_topology(void) @@ -415,7 +408,7 @@ static ssize_t cpu_polarization_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - switch (cpu_read_polarization(cpu)) { + switch (smp_cpu_get_polarization(cpu)) { case POLARIZATION_HRZ: count = sprintf(buf, "horizontal\n"); break; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 01775c04a90..3d2b0fa37db 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -57,6 +57,23 @@ static int kstack_depth_to_print = 12; static int kstack_depth_to_print = 20; #endif /* CONFIG_64BIT */ +static inline void __user *get_trap_ip(struct pt_regs *regs) +{ +#ifdef CONFIG_64BIT + unsigned long address; + + if (regs->int_code & 0x200) + address = *(unsigned long *)(current->thread.trap_tdb + 24); + else + address = regs->psw.addr; + return (void __user *) + ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN); +#else + return (void __user *) + ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); +#endif +} + /* * For show_trace we have tree different stack to consider: * - the panic stack which is used if the kernel stack has overflown @@ -214,7 +231,6 @@ void show_registers(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { - print_modules(); printk("CPU: %d %s %s %.*s\n", task_thread_info(current)->cpu, print_tainted(), init_utsname()->release, @@ -254,6 +270,7 @@ void die(struct pt_regs *regs, const char *str) #endif printk("\n"); notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); + print_modules(); show_regs(regs); bust_spinlocks(0); add_taint(TAINT_DIE); @@ -285,12 +302,6 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static inline void __user *get_psw_address(struct pt_regs *regs) -{ - return (void __user *) - ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); -} - static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { @@ -304,14 +315,14 @@ static void __kprobes do_trap(struct pt_regs *regs, info.si_signo = si_signo; info.si_errno = 0; info.si_code = si_code; - info.si_addr = get_psw_address(regs); + info.si_addr = get_trap_ip(regs); force_sig_info(si_signo, &info, current); report_user_fault(regs, si_signo); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; else { enum bug_trap_type btt; @@ -381,6 +392,11 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, "translation exception") +#ifdef CONFIG_64BIT +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, + "transaction constraint exception") +#endif + static inline void do_fp_trap(struct pt_regs *regs, int fpc) { int si_code = 0; @@ -408,7 +424,7 @@ static void __kprobes illegal_op(struct pt_regs *regs) __u16 __user *location; int signal = 0; - location = get_psw_address(regs); + location = get_trap_ip(regs); if (user_mode(regs)) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) @@ -476,7 +492,7 @@ void specification_exception(struct pt_regs *regs) __u16 __user *location = NULL; int signal = 0; - location = (__u16 __user *) get_psw_address(regs); + location = (__u16 __user *) get_trap_ip(regs); if (user_mode(regs)) { get_user(*((__u16 *) opcode), location); @@ -525,7 +541,7 @@ static void data_exception(struct pt_regs *regs) __u16 __user *location; int signal = 0; - location = get_psw_address(regs); + location = get_trap_ip(regs); if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); @@ -641,6 +657,7 @@ void __init trap_init(void) pgm_check_table[0x12] = &translation_exception; pgm_check_table[0x13] = &special_op_exception; #ifdef CONFIG_64BIT + pgm_check_table[0x18] = &transaction_exception; pgm_check_table[0x38] = &do_asce_exception; pgm_check_table[0x39] = &do_dat_exception; pgm_check_table[0x3A] = &do_dat_exception; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 9a19ca367c1..d7776281cb6 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -85,7 +85,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; static void vdso_init_data(struct vdso_data *vd) { vd->ectg_available = - addressing_mode != HOME_SPACE_MODE && test_facility(31); + s390_user_mode != HOME_SPACE_MODE && test_facility(31); } #ifdef CONFIG_64BIT @@ -102,7 +102,7 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore) lowcore->vdso_per_cpu_data = __LC_PASTE; - if (addressing_mode == HOME_SPACE_MODE || !vdso_enabled) + if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled) return 0; segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); @@ -147,7 +147,7 @@ void vdso_free_per_cpu(struct _lowcore *lowcore) unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; - if (addressing_mode == HOME_SPACE_MODE || !vdso_enabled) + if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled) return; psal = (u32 *)(addr_t) lowcore->paste[4]; @@ -165,7 +165,7 @@ static void vdso_init_cr5(void) { unsigned long cr5; - if (addressing_mode == HOME_SPACE_MODE || !vdso_enabled) + if (s390_user_mode == HOME_SPACE_MODE || !vdso_enabled) return; cr5 = offsetof(struct _lowcore, paste); __ctl_load(cr5, 5, 5); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 4fc97b40a6e..79033442789 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -99,7 +99,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) return virt_timer_forward(user + system); } -void account_vtime(struct task_struct *prev, struct task_struct *next) +void vtime_task_switch(struct task_struct *prev) { struct thread_info *ti; @@ -107,7 +107,7 @@ void account_vtime(struct task_struct *prev, struct task_struct *next) ti = task_thread_info(prev); ti->user_timer = S390_lowcore.user_timer; ti->system_timer = S390_lowcore.system_timer; - ti = task_thread_info(next); + ti = task_thread_info(current); S390_lowcore.user_timer = ti->user_timer; S390_lowcore.system_timer = ti->system_timer; } @@ -122,7 +122,7 @@ void account_process_tick(struct task_struct *tsk, int user_tick) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_system_vtime(struct task_struct *tsk) +void vtime_account(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); u64 timer, system; @@ -138,7 +138,7 @@ void account_system_vtime(struct task_struct *tsk) virt_timer_forward(system); } -EXPORT_SYMBOL_GPL(account_system_vtime); +EXPORT_SYMBOL_GPL(vtime_account); void __kprobes vtime_stop_cpu(void) { @@ -378,9 +378,8 @@ static int __cpuinit s390_nohz_notify(struct notifier_block *self, long cpu = (long) hcpu; idle = &per_cpu(s390_idle, cpu); - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_DYING: - case CPU_DYING_FROZEN: idle->nohz_delay = 0; default: break; diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 78eb9847008..9b04a32e569 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -5,7 +5,7 @@ source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION def_bool y - prompt "Virtualization" + prompt "KVM" ---help--- Say Y here to get to see options for using your Linux host to run other operating systems inside virtual machines (guests). diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 60da903d6f3..310be61bead 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -211,7 +211,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) spin_unlock(&fi->lock); /* deal with other level 3 hypervisors */ - if (stsi(mem, 3, 2, 2) == -ENOSYS) + if (stsi(mem, 3, 2, 2)) mem->count = 0; if (mem->count < 8) mem->count++; @@ -259,7 +259,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) mem = get_zeroed_page(GFP_KERNEL); if (!mem) goto out_fail; - if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS) + if (stsi((void *) mem, fc, sel1, sel2)) goto out_mem; break; case 3: diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 761ab8b56af..6ab0d0b5cec 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -4,6 +4,7 @@ lib-y += delay.o string.o uaccess_std.o uaccess_pt.o obj-y += usercopy.o -obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o +obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o +obj-$(CONFIG_64BIT) += mem64.o lib-$(CONFIG_64BIT) += uaccess_mvcos.o lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/mem32.S b/arch/s390/lib/mem32.S new file mode 100644 index 00000000000..14ca9244b61 --- /dev/null +++ b/arch/s390/lib/mem32.S @@ -0,0 +1,92 @@ +/* + * String handling functions. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +/* + * memset implementation + * + * This code corresponds to the C construct below. We do distinguish + * between clearing (c == 0) and setting a memory array (c != 0) simply + * because nearly all memset invocations in the kernel clear memory and + * the xc instruction is preferred in such cases. + * + * void *memset(void *s, int c, size_t n) + * { + * if (likely(c == 0)) + * return __builtin_memset(s, 0, n); + * return __builtin_memset(s, c, n); + * } + */ +ENTRY(memset) + basr %r5,%r0 +.Lmemset_base: + ltr %r4,%r4 + bzr %r14 + ltr %r3,%r3 + jnz .Lmemset_fill + ahi %r4,-1 + lr %r3,%r4 + srl %r3,8 + ltr %r3,%r3 + lr %r1,%r2 + je .Lmemset_clear_rest +.Lmemset_clear_loop: + xc 0(256,%r1),0(%r1) + la %r1,256(%r1) + brct %r3,.Lmemset_clear_loop +.Lmemset_clear_rest: + ex %r4,.Lmemset_xc-.Lmemset_base(%r5) + br %r14 +.Lmemset_fill: + stc %r3,0(%r2) + chi %r4,1 + lr %r1,%r2 + ber %r14 + ahi %r4,-2 + lr %r3,%r4 + srl %r3,8 + ltr %r3,%r3 + je .Lmemset_fill_rest +.Lmemset_fill_loop: + mvc 1(256,%r1),0(%r1) + la %r1,256(%r1) + brct %r3,.Lmemset_fill_loop +.Lmemset_fill_rest: + ex %r4,.Lmemset_mvc-.Lmemset_base(%r5) + br %r14 +.Lmemset_xc: + xc 0(1,%r1),0(%r1) +.Lmemset_mvc: + mvc 1(1,%r1),0(%r1) + +/* + * memcpy implementation + * + * void *memcpy(void *dest, const void *src, size_t n) + */ +ENTRY(memcpy) + basr %r5,%r0 +.Lmemcpy_base: + ltr %r4,%r4 + bzr %r14 + ahi %r4,-1 + lr %r0,%r4 + srl %r0,8 + ltr %r0,%r0 + lr %r1,%r2 + jnz .Lmemcpy_loop +.Lmemcpy_rest: + ex %r4,.Lmemcpy_mvc-.Lmemcpy_base(%r5) + br %r14 +.Lmemcpy_loop: + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + brct %r0,.Lmemcpy_loop + j .Lmemcpy_rest +.Lmemcpy_mvc: + mvc 0(1,%r1),0(%r3) diff --git a/arch/s390/lib/mem64.S b/arch/s390/lib/mem64.S new file mode 100644 index 00000000000..c6d553e85ab --- /dev/null +++ b/arch/s390/lib/mem64.S @@ -0,0 +1,88 @@ +/* + * String handling functions. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +/* + * memset implementation + * + * This code corresponds to the C construct below. We do distinguish + * between clearing (c == 0) and setting a memory array (c != 0) simply + * because nearly all memset invocations in the kernel clear memory and + * the xc instruction is preferred in such cases. + * + * void *memset(void *s, int c, size_t n) + * { + * if (likely(c == 0)) + * return __builtin_memset(s, 0, n); + * return __builtin_memset(s, c, n); + * } + */ +ENTRY(memset) + ltgr %r4,%r4 + bzr %r14 + ltgr %r3,%r3 + jnz .Lmemset_fill + aghi %r4,-1 + srlg %r3,%r4,8 + ltgr %r3,%r3 + lgr %r1,%r2 + jz .Lmemset_clear_rest +.Lmemset_clear_loop: + xc 0(256,%r1),0(%r1) + la %r1,256(%r1) + brctg %r3,.Lmemset_clear_loop +.Lmemset_clear_rest: + larl %r3,.Lmemset_xc + ex %r4,0(%r3) + br %r14 +.Lmemset_fill: + stc %r3,0(%r2) + cghi %r4,1 + lgr %r1,%r2 + ber %r14 + aghi %r4,-2 + srlg %r3,%r4,8 + ltgr %r3,%r3 + jz .Lmemset_fill_rest +.Lmemset_fill_loop: + mvc 1(256,%r1),0(%r1) + la %r1,256(%r1) + brctg %r3,.Lmemset_fill_loop +.Lmemset_fill_rest: + larl %r3,.Lmemset_mvc + ex %r4,0(%r3) + br %r14 +.Lmemset_xc: + xc 0(1,%r1),0(%r1) +.Lmemset_mvc: + mvc 1(1,%r1),0(%r1) + +/* + * memcpy implementation + * + * void *memcpy(void *dest, const void *src, size_t n) + */ +ENTRY(memcpy) + ltgr %r4,%r4 + bzr %r14 + aghi %r4,-1 + srlg %r5,%r4,8 + ltgr %r5,%r5 + lgr %r1,%r2 + jnz .Lmemcpy_loop +.Lmemcpy_rest: + larl %r5,.Lmemcpy_mvc + ex %r4,0(%r5) + br %r14 +.Lmemcpy_loop: + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + brctg %r5,.Lmemcpy_loop + j .Lmemcpy_rest +.Lmemcpy_mvc: + mvc 0(1,%r1),0(%r3) diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 846ec64ab2c..b647d5ff0ad 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -43,11 +43,7 @@ static inline char *__strnend(const char *s, size_t n) */ size_t strlen(const char *s) { -#if __GNUC__ < 4 return __strend(s) - s; -#else - return __builtin_strlen(s); -#endif } EXPORT_SYMBOL(strlen); @@ -73,7 +69,6 @@ EXPORT_SYMBOL(strnlen); */ char *strcpy(char *dest, const char *src) { -#if __GNUC__ < 4 register int r0 asm("0") = 0; char *ret = dest; @@ -82,9 +77,6 @@ char *strcpy(char *dest, const char *src) : "+&a" (dest), "+&a" (src) : "d" (r0) : "cc", "memory" ); return ret; -#else - return __builtin_strcpy(dest, src); -#endif } EXPORT_SYMBOL(strcpy); @@ -106,7 +98,7 @@ size_t strlcpy(char *dest, const char *src, size_t size) if (size) { size_t len = (ret >= size) ? size-1 : ret; dest[len] = '\0'; - __builtin_memcpy(dest, src, len); + memcpy(dest, src, len); } return ret; } @@ -124,8 +116,8 @@ EXPORT_SYMBOL(strlcpy); char *strncpy(char *dest, const char *src, size_t n) { size_t len = __strnend(src, n) - src; - __builtin_memset(dest + len, 0, n - len); - __builtin_memcpy(dest, src, len); + memset(dest + len, 0, n - len); + memcpy(dest, src, len); return dest; } EXPORT_SYMBOL(strncpy); @@ -171,7 +163,7 @@ size_t strlcat(char *dest, const char *src, size_t n) if (len >= n) len = n - 1; dest[len] = '\0'; - __builtin_memcpy(dest, src, len); + memcpy(dest, src, len); } return res; } @@ -194,7 +186,7 @@ char *strncat(char *dest, const char *src, size_t n) char *p = __strend(dest); p[len] = '\0'; - __builtin_memcpy(p, src, len); + memcpy(p, src, len); return dest; } EXPORT_SYMBOL(strncat); @@ -348,41 +340,3 @@ void *memscan(void *s, int c, size_t n) return (void *) ret; } EXPORT_SYMBOL(memscan); - -/** - * memcpy - Copy one area of memory to another - * @dest: Where to copy to - * @src: Where to copy from - * @n: The size of the area. - * - * returns a pointer to @dest - */ -void *memcpy(void *dest, const void *src, size_t n) -{ - return __builtin_memcpy(dest, src, n); -} -EXPORT_SYMBOL(memcpy); - -/** - * memset - Fill a region of memory with the given value - * @s: Pointer to the start of the area. - * @c: The byte to fill the area with - * @n: The size of the area. - * - * returns a pointer to @s - */ -void *memset(void *s, int c, size_t n) -{ - char *xs; - - if (c == 0) - return __builtin_memset(s, 0, n); - - xs = (char *) s; - if (n > 0) - do { - *xs++ = c; - } while (--n > 0); - return s; -} -EXPORT_SYMBOL(memset); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 60ee2b88379..2d37bb861fa 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -2,69 +2,82 @@ * User access functions based on page table walks for enhanced * system layout without hardware support. * - * Copyright IBM Corp. 2006 + * Copyright IBM Corp. 2006, 2012 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) */ #include <linux/errno.h> #include <linux/hardirq.h> #include <linux/mm.h> +#include <linux/hugetlb.h> #include <asm/uaccess.h> #include <asm/futex.h> #include "uaccess.h" -static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) + +/* + * Returns kernel address for user virtual address. If the returned address is + * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address + * contains the (negative) exception code. + */ +static __always_inline unsigned long follow_table(struct mm_struct *mm, + unsigned long addr, int write) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; + pte_t *ptep; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - return (pte_t *) 0x3a; + return -0x3aUL; pud = pud_offset(pgd, addr); if (pud_none(*pud) || unlikely(pud_bad(*pud))) - return (pte_t *) 0x3b; + return -0x3bUL; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return (pte_t *) 0x10; + if (pmd_none(*pmd)) + return -0x10UL; + if (pmd_huge(*pmd)) { + if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO)) + return -0x04UL; + return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK); + } + if (unlikely(pmd_bad(*pmd))) + return -0x10UL; + + ptep = pte_offset_map(pmd, addr); + if (!pte_present(*ptep)) + return -0x11UL; + if (write && !pte_write(*ptep)) + return -0x04UL; - return pte_offset_map(pmd, addr); + return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK); } static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, size_t n, int write_user) { struct mm_struct *mm = current->mm; - unsigned long offset, pfn, done, size; - pte_t *pte; + unsigned long offset, done, size, kaddr; void *from, *to; done = 0; retry: spin_lock(&mm->page_table_lock); do { - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) + kaddr = follow_table(mm, uaddr, write_user); + if (IS_ERR_VALUE(kaddr)) goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; - goto fault; - } else if (write_user && !pte_write(*pte)) { - pte = (pte_t *) 0x04; - goto fault; - } - pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE - 1); + offset = uaddr & ~PAGE_MASK; size = min(n - done, PAGE_SIZE - offset); if (write_user) { - to = (void *)((pfn << PAGE_SHIFT) + offset); + to = (void *) kaddr; from = kptr + done; } else { - from = (void *)((pfn << PAGE_SHIFT) + offset); + from = (void *) kaddr; to = kptr + done; } memcpy(to, from, size); @@ -75,7 +88,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, (unsigned long) pte, write_user)) + if (__handle_fault(uaddr, -kaddr, write_user)) return n - done; goto retry; } @@ -84,27 +97,22 @@ fault: * Do DAT for user address by page table walk, return kernel address. * This function needs to be called with current->mm->page_table_lock held. */ -static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) +static __always_inline unsigned long __dat_user_addr(unsigned long uaddr, + int write) { struct mm_struct *mm = current->mm; - unsigned long pfn; - pte_t *pte; + unsigned long kaddr; int rc; retry: - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; + kaddr = follow_table(mm, uaddr, write); + if (IS_ERR_VALUE(kaddr)) goto fault; - } - pfn = pte_pfn(*pte); - return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); + return kaddr; fault: spin_unlock(&mm->page_table_lock); - rc = __handle_fault(uaddr, (unsigned long) pte, 0); + rc = __handle_fault(uaddr, -kaddr, write); spin_lock(&mm->page_table_lock); if (!rc) goto retry; @@ -159,11 +167,9 @@ static size_t clear_user_pt(size_t n, void __user *to) static size_t strnlen_user_pt(size_t count, const char __user *src) { - char *addr; unsigned long uaddr = (unsigned long) src; struct mm_struct *mm = current->mm; - unsigned long offset, pfn, done, len; - pte_t *pte; + unsigned long offset, done, len, kaddr; size_t len_str; if (segment_eq(get_fs(), KERNEL_DS)) @@ -172,19 +178,13 @@ static size_t strnlen_user_pt(size_t count, const char __user *src) retry: spin_lock(&mm->page_table_lock); do { - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; + kaddr = follow_table(mm, uaddr, 0); + if (IS_ERR_VALUE(kaddr)) goto fault; - } - pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE-1); - addr = (char *)(pfn << PAGE_SHIFT) + offset; + offset = uaddr & ~PAGE_MASK; len = min(count - done, PAGE_SIZE - offset); - len_str = strnlen(addr, len); + len_str = strnlen((char *) kaddr, len); done += len_str; uaddr += len_str; } while ((len_str == len) && (done < count)); @@ -192,7 +192,7 @@ retry: return done + 1; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, (unsigned long) pte, 0)) + if (__handle_fault(uaddr, -kaddr, 0)) return 0; goto retry; } @@ -225,11 +225,10 @@ static size_t copy_in_user_pt(size_t n, void __user *to, const void __user *from) { struct mm_struct *mm = current->mm; - unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, - uaddr, done, size, error_code; + unsigned long offset_max, uaddr, done, size, error_code; unsigned long uaddr_from = (unsigned long) from; unsigned long uaddr_to = (unsigned long) to; - pte_t *pte_from, *pte_to; + unsigned long kaddr_to, kaddr_from; int write_user; if (segment_eq(get_fs(), KERNEL_DS)) { @@ -242,38 +241,23 @@ retry: do { write_user = 0; uaddr = uaddr_from; - pte_from = follow_table(mm, uaddr_from); - error_code = (unsigned long) pte_from; - if (error_code < 0x1000) - goto fault; - if (!pte_present(*pte_from)) { - error_code = 0x11; + kaddr_from = follow_table(mm, uaddr_from, 0); + error_code = kaddr_from; + if (IS_ERR_VALUE(error_code)) goto fault; - } write_user = 1; uaddr = uaddr_to; - pte_to = follow_table(mm, uaddr_to); - error_code = (unsigned long) pte_to; - if (error_code < 0x1000) - goto fault; - if (!pte_present(*pte_to)) { - error_code = 0x11; + kaddr_to = follow_table(mm, uaddr_to, 1); + error_code = (unsigned long) kaddr_to; + if (IS_ERR_VALUE(error_code)) goto fault; - } else if (!pte_write(*pte_to)) { - error_code = 0x04; - goto fault; - } - pfn_from = pte_pfn(*pte_from); - pfn_to = pte_pfn(*pte_to); - offset_from = uaddr_from & (PAGE_SIZE-1); - offset_to = uaddr_from & (PAGE_SIZE-1); - offset_max = max(offset_from, offset_to); + offset_max = max(uaddr_from & ~PAGE_MASK, + uaddr_to & ~PAGE_MASK); size = min(n - done, PAGE_SIZE - offset_max); - memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, - (void *)(pfn_from << PAGE_SHIFT) + offset_from, size); + memcpy((void *) kaddr_to, (void *) kaddr_from, size); done += size; uaddr_from += size; uaddr_to += size; @@ -282,7 +266,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, error_code, write_user)) + if (__handle_fault(uaddr, -error_code, write_user)) return n - done; goto retry; } @@ -341,7 +325,7 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) return __futex_atomic_op_pt(op, uaddr, oparg, old); spin_lock(¤t->mm->page_table_lock); uaddr = (u32 __force __user *) - __dat_user_addr((__force unsigned long) uaddr); + __dat_user_addr((__force unsigned long) uaddr, 1); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; @@ -378,7 +362,7 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); uaddr = (u32 __force __user *) - __dat_user_addr((__force unsigned long) uaddr); + __dat_user_addr((__force unsigned long) uaddr, 1); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index d98fe9004a5..0f5536b0c1a 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,7 +3,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ - page-states.o gup.o + page-states.o gup.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c new file mode 100644 index 00000000000..4d1ee88864e --- /dev/null +++ b/arch/s390/mm/extable.c @@ -0,0 +1,81 @@ +#include <linux/module.h> +#include <linux/sort.h> +#include <asm/uaccess.h> + +/* + * Search one exception table for an entry corresponding to the + * given instruction address, and return the address of the entry, + * or NULL if none is found. + * We use a binary search, and thus we assume that the table is + * already sorted. + */ +const struct exception_table_entry * +search_extable(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + const struct exception_table_entry *mid; + unsigned long addr; + + while (first <= last) { + mid = ((last - first) >> 1) + first; + addr = extable_insn(mid); + if (addr < value) + first = mid + 1; + else if (addr > value) + last = mid - 1; + else + return mid; + } + return NULL; +} + +/* + * The exception table needs to be sorted so that the binary + * search that we use to find entries in it works properly. + * This is used both for the kernel exception table and for + * the exception tables of modules that get loaded. + * + */ +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *x = a, *y = b; + + /* This compare is only valid after normalization. */ + return x->insn - y->insn; +} + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + struct exception_table_entry *p; + int i; + + /* Normalize entries to being relative to the start of the section */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn += i; + sort(start, finish - start, sizeof(*start), cmp_ex, NULL); + /* Denormalize all entries */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn -= i; +} + +#ifdef CONFIG_MODULES +/* + * If the exception table is sorted, any referring to the module init + * will be at the beginning or the end. + */ +void trim_init_extable(struct module *m) +{ + /* Trim the beginning */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /* Trim the end */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6c013f54414..ac9122ca115 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -111,7 +111,7 @@ static inline int user_space_fault(unsigned long trans_exc_code) if (trans_exc_code == 2) /* Access via secondary space, set_fs setting decides */ return current->thread.mm_segment.ar4; - if (addressing_mode == HOME_SPACE_MODE) + if (s390_user_mode == HOME_SPACE_MODE) /* User space if the access has been done via home space. */ return trans_exc_code == 3; /* @@ -163,7 +163,7 @@ static noinline void do_no_context(struct pt_regs *regs) /* Are we prepared to handle this kernel fault? */ fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) { - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; return; } @@ -628,9 +628,8 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self, struct thread_struct *thread, *next; struct task_struct *tsk; - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_DEAD: - case CPU_DEAD_FROZEN: spin_lock_irq(&pfault_lock); list_for_each_entry_safe(thread, next, &pfault_list, list) { thread->pfault_wait = 0; diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 65cb06e2af4..eeaf8023851 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -154,6 +154,43 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, return 1; } +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next, flags; + pgd_t *pgdp, pgd; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd = *pgdp; + barrier(); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 6adbc082618..81e596c65de 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -42,7 +42,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); -static unsigned long setup_zero_pages(void) +static unsigned long __init setup_zero_pages(void) { struct cpuid cpu_id; unsigned int order; @@ -212,7 +212,7 @@ void free_initmem(void) } #ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end) { free_init_pages("initrd memory", start, end); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 18df31d1f2c..b402991e43d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -609,8 +609,8 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) */ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) { - struct page *page; - unsigned long *table; + unsigned long *uninitialized_var(table); + struct page *uninitialized_var(page); unsigned int mask, bit; if (mm_has_pgste(mm)) @@ -796,7 +796,7 @@ int s390_enable_sie(void) struct mm_struct *mm, *old_mm; /* Do we have switched amode? If no, we cannot do sie */ - if (addressing_mode == HOME_SPACE_MODE) + if (s390_user_mode == HOME_SPACE_MODE) return -EINVAL; /* Do we have pgstes? if yes, we are done */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 6f896e75ab4..c22abf900c9 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -107,7 +107,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); pm_dir = pmd_offset(pu_dir, address); -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && (address + HPAGE_SIZE <= start + size) && (address >= HPAGE_SIZE)) { diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile new file mode 100644 index 00000000000..90568c33ddb --- /dev/null +++ b/arch/s390/net/Makefile @@ -0,0 +1,4 @@ +# +# Arch-specific network modules +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S new file mode 100644 index 00000000000..7e45d13816c --- /dev/null +++ b/arch/s390/net/bpf_jit.S @@ -0,0 +1,130 @@ +/* + * BPF Jit compiler for s390, help functions. + * + * Copyright IBM Corp. 2012 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/linkage.h> + +/* + * Calling convention: + * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved + * %r2: skb pointer + * %r3: offset parameter + * %r5: BPF A accumulator + * %r8: return address + * %r9: save register for skb pointer + * %r10: skb->data + * %r11: skb->len - skb->data_len (headlen) + * %r12: BPF X accumulator + * + * skb_copy_bits takes 4 parameters: + * %r2 = skb pointer + * %r3 = offset into skb data + * %r4 = length to copy + * %r5 = pointer to temp buffer + */ +#define SKBDATA %r8 + + /* A = *(u32 *) (skb->data+K+X) */ +ENTRY(sk_load_word_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u32 *) (skb->data+K) */ +ENTRY(sk_load_word) + llgfr %r1,%r3 # extend offset + ahi %r3,4 # offset + 4 + clr %r11,%r3 # hlen <= offset + 4 ? + jl sk_load_word_slow + l %r5,0(%r1,%r10) # get word from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_word_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,4 # 4 bytes + la %r5,160(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = *(u16 *) (skb->data+K+X) */ +ENTRY(sk_load_half_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u16 *) (skb->data+K) */ +ENTRY(sk_load_half) + llgfr %r1,%r3 # extend offset + ahi %r3,2 # offset + 2 + clr %r11,%r3 # hlen <= offset + 2 ? + jl sk_load_half_slow + llgh %r5,0(%r1,%r10) # get half from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_half_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,2 # 2 bytes + la %r5,162(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(2,%r15),160(%r15) + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = *(u8 *) (skb->data+K+X) */ +ENTRY(sk_load_byte_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u8 *) (skb->data+K) */ +ENTRY(sk_load_byte) + llgfr %r1,%r3 # extend offset + clr %r11,%r3 # hlen < offset ? + jle sk_load_byte_slow + lhi %r5,0 + ic %r5,0(%r1,%r10) # get byte from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_byte_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,1 # 1 bytes + la %r5,163(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(3,%r15),160(%r15) + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */ +ENTRY(sk_load_byte_msh) + llgfr %r1,%r3 # extend offset + clr %r11,%r3 # hlen < offset ? + jle sk_load_byte_slow + lhi %r12,0 + ic %r12,0(%r1,%r10) # get byte from skb + nill %r12,0x0f + sll %r12,2 + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_byte_msh_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,2 # 2 bytes + la %r5,162(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(3,%r15),160(%r15) + l %r12,160(%r15) # load result from temp buffer + nill %r12,0x0f + sll %r12,2 + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c new file mode 100644 index 00000000000..9b355b406af --- /dev/null +++ b/arch/s390/net/bpf_jit_comp.c @@ -0,0 +1,776 @@ +/* + * BPF Jit compiler for s390. + * + * Copyright IBM Corp. 2012 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/moduleloader.h> +#include <linux/netdevice.h> +#include <linux/filter.h> +#include <asm/cacheflush.h> +#include <asm/processor.h> +#include <asm/facility.h> + +/* + * Conventions: + * %r2 = skb pointer + * %r3 = offset parameter + * %r4 = scratch register / length parameter + * %r5 = BPF A accumulator + * %r8 = return address + * %r9 = save register for skb pointer + * %r10 = skb->data + * %r11 = skb->len - skb->data_len (headlen) + * %r12 = BPF X accumulator + * %r13 = literal pool pointer + * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS + */ +int bpf_jit_enable __read_mostly; + +/* + * assembly code in arch/x86/net/bpf_jit.S + */ +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; + +struct bpf_jit { + unsigned int seen; + u8 *start; + u8 *prg; + u8 *mid; + u8 *lit; + u8 *end; + u8 *base_ip; + u8 *ret0_ip; + u8 *exit_ip; + unsigned int off_load_word; + unsigned int off_load_half; + unsigned int off_load_byte; + unsigned int off_load_bmsh; + unsigned int off_load_iword; + unsigned int off_load_ihalf; + unsigned int off_load_ibyte; +}; + +#define BPF_SIZE_MAX 4096 /* Max size for program */ + +#define SEEN_DATAREF 1 /* might call external helpers */ +#define SEEN_XREG 2 /* ebx is used */ +#define SEEN_MEM 4 /* use mem[] for temporary storage */ +#define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */ +#define SEEN_LITERAL 16 /* code uses literals */ +#define SEEN_LOAD_WORD 32 /* code uses sk_load_word */ +#define SEEN_LOAD_HALF 64 /* code uses sk_load_half */ +#define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */ +#define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */ +#define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */ +#define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */ +#define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */ + +#define EMIT2(op) \ +({ \ + if (jit->prg + 2 <= jit->mid) \ + *(u16 *) jit->prg = op; \ + jit->prg += 2; \ +}) + +#define EMIT4(op) \ +({ \ + if (jit->prg + 4 <= jit->mid) \ + *(u32 *) jit->prg = op; \ + jit->prg += 4; \ +}) + +#define EMIT4_DISP(op, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + EMIT4(op | __disp); \ +}) + +#define EMIT4_IMM(op, imm) \ +({ \ + unsigned int __imm = (imm) & 0xffff; \ + EMIT4(op | __imm); \ +}) + +#define EMIT4_PCREL(op, pcrel) \ +({ \ + long __pcrel = ((pcrel) >> 1) & 0xffff; \ + EMIT4(op | __pcrel); \ +}) + +#define EMIT6(op1, op2) \ +({ \ + if (jit->prg + 6 <= jit->mid) { \ + *(u32 *) jit->prg = op1; \ + *(u16 *) (jit->prg + 4) = op2; \ + } \ + jit->prg += 6; \ +}) + +#define EMIT6_DISP(op1, op2, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + EMIT6(op1 | __disp, op2); \ +}) + +#define EMIT6_IMM(op, imm) \ +({ \ + unsigned int __imm = (imm); \ + EMIT6(op | (__imm >> 16), __imm & 0xffff); \ +}) + +#define EMIT_CONST(val) \ +({ \ + unsigned int ret; \ + ret = (unsigned int) (jit->lit - jit->base_ip); \ + jit->seen |= SEEN_LITERAL; \ + if (jit->lit + 4 <= jit->end) \ + *(u32 *) jit->lit = val; \ + jit->lit += 4; \ + ret; \ +}) + +#define EMIT_FN_CONST(bit, fn) \ +({ \ + unsigned int ret; \ + ret = (unsigned int) (jit->lit - jit->base_ip); \ + if (jit->seen & bit) { \ + jit->seen |= SEEN_LITERAL; \ + if (jit->lit + 8 <= jit->end) \ + *(void **) jit->lit = fn; \ + jit->lit += 8; \ + } \ + ret; \ +}) + +static void bpf_jit_prologue(struct bpf_jit *jit) +{ + /* Save registers and create stack frame if necessary */ + if (jit->seen & SEEN_DATAREF) { + /* stmg %r8,%r15,88(%r15) */ + EMIT6(0xeb8ff058, 0x0024); + /* lgr %r14,%r15 */ + EMIT4(0xb90400ef); + /* ahi %r15,<offset> */ + EMIT4_IMM(0xa7fa0000, (jit->seen & SEEN_MEM) ? -112 : -80); + /* stg %r14,152(%r15) */ + EMIT6(0xe3e0f098, 0x0024); + } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) + /* stmg %r12,%r13,120(%r15) */ + EMIT6(0xebcdf078, 0x0024); + else if (jit->seen & SEEN_XREG) + /* stg %r12,120(%r15) */ + EMIT6(0xe3c0f078, 0x0024); + else if (jit->seen & SEEN_LITERAL) + /* stg %r13,128(%r15) */ + EMIT6(0xe3d0f080, 0x0024); + + /* Setup literal pool */ + if (jit->seen & SEEN_LITERAL) { + /* basr %r13,0 */ + EMIT2(0x0dd0); + jit->base_ip = jit->prg; + } + jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word); + jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half); + jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte); + jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh); + jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind); + jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind); + jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind); + + /* Filter needs to access skb data */ + if (jit->seen & SEEN_DATAREF) { + /* l %r11,<len>(%r2) */ + EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len)); + /* s %r11,<data_len>(%r2) */ + EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len)); + /* lg %r10,<data>(%r2) */ + EMIT6_DISP(0xe3a02000, 0x0004, + offsetof(struct sk_buff, data)); + } +} + +static void bpf_jit_epilogue(struct bpf_jit *jit) +{ + /* Return 0 */ + if (jit->seen & SEEN_RET0) { + jit->ret0_ip = jit->prg; + /* lghi %r2,0 */ + EMIT4(0xa7290000); + } + jit->exit_ip = jit->prg; + /* Restore registers */ + if (jit->seen & SEEN_DATAREF) + /* lmg %r8,%r15,<offset>(%r15) */ + EMIT6_DISP(0xeb8ff000, 0x0004, + (jit->seen & SEEN_MEM) ? 200 : 168); + else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) + /* lmg %r12,%r13,120(%r15) */ + EMIT6(0xebcdf078, 0x0004); + else if (jit->seen & SEEN_XREG) + /* lg %r12,120(%r15) */ + EMIT6(0xe3c0f078, 0x0004); + else if (jit->seen & SEEN_LITERAL) + /* lg %r13,128(%r15) */ + EMIT6(0xe3d0f080, 0x0004); + /* br %r14 */ + EMIT2(0x07fe); +} + +/* + * make sure we dont leak kernel information to user + */ +static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) +{ + /* Clear temporary memory if (seen & SEEN_MEM) */ + if (jit->seen & SEEN_MEM) + /* xc 0(64,%r15),0(%r15) */ + EMIT6(0xd73ff000, 0xf000); + /* Clear X if (seen & SEEN_XREG) */ + if (jit->seen & SEEN_XREG) + /* lhi %r12,0 */ + EMIT4(0xa7c80000); + /* Clear A if the first register does not set it. */ + switch (filter[0].code) { + case BPF_S_LD_W_ABS: + case BPF_S_LD_H_ABS: + case BPF_S_LD_B_ABS: + case BPF_S_LD_W_LEN: + case BPF_S_LD_W_IND: + case BPF_S_LD_H_IND: + case BPF_S_LD_B_IND: + case BPF_S_LDX_B_MSH: + case BPF_S_LD_IMM: + case BPF_S_LD_MEM: + case BPF_S_MISC_TXA: + case BPF_S_ANC_PROTOCOL: + case BPF_S_ANC_PKTTYPE: + case BPF_S_ANC_IFINDEX: + case BPF_S_ANC_MARK: + case BPF_S_ANC_QUEUE: + case BPF_S_ANC_HATYPE: + case BPF_S_ANC_RXHASH: + case BPF_S_ANC_CPU: + case BPF_S_RET_K: + /* first instruction sets A register */ + break; + default: /* A = 0 */ + /* lhi %r5,0 */ + EMIT4(0xa7580000); + } +} + +static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, + unsigned int *addrs, int i, int last) +{ + unsigned int K; + int offset; + unsigned int mask; + + K = filter->k; + switch (filter->code) { + case BPF_S_ALU_ADD_X: /* A += X */ + jit->seen |= SEEN_XREG; + /* ar %r5,%r12 */ + EMIT2(0x1a5c); + break; + case BPF_S_ALU_ADD_K: /* A += K */ + if (!K) + break; + if (K <= 16383) + /* ahi %r5,<K> */ + EMIT4_IMM(0xa75a0000, K); + else if (test_facility(21)) + /* alfi %r5,<K> */ + EMIT6_IMM(0xc25b0000, K); + else + /* a %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_SUB_X: /* A -= X */ + jit->seen |= SEEN_XREG; + /* sr %r5,%r12 */ + EMIT2(0x1b5c); + break; + case BPF_S_ALU_SUB_K: /* A -= K */ + if (!K) + break; + if (K <= 16384) + /* ahi %r5,-K */ + EMIT4_IMM(0xa75a0000, -K); + else if (test_facility(21)) + /* alfi %r5,-K */ + EMIT6_IMM(0xc25b0000, -K); + else + /* s %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_MUL_X: /* A *= X */ + jit->seen |= SEEN_XREG; + /* msr %r5,%r12 */ + EMIT4(0xb252005c); + break; + case BPF_S_ALU_MUL_K: /* A *= K */ + if (K <= 16383) + /* mhi %r5,K */ + EMIT4_IMM(0xa75c0000, K); + else if (test_facility(34)) + /* msfi %r5,<K> */ + EMIT6_IMM(0xc2510000, K); + else + /* ms %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x7150d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_DIV_X: /* A /= X */ + jit->seen |= SEEN_XREG | SEEN_RET0; + /* ltr %r12,%r12 */ + EMIT2(0x12cc); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dr %r4,%r12 */ + EMIT2(0x1d4c); + break; + case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */ + /* m %r4,<d(K)>(%r13) */ + EMIT4_DISP(0x5c40d000, EMIT_CONST(K)); + /* lr %r5,%r4 */ + EMIT2(0x1854); + break; + case BPF_S_ALU_AND_X: /* A &= X */ + jit->seen |= SEEN_XREG; + /* nr %r5,%r12 */ + EMIT2(0x145c); + break; + case BPF_S_ALU_AND_K: /* A &= K */ + if (test_facility(21)) + /* nilf %r5,<K> */ + EMIT6_IMM(0xc05b0000, K); + else + /* n %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5450d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_OR_X: /* A |= X */ + jit->seen |= SEEN_XREG; + /* or %r5,%r12 */ + EMIT2(0x165c); + break; + case BPF_S_ALU_OR_K: /* A |= K */ + if (test_facility(21)) + /* oilf %r5,<K> */ + EMIT6_IMM(0xc05d0000, K); + else + /* o %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5650d000, EMIT_CONST(K)); + break; + case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ + jit->seen |= SEEN_XREG; + /* xr %r5,%r12 */ + EMIT2(0x175c); + break; + case BPF_S_ALU_LSH_X: /* A <<= X; */ + jit->seen |= SEEN_XREG; + /* sll %r5,0(%r12) */ + EMIT4(0x8950c000); + break; + case BPF_S_ALU_LSH_K: /* A <<= K */ + if (K == 0) + break; + /* sll %r5,K */ + EMIT4_DISP(0x89500000, K); + break; + case BPF_S_ALU_RSH_X: /* A >>= X; */ + jit->seen |= SEEN_XREG; + /* srl %r5,0(%r12) */ + EMIT4(0x8850c000); + break; + case BPF_S_ALU_RSH_K: /* A >>= K; */ + if (K == 0) + break; + /* srl %r5,K */ + EMIT4_DISP(0x88500000, K); + break; + case BPF_S_ALU_NEG: /* A = -A */ + /* lnr %r5,%r5 */ + EMIT2(0x1155); + break; + case BPF_S_JMP_JA: /* ip += K */ + offset = addrs[i + K] + jit->start - jit->prg; + EMIT4_PCREL(0xa7f40000, offset); + break; + case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */ + mask = 0x200000; /* jh */ + goto kbranch; + case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */ + mask = 0xa00000; /* jhe */ + goto kbranch; + case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */ + mask = 0x800000; /* je */ +kbranch: /* Emit compare if the branch targets are different */ + if (filter->jt != filter->jf) { + if (K <= 16383) + /* chi %r5,<K> */ + EMIT4_IMM(0xa75e0000, K); + else if (test_facility(21)) + /* clfi %r5,<K> */ + EMIT6_IMM(0xc25f0000, K); + else + /* c %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5950d000, EMIT_CONST(K)); + } +branch: if (filter->jt == filter->jf) { + if (filter->jt == 0) + break; + /* j <jt> */ + offset = addrs[i + filter->jt] + jit->start - jit->prg; + EMIT4_PCREL(0xa7f40000, offset); + break; + } + if (filter->jt != 0) { + /* brc <mask>,<jt> */ + offset = addrs[i + filter->jt] + jit->start - jit->prg; + EMIT4_PCREL(0xa7040000 | mask, offset); + } + if (filter->jf != 0) { + /* brc <mask^15>,<jf> */ + offset = addrs[i + filter->jf] + jit->start - jit->prg; + EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); + } + break; + case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */ + mask = 0x700000; /* jnz */ + /* Emit test if the branch targets are different */ + if (filter->jt != filter->jf) { + if (K > 65535) { + /* lr %r4,%r5 */ + EMIT2(0x1845); + /* n %r4,<d(K)>(%r13) */ + EMIT4_DISP(0x5440d000, EMIT_CONST(K)); + } else + /* tmll %r5,K */ + EMIT4_IMM(0xa7510000, K); + } + goto branch; + case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */ + mask = 0x200000; /* jh */ + goto xbranch; + case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */ + mask = 0xa00000; /* jhe */ + goto xbranch; + case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */ + mask = 0x800000; /* je */ +xbranch: /* Emit compare if the branch targets are different */ + if (filter->jt != filter->jf) { + jit->seen |= SEEN_XREG; + /* cr %r5,%r12 */ + EMIT2(0x195c); + } + goto branch; + case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */ + mask = 0x700000; /* jnz */ + /* Emit test if the branch targets are different */ + if (filter->jt != filter->jf) { + jit->seen |= SEEN_XREG; + /* lr %r4,%r5 */ + EMIT2(0x1845); + /* nr %r4,%r12 */ + EMIT2(0x144c); + } + goto branch; + case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; + offset = jit->off_load_word; + goto load_abs; + case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; + offset = jit->off_load_half; + goto load_abs; + case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; + offset = jit->off_load_byte; +load_abs: if ((int) K < 0) + goto out; +call_fn: /* lg %r1,<d(function)>(%r13) */ + EMIT6_DISP(0xe310d000, 0x0004, offset); + /* l %r3,<d(K)>(%r13) */ + EMIT4_DISP(0x5830d000, EMIT_CONST(K)); + /* basr %r8,%r1 */ + EMIT2(0x0d81); + /* jnz <ret0> */ + EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); + break; + case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; + offset = jit->off_load_iword; + goto call_fn; + case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; + offset = jit->off_load_ihalf; + goto call_fn; + case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; + offset = jit->off_load_ibyte; + goto call_fn; + case BPF_S_LDX_B_MSH: + /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ + jit->seen |= SEEN_RET0; + if ((int) K < 0) { + /* j <ret0> */ + EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg)); + break; + } + jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; + offset = jit->off_load_bmsh; + goto call_fn; + case BPF_S_LD_W_LEN: /* A = skb->len; */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + /* l %r5,<d(len)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); + break; + case BPF_S_LDX_W_LEN: /* X = skb->len; */ + jit->seen |= SEEN_XREG; + /* l %r12,<d(len)>(%r2) */ + EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); + break; + case BPF_S_LD_IMM: /* A = K */ + if (K <= 16383) + /* lhi %r5,K */ + EMIT4_IMM(0xa7580000, K); + else if (test_facility(21)) + /* llilf %r5,<K> */ + EMIT6_IMM(0xc05f0000, K); + else + /* l %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5850d000, EMIT_CONST(K)); + break; + case BPF_S_LDX_IMM: /* X = K */ + jit->seen |= SEEN_XREG; + if (K <= 16383) + /* lhi %r12,<K> */ + EMIT4_IMM(0xa7c80000, K); + else if (test_facility(21)) + /* llilf %r12,<K> */ + EMIT6_IMM(0xc0cf0000, K); + else + /* l %r12,<d(K)>(%r13) */ + EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); + break; + case BPF_S_LD_MEM: /* A = mem[K] */ + jit->seen |= SEEN_MEM; + /* l %r5,<K>(%r15) */ + EMIT4_DISP(0x5850f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_S_LDX_MEM: /* X = mem[K] */ + jit->seen |= SEEN_XREG | SEEN_MEM; + /* l %r12,<K>(%r15) */ + EMIT4_DISP(0x58c0f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_S_MISC_TAX: /* X = A */ + jit->seen |= SEEN_XREG; + /* lr %r12,%r5 */ + EMIT2(0x18c5); + break; + case BPF_S_MISC_TXA: /* A = X */ + jit->seen |= SEEN_XREG; + /* lr %r5,%r12 */ + EMIT2(0x185c); + break; + case BPF_S_RET_K: + if (K == 0) { + jit->seen |= SEEN_RET0; + if (last) + break; + /* j <ret0> */ + EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg); + } else { + if (K <= 16383) + /* lghi %r2,K */ + EMIT4_IMM(0xa7290000, K); + else + /* llgf %r2,<K>(%r13) */ + EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K)); + /* j <exit> */ + if (last && !(jit->seen & SEEN_RET0)) + break; + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + } + break; + case BPF_S_RET_A: + /* llgfr %r2,%r5 */ + EMIT4(0xb9160025); + /* j <exit> */ + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + break; + case BPF_S_ST: /* mem[K] = A */ + jit->seen |= SEEN_MEM; + /* st %r5,<K>(%r15) */ + EMIT4_DISP(0x5050f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + jit->seen |= SEEN_XREG | SEEN_MEM; + /* st %r12,<K>(%r15) */ + EMIT4_DISP(0x50c0f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(protocol)>(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); + break; + case BPF_S_ANC_IFINDEX: /* if (!skb->dev) return 0; + * A = skb->dev->ifindex */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + jit->seen |= SEEN_RET0; + /* lg %r1,<d(dev)>(%r2) */ + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); + /* ltgr %r1,%r1 */ + EMIT4(0xb9020011); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* l %r5,<d(ifindex)>(%r1) */ + EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); + break; + case BPF_S_ANC_MARK: /* A = skb->mark */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + /* l %r5,<d(mark)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); + break; + case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(queue_mapping)>(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); + break; + case BPF_S_ANC_HATYPE: /* if (!skb->dev) return 0; + * A = skb->dev->type */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + jit->seen |= SEEN_RET0; + /* lg %r1,<d(dev)>(%r2) */ + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); + /* ltgr %r1,%r1 */ + EMIT4(0xb9020011); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(type)>(%r1) */ + EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); + break; + case BPF_S_ANC_RXHASH: /* A = skb->rxhash */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); + /* l %r5,<d(rxhash)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash)); + break; + case BPF_S_ANC_CPU: /* A = smp_processor_id() */ +#ifdef CONFIG_SMP + /* l %r5,<d(cpu_nr)> */ + EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); +#else + /* lhi %r5,0 */ + EMIT4(0xa7580000); +#endif + break; + default: /* too complex, give up */ + goto out; + } + addrs[i] = jit->prg - jit->start; + return 0; +out: + return -1; +} + +void bpf_jit_compile(struct sk_filter *fp) +{ + unsigned long size, prg_len, lit_len; + struct bpf_jit jit, cjit; + unsigned int *addrs; + int pass, i; + + if (!bpf_jit_enable) + return; + addrs = kmalloc(fp->len * sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) + return; + memset(addrs, 0, fp->len * sizeof(*addrs)); + memset(&jit, 0, sizeof(cjit)); + memset(&cjit, 0, sizeof(cjit)); + + for (pass = 0; pass < 10; pass++) { + jit.prg = jit.start; + jit.lit = jit.mid; + + bpf_jit_prologue(&jit); + bpf_jit_noleaks(&jit, fp->insns); + for (i = 0; i < fp->len; i++) { + if (bpf_jit_insn(&jit, fp->insns + i, addrs, i, + i == fp->len - 1)) + goto out; + } + bpf_jit_epilogue(&jit); + if (jit.start) { + WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit); + if (memcmp(&jit, &cjit, sizeof(jit)) == 0) + break; + } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) { + prg_len = jit.prg - jit.start; + lit_len = jit.lit - jit.mid; + size = max_t(unsigned long, prg_len + lit_len, + sizeof(struct work_struct)); + if (size >= BPF_SIZE_MAX) + goto out; + jit.start = module_alloc(size); + if (!jit.start) + goto out; + jit.prg = jit.mid = jit.start + prg_len; + jit.lit = jit.end = jit.start + prg_len + lit_len; + jit.base_ip += (unsigned long) jit.start; + jit.exit_ip += (unsigned long) jit.start; + jit.ret0_ip += (unsigned long) jit.start; + } + cjit = jit; + } + if (bpf_jit_enable > 1) { + pr_err("flen=%d proglen=%lu pass=%d image=%p\n", + fp->len, jit.end - jit.start, pass, jit.start); + if (jit.start) { + printk(KERN_ERR "JIT code:\n"); + print_fn_code(jit.start, jit.mid - jit.start); + print_hex_dump(KERN_ERR, "JIT literals:\n", + DUMP_PREFIX_ADDRESS, 16, 1, + jit.mid, jit.end - jit.mid, false); + } + } + if (jit.start) + fp->bpf_func = (void *) jit.start; +out: + kfree(addrs); +} + +static void jit_free_defer(struct work_struct *arg) +{ + module_free(NULL, arg); +} + +/* run from softirq, we must use a work_struct to call + * module_free() from process context + */ +void bpf_jit_free(struct sk_filter *fp) +{ + struct work_struct *work; + + if (fp->bpf_func == sk_run_filter) + return; + work = (struct work_struct *)fp->bpf_func; + INIT_WORK(work, jit_free_defer); + schedule_work(work); +} diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 2707023c756..637970cfd3f 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -27,6 +27,7 @@ #include <linux/reboot.h> #include <linux/elfcore.h> #include <linux/pm.h> +#include <linux/rcupdate.h> void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -50,9 +51,10 @@ void __noreturn cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) barrier(); - + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 40db2d0aef3..a7e078f2e2e 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -192,11 +192,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - static void __init pcibios_bus_report_status_early(struct pci_channel *hose, int top_bus, int current_bus, diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index b7cf6a547f1..7e605b95592 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -933,7 +933,7 @@ ret_with_reschedule: pta restore_all, tr1 - movi _TIF_SIGPENDING, r8 + movi (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), r8 and r8, r7, r8 pta work_notifysig, tr0 bne r8, ZERO, tr0 diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index f67601cb3f1..b96489d8b27 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -139,7 +139,7 @@ work_pending: ! r8: current_thread_info ! t: result of "tst #_TIF_NEED_RESCHED, r0" bf/s work_resched - tst #_TIF_SIGPENDING, r0 + tst #(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME), r0 work_notifysig: bt/s __restore_all mov r15, r4 diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 21dcda75a52..fc052116156 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) -{ -#ifdef CONFIG_PCI_DEBUG - printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq, - pci_name(dev)); -#endif - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* in/out routines taken from pcic.c * * This probably belongs here rather than ioport.c because diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 15e0a169397..f1ddc0d2367 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -48,9 +48,7 @@ void *module_alloc(unsigned long size) return NULL; ret = module_map(size); - if (!ret) - ret = ERR_PTR(-ENOMEM); - else + if (ret) memset(ret, 0, size); return ret; @@ -116,6 +114,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, v = sym->st_value + rel[i].r_addend; switch (ELF_R_TYPE(rel[i].r_info) & 0xff) { + case R_SPARC_DISP32: + v -= (Elf_Addr) location; + *loc32 = v; + break; #ifdef CONFIG_SPARC64 case R_SPARC_64: location[0] = v >> 56; @@ -128,11 +130,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, location[7] = v >> 0; break; - case R_SPARC_DISP32: - v -= (Elf_Addr) location; - *loc32 = v; - break; - case R_SPARC_WDISP19: v -= (Elf_Addr) location; *loc32 = (*loc32 & ~0x7ffff) | diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 065b88c4f86..acc8c838ff7 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus) { } -void pcibios_update_irq(struct pci_dev *pdev, int irq) -{ -} - resource_size_t pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 932e4430f7f..c9a3c1fe729 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -412,14 +412,6 @@ config TILE_USB config NEED_BOUNCE_POOL def_bool USB_OHCI_HCD -config HOTPLUG - bool "Support for hot-pluggable devices" - ---help--- - Say Y here if you want to plug devices into your computer while - the system is running, and be able to use them quickly. In many - cases, the devices can likewise be unplugged at any time too. - One well-known example of this is USB. - source "drivers/pci/hotplug/Kconfig" endmenu diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index 7a7ce390534..d5e86c9f74f 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -69,7 +69,6 @@ static inline const struct cpumask *cpumask_of_node(int node) | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 0*SD_WAKE_AFFINE \ - | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h index 15fb7799208..58105c31228 100644 --- a/arch/tile/include/gxio/iorpc_trio.h +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -25,21 +25,23 @@ #include <linux/module.h> #include <asm/pgtable.h> -#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_DEALLOC_ASID IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1401) -#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1402) +#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404) -#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e) -#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140f) +#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412) -#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1417) -#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1418) -#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1419) -#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x141a) +#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414) -#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141c) -#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141d) -#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141f) +#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1420) +#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1421) + +#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1423) +#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1424) +#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1425) #define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index 33c10864d2f..759822687e8 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -246,16 +246,13 @@ static void __devinit fixup_read_and_payload_sizes(void) /* Scan for the smallest maximum payload size. */ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - int pcie_caps_offset; u32 devcap; int max_payload; - pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (pcie_caps_offset == 0) + if (!pci_is_pcie(dev)) continue; - pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP, - &devcap); + pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &devcap); max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD; if (max_payload < smallest_max_payload) smallest_max_payload = max_payload; @@ -263,21 +260,10 @@ static void __devinit fixup_read_and_payload_sizes(void) /* Now, set the max_payload_size for all devices to that value. */ new_values = (max_read_size << 12) | (smallest_max_payload << 5); - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - int pcie_caps_offset; - u16 devctl; - - pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (pcie_caps_offset == 0) - continue; - - pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, - &devctl); - devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ); - devctl |= new_values; - pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, - devctl); - } + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) + pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ, + new_values); } @@ -404,14 +390,6 @@ void pcibios_set_master(struct pci_dev *dev) } /* - * This is called from the generic Linux layer. - */ -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - -/* * Enable memory and/or address decoding, as appropriate, for the * device described by the 'dev' struct. * diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 0e213e35ffc..2ba6d052f85 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -1034,14 +1034,6 @@ char __devinit *pcibios_setup(char *str) } /* - * This is called from the generic Linux layer. - */ -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - -/* * Enable memory address decoding, as appropriate, for the * device described by the 'dev' struct. The I/O decoding * is disabled, though the TILE-Gx supports I/O addressing. diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index bbaf2c59830..457475f9841 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -409,7 +409,8 @@ int setup_one_line(struct line *lines, int n, char *init, line->valid = 1; err = parse_chan_pair(new, line, n, opts, error_out); if (!err) { - struct device *d = tty_register_device(driver, n, NULL); + struct device *d = tty_port_register_device(&line->port, + driver, n, NULL); if (IS_ERR(d)) { *error_out = "Failed to register device"; err = PTR_ERR(d); diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 664a60e8dfb..c17de0db673 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -705,6 +705,7 @@ static void stack_proc(void *arg) struct task_struct *from = current, *to = arg; to->thread.saved_task = from; + rcu_switch(from, to); switch_to(from, to, from); } diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 69f1c57a8d0..33a6a2423bd 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,14 +20,6 @@ struct mm_struct; struct thread_struct { struct task_struct *saved_task; - /* - * This flag is set to 1 before calling do_fork (and analyzed in - * copy_thread) to mark that we are begin called from userspace (fork / - * vfork / clone), and reset to 0 after. It is left to 0 when called - * from kernelspace (i.e. kernel_thread() or fork_idle(), - * as of 2.6.11). - */ - int forking; struct pt_regs regs; int singlestep_syscall; void *fault_addr; @@ -58,7 +50,6 @@ struct thread_struct { #define INIT_THREAD \ { \ - .forking = 0, \ .regs = EMPTY_REGS, \ .fault_addr = NULL, \ .prev_sched = NULL, \ diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 40db8f71dea..2df313b6a58 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -7,16 +7,6 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); -DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); -DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); -DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); -DEFINE_STR(UM_KERN_ERR, KERN_ERR); -DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); -DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); -DEFINE_STR(UM_KERN_INFO, KERN_INFO); -DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); -DEFINE_STR(UM_KERN_CONT, KERN_CONT); - DEFINE(UM_ELF_CLASS, ELF_CLASS); DEFINE(UM_ELFCLASS32, ELFCLASS32); DEFINE(UM_ELFCLASS64, ELFCLASS64); diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index 4fa82c055aa..cef06856333 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -26,6 +26,17 @@ extern void panic(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +/* Requires preincluding include/linux/kern_levels.h */ +#define UM_KERN_EMERG KERN_EMERG +#define UM_KERN_ALERT KERN_ALERT +#define UM_KERN_CRIT KERN_CRIT +#define UM_KERN_ERR KERN_ERR +#define UM_KERN_WARNING KERN_WARNING +#define UM_KERN_NOTICE KERN_NOTICE +#define UM_KERN_INFO KERN_INFO +#define UM_KERN_DEBUG KERN_DEBUG +#define UM_KERN_CONT KERN_CONT + #ifdef UML_CONFIG_PRINTK extern int printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 6cade936636..8c82786da82 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -39,34 +39,21 @@ void flush_thread(void) void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { + get_safe_registers(regs->regs.gp, regs->regs.fp); PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; -} -EXPORT_SYMBOL(start_thread); - -static long execve1(const char *file, - const char __user *const __user *argv, - const char __user *const __user *env) -{ - long error; - - error = do_execve(file, argv, env, ¤t->thread.regs); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; + current->ptrace &= ~PT_DTRACE; #ifdef SUBARCH_EXECVE1 - SUBARCH_EXECVE1(¤t->thread.regs.regs); + SUBARCH_EXECVE1(regs->regs); #endif - task_unlock(current); - } - return error; } +EXPORT_SYMBOL(start_thread); long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env) { long err; - err = execve1(file, argv, env); + err = do_execve(file, argv, env, ¤t->thread.regs); if (!err) UML_LONGJMP(current->thread.exec_buf, 1); return err; @@ -81,7 +68,7 @@ long sys_execve(const char __user *file, const char __user *const __user *argv, filename = getname(file); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = execve1(filename, argv, env); + error = do_execve(filename, argv, env, ¤t->thread.regs); putname(filename); out: return error; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 57fc7028714..c5f5afa5074 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct pt_regs *regs) { void (*handler)(void); + int kthread = current->flags & PF_KTHREAD; int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; - if (current->thread.forking) { + if (!kthread) { memcpy(&p->thread.regs.regs, ®s->regs, sizeof(p->thread.regs.regs)); PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); @@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, handler = fork_handler; arch_copy_thread(¤t->thread.arch, &p->thread.arch); - } - else { + } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); p->thread.request.u.thread = current->thread.request.u.thread; handler = new_thread_handler; @@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, new_thread(task_stack_page(p), &p->thread.switch_buf, handler); - if (current->thread.forking) { + if (!kthread) { clear_flushed_tls(p); /* diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 7362d58efc2..cc9c2350e41 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, struct k_sigaction *ka, siginfo_t *info) { sigset_t *oldset = sigmask_to_save(); + int singlestep = 0; unsigned long sp; int err; + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + /* Did we come from a system call? */ if (PT_REGS_SYSCALL_NR(regs) >= 0) { /* If so, check system call restarting.. */ @@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, if (err) force_sigsegv(signr, current); else - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, info, ka, regs, singlestep); } static int kern_do_signal(struct pt_regs *regs) diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index f958cb876ee..a4c6d8eee74 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -17,25 +17,25 @@ long sys_fork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + return do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; } long sys_vfork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid) +{ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + + return do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); } long old_mmap(unsigned long addr, unsigned long len, diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index d50270d26b4..15889df9b46 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) $(USER_OBJS:.o=.%): \ - c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o) + c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o) # These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of # using it directly. diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c index 46cb6c9de6c..b0056f68d32 100644 --- a/arch/unicore32/kernel/pci.c +++ b/arch/unicore32/kernel/pci.c @@ -154,14 +154,6 @@ void __init puv3_pci_adjust_zones(unsigned long *zone_size, zhole_size[0] = 0; } -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) -{ - if (debug_pci) - printk(KERN_DEBUG "PCI: Assigning IRQ %02d to %s\n", - irq, pci_name(dev)); - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* * If the bus contains any of these devices, then we must not turn on * parity checking of any kind. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..943667050da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -7,11 +7,13 @@ config 64BIT Say no to build a 32-bit kernel - formerly known as i386 config X86_32 - def_bool !64BIT + def_bool y + depends on !64BIT select CLKSRC_I8253 config X86_64 - def_bool 64BIT + def_bool y + depends on 64BIT select X86_DEV_DMA_OPS ### Arch settings @@ -36,6 +38,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER @@ -60,6 +63,8 @@ config X86 select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 @@ -97,9 +102,12 @@ config X86 select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_RCU_USER_QS if X86_64 + select HAVE_IRQ_TIME_ACCOUNTING config INSTRUCTION_DECODER - def_bool (KPROBES || PERF_EVENTS || UPROBES) + def_bool y + depends on KPROBES || PERF_EVENTS || UPROBES config OUTPUT_FORMAT string @@ -127,13 +135,15 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) + def_bool y + depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG config NEED_SG_DMA_LENGTH def_bool y config GENERIC_ISA_DMA - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config GENERIC_BUG def_bool y @@ -150,13 +160,16 @@ config GENERIC_GPIO bool config ARCH_MAY_HAVE_PC_FDC - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + def_bool y + depends on !X86_XADD config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + def_bool y + depends on X86_XADD config GENERIC_CALIBRATE_DELAY def_bool y @@ -746,13 +759,14 @@ config SWIOTLB def_bool y if X86_64 ---help--- Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU (e.g. the current generation - of Intel's x86-64 CPUs). Using this PCI devices which can only - access 32-bits of memory can be used on systems with more than - 3 GB of memory. If unsure, say Y. + which don't have a hardware IOMMU. Using this PCI devices + which can only access 32-bits of memory can be used on systems + with more than 3 GB of memory. + If unsure, say Y. config IOMMU_HELPER - def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) + def_bool y + depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" @@ -796,17 +810,6 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. -config IRQ_TIME_ACCOUNTING - bool "Fine granularity task level IRQ time accounting" - default n - ---help--- - Select this option to enable fine granularity task irq time - accounting. This is done by reading a timestamp on each - transitions between softirq and hardirq state, so there can be a - small performance impact. - - If in doubt, say N here. - source "kernel/Kconfig.preempt" config X86_UP_APIC @@ -871,6 +874,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS config X86_MCE bool "Machine Check / overheating reporting" + default y ---help--- Machine Check support allows the processor to notify the kernel if it detects a problem (e.g. overheating, data corruption). @@ -982,25 +986,25 @@ config X86_REBOOTFIXUPS Say N otherwise. config MICROCODE - tristate "/dev/cpu/microcode - microcode support" + tristate "CPU microcode loading support" select FW_LOADER ---help--- + If you say Y here, you will be able to update the microcode on certain Intel and AMD processors. The Intel support is for the - IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, - Pentium 4, Xeon etc. The AMD support is for family 0x10 and - 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra. - You will obviously need the actual microcode binary data itself - which is not shipped with the Linux kernel. + IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, + Xeon etc. The AMD support is for families 0x10 and later. You will + obviously need the actual microcode binary data itself which is not + shipped with the Linux kernel. This option selects the general module only, you need to select at least one vendor specific module as well. - To compile this driver as a module, choose M here: the - module will be called microcode. + To compile this driver as a module, choose M here: the module + will be called microcode. config MICROCODE_INTEL - bool "Intel microcode patch loading support" + bool "Intel microcode loading support" depends on MICROCODE default MICROCODE select FW_LOADER @@ -1013,7 +1017,7 @@ config MICROCODE_INTEL <http://www.urbanmyth.org/microcode/>. config MICROCODE_AMD - bool "AMD microcode patch loading support" + bool "AMD microcode loading support" depends on MICROCODE select FW_LOADER ---help--- @@ -1159,10 +1163,12 @@ config X86_PAE consumes more pagetable space per process. config ARCH_PHYS_ADDR_T_64BIT - def_bool X86_64 || X86_PAE + def_bool y + depends on X86_64 || X86_PAE config ARCH_DMA_ADDR_T_64BIT - def_bool X86_64 || HIGHMEM64G + def_bool y + depends on X86_64 || HIGHMEM64G config DIRECT_GBPAGES bool "Enable 1GB pages for kernel pagetables" if EXPERT @@ -1285,8 +1291,8 @@ config ARCH_SELECT_MEMORY_MODEL depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE - def_bool X86_64 - depends on MEMORY_HOTPLUG + def_bool y + depends on X86_64 && MEMORY_HOTPLUG config ARCH_PROC_KCORE_TEXT def_bool y @@ -1975,7 +1981,6 @@ config PCI_MMCONFIG config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT - default n depends on PCI && EXPERIMENTAL help Read the PCI windows out of the CNB20LE host bridge. This allows @@ -2186,18 +2191,18 @@ config COMPAT depends on IA32_EMULATION || X86_X32 select ARCH_WANT_OLD_COMPAT_IPC +if COMPAT config COMPAT_FOR_U64_ALIGNMENT - def_bool COMPAT - depends on X86_64 + def_bool y config SYSVIPC_COMPAT def_bool y - depends on COMPAT && SYSVIPC + depends on SYSVIPC config KEYS_COMPAT - bool - depends on COMPAT && KEYS - default y + def_bool y + depends on KEYS +endif endmenu diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 706e12e9984..f3b86d0df44 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT default X86_L1_CACHE_SHIFT config X86_CMPXCHG - def_bool X86_64 || (X86_32 && !M386) + def_bool y + depends on X86_64 || (X86_32 && !M386) config X86_L1_CACHE_SHIFT int @@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT config X86_XADD def_bool y - depends on X86_64 || !M386 + depends on !M386 config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 682e9c210ba..474ca35b1bc 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -142,7 +142,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) -archscripts: +archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs ### diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index e398bb5d63b..8a84501acb1 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ $(obj)/piggy.o +$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone +$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone + ifeq ($(CONFIG_EFI_STUB), y) VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o endif diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index b3e0227df2c..c760e073963 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -276,8 +276,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, nr_gops = size / sizeof(void *); for (i = 0; i < nr_gops; i++) { struct efi_graphics_output_mode_info *info; - efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; - void *pciio; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy; void *h = gop_handle[i]; status = efi_call_phys3(sys_table->boottime->handle_protocol, @@ -285,19 +286,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, if (status != EFI_SUCCESS) continue; - efi_call_phys3(sys_table->boottime->handle_protocol, - h, &pciio_proto, &pciio); + status = efi_call_phys3(sys_table->boottime->handle_protocol, + h, &conout_proto, &dummy); + + if (status == EFI_SUCCESS) + conout_found = true; status = efi_call_phys4(gop->query_mode, gop, gop->mode->mode, &size, &info); - if (status == EFI_SUCCESS && (!first_gop || pciio)) { + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* - * Apple provide GOPs that are not backed by - * real hardware (they're used to handle - * multiple displays). The workaround is to - * search for a GOP implementing the PCIIO - * protocol, and if one isn't found, to just - * fallback to the first GOP. + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. */ width = info->horizontal_resolution; height = info->vertical_resolution; @@ -308,10 +311,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, pixels_per_scan_line = info->pixels_per_scan_line; /* - * Once we've found a GOP supporting PCIIO, + * Once we've found a GOP supporting ConOut, * don't bother looking any further. */ - if (pciio) + if (conout_found) break; first_gop = gop; @@ -328,7 +331,6 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->lfb_width = width; si->lfb_height = height; si->lfb_base = fb_base; - si->lfb_size = fb_size; si->pages = 1; if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { @@ -376,6 +378,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->rsvd_pos = 0; } + si->lfb_size = si->lfb_linelength * si->lfb_height; + + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; + free_handle: efi_call_phys1(sys_table->boottime->free_pool, gop_handle); return status; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 3b6e15627c5..e5b0a8f91c5 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -14,6 +14,10 @@ #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) #define EFI_READ_CHUNK_SIZE (1024 * 1024) +#define EFI_CONSOLE_OUT_DEVICE_GUID \ + EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \ + 0x3f, 0xc1, 0x4d) + #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 #define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 #define PIXEL_BIT_MASK 2 diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b4e15dd6786..2a017441b8b 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -32,10 +32,6 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define SVGA_MODE ASK_VGA #endif -#ifndef RAMDISK -#define RAMDISK 0 -#endif - #ifndef ROOT_RDONLY #define ROOT_RDONLY 1 #endif diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 119db67dcb0..5598547281a 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -231,8 +229,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -243,11 +239,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_RTC_CLASS=y CONFIG_DMADEVICES=y CONFIG_EEEPC_LAPTOP=y CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -299,13 +293,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -316,4 +308,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_CRYPTO_AES_586=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 76eb2903809..671524d0f6c 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_CALGARY_IOMMU=y CONFIG_NR_CPUS=64 @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -227,8 +225,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -239,11 +235,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_AMD_IOMMU_STATS=y CONFIG_INTEL_IOMMU=y # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -298,13 +292,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -314,4 +306,3 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 673ac9b63d6..8c77c64fbd2 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -162,7 +162,8 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, } seg = get_fs(); set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL), + (stack_t __force __user *) &uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) @@ -250,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, get_user_ex(tmp, &sc->fpstate); buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); + err |= restore_xstate_sig(buf, 1); get_user_ex(*pax, &sc->ax); } get_user_catch(err); @@ -361,7 +362,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) + void __user **fpstate) { unsigned long sp; @@ -381,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, sp = (unsigned long) ka->sa.sa_restorer; if (used_math()) { - sp = sp - sig_xstate_ia32_size; - *fpstate = (struct _fpstate_ia32 *) sp; - if (save_i387_xstate_ia32(*fpstate) < 0) + unsigned long fx_aligned, math_size; + + sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); + *fpstate = (struct _fpstate_ia32 __user *) sp; + if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, + math_size) < 0) return (void __user *) -1L; } @@ -448,7 +452,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, * These are actually not used anymore, but left because some * gdb versions depend on them as a marker. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -529,7 +533,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Not actually used anymore, but left because some gdb * versions need it. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 4540bece094..c5b938d92ea 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -287,7 +287,7 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, return ret; } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 70780689599..444704c8e18 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); -extern void alternatives_smp_switch(int smp); +extern void alternatives_enable_smp(void); extern int alternatives_text_reserved(void *start, void *end); extern bool skip_smp_alternatives; #else @@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} -static inline void alternatives_smp_switch(int smp) {} +static inline void alternatives_enable_smp(void) {} static inline int alternatives_text_reserved(void *start, void *end) { return 0; diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 72f5009deb5..6dfd0195bb5 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -355,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); */ static inline unsigned long __ffs(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "rm" (word)); return word; @@ -369,7 +369,7 @@ static inline unsigned long __ffs(unsigned long word) */ static inline unsigned long ffz(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "r" (~word)); return word; @@ -417,10 +417,9 @@ static inline int ffs(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -459,10 +458,9 @@ static inline int fls(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -490,13 +488,13 @@ static inline int fls(int x) #ifdef CONFIG_X86_64 static __always_inline int fls64(__u64 x) { - long bitpos = -1; + int bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ - asm("bsrq %1,%0" + asm("bsrq %1,%q0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index a9e3a740f69..7f8422a28a4 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -49,38 +49,36 @@ For 32-bit we have the following conventions - kernel is built with #include "dwarf2.h" /* - * 64-bit system call stack frame layout defines and helpers, for - * assembly code (note that the seemingly unnecessary parentheses - * are to prevent cpp from inserting spaces in expressions that get - * passed to macros): + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: */ -#define R15 (0) -#define R14 (8) -#define R13 (16) -#define R12 (24) -#define RBP (32) -#define RBX (40) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 /* arguments: interrupts/non tracing syscalls only save up to here: */ -#define R11 (48) -#define R10 (56) -#define R9 (64) -#define R8 (72) -#define RAX (80) -#define RCX (88) -#define RDX (96) -#define RSI (104) -#define RDI (112) -#define ORIG_RAX (120) /* + error_code */ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ /* end of arguments */ /* cpu exception frame or undefined in case of fast syscall: */ -#define RIP (128) -#define CS (136) -#define EFLAGS (144) -#define RSP (152) -#define SS (160) +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 #define ARGOFFSET R11 #define SWFRAME ORIG_RAX diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6b7ee5ff682..16cae425d1f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -97,6 +97,7 @@ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ +#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ @@ -209,6 +210,7 @@ #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -299,12 +301,14 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) +#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) +#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 75f4c6d6a33..92f3c6ed817 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -12,6 +12,7 @@ #include <linux/kernel_stat.h> #include <linux/regset.h> +#include <linux/compat.h> #include <linux/slab.h> #include <asm/asm.h> #include <asm/cpufeature.h> @@ -21,42 +22,74 @@ #include <asm/uaccess.h> #include <asm/xsave.h> -extern unsigned int sig_xstate_size; +#ifdef CONFIG_X86_64 +# include <asm/sigcontext32.h> +# include <asm/user32.h> +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern unsigned int mxcsr_feature_mask; extern void fpu_init(void); +extern void eager_fpu_init(void); DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env); + extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, xstateregs_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, xstateregs_set; - /* * xstateregs_active == fpregs_active. Please refer to the comment * at the definition of fpregs_active. */ #define xstateregs_active fpregs_active -extern struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -extern unsigned int sig_xstate_ia32_size; -extern struct _fpx_sw_bytes fx_sw_reserved_ia32; -struct _fpstate_ia32; -struct _xstate_ia32; -extern int save_i387_xstate_ia32(void __user *buf); -extern int restore_i387_xstate_ia32(void __user *buf); -#endif - #ifdef CONFIG_MATH_EMULATION +# define HAVE_HWFP (boot_cpu_data.hard_math) extern void finit_soft_fpu(struct i387_soft_struct *soft); #else +# define HAVE_HWFP 1 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif +static inline int is_ia32_compat_frame(void) +{ + return config_enabled(CONFIG_IA32_EMULATION) && + test_thread_flag(TIF_IA32); +} + +static inline int is_ia32_frame(void) +{ + return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); +} + +static inline int is_x32_frame(void) +{ + return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); +} + #define X87_FSW_ES (1 << 7) /* Exception Summary */ +static __always_inline __pure bool use_eager_fpu(void) +{ + return static_cpu_has(X86_FEATURE_EAGER_FPU); +} + static __always_inline __pure bool use_xsaveopt(void) { return static_cpu_has(X86_FEATURE_XSAVEOPT); @@ -72,6 +105,13 @@ static __always_inline __pure bool use_fxsr(void) return static_cpu_has(X86_FEATURE_FXSR); } +static inline void fx_finit(struct i387_fxsave_struct *fx) +{ + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} + extern void __sanitize_i387_state(struct task_struct *); static inline void sanitize_i387_state(struct task_struct *tsk) @@ -81,131 +121,88 @@ static inline void sanitize_i387_state(struct task_struct *tsk) __sanitize_i387_state(tsk); } -#ifdef CONFIG_X86_64 -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +#define check_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +static inline int fsave_user(struct i387_fsave_struct __user *fx) { - int err; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxrstorq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "m" (*fx), "0" (0)); -#else - asm volatile("1: rex64/fxrstor (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "R" (fx), "m" (*fx), "0" (0)); -#endif - return err; + return check_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { - int err; + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - /* - * Clear the bytes not touched by the fxsave and reserved - * for the SW usage. - */ - err = __clear_user(&fx->sw_reserved, - sizeof(struct _fpx_sw_bytes)); - if (unlikely(err)) - return -EFAULT; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxsaveq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), [fx] "=m" (*fx) - : "0" (0)); -#else - asm volatile("1: rex64/fxsave (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), "=m" (*fx) - : [fx] "R" (fx), "0" (0)); -#endif - if (unlikely(err) && - __clear_user(fx, sizeof(struct i387_fxsave_struct))) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ - return err; + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } -static inline void fpu_fxsave(struct fpu *fpu) +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { - /* Using "rex64; fxsave %0" is broken because, if the memory operand - uses any extended registers for addressing, a second REX prefix - will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -#ifdef CONFIG_AS_FXSAVEQ - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#else - /* Using, as a workaround, the properly prefixed form below isn't - accepted by any binutils version so far released, complaining that - the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). - asm volatile("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); - This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - asm volatile("rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); -#endif + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); } -#else /* CONFIG_X86_32 */ - -/* perform fxrstor iff the processor has extended states, otherwise frstor */ -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +static inline int frstor_checking(struct i387_fsave_struct *fx) { - /* - * The "nop" is needed to make the instructions the same - * length. - */ - alternative_input( - "nop ; frstor %1", - "fxrstor %1", - X86_FEATURE_FXSR, - "m" (*fx)); - - return 0; + return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline void fpu_fxsave(struct fpu *fpu) { - asm volatile("fxsave %[fx]" - : [fx] "=m" (fpu->state->fxsave)); + if (config_enabled(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); + else { + /* Using "rex64; fxsave %0" is broken because, if the memory + * operand uses any extended registers for addressing, a second + * REX prefix will be generated (to the assembler, rex64 + * followed by semicolon is a separate instruction), and hence + * the 64-bitness is lost. + * + * Using "fxsaveq %0" would be the ideal choice, but is only + * supported starting with gas 2.16. + * + * Using, as a workaround, the properly prefixed form below + * isn't accepted by any binutils version so far released, + * complaining that the same type of prefix is used twice if + * an extended register is needed for addressing (fix submitted + * to mainline 2005-11-21). + * + * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); + * + * This, however, we can work around by forcing the compiler to + * select an addressing mode that doesn't require extended + * registers. + */ + asm volatile( "rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); + } } -#endif /* CONFIG_X86_64 */ - /* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact. @@ -248,17 +245,14 @@ static inline int __save_init_fpu(struct task_struct *tsk) return fpu_save_init(&tsk->thread.fpu); } -static inline int fpu_fxrstor_checking(struct fpu *fpu) -{ - return fxrstor_checking(&fpu->state->fxsave); -} - static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) - return fpu_xrstor_checking(fpu); + return fpu_xrstor_checking(&fpu->state->xsave); + else if (use_fxsr()) + return fxrstor_checking(&fpu->state->fxsave); else - return fpu_fxrstor_checking(fpu); + return frstor_checking(&fpu->state->fsave); } static inline int restore_fpu_checking(struct task_struct *tsk) @@ -310,15 +304,52 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) static inline void __thread_fpu_end(struct task_struct *tsk) { __thread_clear_has_fpu(tsk); - stts(); + if (!use_eager_fpu()) + stts(); } static inline void __thread_fpu_begin(struct task_struct *tsk) { - clts(); + if (!use_eager_fpu()) + clts(); __thread_set_has_fpu(tsk); } +static inline void __drop_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +static inline void drop_fpu(struct task_struct *tsk) +{ + /* + * Forget coprocessor state.. + */ + preempt_disable(); + tsk->fpu_counter = 0; + __drop_fpu(tsk); + clear_used_math(); + preempt_enable(); +} + +static inline void drop_init_fpu(struct task_struct *tsk) +{ + if (!use_eager_fpu()) + drop_fpu(tsk); + else { + if (use_xsave()) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); + } +} + /* * FPU state switching for scheduling. * @@ -352,7 +383,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta { fpu_switch_t fpu; - fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + /* + * If the task has used the math, pre-load the FPU on xsave processors + * or if the past 5 consecutive context-switches used math. + */ + fpu.preload = tsk_used_math(new) && (use_eager_fpu() || + new->fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -364,14 +400,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); - } else + } else if (!use_eager_fpu()) stts(); } else { old->fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { new->fpu_counter++; - if (fpu_lazy_restore(new, cpu)) + if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -391,44 +427,40 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { if (fpu.preload) { if (unlikely(restore_fpu_checking(new))) - __thread_fpu_end(new); + drop_init_fpu(new); } } /* * Signal frame handlers... */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); +extern int save_xstate_sig(void __user *buf, void __user *fx, int size); +extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); -static inline void __clear_fpu(struct task_struct *tsk) +static inline int xstate_sigframe_size(void) { - if (__thread_has_fpu(tsk)) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +static inline int restore_xstate_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct i387_fsave_struct); + size += sizeof(struct i387_fsave_struct); } + + return __restore_xstate_sig(buf, buf_fx, size); } /* - * The actual user_fpu_begin/end() functions - * need to be preemption-safe. + * Need to be preemption-safe. * - * NOTE! user_fpu_end() must be used only after you - * have saved the FP state, and user_fpu_begin() must - * be used only immediately before restoring it. - * These functions do not do any save/restore on - * their own. + * NOTE! user_fpu_begin() must be used only immediately before restoring + * it. This function does not do any save/restore on their own. */ -static inline void user_fpu_end(void) -{ - preempt_disable(); - __thread_fpu_end(current); - preempt_enable(); -} - static inline void user_fpu_begin(void) { preempt_disable(); @@ -437,25 +469,32 @@ static inline void user_fpu_begin(void) preempt_enable(); } +static inline void __save_fpu(struct task_struct *tsk) +{ + if (use_xsave()) + xsave_state(&tsk->thread.fpu.state->xsave, -1); + else + fpu_fxsave(&tsk->thread.fpu); +} + /* * These disable preemption on their own and are safe */ static inline void save_init_fpu(struct task_struct *tsk) { WARN_ON_ONCE(!__thread_has_fpu(tsk)); + + if (use_eager_fpu()) { + __save_fpu(tsk); + return; + } + preempt_disable(); __save_init_fpu(tsk); __thread_fpu_end(tsk); preempt_enable(); } -static inline void clear_fpu(struct task_struct *tsk) -{ - preempt_disable(); - __clear_fpu(tsk); - preempt_enable(); -} - /* * i387 state interaction */ @@ -510,11 +549,34 @@ static inline void fpu_free(struct fpu *fpu) } } -static inline void fpu_copy(struct fpu *dst, struct fpu *src) +static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) { - memcpy(dst->state, src->state, xstate_size); + if (use_eager_fpu()) { + memset(&dst->thread.fpu.state->xsave, 0, xstate_size); + __save_fpu(dst); + } else { + struct fpu *dfpu = &dst->thread.fpu; + struct fpu *sfpu = &src->thread.fpu; + + unlazy_fpu(src); + memcpy(dfpu->state, sfpu->state, xstate_size); + } } -extern void fpu_finit(struct fpu *fpu); +static inline unsigned long +alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, + unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct i387_fsave_struct); + sp -= sizeof(struct i387_fsave_struct); + } + + *size = frame_size; + return sp; +} #endif diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b0767bc0874..9a25b522d37 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,38 +3,54 @@ #ifdef __ASSEMBLY__ - .macro MCOUNT_SAVE_FRAME - /* taken from glibc */ - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) + /* skip is set if the stack was already partially adjusted */ + .macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) .endm - .macro MCOUNT_RESTORE_FRAME - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp + .macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp .endm #endif #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#ifdef CC_USING_FENTRY +# define MCOUNT_ADDR ((long)(__fentry__)) +#else +# define MCOUNT_ADDR ((long)(mcount)) +#endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#define ARCH_SUPPORTS_FTRACE_SAVE_REGS +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; +extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index d3895dbf4dd..81f04cee5f7 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -18,6 +18,10 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; + /* + * irq_tlb_count is double-counted in irq_call_count, so it must be + * subtracted from irq_call_count when displaying irq_call_count + */ unsigned int irq_tlb_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 2c392d663dc..434e2106cc8 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -35,8 +35,6 @@ #define HPET_ID_NUMBER_SHIFT 8 #define HPET_ID_VENDOR_SHIFT 16 -#define HPET_ID_VENDOR_8086 0x8086 - #define HPET_CFG_ENABLE 0x001 #define HPET_CFG_LEGACY 0x002 #define HPET_LEGACY_8254 2 diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 257d9cca214..ed8089d6909 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -19,12 +19,37 @@ struct pt_regs; struct user_i387_struct; extern int init_fpu(struct task_struct *child); +extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); extern bool irq_fpu_usable(void); -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); + +/* + * Careful: __kernel_fpu_begin/end() must be called with preempt disabled + * and they don't touch the preempt state on their own. + * If you enable preemption after __kernel_fpu_begin(), preempt notifier + * should call the __kernel_fpu_end() to prevent the kernel/user FPU + * state from getting corrupted. KVM for example uses this model. + * + * All other cases use kernel_fpu_begin/end() which disable preemption + * during kernel FPU usage. + */ +extern void __kernel_fpu_begin(void); +extern void __kernel_fpu_end(void); + +static inline void kernel_fpu_begin(void) +{ + WARN_ON_ONCE(!irq_fpu_usable()); + preempt_disable(); + __kernel_fpu_begin(); +} + +static inline void kernel_fpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} /* * Some instructions like VIA's padlock instructions generate a spurious diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h index f229b13a5f3..f42a04735a0 100644 --- a/arch/x86/include/asm/iommu_table.h +++ b/arch/x86/include/asm/iommu_table.h @@ -48,7 +48,7 @@ struct iommu_table_entry { #define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ - static const struct iommu_table_entry const \ + static const struct iommu_table_entry \ __iommu_entry_##_detect __used \ __attribute__ ((unused, __section__(".iommu_table"), \ aligned((sizeof(void *))))) \ @@ -63,10 +63,10 @@ struct iommu_table_entry { * to stop detecting the other IOMMUs after yours has been detected. */ #define IOMMU_INIT_POST(_detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0) #define IOMMU_INIT_POST_FINISH(detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1) /* * A more sophisticated version of IOMMU_INIT. This variant requires: diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 54788253915..d3ddd17405d 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,6 +27,7 @@ #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT +#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 246617efd67..41e08cb6a09 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,22 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09155d64cf7..1eaa6b05667 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -75,22 +75,6 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define DE_VECTOR 0 -#define DB_VECTOR 1 -#define BP_VECTOR 3 -#define OF_VECTOR 4 -#define BR_VECTOR 5 -#define UD_VECTOR 6 -#define NM_VECTOR 7 -#define DF_VECTOR 8 -#define TS_VECTOR 10 -#define NP_VECTOR 11 -#define SS_VECTOR 12 -#define GP_VECTOR 13 -#define PF_VECTOR 14 -#define MF_VECTOR 16 -#define MC_VECTOR 18 - #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a3ac52b29cb..54d73b1f00a 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -116,19 +116,9 @@ struct mce_log { /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 - -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ -#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9) -#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9) -#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9) -#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9) -#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9) -#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) -#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) - +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) #ifdef __KERNEL__ - extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); @@ -171,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device); #ifdef CONFIG_X86_MCE_INTEL extern int mce_cmci_disabled; extern int mce_ignore_ce; +extern int mce_bios_cmci_threshold; void mce_intel_feature_init(struct cpuinfo_x86 *c); void cmci_clear(void); void cmci_reenable(void); diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 4ebe157bf73..43d921b4752 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -15,8 +15,8 @@ struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, const void __user *buf, size_t size); - enum ucode_state (*request_microcode_fw) (int cpu, - struct device *device); + enum ucode_state (*request_microcode_fw) (int cpu, struct device *, + bool refresh_fw); void (*microcode_fini_cpu) (int cpu); @@ -49,12 +49,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void) #ifdef CONFIG_MICROCODE_AMD extern struct microcode_ops * __init init_amd_microcode(void); extern void __exit exit_amd_microcode(void); - -static inline void get_ucode_data(void *to, const u8 *from, size_t n) -{ - memcpy(to, from, n); -} - #else static inline struct microcode_ops * __init init_amd_microcode(void) { diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index cb4e43bce98..4fabcdf1cfa 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { } static inline void amd_pmu_disable_virt(void) { } #endif +#define arch_perf_out_copy_user copy_from_user_nmi + #endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h new file mode 100644 index 00000000000..3f2207bfd17 --- /dev/null +++ b/arch/x86/include/asm/perf_regs.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_PERF_REGS_H +#define _ASM_X86_PERF_REGS_H + +enum perf_event_x86_regs { + PERF_REG_X86_AX, + PERF_REG_X86_BX, + PERF_REG_X86_CX, + PERF_REG_X86_DX, + PERF_REG_X86_SI, + PERF_REG_X86_DI, + PERF_REG_X86_BP, + PERF_REG_X86_SP, + PERF_REG_X86_IP, + PERF_REG_X86_FLAGS, + PERF_REG_X86_CS, + PERF_REG_X86_SS, + PERF_REG_X86_DS, + PERF_REG_X86_ES, + PERF_REG_X86_FS, + PERF_REG_X86_GS, + PERF_REG_X86_R8, + PERF_REG_X86_R9, + PERF_REG_X86_R10, + PERF_REG_X86_R11, + PERF_REG_X86_R12, + PERF_REG_X86_R13, + PERF_REG_X86_R14, + PERF_REG_X86_R15, + + PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, + PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, +}; +#endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 013286a10c2..db8fec6d295 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -303,11 +303,9 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 -extern void native_pagetable_setup_start(pgd_t *base); -extern void native_pagetable_setup_done(pgd_t *base); +extern void native_pagetable_init(void); #else -#define native_pagetable_setup_start x86_init_pgd_noop -#define native_pagetable_setup_done x86_init_pgd_noop +#define native_pagetable_init paging_init #endif struct seq_file; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad9bca..b98c0d958eb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); -extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR @@ -759,6 +758,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +extern void set_task_blockstep(struct task_struct *task, bool on); + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h new file mode 100644 index 00000000000..d1ac07a2397 --- /dev/null +++ b/arch/x86/include/asm/rcu.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_RCU_H +#define _ASM_X86_RCU_H + +#ifndef __ASSEMBLY__ + +#include <linux/rcupdate.h> +#include <asm/ptrace.h> + +static inline void exception_enter(struct pt_regs *regs) +{ + rcu_user_exit(); +} + +static inline void exception_exit(struct pt_regs *regs) +{ +#ifdef CONFIG_RCU_USER_QS + if (user_mode(regs)) + rcu_user_enter(); +#endif +} + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_RCU_USER_QS +# define SCHEDULE_USER call schedule_user +#else +# define SCHEDULE_USER call schedule +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 598457cbd0f..323973f4abf 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -31,6 +31,10 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +#ifndef CONFIG_COMPAT +typedef sigset_t compat_sigset_t; +#endif + #else /* Here we must cater to libcs that poke about in kernel headers. */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f2b83bc7d78..cdf5674dd23 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -1,6 +1,135 @@ #ifndef __SVM_H #define __SVM_H +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_EXIT_REASONS \ + { SVM_EXIT_READ_CR0, "read_cr0" }, \ + { SVM_EXIT_READ_CR3, "read_cr3" }, \ + { SVM_EXIT_READ_CR4, "read_cr4" }, \ + { SVM_EXIT_READ_CR8, "read_cr8" }, \ + { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ + { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ + { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ + { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ + { SVM_EXIT_READ_DR0, "read_dr0" }, \ + { SVM_EXIT_READ_DR1, "read_dr1" }, \ + { SVM_EXIT_READ_DR2, "read_dr2" }, \ + { SVM_EXIT_READ_DR3, "read_dr3" }, \ + { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ + { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ + { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ + { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ + { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ + { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ + { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ + { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ + { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ + { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ + { SVM_EXIT_INTR, "interrupt" }, \ + { SVM_EXIT_NMI, "nmi" }, \ + { SVM_EXIT_SMI, "smi" }, \ + { SVM_EXIT_INIT, "init" }, \ + { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CPUID, "cpuid" }, \ + { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_HLT, "hlt" }, \ + { SVM_EXIT_INVLPG, "invlpg" }, \ + { SVM_EXIT_INVLPGA, "invlpga" }, \ + { SVM_EXIT_IOIO, "io" }, \ + { SVM_EXIT_MSR, "msr" }, \ + { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ + { SVM_EXIT_SHUTDOWN, "shutdown" }, \ + { SVM_EXIT_VMRUN, "vmrun" }, \ + { SVM_EXIT_VMMCALL, "hypercall" }, \ + { SVM_EXIT_VMLOAD, "vmload" }, \ + { SVM_EXIT_VMSAVE, "vmsave" }, \ + { SVM_EXIT_STGI, "stgi" }, \ + { SVM_EXIT_CLGI, "clgi" }, \ + { SVM_EXIT_SKINIT, "skinit" }, \ + { SVM_EXIT_WBINVD, "wbinvd" }, \ + { SVM_EXIT_MONITOR, "monitor" }, \ + { SVM_EXIT_MWAIT, "mwait" }, \ + { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_NPF, "npf" } + +#ifdef __KERNEL__ + enum { INTERCEPT_INTR, INTERCEPT_NMI, @@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFO_REG_MASK 0x0F -#define SVM_EXIT_READ_CR0 0x000 -#define SVM_EXIT_READ_CR3 0x003 -#define SVM_EXIT_READ_CR4 0x004 -#define SVM_EXIT_READ_CR8 0x008 -#define SVM_EXIT_WRITE_CR0 0x010 -#define SVM_EXIT_WRITE_CR3 0x013 -#define SVM_EXIT_WRITE_CR4 0x014 -#define SVM_EXIT_WRITE_CR8 0x018 -#define SVM_EXIT_READ_DR0 0x020 -#define SVM_EXIT_READ_DR1 0x021 -#define SVM_EXIT_READ_DR2 0x022 -#define SVM_EXIT_READ_DR3 0x023 -#define SVM_EXIT_READ_DR4 0x024 -#define SVM_EXIT_READ_DR5 0x025 -#define SVM_EXIT_READ_DR6 0x026 -#define SVM_EXIT_READ_DR7 0x027 -#define SVM_EXIT_WRITE_DR0 0x030 -#define SVM_EXIT_WRITE_DR1 0x031 -#define SVM_EXIT_WRITE_DR2 0x032 -#define SVM_EXIT_WRITE_DR3 0x033 -#define SVM_EXIT_WRITE_DR4 0x034 -#define SVM_EXIT_WRITE_DR5 0x035 -#define SVM_EXIT_WRITE_DR6 0x036 -#define SVM_EXIT_WRITE_DR7 0x037 -#define SVM_EXIT_EXCP_BASE 0x040 -#define SVM_EXIT_INTR 0x060 -#define SVM_EXIT_NMI 0x061 -#define SVM_EXIT_SMI 0x062 -#define SVM_EXIT_INIT 0x063 -#define SVM_EXIT_VINTR 0x064 -#define SVM_EXIT_CR0_SEL_WRITE 0x065 -#define SVM_EXIT_IDTR_READ 0x066 -#define SVM_EXIT_GDTR_READ 0x067 -#define SVM_EXIT_LDTR_READ 0x068 -#define SVM_EXIT_TR_READ 0x069 -#define SVM_EXIT_IDTR_WRITE 0x06a -#define SVM_EXIT_GDTR_WRITE 0x06b -#define SVM_EXIT_LDTR_WRITE 0x06c -#define SVM_EXIT_TR_WRITE 0x06d -#define SVM_EXIT_RDTSC 0x06e -#define SVM_EXIT_RDPMC 0x06f -#define SVM_EXIT_PUSHF 0x070 -#define SVM_EXIT_POPF 0x071 -#define SVM_EXIT_CPUID 0x072 -#define SVM_EXIT_RSM 0x073 -#define SVM_EXIT_IRET 0x074 -#define SVM_EXIT_SWINT 0x075 -#define SVM_EXIT_INVD 0x076 -#define SVM_EXIT_PAUSE 0x077 -#define SVM_EXIT_HLT 0x078 -#define SVM_EXIT_INVLPG 0x079 -#define SVM_EXIT_INVLPGA 0x07a -#define SVM_EXIT_IOIO 0x07b -#define SVM_EXIT_MSR 0x07c -#define SVM_EXIT_TASK_SWITCH 0x07d -#define SVM_EXIT_FERR_FREEZE 0x07e -#define SVM_EXIT_SHUTDOWN 0x07f -#define SVM_EXIT_VMRUN 0x080 -#define SVM_EXIT_VMMCALL 0x081 -#define SVM_EXIT_VMLOAD 0x082 -#define SVM_EXIT_VMSAVE 0x083 -#define SVM_EXIT_STGI 0x084 -#define SVM_EXIT_CLGI 0x085 -#define SVM_EXIT_SKINIT 0x086 -#define SVM_EXIT_RDTSCP 0x087 -#define SVM_EXIT_ICEBP 0x088 -#define SVM_EXIT_WBINVD 0x089 -#define SVM_EXIT_MONITOR 0x08a -#define SVM_EXIT_MWAIT 0x08b -#define SVM_EXIT_MWAIT_COND 0x08c -#define SVM_EXIT_XSETBV 0x08d -#define SVM_EXIT_NPF 0x400 - -#define SVM_EXIT_ERR -1 - #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" @@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb { #endif +#endif diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3fda9db4881..4ca1c611b55 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -40,7 +40,7 @@ asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); asmlinkage long sys32_alarm(unsigned int); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); asmlinkage long sys32_sysfs(int, u32, u32); asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89f794f007e..c535d847e3b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -89,6 +89,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ +#define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -114,6 +115,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -126,12 +128,13 @@ struct thread_info { /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -141,7 +144,8 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) + ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index f3971bbcd1d..8ff8be7835a 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -42,10 +42,11 @@ struct arch_uprobe { }; struct arch_uprobe_task { - unsigned long saved_trap_nr; #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif + unsigned int saved_trap_nr; + unsigned int saved_tf; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index bb0522850b7..fddb53d6391 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -11,7 +11,8 @@ extern const char VDSO32_PRELINK[]; #define VDSO32_SYMBOL(base, name) \ ({ \ extern const char VDSO32_##name[]; \ - (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ + (void __user *)(VDSO32_##name - VDSO32_PRELINK + \ + (unsigned long)(base)); \ }) #endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 74fcb963595..36ec21c36d6 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -25,6 +25,88 @@ * */ +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_INVPCID 58 + +#define VMX_EXIT_REASONS \ + { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ + { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ + { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ + { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ + { EXIT_REASON_CPUID, "CPUID" }, \ + { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVLPG, "INVLPG" }, \ + { EXIT_REASON_RDPMC, "RDPMC" }, \ + { EXIT_REASON_RDTSC, "RDTSC" }, \ + { EXIT_REASON_VMCALL, "VMCALL" }, \ + { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ + { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ + { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ + { EXIT_REASON_VMPTRST, "VMPTRST" }, \ + { EXIT_REASON_VMREAD, "VMREAD" }, \ + { EXIT_REASON_VMRESUME, "VMRESUME" }, \ + { EXIT_REASON_VMWRITE, "VMWRITE" }, \ + { EXIT_REASON_VMOFF, "VMOFF" }, \ + { EXIT_REASON_VMON, "VMON" }, \ + { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ + { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ + { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ + { EXIT_REASON_MSR_READ, "MSR_READ" }, \ + { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ + { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ + { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ + { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ + { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ + { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ + { EXIT_REASON_WBINVD, "WBINVD" } + +#ifdef __KERNEL__ + #include <linux/types.h> /* @@ -241,49 +323,6 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 - -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 - -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVD 13 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_INVALID_STATE 33 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_MONITOR_INSTRUCTION 39 -#define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MCE_DURING_VMENTRY 41 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_WBINVD 54 -#define EXIT_REASON_XSETBV 55 -#define EXIT_REASON_INVPCID 58 - /* * Interruption-information format */ @@ -488,3 +527,5 @@ enum vm_instruction_error_number { }; #endif + +#endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 38155f66714..57693498519 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -81,12 +81,13 @@ struct x86_init_mapping { /** * struct x86_init_paging - platform specific paging functions - * @pagetable_setup_start: platform specific pre paging_init() call - * @pagetable_setup_done: platform specific post paging_init() call + * @pagetable_init: platform specific paging initialization call to setup + * the kernel pagetables and prepare accessors functions. + * Callback must call paging_init(). Called once after the + * direct mapping for phys memory is available. */ struct x86_init_paging { - void (*pagetable_setup_start)(pgd_t *base); - void (*pagetable_setup_done)(pgd_t *base); + void (*pagetable_init)(void); }; /** diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 93971e841dd..472b9b78301 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); -extern int m2p_remove_override(struct page *page, bool clear_pte); +extern int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 454570891bd..aabd5850bdb 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile( \ - "movups %%xmm0,(%0) ;\n\t" \ - "movups %%xmm1,0x10(%0) ;\n\t" \ - "movups %%xmm2,0x20(%0) ;\n\t" \ - "movups %%xmm3,0x30(%0) ;\n\t" \ - : \ - : "r" (xmm_save) \ - : "memory"); \ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%0),%%xmm0 ;\n\t" \ - "movups 0x10(%0),%%xmm1 ;\n\t" \ - "movups 0x20(%0),%%xmm2 ;\n\t" \ - "movups 0x30(%0),%%xmm3 ;\n\t" \ - : \ - : "r" (xmm_save) \ - : "memory"); \ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0) - -#define ALIGN16 __attribute__((aligned(16))) - #define OFFS(x) "16*("#x")" #define PF_OFFS(x) "256+16*("#x")" #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" @@ -587,10 +555,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); /* Make sure GCC forgets anything it knows about p4 or p5, such that it won't pass to the asm volatile below a @@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, like assuming they have some legal value. */ asm("" : "=r" (p4), "=r" (p5)); - XMMS_RESTORE; + kernel_fpu_end(); } static struct xor_block_template xor_block_pIII_sse = { diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index b9b2323e90f..5fc06d0b7eb 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h @@ -34,41 +34,7 @@ * no advantages to be gotten from x86-64 here anyways. */ -typedef struct { - unsigned long a, b; -} __attribute__((aligned(16))) xmm_store_t; - -/* Doesn't use gcc to save the XMM registers, because there is no easy way to - tell it to do a clts before the register saving. */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - asm volatile( \ - "movq %%cr0,%0 ;\n\t" \ - "clts ;\n\t" \ - "movups %%xmm0,(%1) ;\n\t" \ - "movups %%xmm1,0x10(%1) ;\n\t" \ - "movups %%xmm2,0x20(%1) ;\n\t" \ - "movups %%xmm3,0x30(%1) ;\n\t" \ - : "=&r" (cr0) \ - : "r" (xmm_save) \ - : "memory"); \ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%1),%%xmm0 ;\n\t" \ - "movups 0x10(%1),%%xmm1 ;\n\t" \ - "movups 0x20(%1),%%xmm2 ;\n\t" \ - "movups 0x30(%1),%%xmm3 ;\n\t" \ - "movq %0,%%cr0 ;\n\t" \ - : \ - : "r" (cr0), "r" (xmm_save) \ - : "memory"); \ - preempt_enable(); \ -} while (0) +#include <asm/i387.h> #define OFFS(x) "16*("#x")" #define PF_OFFS(x) "256+16*("#x")" @@ -91,10 +57,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned int lines = bytes >> 8; - unsigned long cr0; - xmm_store_t xmm_save[4]; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : [inc] "r" (256UL) : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static struct xor_block_template xor_block_sse = { diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 2510d35f480..7ea79c5fa1f 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -20,32 +20,6 @@ #include <linux/compiler.h> #include <asm/i387.h> -#define ALIGN32 __aligned(32) - -#define YMM_SAVED_REGS 4 - -#define YMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \ - asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \ - asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \ - asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \ -} while (0); - -#define YMMS_RESTORE \ -do { \ - asm volatile("sfence" : : : "memory"); \ - asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \ - asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \ - asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \ - asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0); - #define BLOCK4(i) \ BLOCK(32 * i, 0) \ BLOCK(32 * (i + 1), 1) \ @@ -60,10 +34,9 @@ do { \ static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -82,16 +55,15 @@ do { \ p1 = (unsigned long *)((uintptr_t)p1 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -113,16 +85,15 @@ do { \ p2 = (unsigned long *)((uintptr_t)p2 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2, unsigned long *p3) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -147,16 +118,15 @@ do { \ p3 = (unsigned long *)((uintptr_t)p3 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -184,7 +154,7 @@ do { \ p4 = (unsigned long *)((uintptr_t)p4 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static struct xor_block_template xor_block_avx = { diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 8a1b6f9b594..2ddee1b8779 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -34,17 +34,14 @@ extern unsigned int xstate_size; extern u64 pcntxt_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; +extern struct xsave_struct *init_xstate_buf; extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); -extern int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *sw); -static inline int fpu_xrstor_checking(struct fpu *fpu) +static inline int fpu_xrstor_checking(struct xsave_struct *fx) { - struct xsave_struct *fx = &fpu->state->xsave; int err; asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" @@ -69,8 +66,7 @@ static inline int xsave_user(struct xsave_struct __user *buf) * Clear the xsave header first, so that reserved fields are * initialized to zero. */ - err = __clear_user(&buf->xsave_hdr, - sizeof(struct xsave_hdr_struct)); + err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); if (unlikely(err)) return -EFAULT; @@ -84,9 +80,6 @@ static inline int xsave_user(struct xsave_struct __user *buf) : [err] "=r" (err) : "D" (buf), "a" (-1), "d" (-1), "0" (0) : "memory"); - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ return err; } diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d9..8d7a619718b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6e..e651f7a589a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) acpi_register_lapic(physid, ACPI_MADT_ENABLED); /* - * If mp_register_lapic successfully generates a new logical cpu + * If acpi_register_lapic successfully generates a new logical cpu * number, then the following will get us exactly what was mapped */ cpumask_andnot(new_map, cpu_present_mask, tmp_map); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ced4534baed..ef5ccca79a6 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,19 +23,6 @@ #define MAX_PATCH_LEN (255-1) -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) @@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, /* turn DS segment override prefix into lock prefix */ if (*ptr == 0x3e) text_poke(ptr, ((unsigned char []){0xf0}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, { const s32 *poff; - if (noreplace_smp) - return; - mutex_lock(&text_mutex); for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; @@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, /* turn lock prefix into DS segment override prefix */ if (*ptr == 0xf0) text_poke(ptr, ((unsigned char []){0x3E}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -359,7 +343,7 @@ struct smp_alt_module { }; static LIST_HEAD(smp_alt_modules); static DEFINE_MUTEX(smp_alt); -static int smp_mode = 1; /* protected by smp_alt */ +static bool uniproc_patched = false; /* protected by smp_alt */ void __init_or_module alternatives_smp_module_add(struct module *mod, char *name, @@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, { struct smp_alt_module *smp; - if (noreplace_smp) - return; + mutex_lock(&smp_alt); + if (!uniproc_patched) + goto unlock; - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } + if (num_possible_cpus() == 1) + /* Don't bother remembering, we'll never have to undo it. */ + goto smp_unlock; smp = kzalloc(sizeof(*smp), GFP_KERNEL); if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ + /* we'll run the (safe but slow) SMP code then ... */ + goto unlock; smp->mod = mod; smp->name = name; @@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - mutex_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); +smp_unlock: + alternatives_smp_unlock(locks, locks_end, text, text_end); +unlock: mutex_unlock(&smp_alt); } @@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - if (smp_alt_once || noreplace_smp) - return; - mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - mutex_unlock(&smp_alt); - DPRINTK("%s: %s\n", __func__, item->name); kfree(item); - return; + break; } mutex_unlock(&smp_alt); } -bool skip_smp_alternatives; -void alternatives_smp_switch(int smp) +void alternatives_enable_smp(void) { struct smp_alt_module *mod; @@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp) pr_info("lockdep: fixing up alternatives\n"); #endif - if (noreplace_smp || smp_alt_once || skip_smp_alternatives) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); + /* Why bother if there are no other CPUs? */ + BUG_ON(num_possible_cpus() == 1); mutex_lock(&smp_alt); - /* - * Avoid unnecessary switches because it forces JIT based VMs to - * throw away all cached translations, which can be quite costly. - */ - if (smp == smp_mode) { - /* nothing */ - } else if (smp) { + if (uniproc_patched) { pr_info("switching to SMP code\n"); + BUG_ON(num_online_cpus() != 1); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); - } else { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); + uniproc_patched = false; } - smp_mode = smp; mutex_unlock(&smp_alt); } @@ -540,40 +503,22 @@ void __init alternative_instructions(void) apply_alternatives(__alt_instructions, __alt_instructions_end); - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - #ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - } else { + /* Patch to UP if other cpus not imminent. */ + if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { + uniproc_patched = true; alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - - /* Only switch to UP mode if we don't immediately boot others */ - if (num_present_cpus() == 1 || setup_max_cpus <= 1) - alternatives_smp_switch(0); } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - if (smp_alt_once) + if (!uniproc_patched || num_possible_cpus() == 1) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); +#endif + + apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24deb308232..b17416e72fb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); i++; v1 >>= 1; - }; + } apic_printk(APIC_DEBUG, KERN_CONT "\n"); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9d92e19039f..f7e98a2c0d1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, } #endif +static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) +{ + if (!cpu_has_invlpg) + return; + + tlb_flushall_shift = 5; + + if (c->x86 <= 0x11) + tlb_flushall_shift = 4; +} + +static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) +{ + u32 ebx, eax, ecx, edx; + u16 mask = 0xfff; + + if (c->x86 < 0xf) + return; + + if (c->extended_cpuid_level < 0x80000006) + return; + + cpuid(0x80000006, &eax, &ebx, &ecx, &edx); + + tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; + tlb_lli_4k[ENTRIES] = ebx & mask; + + /* + * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB + * characteristics from the CPUID function 0x80000005 instead. + */ + if (c->x86 == 0xf) { + cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + mask = 0xff; + } + + /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ + if (!((eax >> 16) & mask)) { + u32 a, b, c, d; + + cpuid(0x80000005, &a, &b, &c, &d); + tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; + } else { + tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; + } + + /* a 4M entry uses two 2M entries */ + tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + + /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ + if (!(eax & mask)) { + /* Erratum 658 */ + if (c->x86 == 0x15 && c->x86_model <= 0x1f) { + tlb_lli_2m[ENTRIES] = 1024; + } else { + cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + tlb_lli_2m[ENTRIES] = eax & 0xff; + } + } else + tlb_lli_2m[ENTRIES] = eax & mask; + + tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + + cpu_set_tlb_flushall_shift(c); +} + static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, @@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_size_cache = amd_size_cache, #endif .c_early_init = early_init_amd, + .c_detect_tlb = cpu_detect_tlb_amd, .c_bsp_init = bsp_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c97bb7b5a9f..d0e910da16c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -165,10 +165,15 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); #endif check_config(); - check_fpu(); check_hlt(); check_popad(); init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); + + /* + * kernel_fpu_begin/end() in check_fpu() relies on the patched + * alternative instructions. + */ + check_fpu(); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fcc..532691b6c8f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -476,7 +476,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ - "tlb_flushall_shift is 0x%x\n", + "tlb_flushall_shift: %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], @@ -942,8 +942,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - if (boot_cpu_data.cpuid_level >= 2) - cpu_detect_tlb(&boot_cpu_data); + cpu_detect_tlb(&boot_cpu_data); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) @@ -1023,14 +1022,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT "%s ", vendor); if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + printk(KERN_CONT "%s", strim(c->x86_model_id)); else printk(KERN_CONT "%d86", c->x86); + printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); + printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); else - printk(KERN_CONT "\n"); + printk(KERN_CONT ")\n"); print_cpu_msr(c); } @@ -1116,8 +1117,6 @@ void syscall_init(void) X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } -unsigned long kernel_eflags; - /* * Copies of the original ist values from the tss are only accessed during * debugging, no special alignment required. @@ -1297,9 +1296,6 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); - xsave_init(); - - raw_local_save_flags(kernel_eflags); if (is_uv_system()) uv_cpu_init(); @@ -1352,6 +1348,5 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); - xsave_init(); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0a4ce2980a5..198e019a531 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) int i, j, n; unsigned int regs[4]; unsigned char *desc = (unsigned char *)regs; + + if (c->cpuid_level < 2) + return; + /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index fc4beb39357..ddc72f83933 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) } static cpumask_var_t mce_inject_cpumask; +static DEFINE_MUTEX(mce_inject_mutex); static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { @@ -194,7 +195,11 @@ static void raise_mce(struct mce *m) put_online_cpus(); } else #endif + { + preempt_disable(); raise_local(); + preempt_enable(); + } } /* Error injection interface */ @@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, * so do it a jiffie or two later everywhere. */ schedule_timeout(2); + + mutex_lock(&mce_inject_mutex); raise_mce(&m); + mutex_unlock(&mce_inject_mutex); return usize; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ed44c8a6585..6a05c1d327a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -28,6 +28,18 @@ extern int mce_ser; extern struct mce_bank *mce_banks; +#ifdef CONFIG_X86_MCE_INTEL +unsigned long mce_intel_adjust_timer(unsigned long interval); +void mce_intel_cmci_poll(void); +void mce_intel_hcpu_update(unsigned long cpu); +#else +# define mce_intel_adjust_timer mce_adjust_timer_default +static inline void mce_intel_cmci_poll(void) { } +static inline void mce_intel_hcpu_update(unsigned long cpu) { } +#endif + +void mce_timer_kick(unsigned long interval); + #ifdef CONFIG_ACPI_APEI int apei_write_mce(struct mce *m); ssize_t apei_read_mce(struct mce *m, u64 *record_id); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 292d0258311..29e87d3b284 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; +int mce_bios_cmci_threshold __read_mostly; struct mce_bank *mce_banks __read_mostly; @@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); +static unsigned long mce_adjust_timer_default(unsigned long interval) +{ + return interval; +} + +static unsigned long (*mce_adjust_timer)(unsigned long interval) = + mce_adjust_timer_default; + static void mce_timer_fn(unsigned long data) { struct timer_list *t = &__get_cpu_var(mce_timer); @@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data) if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); + mce_intel_cmci_poll(); } /* @@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data) * polling interval, otherwise increase the polling interval. */ iv = __this_cpu_read(mce_next_interval); - if (mce_notify_irq()) + if (mce_notify_irq()) { iv = max(iv / 2, (unsigned long) HZ/100); - else + } else { iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); + iv = mce_adjust_timer(iv); + } __this_cpu_write(mce_next_interval, iv); + /* Might have become 0 after CMCI storm subsided */ + if (iv) { + t->expires = jiffies + iv; + add_timer_on(t, smp_processor_id()); + } +} - t->expires = jiffies + iv; - add_timer_on(t, smp_processor_id()); +/* + * Ensure that the timer is firing in @interval from now. + */ +void mce_timer_kick(unsigned long interval) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned long when = jiffies + interval; + unsigned long iv = __this_cpu_read(mce_next_interval); + + if (timer_pending(t)) { + if (time_before(when, t->expires)) + mod_timer_pinned(t, when); + } else { + t->expires = round_jiffies(when); + add_timer_on(t, smp_processor_id()); + } + if (interval < iv) + __this_cpu_write(mce_next_interval, interval); } /* Must not be called in IRQ context where del_timer_sync() can deadlock */ @@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: mce_intel_feature_init(c); + mce_adjust_timer = mce_intel_adjust_timer; break; case X86_VENDOR_AMD: mce_amd_feature_init(c); @@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } -static void __mcheck_cpu_init_timer(void) +static void mce_start_timer(unsigned int cpu, struct timer_list *t) { - struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long iv = check_interval * HZ; + unsigned long iv = mce_adjust_timer(check_interval * HZ); - setup_timer(t, mce_timer_fn, smp_processor_id()); + __this_cpu_write(mce_next_interval, iv); - if (mce_ignore_ce) + if (mce_ignore_ce || !iv) return; - __this_cpu_write(mce_next_interval, iv); - if (!iv) - return; t->expires = round_jiffies(jiffies + iv); add_timer_on(t, smp_processor_id()); } +static void __mcheck_cpu_init_timer(void) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned int cpu = smp_processor_id(); + + setup_timer(t, mce_timer_fn, cpu); + mce_start_timer(cpu, t); +} + /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { @@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { * check, or 0 to not wait * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. * mce=nobootlog Don't log MCEs from before booting. + * mce=bios_cmci_threshold Don't program the CMCI threshold */ static int __init mcheck_enable(char *str) { @@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str) mce_ignore_ce = 1; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) mce_bootlog = (str[0] == 'b'); + else if (!strcmp(str, "bios_cmci_threshold")) + mce_bios_cmci_threshold = 1; else if (isdigit(str[0])) { get_option(&str, &tolerant); if (*str == ',') { @@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = { &mce_cmci_disabled }; +static struct dev_ext_attribute dev_attr_bios_cmci_threshold = { + __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL), + &mce_bios_cmci_threshold +}; + static struct device_attribute *mce_device_attrs[] = { &dev_attr_tolerant.attr, &dev_attr_check_interval.attr, @@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = { &dev_attr_dont_log_ce.attr, &dev_attr_ignore_ce.attr, &dev_attr_cmci_disabled.attr, + &dev_attr_bios_cmci_threshold.attr, NULL }; @@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) unsigned int cpu = (unsigned long)hcpu; struct timer_list *t = &per_cpu(mce_timer, cpu); - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: mce_device_create(cpu); if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); break; case CPU_DEAD: - case CPU_DEAD_FROZEN: if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); mce_device_remove(cpu); + mce_intel_hcpu_update(cpu); break; case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + del_timer_sync(t); break; case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - if (!mce_ignore_ce && check_interval) { - t->expires = round_jiffies(jiffies + - per_cpu(mce_next_interval, cpu)); - add_timer_on(t, cpu); - } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); + mce_start_timer(cpu, t); break; - case CPU_POST_DEAD: + } + + if (action == CPU_POST_DEAD) { /* intentionally ignoring frozen here */ cmci_rediscover(cpu); - break; } + return NOTIFY_OK; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 38e49bc95ff..5f88abf07e9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -15,6 +15,8 @@ #include <asm/msr.h> #include <asm/mce.h> +#include "mce-internal.h" + /* * Support for Intel Correct Machine Check Interrupts. This allows * the CPU to raise an interrupt when a corrected machine check happened. @@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); */ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); -#define CMCI_THRESHOLD 1 +#define CMCI_THRESHOLD 1 +#define CMCI_POLL_INTERVAL (30 * HZ) +#define CMCI_STORM_INTERVAL (1 * HZ) +#define CMCI_STORM_THRESHOLD 15 + +static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); +static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); +static DEFINE_PER_CPU(unsigned int, cmci_storm_state); + +enum { + CMCI_STORM_NONE, + CMCI_STORM_ACTIVE, + CMCI_STORM_SUBSIDED, +}; + +static atomic_t cmci_storm_on_cpus; static int cmci_supported(int *banks) { @@ -53,6 +70,93 @@ static int cmci_supported(int *banks) return !!(cap & MCG_CMCI_P); } +void mce_intel_cmci_poll(void) +{ + if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) + return; + machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); +} + +void mce_intel_hcpu_update(unsigned long cpu) +{ + if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) + atomic_dec(&cmci_storm_on_cpus); + + per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; +} + +unsigned long mce_intel_adjust_timer(unsigned long interval) +{ + int r; + + if (interval < CMCI_POLL_INTERVAL) + return interval; + + switch (__this_cpu_read(cmci_storm_state)) { + case CMCI_STORM_ACTIVE: + /* + * We switch back to interrupt mode once the poll timer has + * silenced itself. That means no events recorded and the + * timer interval is back to our poll interval. + */ + __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); + r = atomic_sub_return(1, &cmci_storm_on_cpus); + if (r == 0) + pr_notice("CMCI storm subsided: switching to interrupt mode\n"); + /* FALLTHROUGH */ + + case CMCI_STORM_SUBSIDED: + /* + * We wait for all cpus to go back to SUBSIDED + * state. When that happens we switch back to + * interrupt mode. + */ + if (!atomic_read(&cmci_storm_on_cpus)) { + __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); + cmci_reenable(); + cmci_recheck(); + } + return CMCI_POLL_INTERVAL; + default: + /* + * We have shiny weather. Let the poll do whatever it + * thinks. + */ + return interval; + } +} + +static bool cmci_storm_detect(void) +{ + unsigned int cnt = __this_cpu_read(cmci_storm_cnt); + unsigned long ts = __this_cpu_read(cmci_time_stamp); + unsigned long now = jiffies; + int r; + + if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) + return true; + + if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { + cnt++; + } else { + cnt = 1; + __this_cpu_write(cmci_time_stamp, now); + } + __this_cpu_write(cmci_storm_cnt, cnt); + + if (cnt <= CMCI_STORM_THRESHOLD) + return false; + + cmci_clear(); + __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); + r = atomic_add_return(1, &cmci_storm_on_cpus); + mce_timer_kick(CMCI_POLL_INTERVAL); + + if (r == 1) + pr_notice("CMCI storm detected: switching to poll mode\n"); + return true; +} + /* * The interrupt handler. This is called on every event. * Just call the poller directly to log any events. @@ -61,33 +165,28 @@ static int cmci_supported(int *banks) */ static void intel_threshold_interrupt(void) { + if (cmci_storm_detect()) + return; machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); mce_notify_irq(); } -static void print_update(char *type, int *hdr, int num) -{ - if (*hdr == 0) - printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); - *hdr = 1; - printk(KERN_CONT " %s:%d", type, num); -} - /* * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks * on this CPU. Use the algorithm recommended in the SDM to discover shared * banks. */ -static void cmci_discover(int banks, int boot) +static void cmci_discover(int banks) { unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); unsigned long flags; - int hdr = 0; int i; + int bios_wrong_thresh = 0; raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; + int bios_zero_thresh = 0; if (test_bit(i, owned)) continue; @@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot) /* Already owned by someone else? */ if (val & MCI_CTL2_CMCI_EN) { - if (test_and_clear_bit(i, owned) && !boot) - print_update("SHD", &hdr, i); + clear_bit(i, owned); __clear_bit(i, __get_cpu_var(mce_poll_banks)); continue; } - val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; - val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; + if (!mce_bios_cmci_threshold) { + val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; + val |= CMCI_THRESHOLD; + } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { + /* + * If bios_cmci_threshold boot option was specified + * but the threshold is zero, we'll try to initialize + * it to 1. + */ + bios_zero_thresh = 1; + val |= CMCI_THRESHOLD; + } + + val |= MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(i), val); rdmsrl(MSR_IA32_MCx_CTL2(i), val); /* Did the enable bit stick? -- the bank supports CMCI */ if (val & MCI_CTL2_CMCI_EN) { - if (!test_and_set_bit(i, owned) && !boot) - print_update("CMCI", &hdr, i); + set_bit(i, owned); __clear_bit(i, __get_cpu_var(mce_poll_banks)); + /* + * We are able to set thresholds for some banks that + * had a threshold of 0. This means the BIOS has not + * set the thresholds properly or does not work with + * this boot option. Note down now and report later. + */ + if (mce_bios_cmci_threshold && bios_zero_thresh && + (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) + bios_wrong_thresh = 1; } else { WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); - if (hdr) - printk(KERN_CONT "\n"); + if (mce_bios_cmci_threshold && bios_wrong_thresh) { + pr_info_once( + "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); + pr_info_once( + "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); + } } /* @@ -156,7 +278,7 @@ void cmci_clear(void) continue; /* Disable CMCI */ rdmsrl(MSR_IA32_MCx_CTL2(i), val); - val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); + val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(i), val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } @@ -186,7 +308,7 @@ void cmci_rediscover(int dying) continue; /* Recheck banks in case CPUs don't all have the same */ if (cmci_supported(&banks)) - cmci_discover(banks, 0); + cmci_discover(banks); } set_cpus_allowed_ptr(current, old); @@ -200,7 +322,7 @@ void cmci_reenable(void) { int banks; if (cmci_supported(&banks)) - cmci_discover(banks, 0); + cmci_discover(banks); } static void intel_init_cmci(void) @@ -211,7 +333,7 @@ static void intel_init_cmci(void) return; mce_threshold_vector = intel_threshold_interrupt; - cmci_discover(banks, 1); + cmci_discover(banks); /* * For CPU #0 this runs with still disabled APIC, but that's * ok because only the vector is set up. We still do another diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6605a81ba33..8b6defe7eef 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[]; extern struct event_constraint intel_snb_pebs_event_constraints[]; +extern struct event_constraint intel_ivb_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_enable(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 7bfb5bec863..eebd5ffe1bb 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } +static const struct perf_event_attr ibs_notsupp = { + .exclude_user = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + .exclude_idle = 1, + .exclude_host = 1, + .exclude_guest = 1, +}; + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event) if (event->pmu != &perf_ibs->pmu) return -ENOENT; + if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) + return -EINVAL; + if (config & ~perf_ibs->config_mask) return -EINVAL; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 0d3d63afa76..6bca492b854 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2048,7 +2048,6 @@ __init int intel_pmu_init(void) case 42: /* SandyBridge */ case 45: /* SandyBridge, "Romely-EP" */ x86_add_quirk(intel_sandybridge_quirk); - case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, @@ -2073,6 +2072,29 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; + case 58: /* IvyBridge */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.extra_regs = intel_snb_extra_regs; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.er_flags |= ERF_NO_HT_SHARING; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + + pr_cont("IvyBridge events, "); + break; + default: switch (x86_pmu.version) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index e38d97bf425..826054a4f2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_ivb_pebs_event_constraints[] = { + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0a5571080e7..99d96a4978b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static struct uncore_event_desc snb_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + { /* end: all zeroes */ }, +}; + static struct attribute *snb_uncore_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = { .constraints = snb_uncore_cbox_constraints, .ops = &snb_uncore_msr_ops, .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, }; static struct intel_uncore_type *snb_msr_uncores[] = { @@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp static struct intel_uncore_box * uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - static struct intel_uncore_box *box; + struct intel_uncore_box *box; box = *per_cpu_ptr(pmu->box, cpu); if (box) @@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event) return ret; } +static ssize_t uncore_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); + +static struct attribute *uncore_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group uncore_pmu_attr_group = { + .attrs = uncore_pmu_attrs, +}; + static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) free_percpu(type->pmus[i].box); kfree(type->pmus); type->pmus = NULL; - kfree(type->attr_groups[1]); - type->attr_groups[1] = NULL; + kfree(type->events_group); + type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) @@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->attr_groups[1] = events_group; + type->events_group = events_group; } + type->pmu_group = &uncore_pmu_attr_group; type->pmus = pmus; return 0; fail: diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c1856aa..e68a4550e95 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -369,10 +369,12 @@ struct intel_uncore_type { struct intel_uncore_pmu *pmus; struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; - const struct attribute_group *attr_groups[3]; + const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] +#define pmu_group attr_groups[0] +#define format_group attr_groups[1] +#define events_group attr_groups[2] struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 8022c668148..fbd89556229 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = cpumask_first(cpu_online_mask); - else - *pos = cpumask_next(*pos - 1, cpu_online_mask); + *pos = cpumask_next(*pos - 1, cpu_online_mask); if ((*pos) < nr_cpu_ids) return &cpu_data(*pos); return NULL; diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd2323..60c78917190 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -199,12 +199,14 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -214,6 +216,7 @@ out_class: for_each_online_cpu(i) { cpuid_device_destroy(i); } + put_online_cpus(); class_destroy(cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); @@ -225,11 +228,13 @@ static void __exit cpuid_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); } module_init(cpuid_init); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced4a87..b1581527a23 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { .xlate = ioapic_xlate, }; +static void dt_add_ioapic_domain(unsigned int ioapic_num, + struct device_node *np) +{ + struct irq_domain *id; + struct mp_ioapic_gsi *gsi_cfg; + int ret; + int num; + + gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); + num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; + + id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, + (void *)ioapic_num); + BUG_ON(!id); + if (gsi_cfg->gsi_base == 0) { + /* + * The first NR_IRQS_LEGACY irq descs are allocated in + * early_irq_init() and need just a mapping. The + * remaining irqs need both. All of them are preallocated + * and assigned so we can keep the 1:1 mapping which the ioapic + * is having. + */ + ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); + if (ret) + pr_err("Error mapping legacy IRQs: %d\n", ret); + + if (num > NR_IRQS_LEGACY) { + ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, + NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); + if (ret) + pr_err("Error creating mapping for the " + "remaining IRQs: %d\n", ret); + } + irq_set_default_host(id); + } else { + ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); + if (ret) + pr_err("Error creating IRQ mapping: %d\n", ret); + } +} + static void __init ioapic_add_ofnode(struct device_node *np) { struct resource r; @@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) for (i = 0; i < nr_ioapics; i++) { if (r.start == mpc_ioapic_addr(i)) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; - - gsi_cfg = mp_ioapic_gsi_routing(i); - - id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, - &ioapic_irq_domain_ops, - (void*)i); - BUG_ON(!id); + dt_add_ioapic_domain(i, np); return; } } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f2883747..f438a44bf8f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,17 +1109,21 @@ ENTRY(ftrace_caller) pushl %eax pushl %ecx pushl %edx - movl 0xc(%esp), %eax + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax movl 0x4(%ebp), %edx + leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call ftrace_call: call ftrace_stub + addl $4,%esp /* skip NULL pointer */ popl %edx popl %ecx popl %eax +ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -1131,6 +1135,71 @@ ftrace_stub: ret END(ftrace_caller) +ENTRY(ftrace_regs_caller) + pushf /* push flags before compare (in cs location) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* + * i386 does not save SS and ESP when coming from kernel. + * Instead, to get sp, ®s->sp is used (see ptrace.h). + * Unfortunately, that means eflags must be at the same location + * as the current return ip is. We move the return ip into the + * ip location, and move flags into the return ip location. + */ + pushl 4(%esp) /* save return ip into ip slot */ + + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS,13*4(%esp) + + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ + leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + movl %eax, 14*4(%esp) /* Put return ip back for ret */ + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret + +ftrace_restore_flags: + popf + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) @@ -1171,9 +1240,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834..066334be7b7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,6 +56,7 @@ #include <asm/ftrace.h> #include <asm/percpu.h> #include <asm/asm.h> +#include <asm/rcu.h> #include <linux/err.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -68,25 +69,51 @@ .section .entry.text, "ax" #ifdef CONFIG_FUNCTION_TRACER + +#ifdef CC_USING_FENTRY +# define function_hook __fentry__ +#else +# define function_hook mcount +#endif + #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) + +ENTRY(function_hook) retq -END(mcount) +END(function_hook) + +/* skip is set if stack has been adjusted */ +.macro ftrace_caller_setup skip=0 + MCOUNT_SAVE_FRAME \skip + + /* Load the ftrace_ops into the 3rd parameter */ + leaq function_trace_op, %rdx + + /* Load ip into the first parameter */ + movq RIP(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi + /* Load the parent_ip into the second parameter */ +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else + movq 8(%rbp), %rsi +#endif +.endm ENTRY(ftrace_caller) + /* Check if tracing was disabled (quick check) */ cmpl $0, function_trace_stop jne ftrace_stub - MCOUNT_SAVE_FRAME - - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup + /* regs go into 4th parameter (but make it NULL) */ + movq $0, %rcx GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME +ftrace_return: #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -97,8 +124,78 @@ GLOBAL(ftrace_stub) retq END(ftrace_caller) +ENTRY(ftrace_regs_caller) + /* Save the current flags before compare (in SS location)*/ + pushfq + + /* Check if tracing was disabled (quick check) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* skip=8 to skip flags saved in SS */ + ftrace_caller_setup 8 + + /* Save the rest of pt_regs */ + movq %r15, R15(%rsp) + movq %r14, R14(%rsp) + movq %r13, R13(%rsp) + movq %r12, R12(%rsp) + movq %r11, R11(%rsp) + movq %r10, R10(%rsp) + movq %rbp, RBP(%rsp) + movq %rbx, RBX(%rsp) + /* Copy saved flags */ + movq SS(%rsp), %rcx + movq %rcx, EFLAGS(%rsp) + /* Kernel segments */ + movq $__KERNEL_DS, %rcx + movq %rcx, SS(%rsp) + movq $__KERNEL_CS, %rcx + movq %rcx, CS(%rsp) + /* Stack - skipping return address */ + leaq SS+16(%rsp), %rcx + movq %rcx, RSP(%rsp) + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + /* Copy flags back to SS, to restore them */ + movq EFLAGS(%rsp), %rax + movq %rax, SS(%rsp) + + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, SS+8(%rsp) + + /* restore the rest of pt_regs */ + movq R15(%rsp), %r15 + movq R14(%rsp), %r14 + movq R13(%rsp), %r13 + movq R12(%rsp), %r12 + movq R10(%rsp), %r10 + movq RBP(%rsp), %rbp + movq RBX(%rsp), %rbx + + /* skip=8 to skip flags saved in SS */ + MCOUNT_RESTORE_FRAME 8 + + /* Restore flags */ + popfq + + jmp ftrace_return +ftrace_restore_flags: + popfq + jmp ftrace_stub + +END(ftrace_regs_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) + +ENTRY(function_hook) cmpl $0, function_trace_stop jne ftrace_stub @@ -119,8 +216,12 @@ GLOBAL(ftrace_stub) trace: MCOUNT_SAVE_FRAME - movq 0x38(%rsp), %rdi + movq RIP(%rsp), %rdi +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function @@ -128,20 +229,22 @@ trace: MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - MCOUNT_SAVE_FRAME +#ifdef CC_USING_FENTRY + leaq SS+16(%rsp), %rdi + movq $0, %rdx /* No framepointers needed */ +#else leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi movq (%rbp), %rdx +#endif + movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return @@ -342,15 +445,15 @@ ENDPROC(native_usergs_sysret64) .macro SAVE_ARGS_IRQ cld /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, RDI-RBP - movq_cfi rsi, RSI-RBP - movq_cfi rdx, RDX-RBP - movq_cfi rcx, RCX-RBP - movq_cfi rax, RAX-RBP - movq_cfi r8, R8-RBP - movq_cfi r9, R9-RBP - movq_cfi r10, R10-RBP - movq_cfi r11, R11-RBP + movq_cfi rdi, (RDI-RBP) + movq_cfi rsi, (RSI-RBP) + movq_cfi rdx, (RDX-RBP) + movq_cfi rcx, (RCX-RBP) + movq_cfi rax, (RAX-RBP) + movq_cfi r8, (R8-RBP) + movq_cfi r9, (R9-RBP) + movq_cfi r10, (R10-RBP) + movq_cfi r11, (R11-RBP) /* Save rbp so that we can unwind from get_irq_regs() */ movq_cfi rbp, 0 @@ -384,7 +487,7 @@ ENDPROC(native_usergs_sysret64) .endm ENTRY(save_rest) - PARTIAL_FRAME 1 REST_SKIP+8 + PARTIAL_FRAME 1 (REST_SKIP+8) movq 5*8+16(%rsp), %r11 /* save return address */ movq_cfi rbx, RBX+16 movq_cfi rbp, RBP+16 @@ -440,7 +543,7 @@ ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK,TI_flags(%r8) - pushq_cfi kernel_eflags(%rip) + pushq_cfi $0x0002 popfq_cfi # reset kernel eflags call schedule_tail # rdi: 'prev' task parameter @@ -565,7 +668,7 @@ sysret_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi jmp sysret_check @@ -678,7 +781,7 @@ int_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -974,7 +1077,7 @@ retint_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) @@ -1449,7 +1552,7 @@ paranoid_userspace: paranoid_schedule: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) - call schedule + SCHEDULE_USER DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF jmp paranoid_userspace diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e..1d414029f1d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,6 +206,21 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); +/* + * Should never be called: + * As it is only called by __ftrace_replace_code() which is called by + * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() + * which is called to turn mcount into nops or nops into function calls + * but not to convert a function from not using regs to one that uses + * regs, which ftrace_modify_call() is for. + */ +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + WARN_ON(1); + return -EINVAL; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + } + atomic_dec(&modifying_ftrace_code); return ret; @@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } +/* + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + */ +static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/* + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + */ +static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; @@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + ftrace_addr = get_ftrace_old_addr(rec); + /* fall through */ case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); @@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); + nop = ftrace_call_replace(ip, ftrace_addr); + + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) + goto update; + + /* Check both ftrace_addr and ftrace_old_addr */ + ftrace_addr = get_ftrace_old_addr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } + update: return probe_kernel_write((void *)ip, &nop[0], 1); } @@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); @@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f250431fb50..675a0501244 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -19,24 +19,17 @@ #include <asm/fpu-internal.h> #include <asm/user.h> -#ifdef CONFIG_X86_64 -# include <asm/sigcontext32.h> -# include <asm/user32.h> -#else -# define save_i387_xstate_ia32 save_i387_xstate -# define restore_i387_xstate_ia32 restore_i387_xstate -# define _fpstate_ia32 _fpstate -# define _xstate_ia32 _xstate -# define sig_xstate_ia32_size sig_xstate_size -# define fx_sw_reserved_ia32 fx_sw_reserved -# define user_i387_ia32_struct user_i387_struct -# define user32_fxsr_struct user_fxsr_struct -#endif - /* * Were we in an interrupt that interrupted kernel mode? * - * We can do a kernel_fpu_begin/end() pair *ONLY* if that + * For now, with eagerfpu we will return interrupted kernel FPU + * state as not-idle. TBD: Ideally we can change the return value + * to something like __thread_has_fpu(current). But we need to + * be careful of doing __thread_clear_has_fpu() before saving + * the FPU etc for supporting nested uses etc. For now, take + * the simple route! + * + * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that * pair does nothing at all: the thread must not have fpu (so * that we don't try to save the FPU state), and TS must * be set (so that the clts/stts pair does nothing that is @@ -44,6 +37,9 @@ */ static inline bool interrupted_kernel_fpu_idle(void) { + if (use_eager_fpu()) + return 0; + return !__thread_has_fpu(current) && (read_cr0() & X86_CR0_TS); } @@ -77,29 +73,29 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable); -void kernel_fpu_begin(void) +void __kernel_fpu_begin(void) { struct task_struct *me = current; - WARN_ON_ONCE(!irq_fpu_usable()); - preempt_disable(); if (__thread_has_fpu(me)) { __save_init_fpu(me); __thread_clear_has_fpu(me); - /* We do 'stts()' in kernel_fpu_end() */ - } else { + /* We do 'stts()' in __kernel_fpu_end() */ + } else if (!use_eager_fpu()) { this_cpu_write(fpu_owner_task, NULL); clts(); } } -EXPORT_SYMBOL(kernel_fpu_begin); +EXPORT_SYMBOL(__kernel_fpu_begin); -void kernel_fpu_end(void) +void __kernel_fpu_end(void) { - stts(); - preempt_enable(); + if (use_eager_fpu()) + math_state_restore(); + else + stts(); } -EXPORT_SYMBOL(kernel_fpu_end); +EXPORT_SYMBOL(__kernel_fpu_end); void unlazy_fpu(struct task_struct *tsk) { @@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk) } EXPORT_SYMBOL(unlazy_fpu); -#ifdef CONFIG_MATH_EMULATION -# define HAVE_HWFP (boot_cpu_data.hard_math) -#else -# define HAVE_HWFP 1 -#endif - -static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; static void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; - clts(); if (cpu_has_fxsr) { memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); asm volatile("fxsave %0" : : "m" (fx_scratch)); @@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) mask = 0x0000ffbf; } mxcsr_feature_mask &= mask; - stts(); } static void __cpuinit init_thread_xstate(void) @@ -192,9 +179,8 @@ void __cpuinit fpu_init(void) init_thread_xstate(); mxcsr_feature_mask_init(); - /* clean state in init */ - current_thread_info()->status = 0; - clear_used_math(); + xsave_init(); + eager_fpu_init(); } void fpu_finit(struct fpu *fpu) @@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu) } if (cpu_has_fxsr) { - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - - memset(fx, 0, xstate_size); - fx->cwd = 0x37f; - if (cpu_has_xmm) - fx->mxcsr = MXCSR_DEFAULT; + fx_finit(&fpu->state->fxsave); } else { struct i387_fsave_struct *fp = &fpu->state->fsave; memset(fp, 0, xstate_size); @@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) * FXSR floating point environment conversions. */ -static void +void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; @@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) memcpy(&to[i], &from[i], sizeof(to[0])); } -static void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env) +void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env) { struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; @@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, } /* - * Signal frame handlers. - */ - -static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; - - fp->status = fp->swd; - if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) - return -1; - return 1; -} - -static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - struct user_i387_ia32_struct env; - int err = 0; - - convert_from_fxsr(&env, tsk); - if (__copy_to_user(buf, &env, sizeof(env))) - return -1; - - err |= __put_user(fx->swd, &buf->status); - err |= __put_user(X86_FXSR_MAGIC, &buf->magic); - if (err) - return -1; - - if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) - return -1; - return 1; -} - -static int save_i387_xsave(void __user *buf) -{ - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fx = buf; - int err = 0; - - - sanitize_i387_state(tsk); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. - * This will enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware applications can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - - if (save_i387_fxsave(fx) < 0) - return -1; - - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, - sizeof(struct _fpx_sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_ia32_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return -1; - - return 1; -} - -int save_i387_xstate_ia32(void __user *buf) -{ - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - struct task_struct *tsk = current; - - if (!used_math()) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) - return -EACCES; - /* - * This will cause a "finit" to be triggered by the next - * attempted FPU operation by the 'current' process. - */ - clear_used_math(); - - if (!HAVE_HWFP) { - return fpregs_soft_get(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) ? -1 : 1; - } - - unlazy_fpu(tsk); - - if (cpu_has_xsave) - return save_i387_xsave(fp); - if (cpu_has_fxsr) - return save_i387_fxsave(fp); - else - return save_i387_fsave(fp); -} - -static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - - return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, - sizeof(struct i387_fsave_struct)); -} - -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, - unsigned int size) -{ - struct task_struct *tsk = current; - struct user_i387_ia32_struct env; - int err; - - err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], - size); - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - if (err || __copy_from_user(&env, buf, sizeof(env))) - return 1; - convert_to_fxsr(tsk, &env); - - return 0; -} - -static int restore_i387_xsave(void __user *buf) -{ - struct _fpx_sw_bytes fx_sw_user; - struct _fpstate_ia32 __user *fx_user = - ((struct _fpstate_ia32 __user *) buf); - struct i387_fxsave_struct __user *fx = - (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; - struct xsave_hdr_struct *xsave_hdr = - ¤t->thread.fpu.state->xsave.xsave_hdr; - u64 mask; - int err; - - if (check_for_xstate(fx, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - - xsave_hdr->xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; - - /* - * Init the state that is not present in the memory layout - * and enabled by the OS. - */ - mask = ~(pcntxt_mask & ~mask); - xsave_hdr->xstate_bv &= mask; - - return err; -fx_only: - /* - * Couldn't find the extended state information in the memory - * layout. Restore the FP/SSE and init the other extended state - * enabled by the OS. - */ - xsave_hdr->xstate_bv = XSTATE_FPSSE; - return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); -} - -int restore_i387_xstate_ia32(void __user *buf) -{ - int err; - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - - if (HAVE_HWFP) - clear_fpu(tsk); - - if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - - return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) - return -EACCES; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (HAVE_HWFP) { - if (cpu_has_xsave) - err = restore_i387_xsave(buf); - else if (cpu_has_fxsr) - err = restore_i387_fxsave(fp, sizeof(struct - i387_fxsave_struct)); - else - err = restore_i387_fsave(fp); - } else { - err = fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) != 0; - } - set_used_math(); - - return err; -} - -/* * FPU state for core dumps. * This is only used for a.out dumps now. * It is declared generically using elf_fpregset_t (which is diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 36d1853e91a..9a5c460404d 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -263,7 +263,7 @@ static void i8259A_shutdown(void) * out of. */ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ } static struct syscore_ops i8259_syscore_ops = { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d44f7829968..e4595f10591 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, " Rescheduling interrupts\n"); seq_printf(p, "%*s: ", prec, "CAL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); + seq_printf(p, "%10u ", irq_stats(j)->irq_call_count - + irq_stats(j)->irq_tlb_count); seq_printf(p, " Function call interrupts\n"); seq_printf(p, "%*s: ", prec, "TLB"); for_each_online_cpu(j) @@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; - sum += irq_stats(cpu)->irq_tlb_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b..57916c0d3cf 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +#ifdef KPROBES_CAN_USE_FTRACE +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} +#endif + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { +#ifdef KPROBES_CAN_USE_FTRACE + if (kprobe_ftrace(p)) { + skip_singlestep(p, regs, kcb); + return 1; + } +#endif setup_singlestep(p, regs, kcb, 0); return 1; } @@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +#ifdef KPROBES_CAN_USE_FTRACE +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} +#endif + int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 82746f942cd..7720ff5a9ee 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -75,20 +75,113 @@ struct microcode_amd { static struct equiv_cpu_entry *equiv_cpu_table; -/* page-sized ucode patch buffer */ -void *patch; +struct ucode_patch { + struct list_head plist; + void *data; + u32 patch_id; + u16 equiv_cpu; +}; + +static LIST_HEAD(pcache); + +static u16 find_equiv_id(unsigned int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int i = 0; + + if (!equiv_cpu_table) + return 0; + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) + return equiv_cpu_table[i].equiv_cpu; + + i++; + } + return 0; +} + +static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) +{ + int i = 0; + + BUG_ON(!equiv_cpu_table); + + while (equiv_cpu_table[i].equiv_cpu != 0) { + if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) + return equiv_cpu_table[i].installed_cpu; + i++; + } + return 0; +} + +/* + * a small, trivial cache of per-family ucode patches + */ +static struct ucode_patch *cache_find_patch(u16 equiv_cpu) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) + if (p->equiv_cpu == equiv_cpu) + return p; + return NULL; +} + +static void update_cache(struct ucode_patch *new_patch) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) { + if (p->equiv_cpu == new_patch->equiv_cpu) { + if (p->patch_id >= new_patch->patch_id) + /* we already have the latest patch */ + return; + + list_replace(&p->plist, &new_patch->plist); + kfree(p->data); + kfree(p); + return; + } + } + /* no patch found, add it */ + list_add_tail(&new_patch->plist, &pcache); +} + +static void free_cache(void) +{ + struct ucode_patch *p, *tmp; + + list_for_each_entry_safe(p, tmp, &pcache, plist) { + __list_del(p->plist.prev, p->plist.next); + kfree(p->data); + kfree(p); + } +} + +static struct ucode_patch *find_patch(unsigned int cpu) +{ + u16 equiv_id; + + equiv_id = find_equiv_id(cpu); + if (!equiv_id) + return NULL; + + return cache_find_patch(equiv_id); +} static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); + csig->sig = cpuid_eax(0x00000001); csig->rev = c->microcode; pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); return 0; } -static unsigned int verify_ucode_size(int cpu, u32 patch_size, +static unsigned int verify_patch_size(int cpu, u32 patch_size, unsigned int size) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size, return patch_size; } -static u16 find_equiv_id(void) +static int apply_microcode_amd(int cpu) { - unsigned int current_cpu_id, i = 0; - - BUG_ON(equiv_cpu_table == NULL); - - current_cpu_id = cpuid_eax(0x00000001); - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (current_cpu_id == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; - } - return 0; -} + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_amd *mc_amd; + struct ucode_cpu_info *uci; + struct ucode_patch *p; + u32 rev, dummy; -/* - * we signal a good patch is found by returning its size > 0 - */ -static int get_matching_microcode(int cpu, const u8 *ucode_ptr, - unsigned int leftover_size, int rev, - unsigned int *current_size) -{ - struct microcode_header_amd *mc_hdr; - unsigned int actual_size, patch_size; - u16 equiv_cpu_id; + BUG_ON(raw_smp_processor_id() != cpu); - /* size of the current patch we're staring at */ - patch_size = *(u32 *)(ucode_ptr + 4); - *current_size = patch_size + SECTION_HDR_SIZE; + uci = ucode_cpu_info + cpu; - equiv_cpu_id = find_equiv_id(); - if (!equiv_cpu_id) + p = find_patch(cpu); + if (!p) return 0; - /* - * let's look at the patch header itself now - */ - mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); + mc_amd = p->data; + uci->mc = p->data; - if (mc_hdr->processor_rev_id != equiv_cpu_id) - return 0; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* ucode might be chipset specific -- currently we don't support this */ - if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { - pr_err("CPU%d: chipset specific code not yet supported\n", - cpu); + /* need to apply patch? */ + if (rev >= mc_amd->hdr.patch_id) { + c->microcode = rev; return 0; } - if (mc_hdr->patch_id <= rev) - return 0; - - /* - * now that the header looks sane, verify its size - */ - actual_size = verify_ucode_size(cpu, patch_size, leftover_size); - if (!actual_size) - return 0; - - /* clear the patch buffer */ - memset(patch, 0, PAGE_SIZE); - - /* all looks ok, get the binary patch */ - get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); - - return actual_size; -} - -static int apply_microcode_amd(int cpu) -{ - u32 rev, dummy; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - struct microcode_amd *mc_amd = uci->mc; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (mc_amd == NULL) - return 0; - wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); - /* get patch id after patching */ - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* check current patch id and patch's id for match */ + /* verify patch application was successful */ + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev != mc_amd->hdr.patch_id) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf) return -ENOMEM; } - get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); + memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); /* add header length */ return size + CONTAINER_HDR_SZ; @@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void) equiv_cpu_table = NULL; } -static enum ucode_state -generic_load_microcode(int cpu, const u8 *data, size_t size) +static void cleanup(void) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header_amd *mc_hdr = NULL; - unsigned int mc_size, leftover, current_size = 0; + free_equiv_cpu_table(); + free_cache(); +} + +/* + * We return the current size even if some of the checks failed so that + * we can skip over the next patch. If we return a negative value, we + * signal a grave error like a memory allocation has failed and the + * driver cannot continue functioning normally. In such cases, we tear + * down everything we've used up so far and exit. + */ +static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_header_amd *mc_hdr; + struct ucode_patch *patch; + unsigned int patch_size, crnt_size, ret; + u32 proc_fam; + u16 proc_id; + + patch_size = *(u32 *)(fw + 4); + crnt_size = patch_size + SECTION_HDR_SIZE; + mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); + proc_id = mc_hdr->processor_rev_id; + + proc_fam = find_cpu_family_by_equiv_cpu(proc_id); + if (!proc_fam) { + pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); + return crnt_size; + } + + /* check if patch is for the current family */ + proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); + if (proc_fam != c->x86) + return crnt_size; + + if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { + pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", + mc_hdr->patch_id); + return crnt_size; + } + + ret = verify_patch_size(cpu, patch_size, leftover); + if (!ret) { + pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); + return crnt_size; + } + + patch = kzalloc(sizeof(*patch), GFP_KERNEL); + if (!patch) { + pr_err("Patch allocation failure.\n"); + return -EINVAL; + } + + patch->data = kzalloc(patch_size, GFP_KERNEL); + if (!patch->data) { + pr_err("Patch data allocation failure.\n"); + kfree(patch); + return -EINVAL; + } + + /* All looks ok, copy patch... */ + memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); + INIT_LIST_HEAD(&patch->plist); + patch->patch_id = mc_hdr->patch_id; + patch->equiv_cpu = proc_id; + + /* ... and add to cache. */ + update_cache(patch); + + return crnt_size; +} + +static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) +{ + enum ucode_state ret = UCODE_ERROR; + unsigned int leftover; + u8 *fw = (u8 *)data; + int crnt_size = 0; int offset; - const u8 *ucode_ptr = data; - void *new_mc = NULL; - unsigned int new_rev = uci->cpu_sig.rev; - enum ucode_state state = UCODE_ERROR; - offset = install_equiv_cpu_table(ucode_ptr); + offset = install_equiv_cpu_table(data); if (offset < 0) { pr_err("failed to create equivalent cpu table\n"); - goto out; + return ret; } - ucode_ptr += offset; + fw += offset; leftover = size - offset; - if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { + if (*(u32 *)fw != UCODE_UCODE_TYPE) { pr_err("invalid type field in container file section header\n"); - goto free_table; + free_equiv_cpu_table(); + return ret; } while (leftover) { - mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, - new_rev, ¤t_size); - if (mc_size) { - mc_hdr = patch; - new_mc = patch; - new_rev = mc_hdr->patch_id; - goto out_ok; - } - - ucode_ptr += current_size; - leftover -= current_size; - } + crnt_size = verify_and_add_patch(cpu, fw, leftover); + if (crnt_size < 0) + return ret; - if (!new_mc) { - state = UCODE_NFOUND; - goto free_table; + fw += crnt_size; + leftover -= crnt_size; } -out_ok: - uci->mc = new_mc; - state = UCODE_OK; - pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", - cpu, uci->cpu_sig.rev, new_rev); - -free_table: - free_equiv_cpu_table(); - -out: - return state; + return UCODE_OK; } /* @@ -315,7 +402,7 @@ out: * * This legacy file is always smaller than 2K in size. * - * Starting at family 15h they are in family specific firmware files: + * Beginning with family 15h, they are in family-specific firmware files: * * amd-ucode/microcode_amd_fam15h.bin * amd-ucode/microcode_amd_fam16h.bin @@ -323,12 +410,17 @@ out: * * These might be larger than 2K. */ -static enum ucode_state request_microcode_amd(int cpu, struct device *device) +static enum ucode_state request_microcode_amd(int cpu, struct device *device, + bool refresh_fw) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; - const struct firmware *fw; - enum ucode_state ret = UCODE_NFOUND; struct cpuinfo_x86 *c = &cpu_data(cpu); + enum ucode_state ret = UCODE_NFOUND; + const struct firmware *fw; + + /* reload ucode container only on the boot cpu */ + if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + return UCODE_OK; if (c->x86 >= 0x15) snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); @@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device) goto fw_release; } - ret = generic_load_microcode(cpu, fw->data, fw->size); + /* free old equiv table */ + free_equiv_cpu_table(); + + ret = load_microcode_amd(cpu, fw->data, fw->size); + if (ret != UCODE_OK) + cleanup(); -fw_release: + fw_release: release_firmware(fw); -out: + out: return ret; } @@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void) return NULL; } - patch = (void *)get_zeroed_page(GFP_KERNEL); - if (!patch) - return NULL; - return µcode_amd_ops; } void __exit exit_amd_microcode(void) { - free_page((unsigned long)patch); + cleanup(); } diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 9e5bcf1e237..3a04b224d0c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -279,19 +279,18 @@ static struct platform_device *microcode_pdev; static int reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ustate; int err = 0; - if (uci->valid) { - enum ucode_state ustate; - - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - } + if (!uci->valid) + return err; + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); + if (ustate == UCODE_OK) + apply_microcode_on_target(cpu); + else + if (ustate == UCODE_ERROR) + err = -EINVAL; return err; } @@ -373,18 +372,15 @@ static void microcode_fini_cpu(int cpu) static enum ucode_state microcode_resume_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->mc) - return UCODE_NFOUND; - pr_debug("CPU%d updated upon resume\n", cpu); - apply_microcode_on_target(cpu); + + if (apply_microcode_on_target(cpu)) + return UCODE_ERROR; return UCODE_OK; } -static enum ucode_state microcode_init_cpu(int cpu) +static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) { enum ucode_state ustate; @@ -395,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, + refresh_fw); if (ustate == UCODE_OK) { pr_debug("CPU%d updated upon init\n", cpu); @@ -408,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu) static enum ucode_state microcode_update_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; if (uci->valid) - ustate = microcode_resume_cpu(cpu); - else - ustate = microcode_init_cpu(cpu); + return microcode_resume_cpu(cpu); - return ustate; + return microcode_init_cpu(cpu, false); } static int mc_device_add(struct device *dev, struct subsys_interface *sif) @@ -431,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) if (err) return err; - if (microcode_init_cpu(cpu) == UCODE_ERROR) + if (microcode_init_cpu(cpu, true) == UCODE_ERROR) return -EINVAL; return err; @@ -480,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) struct device *dev; dev = get_cpu_device(cpu); - switch (action) { + + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: microcode_update_cpu(cpu); - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: pr_debug("CPU%d added\n", cpu); + /* + * "break" is missing on purpose here because we want to fall + * through in order to create the sysfs group. + */ + + case CPU_DOWN_FAILED: if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); break; + case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&dev->kobj, &mc_attr_group); pr_debug("CPU%d removed\n", cpu); break; /* + * case CPU_DEAD: + * * When a CPU goes offline, don't free up or invalidate the copy of * the microcode in kernel memory, so that we can reuse it when the * CPU comes back online without unnecessarily requesting the userspace * for it again. */ - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; } + + /* The CPU refused to come up during a system resume */ + if (action == CPU_UP_CANCELED_FROZEN) + microcode_fini_cpu(cpu); + return NOTIFY_OK; } diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0327e2b3c40..3544aed3933 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n) return 0; } -static enum ucode_state request_microcode_fw(int cpu, struct device *device) +static enum ucode_state request_microcode_fw(int cpu, struct device *device, + bool refresh_fw) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index eb113693f04..a7c5661f849 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -257,12 +257,14 @@ static int __init msr_init(void) goto out_chrdev; } msr_class->devnode = msr_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -271,6 +273,7 @@ out_class: i = 0; for_each_online_cpu(i) msr_device_destroy(i); + put_online_cpus(); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); @@ -281,11 +284,13 @@ out: static void __exit msr_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); } module_init(msr_init); diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 00000000000..e309cc5c276 --- /dev/null +++ b/arch/x86/kernel/perf_regs.c @@ -0,0 +1,105 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <linux/stddef.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_X86_32 +#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX +#else +#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX +#endif + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { + PT_REGS_OFFSET(PERF_REG_X86_AX, ax), + PT_REGS_OFFSET(PERF_REG_X86_BX, bx), + PT_REGS_OFFSET(PERF_REG_X86_CX, cx), + PT_REGS_OFFSET(PERF_REG_X86_DX, dx), + PT_REGS_OFFSET(PERF_REG_X86_SI, si), + PT_REGS_OFFSET(PERF_REG_X86_DI, di), + PT_REGS_OFFSET(PERF_REG_X86_BP, bp), + PT_REGS_OFFSET(PERF_REG_X86_SP, sp), + PT_REGS_OFFSET(PERF_REG_X86_IP, ip), + PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), + PT_REGS_OFFSET(PERF_REG_X86_CS, cs), + PT_REGS_OFFSET(PERF_REG_X86_SS, ss), +#ifdef CONFIG_X86_32 + PT_REGS_OFFSET(PERF_REG_X86_DS, ds), + PT_REGS_OFFSET(PERF_REG_X86_ES, es), + PT_REGS_OFFSET(PERF_REG_X86_FS, fs), + PT_REGS_OFFSET(PERF_REG_X86_GS, gs), +#else + /* + * The pt_regs struct does not store + * ds, es, fs, gs in 64 bit mode. + */ + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, +#endif +#ifdef CONFIG_X86_64 + PT_REGS_OFFSET(PERF_REG_X86_R8, r8), + PT_REGS_OFFSET(PERF_REG_X86_R9, r9), + PT_REGS_OFFSET(PERF_REG_X86_R10, r10), + PT_REGS_OFFSET(PERF_REG_X86_R11, r11), + PT_REGS_OFFSET(PERF_REG_X86_R12, r12), + PT_REGS_OFFSET(PERF_REG_X86_R13, r13), + PT_REGS_OFFSET(PERF_REG_X86_R14, r14), + PT_REGS_OFFSET(PERF_REG_X86_R15, r15), +#endif +}; + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) + return 0; + + return regs_get_register(regs, pt_regs_offset[idx]); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) + +#ifdef CONFIG_X86_32 +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* CONFIG_X86_64 */ +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + if (mask & REG_NOSUPPORT) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_IA32)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 0bc72e2069e..d5f15c3f7b2 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) return oprom; } -void *pci_map_biosrom(struct pci_dev *pdev) +void __iomem *pci_map_biosrom(struct pci_dev *pdev) { struct resource *oprom = find_oprom(pdev); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ef6a8456f71..dc3567e083f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { int ret; - unlazy_fpu(src); - *dst = *src; if (fpu_allocated(&src->thread.fpu)) { memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); ret = fpu_alloc(&dst->thread.fpu); if (ret) return ret; - fpu_copy(&dst->thread.fpu, &src->thread.fpu); + fpu_copy(dst, src); } return 0; } @@ -97,16 +95,6 @@ void arch_task_cache_init(void) SLAB_PANIC | SLAB_NOTRACK, NULL); } -static inline void drop_fpu(struct task_struct *tsk) -{ - /* - * Forget coprocessor state.. - */ - tsk->fpu_counter = 0; - clear_fpu(tsk); - clear_used_math(); -} - /* * Free current thread data structures etc.. */ @@ -163,7 +151,13 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - drop_fpu(tsk); + drop_init_fpu(tsk); + /* + * Free the FPU state for non xsave platforms. They get reallocated + * lazily at the first use. + */ + if (!use_eager_fpu()) + free_thread_xstate(tsk); } static void hard_disable_TSC(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa186121..b9ff83c7135 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0a980c9d7cb..8a6d20ce197 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } void diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c2bf0..b00b33a1839 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = { #define genregs32_get genregs_get #define genregs32_set genregs_set -#define user_i387_ia32_struct user_i387_struct -#define user32_fxsr_struct user_fxsr_struct - #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + rcu_user_exit(); + /* * If we stepped into a sysenter/syscall insn, it trapped in * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. @@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs) !test_thread_flag(TIF_SYSCALL_EMU); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + rcu_user_enter(); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80e1b9..4f165479c45 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -961,9 +961,7 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif - x86_init.paging.pagetable_setup_start(swapper_pg_dir); - paging_init(); - x86_init.paging.pagetable_setup_done(swapper_pg_dir); + x86_init.paging.pagetable_init(); if (boot_cpu_data.cpuid_level >= 0) { /* A CPU has %cr4 if and only if it has CPUID */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376..3160c26db5e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, regs->orig_ax = -1; /* disable syscall checks */ get_user_ex(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); + err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); get_user_ex(*pax, &sc->ax); } get_user_catch(err); @@ -206,35 +206,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { /* Default to using normal stack */ + unsigned long math_size = 0; unsigned long sp = regs->sp; + unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); -#ifdef CONFIG_X86_64 /* redzone */ - sp -= 128; -#endif /* CONFIG_X86_64 */ + if (config_enabled(CONFIG_X86_64)) + sp -= 128; if (!onsigstack) { /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (current->sas_ss_size) sp = current->sas_ss_sp + current->sas_ss_size; - } else { -#ifdef CONFIG_X86_32 - /* This is the legacy signal stack switching. */ - if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) + } else if (config_enabled(CONFIG_X86_32) && + (regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + /* This is the legacy signal stack switching. */ sp = (unsigned long) ka->sa.sa_restorer; -#endif /* CONFIG_X86_32 */ } } if (used_math()) { - sp -= sig_xstate_size; -#ifdef CONFIG_X86_64 - sp = round_down(sp, 64); -#endif /* CONFIG_X86_64 */ + sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), + &buf_fx, &math_size); *fpstate = (void __user *)sp; } @@ -247,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (onsigstack && !likely(on_sig_stack(sp))) return (void __user *)-1L; - /* save i387 state */ - if (used_math() && save_i387_xstate(*fpstate) < 0) + /* save i387 and extended state */ + if (used_math() && + save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; return (void __user *)sp; @@ -474,6 +472,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } #endif /* CONFIG_X86_32 */ +static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, + siginfo_t *info, compat_sigset_t *set, + struct pt_regs *regs) +{ +#ifdef CONFIG_X86_X32_ABI + struct rt_sigframe_x32 __user *frame; + void __user *restorer; + int err = 0; + void __user *fpstate = NULL; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return -EFAULT; + + if (ka->sa.sa_flags & SA_SIGINFO) { + if (copy_siginfo_to_user32(&frame->info, info)) + return -EFAULT; + } + + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + put_user_ex(0, &frame->uc.uc__pad0); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (ka->sa.sa_flags & SA_RESTORER) { + restorer = ka->sa.sa_restorer; + } else { + /* could use a vstub here */ + restorer = NULL; + err |= -EFAULT; + } + put_user_ex(restorer, &frame->pretcode); + } put_user_catch(err); + + if (err) + return -EFAULT; + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; + + /* We use the x32 calling convention here... */ + regs->di = sig; + regs->si = (unsigned long) &frame->info; + regs->dx = (unsigned long) &frame->uc; + + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); + + regs->cs = __USER_CS; + regs->ss = __USER_DS; +#endif /* CONFIG_X86_X32_ABI */ + + return 0; +} + #ifdef CONFIG_X86_32 /* * Atomically swap in the new signal mask, and wait for a signal. @@ -612,55 +678,22 @@ static int signr_convert(int sig) return sig; } -#ifdef CONFIG_X86_32 - -#define is_ia32 1 -#define ia32_setup_frame __setup_frame -#define ia32_setup_rt_frame __setup_rt_frame - -#else /* !CONFIG_X86_32 */ - -#ifdef CONFIG_IA32_EMULATION -#define is_ia32 test_thread_flag(TIF_IA32) -#else /* !CONFIG_IA32_EMULATION */ -#define is_ia32 0 -#endif /* CONFIG_IA32_EMULATION */ - -#ifdef CONFIG_X86_X32_ABI -#define is_x32 test_thread_flag(TIF_X32) - -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs); -#else /* !CONFIG_X86_X32_ABI */ -#define is_x32 0 -#endif /* CONFIG_X86_X32_ABI */ - -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs *regs); - -#endif /* CONFIG_X86_32 */ - static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct pt_regs *regs) { int usig = signr_convert(sig); sigset_t *set = sigmask_to_save(); + compat_sigset_t *cset = (compat_sigset_t *) set; /* Set up the stack frame */ - if (is_ia32) { + if (is_ia32_frame()) { if (ka->sa.sa_flags & SA_SIGINFO) - return ia32_setup_rt_frame(usig, ka, info, set, regs); + return ia32_setup_rt_frame(usig, ka, info, cset, regs); else - return ia32_setup_frame(usig, ka, set, regs); -#ifdef CONFIG_X86_X32_ABI - } else if (is_x32) { - return x32_setup_rt_frame(usig, ka, info, - (compat_sigset_t *)set, regs); -#endif + return ia32_setup_frame(usig, ka, cset, regs); + } else if (is_x32_frame()) { + return x32_setup_rt_frame(usig, ka, info, cset, regs); } else { return __setup_rt_frame(sig, ka, info, set, regs); } @@ -779,6 +812,8 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { + rcu_user_exit(); + #ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) @@ -804,6 +839,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ + + rcu_user_enter(); } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) @@ -824,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) } #ifdef CONFIG_X86_X32_ABI -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs) -{ - struct rt_sigframe_x32 __user *frame; - void __user *restorer; - int err = 0; - void __user *fpstate = NULL; - - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - return -EFAULT; - - if (ka->sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user32(&frame->info, info)) - return -EFAULT; - } - - put_user_try { - /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); - put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - put_user_ex(0, &frame->uc.uc__pad0); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - if (ka->sa.sa_flags & SA_RESTORER) { - restorer = ka->sa.sa_restorer; - } else { - /* could use a vstub here */ - restorer = NULL; - err |= -EFAULT; - } - put_user_ex(restorer, &frame->pretcode); - } put_user_catch(err); - - if (err) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; - - /* We use the x32 calling convention here... */ - regs->di = sig; - regs->si = (unsigned long) &frame->info; - regs->dx = (unsigned long) &frame->uc; - - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); - - regs->cs = __USER_CS; - regs->ss = __USER_DS; - - return 0; -} - asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe_x32 __user *frame; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c5a8c314c0..c80a33bc528 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long boot_error = 0; int timeout; - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); idle->thread.sp = (unsigned long) (((struct pt_regs *) (THREAD_SIZE + task_stack_page(idle))) - 1); @@ -1053,20 +1054,6 @@ out: preempt_enable(); } -void arch_disable_nonboot_cpus_begin(void) -{ - /* - * Avoid the smp alternatives switch during the disable_nonboot_cpus(). - * In the suspend path, we will be back in the SMP mode shortly anyways. - */ - skip_smp_alternatives = true; -} - -void arch_disable_nonboot_cpus_end(void) -{ - skip_smp_alternatives = false; -} - void arch_enable_nonboot_cpus_begin(void) { set_mtrr_aps_delayed_init(); @@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu) if (per_cpu(cpu_state, cpu) == CPU_DEAD) { if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); - - if (1 == num_online_cpus()) - alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d116148..cd3b2438a98 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child) return 1; } +void set_task_blockstep(struct task_struct *task, bool on) +{ + unsigned long debugctl; + + /* + * Ensure irq/preemption can't change debugctl in between. + * Note also that both TIF_BLOCKSTEP and debugctl should + * be changed atomically wrt preemption. + * FIXME: this means that set/clear TIF_BLOCKSTEP is simply + * wrong if task != current, SIGKILL can wakeup the stopped + * tracee and set/clear can play with the running task, this + * can confuse the next __switch_to_xtra(). + */ + local_irq_disable(); + debugctl = get_debugctlmsr(); + if (on) { + debugctl |= DEBUGCTLMSR_BTF; + set_tsk_thread_flag(task, TIF_BLOCKSTEP); + } else { + debugctl &= ~DEBUGCTLMSR_BTF; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + } + if (task == current) + update_debugctlmsr(debugctl); + local_irq_enable(); +} + /* * Enable single or block step. */ @@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block) * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (enable_single_step(child) && block) + set_task_blockstep(child, true); + else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) @@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c936..8276dc6794c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -55,6 +55,7 @@ #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/mce.h> +#include <asm/rcu.h> #include <asm/mach_traps.h> @@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } -static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) +static int __kprobes +do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, + struct pt_regs *regs, long error_code) { - struct task_struct *tsk = current; - #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* - * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. */ - if (trapnr < X86_TRAP_UD) - goto vm86_trap; - goto trap_signal; + if (trapnr < X86_TRAP_UD) { + if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, trapnr)) + return 0; + } + return -1; } #endif + if (!user_mode(regs)) { + if (!fixup_exception(regs)) { + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = trapnr; + die(str, regs, error_code); + } + return 0; + } - if (!user_mode(regs)) - goto kernel_trap; + return -1; +} -#ifdef CONFIG_X86_32 -trap_signal: -#endif +static void __kprobes +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, siginfo_t *info) +{ + struct task_struct *tsk = current; + + + if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) + return; /* * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not @@ -158,33 +174,20 @@ trap_signal: force_sig_info(signr, info, tsk); else force_sig(signr, tsk); - return; - -kernel_trap: - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = trapnr; - die(str, regs, error_code); - } - return; - -#ifdef CONFIG_X86_32 -vm86_trap: - if (handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, trapnr)) - goto trap_signal; - return; -#endif } #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ + exception_exit(regs); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ + exception_exit(regs); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, @@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { + exception_enter(regs); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) - return; - preempt_conditional_sti(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { + preempt_conditional_sti(regs); + do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); + } + exception_exit(regs); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; + exception_enter(regs); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; + exception_enter(regs); conditional_sti(regs); #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto gp_in_vm86; + if (regs->flags & X86_VM_MASK) { + local_irq_enable(); + handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + goto exit; + } #endif tsk = current; - if (!user_mode(regs)) - goto gp_in_kernel; + if (!user_mode(regs)) { + if (fixup_exception(regs)) + goto exit; + + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = X86_TRAP_GP; + if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) + die("general protection fault", regs, error_code); + goto exit; + } tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; @@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code) } force_sig(SIGSEGV, tsk); - return; - -#ifdef CONFIG_X86_32 -gp_in_vm86: - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - return; -#endif - -gp_in_kernel: - if (fixup_exception(regs)) - return; - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, - X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); +exit: + exception_exit(regs); } /* May run on IST stack. */ @@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co ftrace_int3_handler(regs)) return; #endif + exception_enter(regs); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); debug_stack_usage_dec(); +exit: + exception_exit(regs); } #ifdef CONFIG_X86_64 @@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; + exception_enter(regs); + get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) - return; + goto exit; /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); @@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) X86_TRAP_DB); preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; + goto exit; } /* @@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; +exit: + exception_exit(regs); } /* @@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 ignore_fpu_irq = 1; #endif - + exception_enter(regs); math_error(regs, error_code, X86_TRAP_MF); + exception_exit(regs); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { + exception_enter(regs); math_error(regs, error_code, X86_TRAP_XF); + exception_exit(regs); } dotraplinkage void @@ -613,11 +628,12 @@ void math_state_restore(void) } __thread_fpu_begin(tsk); + /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ if (unlikely(restore_fpu_checking(tsk))) { - __thread_fpu_end(tsk); + drop_init_fpu(tsk); force_sig(SIGSEGV, tsk); return; } @@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { + exception_enter(regs); + BUG_ON(use_eager_fpu()); + #ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; @@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); + exception_exit(regs); return; } #endif @@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif + exception_exit(regs); } #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; + + exception_enter(regs); local_irq_enable(); info.si_signo = SIGILL; @@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) info.si_code = ILL_BADSTK; info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, - X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) - return; - do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); + X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { + do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, + &info); + } + exception_exit(regs); } #endif diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa..9538f00827a 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,6 +41,9 @@ /* Adjust the return address of a call insn */ #define UPROBE_FIX_CALL 0x2 +/* Instruction will modify TF, don't change it */ +#define UPROBE_FIX_SETF 0x4 + #define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_CX 0x4000 @@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) insn_get_opcode(insn); /* should be a nop */ switch (OPCODE1(insn)) { + case 0x9d: + /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: @@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * Skip these instructions as per the currently known x86 ISA. * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } */ -bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; @@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) } return false; } + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + bool ret = __skip_sstep(auprobe, regs); + if (ret && (regs->flags & X86_EFLAGS_TF)) + send_sig(SIGTRAP, current, 0); + return ret; +} + +void arch_uprobe_enable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); + + autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); + + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) + set_task_blockstep(task, false); +} + +void arch_uprobe_disable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); + struct pt_regs *regs = task_pt_regs(task); + /* + * The state of TIF_BLOCKSTEP was not saved so we can get an extra + * SIGTRAP if we do not clear TF. We need to examine the opcode to + * make it right. + */ + if (unlikely(trapped)) { + if (!autask->saved_tf) + regs->flags &= ~X86_EFLAGS_TF; + } else { + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + regs->flags &= ~X86_EFLAGS_TF; + } +} diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927..1330dd10295 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -13,9 +13,13 @@ #include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ +/* mcount and __fentry__ are defined in assembly */ +#ifdef CC_USING_FENTRY +EXPORT_SYMBOL(__fentry__); +#else EXPORT_SYMBOL(mcount); #endif +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 9f3167e891e..7a3d075a814 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -26,7 +26,6 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } @@ -68,8 +67,7 @@ struct x86_init_ops x86_init __initdata = { }, .paging = { - .pagetable_setup_start = native_pagetable_setup_start, - .pagetable_setup_done = native_pagetable_setup_done, + .pagetable_init = native_pagetable_init, }, .timers = { diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 3d3e2070911..4e89b3dd408 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -10,9 +10,7 @@ #include <linux/compat.h> #include <asm/i387.h> #include <asm/fpu-internal.h> -#ifdef CONFIG_IA32_EMULATION -#include <asm/sigcontext32.h> -#endif +#include <asm/sigframe.h> #include <asm/xcr.h> /* @@ -23,13 +21,9 @@ u64 pcntxt_mask; /* * Represents init state for the supported extended state. */ -static struct xsave_struct *init_xstate_buf; - -struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -struct _fpx_sw_bytes fx_sw_reserved_ia32; -#endif +struct xsave_struct *init_xstate_buf; +static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; /* @@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; */ void __sanitize_i387_state(struct task_struct *tsk) { - u64 xstate_bv; - int feature_bit = 0x2; struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; + int feature_bit = 0x2; + u64 xstate_bv; if (!fx) return; @@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk) * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ -int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw_user) +static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct i387_fxsave_struct) + sizeof(struct xsave_hdr_struct); unsigned int magic2; - int err; - err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], - sizeof(struct _fpx_sw_bytes)); - if (err) - return -EFAULT; + if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) + return -1; - /* - * First Magic check failed. - */ - if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) - return -EINVAL; + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fx_sw->extended_size) + return -1; /* - * Check for error scenarios. - */ - if (fx_sw_user->xstate_size < min_xstate_size || - fx_sw_user->xstate_size > xstate_size || - fx_sw_user->xstate_size > fx_sw_user->extended_size) - return -EINVAL; - - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + - fx_sw_user->extended_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return err; - /* * Check for the presence of second magic word at the end of memory * layout. This detects the case where the user just copied the legacy * fpstate layout with out copying the extended state information * in the memory layout. */ - if (magic2 != FP_XSTATE_MAGIC2) - return -EFAULT; + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) + || magic2 != FP_XSTATE_MAGIC2) + return -1; return 0; } -#ifdef CONFIG_X86_64 /* * Signal frame handlers. */ - -int save_i387_xstate(void __user *buf) +static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) { - struct task_struct *tsk = current; - int err = 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) - return -EACCES; + if (use_fxsr()) { + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + struct _fpstate_ia32 __user *fp = buf; - BUG_ON(sig_xstate_size < xstate_size); + convert_from_fxsr(&env, tsk); - if ((unsigned long)buf % 64) - pr_err("%s: bad fpstate %p\n", __func__, buf); - - if (!used_math()) - return 0; - - if (user_has_fpu()) { - if (use_xsave()) - err = xsave_user(buf); - else - err = fxsave_user(buf); - - if (err) - return err; - user_fpu_end(); + if (__copy_to_user(buf, &env, sizeof(env)) || + __put_user(xsave->i387.swd, &fp->status) || + __put_user(X86_FXSR_MAGIC, &fp->magic)) + return -1; } else { - sanitize_i387_state(tsk); - if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, - xstate_size)) + struct i387_fsave_struct __user *fp = buf; + u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) return -1; } - clear_used_math(); /* trigger finit */ + return 0; +} - if (use_xsave()) { - struct _fpstate __user *fx = buf; - struct _xstate __user *x = buf; - u64 xstate_bv; +static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +{ + struct xsave_struct __user *x = buf; + struct _fpx_sw_bytes *sw_bytes; + u32 xstate_bv; + int err; - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); + /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ + sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; + err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_size - - FP_XSTATE_MAGIC2_SIZE)); + if (!use_xsave()) + return err; - /* - * Read the xstate_bv which we copied (directly from the cpu or - * from the state in task struct) to the user buffers and - * set the FP/SSE bits. - */ - err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); + err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware apps can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - xstate_bv |= XSTATE_FPSSE; + /* + * Read the xstate_bv which we copied (directly from the cpu or + * from the state in task struct) to the user buffers. + */ + err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware apps can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xstate_bv |= XSTATE_FPSSE; - if (err) - return err; - } + err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - return 1; + return err; +} + +static inline int save_user_xstate(struct xsave_struct __user *buf) +{ + int err; + + if (use_xsave()) + err = xsave_user(buf); + else if (use_fxsr()) + err = fxsave_user((struct i387_fxsave_struct __user *) buf); + else + err = fsave_user((struct i387_fsave_struct __user *) buf); + + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + return err; } /* - * Restore the extended state if present. Otherwise, restore the FP/SSE - * state. + * Save the fpu, extended register state to the user signal frame. + * + * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save + * state is copied. + * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. + * + * buf == buf_fx for 64-bit frames and 32-bit fsave frame. + * buf != buf_fx for 32-bit frames with fxstate. + * + * If the fpu, extended register state is live, save the state directly + * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, + * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * + * If this is a 32-bit frame with fxstate, put a fsave header before + * the aligned state at 'buf_fx'. + * + * For [f]xsave state, update the SW reserved fields in the [f]xsave frame + * indicating the absence/presence of the extended state to the user. */ -static int restore_user_xstate(void __user *buf) +int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) { - struct _fpx_sw_bytes fx_sw_user; - u64 mask; - int err; + struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; + struct task_struct *tsk = current; + int ia32_fxstate = (buf != buf_fx); - if (((unsigned long)buf % 64) || - check_for_xstate(buf, buf, &fx_sw_user)) - goto fx_only; + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); - mask = fx_sw_user.xstate_bv; + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EACCES; - /* - * restore the state passed by the user. - */ - err = xrestore_user(buf, mask); - if (err) - return err; + if (!HAVE_HWFP) + return fpregs_soft_get(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), NULL, + (struct _fpstate_ia32 __user *) buf) ? -1 : 1; - /* - * init the state skipped by the user. - */ - mask = pcntxt_mask & ~mask; - if (unlikely(mask)) - xrstor_state(init_xstate_buf, mask); + if (user_has_fpu()) { + /* Save the live register state to the user directly. */ + if (save_user_xstate(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + fpu_fxsave(&tsk->thread.fpu); + } else { + sanitize_i387_state(tsk); + if (__copy_to_user(buf_fx, xsave, xstate_size)) + return -1; + } + + /* Save the fsave header for the 32-bit frames. */ + if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + return -1; + + if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + return -1; + + drop_init_fpu(tsk); /* trigger finit */ return 0; +} -fx_only: - /* - * couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); - return fxrstor_checking((__force struct i387_fxsave_struct *)buf); +static inline void +sanitize_restored_xstate(struct task_struct *tsk, + struct user_i387_ia32_struct *ia32_env, + u64 xstate_bv, int fx_only) +{ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; + + if (use_xsave()) { + /* These bits must be zero. */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + /* + * Init the state that is not present in the memory + * layout and not enabled by the OS. + */ + if (fx_only) + xsave_hdr->xstate_bv = XSTATE_FPSSE; + else + xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); + } + + if (use_fxsr()) { + /* + * mscsr reserved bits must be masked to zero for security + * reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + + convert_to_fxsr(tsk, ia32_env); + } } /* - * This restores directly out of user space. Exceptions are handled. + * Restore the extended state if present. Otherwise, restore the FP/SSE state. */ -int restore_i387_xstate(void __user *buf) +static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) { + if (use_xsave()) { + if ((unsigned long)buf % 64 || fx_only) { + u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; + xrstor_state(init_xstate_buf, init_bv); + return fxrstor_checking((__force void *) buf); + } else { + u64 init_bv = pcntxt_mask & ~xbv; + if (unlikely(init_bv)) + xrstor_state(init_xstate_buf, init_bv); + return xrestore_user(buf, xbv); + } + } else if (use_fxsr()) { + return fxrstor_checking((__force void *) buf); + } else + return frstor_checking((__force void *) buf); +} + +int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) +{ + int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; - int err = 0; + int state_size = xstate_size; + u64 xstate_bv = 0; + int fx_only = 0; + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - if (used_math()) - goto clear; + drop_init_fpu(tsk); return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) - return -EACCES; + } - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; + if (!access_ok(VERIFY_READ, buf, size)) + return -EACCES; + + if (!used_math() && init_fpu(tsk)) + return -1; + + if (!HAVE_HWFP) { + return fpregs_soft_set(current, NULL, + 0, sizeof(struct user_i387_ia32_struct), + NULL, buf) != 0; } - user_fpu_begin(); - if (use_xsave()) - err = restore_user_xstate(buf); - else - err = fxrstor_checking((__force struct i387_fxsave_struct *) - buf); - if (unlikely(err)) { + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; + if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { + /* + * Couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + state_size = sizeof(struct i387_fxsave_struct); + fx_only = 1; + } else { + state_size = fx_sw_user.xstate_size; + xstate_bv = fx_sw_user.xstate_bv; + } + } + + if (ia32_fxstate) { + /* + * For 32-bit frames with fxstate, copy the user state to the + * thread's fpu state, reconstruct fxstate from the fsave + * header. Sanitize the copied state etc. + */ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + int err = 0; + + /* + * Drop the current fpu which clears used_math(). This ensures + * that any context-switch during the copy of the new state, + * avoids the intermediate state from getting restored/saved. + * Thus avoiding the new restored state from getting corrupted. + * We will be ready to restore/save the state only after + * set_used_math() is again set. + */ + drop_fpu(tsk); + + if (__copy_from_user(xsave, buf_fx, state_size) || + __copy_from_user(&env, buf, sizeof(env))) { + err = -1; + } else { + sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + set_used_math(); + } + + if (use_eager_fpu()) + math_state_restore(); + + return err; + } else { /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. + * For 64-bit frames and 32-bit fsave frames, restore the user + * state to the registers directly (with exceptions handled). */ -clear: - clear_fpu(tsk); - clear_used_math(); + user_fpu_begin(); + if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { + drop_init_fpu(tsk); + return -1; + } } - return err; + + return 0; } -#endif /* * Prepare the SW reserved portion of the fxsave memory layout, indicating @@ -321,31 +428,22 @@ clear: */ static void prepare_fx_sw_frame(void) { - int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + - FP_XSTATE_MAGIC2_SIZE; + int fsave_header_size = sizeof(struct i387_fsave_struct); + int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - sig_xstate_size = sizeof(struct _fpstate) + size_extended; - -#ifdef CONFIG_IA32_EMULATION - sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; -#endif - - memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + if (config_enabled(CONFIG_X86_32)) + size += fsave_header_size; fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.extended_size = size; fx_sw_reserved.xstate_bv = pcntxt_mask; fx_sw_reserved.xstate_size = xstate_size; -#ifdef CONFIG_IA32_EMULATION - memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); - fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; -#endif -} -#ifdef CONFIG_X86_64 -unsigned int sig_xstate_size = sizeof(struct _fpstate); -#endif + if (config_enabled(CONFIG_IA32_EMULATION)) { + fx_sw_reserved_ia32 = fx_sw_reserved; + fx_sw_reserved_ia32.extended_size += fsave_header_size; + } +} /* * Enable the extended processor state save/restore feature @@ -384,19 +482,21 @@ static void __init setup_xstate_features(void) /* * setup the xstate image representing the init state */ -static void __init setup_xstate_init(void) +static void __init setup_init_fpu_buf(void) { - setup_xstate_features(); - /* * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave */ init_xstate_buf = alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); - init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; + fx_finit(&init_xstate_buf->i387); + + if (!cpu_has_xsave) + return; + + setup_xstate_features(); - clts(); /* * Init all the features state with header_bv being 0x0 */ @@ -406,9 +506,21 @@ static void __init setup_xstate_init(void) * of any feature which is not represented by all zero's. */ xsave_state(init_xstate_buf, -1); - stts(); } +static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; +static int __init eager_fpu_setup(char *s) +{ + if (!strcmp(s, "on")) + eagerfpu = ENABLE; + else if (!strcmp(s, "off")) + eagerfpu = DISABLE; + else if (!strcmp(s, "auto")) + eagerfpu = AUTO; + return 1; +} +__setup("eagerfpu=", eager_fpu_setup); + /* * Enable and initialize the xsave feature. */ @@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void) update_regset_xstate_info(xstate_size, pcntxt_mask); prepare_fx_sw_frame(); + setup_init_fpu_buf(); - setup_xstate_init(); + /* Auto enable eagerfpu for xsaveopt */ + if (cpu_has_xsaveopt && eagerfpu != DISABLE) + eagerfpu = ENABLE; pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", pcntxt_mask, xstate_size); @@ -471,3 +586,43 @@ void __cpuinit xsave_init(void) next_func = xstate_enable; this_func(); } + +static inline void __init eager_fpu_init_bp(void) +{ + current->thread.fpu.state = + alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); + if (!init_xstate_buf) + setup_init_fpu_buf(); +} + +void __cpuinit eager_fpu_init(void) +{ + static __refdata void (*boot_func)(void) = eager_fpu_init_bp; + + clear_used_math(); + current_thread_info()->status = 0; + + if (eagerfpu == ENABLE) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + + if (!cpu_has_eager_fpu) { + stts(); + return; + } + + if (boot_func) { + boot_func(); + boot_func = NULL; + } + + /* + * This is same as math_state_restore(). But use_xsave() is + * not yet patched to use math_state_restore(). + */ + init_fpu(current); + __thread_fpu_begin(current); + if (cpu_has_xsave) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); +} diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a71faf727ff..bca63f04dcc 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 -#define VMX_EXIT_REASONS \ - { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ - { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ - { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ - { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ - { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ - { EXIT_REASON_CPUID, "CPUID" }, \ - { EXIT_REASON_HLT, "HLT" }, \ - { EXIT_REASON_INVLPG, "INVLPG" }, \ - { EXIT_REASON_RDPMC, "RDPMC" }, \ - { EXIT_REASON_RDTSC, "RDTSC" }, \ - { EXIT_REASON_VMCALL, "VMCALL" }, \ - { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ - { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ - { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ - { EXIT_REASON_VMPTRST, "VMPTRST" }, \ - { EXIT_REASON_VMREAD, "VMREAD" }, \ - { EXIT_REASON_VMRESUME, "VMRESUME" }, \ - { EXIT_REASON_VMWRITE, "VMWRITE" }, \ - { EXIT_REASON_VMOFF, "VMOFF" }, \ - { EXIT_REASON_VMON, "VMON" }, \ - { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ - { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ - { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ - { EXIT_REASON_MSR_READ, "MSR_READ" }, \ - { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ - { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ - { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ - { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ - { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ - { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ - { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ - { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ - { EXIT_REASON_WBINVD, "WBINVD" } - -#define SVM_EXIT_REASONS \ - { SVM_EXIT_READ_CR0, "read_cr0" }, \ - { SVM_EXIT_READ_CR3, "read_cr3" }, \ - { SVM_EXIT_READ_CR4, "read_cr4" }, \ - { SVM_EXIT_READ_CR8, "read_cr8" }, \ - { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ - { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ - { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ - { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ - { SVM_EXIT_READ_DR0, "read_dr0" }, \ - { SVM_EXIT_READ_DR1, "read_dr1" }, \ - { SVM_EXIT_READ_DR2, "read_dr2" }, \ - { SVM_EXIT_READ_DR3, "read_dr3" }, \ - { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ - { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ - { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ - { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ - { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ - { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ - { SVM_EXIT_INTR, "interrupt" }, \ - { SVM_EXIT_NMI, "nmi" }, \ - { SVM_EXIT_SMI, "smi" }, \ - { SVM_EXIT_INIT, "init" }, \ - { SVM_EXIT_VINTR, "vintr" }, \ - { SVM_EXIT_CPUID, "cpuid" }, \ - { SVM_EXIT_INVD, "invd" }, \ - { SVM_EXIT_HLT, "hlt" }, \ - { SVM_EXIT_INVLPG, "invlpg" }, \ - { SVM_EXIT_INVLPGA, "invlpga" }, \ - { SVM_EXIT_IOIO, "io" }, \ - { SVM_EXIT_MSR, "msr" }, \ - { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ - { SVM_EXIT_SHUTDOWN, "shutdown" }, \ - { SVM_EXIT_VMRUN, "vmrun" }, \ - { SVM_EXIT_VMMCALL, "hypercall" }, \ - { SVM_EXIT_VMLOAD, "vmload" }, \ - { SVM_EXIT_VMSAVE, "vmsave" }, \ - { SVM_EXIT_STGI, "stgi" }, \ - { SVM_EXIT_CLGI, "clgi" }, \ - { SVM_EXIT_SKINIT, "skinit" }, \ - { SVM_EXIT_WBINVD, "wbinvd" }, \ - { SVM_EXIT_MONITOR, "monitor" }, \ - { SVM_EXIT_MWAIT, "mwait" }, \ - { SVM_EXIT_XSETBV, "xsetbv" }, \ - { SVM_EXIT_NPF, "npf" } - /* * Tracepoint for kvm guest exit: */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b1eb202ee76..851aa7c3b89 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif - if (user_has_fpu()) - clts(); + /* + * If the FPU is not active (through the host task or + * the guest vcpu), then restore the cr0.TS bit. + */ + if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) + stts(); load_gdt(&__get_cpu_var(host_gdt)); } @@ -3743,7 +3747,7 @@ static void vmx_set_constant_host_state(void) unsigned long tmpl; struct desc_ptr dt; - vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ + vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ @@ -4543,7 +4547,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SET_TPR; return 0; } - }; + } break; case 2: /* clts */ handle_clts(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2966c847d48..1f09552572f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5979,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; - unlazy_fpu(current); + __kernel_fpu_begin(); fpu_restore_checking(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } @@ -5993,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 0; fpu_save_init(&vcpu->arch.guest_fpu); + __kernel_fpu_end(); ++vcpu->stat.fpu_reload; kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); trace_kvm_fpu(0); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 76dcd9d8e0b..7dde46d68a2 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,6 +18,7 @@ #include <asm/pgalloc.h> /* pgd_*(), ... */ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_START */ +#include <asm/rcu.h> /* exception_enter(), ... */ /* * Page fault error code bits: @@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address) * and the problem, and then passes it off to one of the appropriate * routines. */ -dotraplinkage void __kprobes -do_page_fault(struct pt_regs *regs, unsigned long error_code) +static void __kprobes +__do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -1209,3 +1210,11 @@ good_area: up_read(&mm->mmap_sem); } + +dotraplinkage void __kprobes +do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + exception_enter(regs); + __do_page_fault(regs, error_code); + exception_exit(regs); +} diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e0e6990723e..ab1f6a93b52 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -319,7 +319,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, */ int devmem_is_allowed(unsigned long pagenr) { - if (pagenr <= 256) + if (pagenr < 256) return 1; if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 575d86f85ce..4f04db15002 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -445,10 +445,10 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ -void __init native_pagetable_setup_start(pgd_t *base) +void __init native_pagetable_init(void) { unsigned long pfn, va; - pgd_t *pgd; + pgd_t *pgd, *base = swapper_pg_dir; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -475,10 +475,7 @@ void __init native_pagetable_setup_start(pgd_t *base) pte_clear(NULL, va, pte); } paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); -} - -void __init native_pagetable_setup_done(pgd_t *base) -{ + paging_init(); } /* @@ -493,7 +490,7 @@ void __init native_pagetable_setup_done(pgd_t *base) * If we're booting paravirtualized under a hypervisor, then there are * more options: we may already be running PAE, and the pagetable may * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_setup_start() will set up swapper_pg_dir + * paravirt_pagetable_init() will set up swapper_pg_dir * appropriately for the rest of the initialization to work. * * In general, pagetable_init() assumes that the pagetable may already diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 613cd83e8c0..0777f042e40 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -98,6 +98,8 @@ static void flush_tlb_func(void *info) { struct flush_tlb_info *f = info; + inc_irq_stat(irq_tlb_count); + if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; @@ -320,7 +322,7 @@ static ssize_t tlbflush_write_file(struct file *file, if (kstrtos8(buf, 0, &shift)) return -EINVAL; - if (shift > 64) + if (shift < -1 || shift >= BITS_PER_LONG) return -EINVAL; tlb_flushall_shift = shift; diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 505acdd6d60..192397c9860 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -305,7 +305,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->flags = flags; res->start = start; res->end = end; - res->child = NULL; if (!pci_use_crs) { dev_printk(KERN_DEBUG, &info->bridge->dev, @@ -434,7 +433,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, size = sizeof(*info->res) * info->res_num; info->res_num = 0; - info->res = kmalloc(size, GFP_KERNEL); + info->res = kzalloc(size, GFP_KERNEL); if (!info->res) return; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 937bcece700..704b9ec043d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) while (i >= sizeof(struct acpi_mcfg_allocation)) { entries++; i -= sizeof(struct acpi_mcfg_allocation); - }; + } if (entries == 0) { pr_err(PREFIX "MMCONFIG has no entries\n"); return -ENODEV; diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 6f2f8eeed17..3e6d2a6db86 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -62,11 +62,6 @@ out: return irq; } -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - int __init pci_visws_init(void) { pcibios_enable_irq = &pci_visws_enable_irq; diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 73b8be0f367..6db1cc4c753 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o +obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c new file mode 100644 index 00000000000..f6a0c1b8e51 --- /dev/null +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -0,0 +1,76 @@ +/* + * Copyright 2012 Intel Corporation + * Author: Josh Triplett <josh@joshtriplett.org> + * + * Based on the bgrt driver: + * Copyright 2012 Red Hat, Inc <mjg@redhat.com> + * Author: Matthew Garrett + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/efi-bgrt.h> + +struct acpi_table_bgrt *bgrt_tab; +void *bgrt_image; +size_t bgrt_image_size; + +struct bmp_header { + u16 id; + u32 size; +} __packed; + +void efi_bgrt_init(void) +{ + acpi_status status; + void __iomem *image; + bool ioremapped = false; + struct bmp_header bmp_header; + + if (acpi_disabled) + return; + + status = acpi_get_table("BGRT", 0, + (struct acpi_table_header **)&bgrt_tab); + if (ACPI_FAILURE(status)) + return; + + if (bgrt_tab->version != 1) + return; + if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) + return; + + image = efi_lookup_mapped_addr(bgrt_tab->image_address); + if (!image) { + image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); + ioremapped = true; + if (!image) + return; + } + + memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); + if (ioremapped) + iounmap(image); + bgrt_image_size = bmp_header.size; + + bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); + if (!bgrt_image) + return; + + if (ioremapped) { + image = ioremap(bgrt_tab->image_address, bmp_header.size); + if (!image) { + kfree(bgrt_image); + bgrt_image = NULL; + return; + } + } + + memcpy_fromio(bgrt_image, image, bgrt_image_size); + if (ioremapped) + iounmap(image); +} diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92660edaa1e..aded2a91162 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -31,6 +31,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/efi.h> +#include <linux/efi-bgrt.h> #include <linux/export.h> #include <linux/bootmem.h> #include <linux/memblock.h> @@ -419,10 +420,21 @@ void __init efi_reserve_boot_services(void) } } -static void __init efi_free_boot_services(void) +static void __init efi_unmap_memmap(void) +{ + if (memmap.map) { + early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); + memmap.map = NULL; + } +} + +void __init efi_free_boot_services(void) { void *p; + if (!efi_native) + return; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; unsigned long long start = md->phys_addr; @@ -438,6 +450,8 @@ static void __init efi_free_boot_services(void) free_bootmem_late(start, size); } + + efi_unmap_memmap(); } static int __init efi_systab_init(void *phys) @@ -732,6 +746,11 @@ void __init efi_init(void) #endif } +void __init efi_late_init(void) +{ + efi_bgrt_init(); +} + void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; @@ -764,6 +783,34 @@ static void __init runtime_code_page_mkexec(void) } /* + * We can't ioremap data in EFI boot services RAM, because we've already mapped + * it as RAM. So, look it up in the existing EFI memory map instead. Only + * callable after efi_enter_virtual_mode and before efi_free_boot_services. + */ +void __iomem *efi_lookup_mapped_addr(u64 phys_addr) +{ + void *p; + if (WARN_ON(!memmap.map)) + return NULL; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + u64 size = md->num_pages << EFI_PAGE_SHIFT; + u64 end = md->phys_addr + size; + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + if (!md->virt_addr) + continue; + if (phys_addr >= md->phys_addr && phys_addr < end) { + phys_addr += md->virt_addr - md->phys_addr; + return (__force void __iomem *)(unsigned long)phys_addr; + } + } + return NULL; +} + +/* * This function will switch the EFI runtime services to virtual mode. * Essentially, look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor and update @@ -787,8 +834,10 @@ void __init efi_enter_virtual_mode(void) * non-native EFI */ - if (!efi_native) - goto out; + if (!efi_native) { + efi_unmap_memmap(); + return; + } /* Merge contiguous regions of the same type and attribute */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -878,18 +927,12 @@ void __init efi_enter_virtual_mode(void) } /* - * Thankfully, it does seem that no runtime services other than - * SetVirtualAddressMap() will touch boot services code, so we can - * get rid of it all at this point - */ - efi_free_boot_services(); - - /* * Now that EFI is in virtual mode, update the function * pointers in the runtime service table to the new virtual addresses. * * Call EFI services through wrapper functions. */ + efi.runtime_version = efi_systab.fw_revision; efi.get_time = virt_efi_get_time; efi.set_time = virt_efi_set_time; efi.get_wakeup_time = virt_efi_get_wakeup_time; @@ -906,9 +949,6 @@ void __init efi_enter_virtual_mode(void) if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); -out: - early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); - memmap.map = NULL; kfree(new_memmap); } diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 9926e11a772..aeaff8bef2f 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -21,6 +21,7 @@ config 64BIT config X86_32 def_bool !64BIT select HAVE_AOUT + select ARCH_WANT_IPC_PARSE_VERSION config X86_64 def_bool 64BIT diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 5868526b5ee..46a9df99f3c 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -7,9 +7,6 @@ #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) -#define STR(x) #x -#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) - #define BLANK() asm volatile("\n->" : : ) #define OFFSET(sym, str, mem) \ diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h index bd9a89b67e4..ca255a805ed 100644 --- a/arch/x86/um/shared/sysdep/syscalls.h +++ b/arch/x86/um/shared/sysdep/syscalls.h @@ -1,3 +1,5 @@ +extern long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); #ifdef __i386__ #include "syscalls_32.h" #else diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index a508cea1350..ba7363ecf89 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) 0; PT_REGS_CX(regs) = (unsigned long) 0; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } @@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) &frame->info; PT_REGS_CX(regs) = (unsigned long) &frame->uc; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 68d1dc91b37..b5408cecac6 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -28,7 +28,7 @@ #define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old -#define ptregs_clone sys_clone +#define ptregs_clone i386_clone #define ptregs_vm86 sys_vm86 #define ptregs_sigaltstack sys_sigaltstack #define ptregs_vfork sys_vfork diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c index b853e8600b9..db444c7218f 100644 --- a/arch/x86/um/syscalls_32.c +++ b/arch/x86/um/syscalls_32.c @@ -3,37 +3,24 @@ * Licensed under the GPL */ -#include "linux/sched.h" -#include "linux/shm.h" -#include "linux/ipc.h" -#include "linux/syscalls.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include <linux/syscalls.h> +#include <sysdep/syscalls.h> /* * The prototype on i386 is: * - * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls * * and the "newtls" arg. on i386 is read by copy_thread directly from the * register saved on the stack. */ -long sys_clone(unsigned long clone_flags, unsigned long newsp, - int __user *parent_tid, void *newtls, int __user *child_tid) +long i386_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) { - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; + return sys_clone(clone_flags, newsp, parent_tid, child_tid); } + long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index f3d82bb6e15..adb08eb5c22 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -5,12 +5,9 @@ * Licensed under the GPL */ -#include "linux/linkage.h" -#include "linux/personality.h" -#include "linux/utsname.h" -#include "asm/prctl.h" /* XXX This should get the constants from libc */ -#include "asm/uaccess.h" -#include "os.h" +#include <linux/sched.h> +#include <asm/prctl.h> /* XXX This should get the constants from libc */ +#include <os.h> long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) { @@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr) return arch_prctl(current, code, (unsigned long __user *) addr); } -long sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid) -{ - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; -} - void arch_switch_to(struct task_struct *to) { if ((to->thread.arch.fs == 0) || (to->mm == NULL)) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9642d4a3860..1fbe75a95f1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1452,6 +1452,10 @@ asmlinkage void __init xen_start_kernel(void) pci_request_acs(); xen_acpi_sleep_register(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; } #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5141d808e75..7a769b7526c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1174,8 +1174,13 @@ static void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } -static void __init xen_pagetable_setup_start(pgd_t *base) +static void xen_post_allocator_init(void); + +static void __init xen_pagetable_init(void) { + paging_init(); + xen_setup_shared_info(); + xen_post_allocator_init(); } static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) @@ -1192,14 +1197,6 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) } } -static void xen_post_allocator_init(void); - -static void __init xen_pagetable_setup_done(pgd_t *base) -{ - xen_setup_shared_info(); - xen_post_allocator_init(); -} - static void xen_write_cr2(unsigned long cr2) { this_cpu_read(xen_vcpu)->arch.cr2 = cr2; @@ -2068,8 +2065,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; - x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; - x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; + x86_init.paging.pagetable_init = xen_pagetable_init; pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 76ba0e97e53..72213da605f 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, xen_mc_issue(PARAVIRT_LAZY_MMU); } - /* let's use dev_bus_addr to record the old mfn instead */ - kmap_op->dev_bus_addr = page->index; - page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); @@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); -int m2p_remove_override(struct page *page, bool clear_pte) +int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long mfn; @@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) WARN_ON(!PagePrivate(page)); ClearPagePrivate(page); - if (clear_pte) { - struct gnttab_map_grant_ref *map_op = - (struct gnttab_map_grant_ref *) page->index; - set_phys_to_machine(pfn, map_op->dev_bus_addr); + set_phys_to_machine(pfn, page->index); + if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_grant_ref *unmap_op; @@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte) * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ - if (map_op->handle == -1) + if (kmap_op->handle == -1) xen_mc_flush(); /* - * Now if map_op->handle is negative it means that the + * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ - if (map_op->handle == GNTST_general_error) { + if (kmap_op->handle == GNTST_general_error) { printk(KERN_WARNING "m2p_remove_override: " "pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); @@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) mcs = xen_mc_entry( sizeof(struct gnttab_unmap_grant_ref)); unmap_op = mcs.args; - unmap_op->host_addr = map_op->host_addr; - unmap_op->handle = map_op->handle; + unmap_op->host_addr = kmap_op->host_addr; + unmap_op->handle = kmap_op->handle; unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, @@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); __flush_tlb_single(address); - map_op->host_addr = 0; + kmap_op->host_addr = 0; } - } else - set_phys_to_machine(pfn, page->index); + } /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present * somewhere in this domain, even before being added to the diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d11ca11d14f..e2d62d697b5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -17,6 +17,7 @@ #include <asm/e820.h> #include <asm/setup.h> #include <asm/acpi.h> +#include <asm/numa.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -544,4 +545,7 @@ void __init xen_arch_setup(void) disable_cpufreq(); WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); +#ifdef CONFIG_NUMA + numa_off = 1; +#endif } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index f58dca7a6e5..353c50f1870 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) return rc; if (num_online_cpus() == 1) - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); rc = xen_smp_intr_init(cpu); if (rc) @@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu) unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); - - if (num_online_cpus() == 1) - alternatives_smp_switch(0); } static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 8ed64cfae4f..744f5ee4ba4 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -172,24 +172,6 @@ config CMDLINE source "mm/Kconfig" -config HOTPLUG - bool "Support for hot-pluggable devices" - help - Say Y here if you want to plug devices into your computer while - the system is running, and be able to use them quickly. In many - cases, the devices can likewise be unplugged at any time too. - - One well known example of this is PCMCIA- or PC-cards, credit-card - size devices such as network cards, modems or hard drives which are - plugged into slots found on all modern laptop computers. Another - example, used on modern desktops as well as laptops, is USB. - - Enable HOTPLUG and build a modular kernel. Get agent software - (from <http://linux-hotplug.sourceforge.net/>) and install it. - Then your kernel will automatically call out to a user mode "policy - agent" (/sbin/hotplug) to load modules and set up software needed - to use devices as you hotplug them. - source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index 69759e9cb3e..54354de38a7 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -210,14 +210,6 @@ void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -/* the next one is stolen from the alpha port... */ - -void __init -pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 2c8d6a3d250..bc44311aa18 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -31,6 +31,7 @@ #include <linux/mqueue.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -110,8 +111,10 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) platform_idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index f9726f6afdf..2cd3d3a3400 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -223,6 +223,7 @@ int __init rs_init(void) serial_driver->flags = TTY_DRIVER_REAL_RAW; tty_set_operations(serial_driver, &serial_ops); + tty_port_link_device(&serial_port, serial_driver, 0); if (tty_register_driver(serial_driver)) panic("Couldn't register serial driver\n"); |