diff options
Diffstat (limited to 'arch')
205 files changed, 7561 insertions, 2634 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 7f418bbc261..eef3bbb9707 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG config HAVE_DEFAULT_NO_SPIN_MUTEXES bool +config HAVE_HW_BREAKPOINT + bool + depends on HAVE_PERF_EVENTS + select ANON_INODES + select PERF_EVENTS + + source "kernel/gcov/Kconfig" diff --git a/arch/arm/kernel/isa.c b/arch/arm/kernel/isa.c index 8ac9b842400..34648591073 100644 --- a/arch/arm/kernel/isa.c +++ b/arch/arm/kernel/isa.c @@ -22,47 +22,42 @@ static unsigned int isa_membase, isa_portbase, isa_portshift; static ctl_table ctl_isa_vars[4] = { { - .ctl_name = BUS_ISA_MEM_BASE, .procname = "membase", .data = &isa_membase, .maxlen = sizeof(isa_membase), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = BUS_ISA_PORT_BASE, .procname = "portbase", .data = &isa_portbase, .maxlen = sizeof(isa_portbase), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = BUS_ISA_PORT_SHIFT, .procname = "portshift", .data = &isa_portshift, .maxlen = sizeof(isa_portshift), .mode = 0444, - .proc_handler = &proc_dointvec, - }, {0} + .proc_handler = proc_dointvec, + }, {} }; static struct ctl_table_header *isa_sysctl_header; static ctl_table ctl_isa[2] = { { - .ctl_name = CTL_BUS_ISA, .procname = "isa", .mode = 0555, .child = ctl_isa_vars, - }, {0} + }, {} }; static ctl_table ctl_bus[2] = { { - .ctl_name = CTL_BUS, .procname = "bus", .mode = 0555, .child = ctl_isa, - }, {0} + }, {} }; void __init diff --git a/arch/arm/mach-bcmring/arch.c b/arch/arm/mach-bcmring/arch.c index 0da693b0f7e..fbe6fa02c88 100644 --- a/arch/arm/mach-bcmring/arch.c +++ b/arch/arm/mach-bcmring/arch.c @@ -47,10 +47,6 @@ HW_DECLARE_SPINLOCK(gpio) EXPORT_SYMBOL(bcmring_gpio_reg_lock); #endif -/* FIXME: temporary solution */ -#define BCM_SYSCTL_REBOOT_WARM 1 -#define CTL_BCM_REBOOT 112 - /* sysctl */ int bcmring_arch_warm_reboot; /* do a warm reboot on hard reset */ @@ -58,18 +54,16 @@ static struct ctl_table_header *bcmring_sysctl_header; static struct ctl_table bcmring_sysctl_warm_reboot[] = { { - .ctl_name = BCM_SYSCTL_REBOOT_WARM, .procname = "warm", .data = &bcmring_arch_warm_reboot, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec}, + .proc_handler = proc_dointvec}, {} }; static struct ctl_table bcmring_sysctl_reboot[] = { { - .ctl_name = CTL_BCM_REBOOT, .procname = "reboot", .mode = 0555, .child = bcmring_sysctl_warm_reboot}, diff --git a/arch/arm/mach-davinci/include/mach/asp.h b/arch/arm/mach-davinci/include/mach/asp.h index 18e4ce34ece..e07f70ed7c5 100644 --- a/arch/arm/mach-davinci/include/mach/asp.h +++ b/arch/arm/mach-davinci/include/mach/asp.h @@ -51,6 +51,14 @@ struct snd_platform_data { u32 rx_dma_offset; enum dma_event_q eventq_no; /* event queue number */ unsigned int codec_fmt; + /* + * Allowing this is more efficient and eliminates left and right swaps + * caused by underruns, but will swap the left and right channels + * when compared to previous behavior. + */ + unsigned enable_channel_combine:1; + unsigned sram_size_playback; + unsigned sram_size_capture; /* McASP specific fields */ int tdm_slots; diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c index 0acb5560229..08e535d92c0 100644 --- a/arch/arm/mach-omap2/board-3430sdp.c +++ b/arch/arm/mach-omap2/board-3430sdp.c @@ -410,6 +410,15 @@ static struct regulator_init_data sdp3430_vpll2 = { .consumer_supplies = &sdp3430_vdvi_supply, }; +static struct twl4030_codec_audio_data sdp3430_audio = { + .audio_mclk = 26000000, +}; + +static struct twl4030_codec_data sdp3430_codec = { + .audio_mclk = 26000000, + .audio = &sdp3430_audio, +}; + static struct twl4030_platform_data sdp3430_twldata = { .irq_base = TWL4030_IRQ_BASE, .irq_end = TWL4030_IRQ_END, @@ -420,6 +429,7 @@ static struct twl4030_platform_data sdp3430_twldata = { .madc = &sdp3430_madc_data, .keypad = &sdp3430_kp_data, .usb = &sdp3430_usb_data, + .codec = &sdp3430_codec, .vaux1 = &sdp3430_vaux1, .vaux2 = &sdp3430_vaux2, diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c index 08b0816afa6..af411e11ddd 100644 --- a/arch/arm/mach-omap2/board-omap3beagle.c +++ b/arch/arm/mach-omap2/board-omap3beagle.c @@ -254,6 +254,15 @@ static struct twl4030_usb_data beagle_usb_data = { .usb_mode = T2_USB_MODE_ULPI, }; +static struct twl4030_codec_audio_data beagle_audio_data = { + .audio_mclk = 26000000, +}; + +static struct twl4030_codec_data beagle_codec_data = { + .audio_mclk = 26000000, + .audio = &beagle_audio_data, +}; + static struct twl4030_platform_data beagle_twldata = { .irq_base = TWL4030_IRQ_BASE, .irq_end = TWL4030_IRQ_END, @@ -261,6 +270,7 @@ static struct twl4030_platform_data beagle_twldata = { /* platform_data for children goes here */ .usb = &beagle_usb_data, .gpio = &beagle_gpio_data, + .codec = &beagle_codec_data, .vmmc1 = &beagle_vmmc1, .vsim = &beagle_vsim, .vdac = &beagle_vdac, diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c index 4c4d7f8dbd7..25ca5f6a0d3 100644 --- a/arch/arm/mach-omap2/board-omap3evm.c +++ b/arch/arm/mach-omap2/board-omap3evm.c @@ -194,6 +194,15 @@ static struct twl4030_madc_platform_data omap3evm_madc_data = { .irq_line = 1, }; +static struct twl4030_codec_audio_data omap3evm_audio_data = { + .audio_mclk = 26000000, +}; + +static struct twl4030_codec_data omap3evm_codec_data = { + .audio_mclk = 26000000, + .audio = &omap3evm_audio_data, +}; + static struct twl4030_platform_data omap3evm_twldata = { .irq_base = TWL4030_IRQ_BASE, .irq_end = TWL4030_IRQ_END, @@ -203,6 +212,7 @@ static struct twl4030_platform_data omap3evm_twldata = { .madc = &omap3evm_madc_data, .usb = &omap3evm_usb_data, .gpio = &omap3evm_gpio_data, + .codec = &omap3evm_codec_data, }; static struct i2c_board_info __initdata omap3evm_i2c_boardinfo[] = { diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c index 7519edb6915..c4be626c842 100644 --- a/arch/arm/mach-omap2/board-omap3pandora.c +++ b/arch/arm/mach-omap2/board-omap3pandora.c @@ -281,11 +281,21 @@ static struct twl4030_usb_data omap3pandora_usb_data = { .usb_mode = T2_USB_MODE_ULPI, }; +static struct twl4030_codec_audio_data omap3pandora_audio_data = { + .audio_mclk = 26000000, +}; + +static struct twl4030_codec_data omap3pandora_codec_data = { + .audio_mclk = 26000000, + .audio = &omap3pandora_audio_data, +}; + static struct twl4030_platform_data omap3pandora_twldata = { .irq_base = TWL4030_IRQ_BASE, .irq_end = TWL4030_IRQ_END, .gpio = &omap3pandora_gpio_data, .usb = &omap3pandora_usb_data, + .codec = &omap3pandora_codec_data, .vmmc1 = &pandora_vmmc1, .vmmc2 = &pandora_vmmc2, .keypad = &pandora_kp_data, diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c index 9917d2fddc2..e1fb50451e1 100644 --- a/arch/arm/mach-omap2/board-overo.c +++ b/arch/arm/mach-omap2/board-overo.c @@ -329,6 +329,15 @@ static struct regulator_init_data overo_vmmc1 = { .consumer_supplies = &overo_vmmc1_supply, }; +static struct twl4030_codec_audio_data overo_audio_data = { + .audio_mclk = 26000000, +}; + +static struct twl4030_codec_data overo_codec_data = { + .audio_mclk = 26000000, + .audio = &overo_audio_data, +}; + /* mmc2 (WLAN) and Bluetooth don't use twl4030 regulators */ static struct twl4030_platform_data overo_twldata = { @@ -336,6 +345,7 @@ static struct twl4030_platform_data overo_twldata = { .irq_end = TWL4030_IRQ_END, .gpio = &overo_gpio_data, .usb = &overo_usb_data, + .codec = &overo_codec_data, .vmmc1 = &overo_vmmc1, }; diff --git a/arch/arm/mach-omap2/board-zoom2.c b/arch/arm/mach-omap2/board-zoom2.c index 51e0b3ba5f3..51df584728f 100644 --- a/arch/arm/mach-omap2/board-zoom2.c +++ b/arch/arm/mach-omap2/board-zoom2.c @@ -230,6 +230,15 @@ static struct twl4030_madc_platform_data zoom2_madc_data = { .irq_line = 1, }; +static struct twl4030_codec_audio_data zoom2_audio_data = { + .audio_mclk = 26000000, +}; + +static struct twl4030_codec_data zoom2_codec_data = { + .audio_mclk = 26000000, + .audio = &zoom2_audio_data, +}; + static struct twl4030_platform_data zoom2_twldata = { .irq_base = TWL4030_IRQ_BASE, .irq_end = TWL4030_IRQ_END, @@ -240,6 +249,7 @@ static struct twl4030_platform_data zoom2_twldata = { .usb = &zoom2_usb_data, .gpio = &zoom2_gpio_data, .keypad = &zoom2_kp_twl4030_data, + .codec = &zoom2_codec_data, .vmmc1 = &zoom2_vmmc1, .vmmc2 = &zoom2_vmmc2, .vsim = &zoom2_vsim, diff --git a/arch/arm/mach-s3c6400/include/mach/map.h b/arch/arm/mach-s3c6400/include/mach/map.h index fc8b223bad4..866be31872a 100644 --- a/arch/arm/mach-s3c6400/include/mach/map.h +++ b/arch/arm/mach-s3c6400/include/mach/map.h @@ -48,6 +48,8 @@ #define S3C64XX_PA_IIS1 (0x7F003000) #define S3C64XX_PA_TIMER (0x7F006000) #define S3C64XX_PA_IIC0 (0x7F004000) +#define S3C64XX_PA_PCM0 (0x7F009000) +#define S3C64XX_PA_PCM1 (0x7F00A000) #define S3C64XX_PA_IISV4 (0x7F00D000) #define S3C64XX_PA_IIC1 (0x7F00F000) diff --git a/arch/arm/plat-s3c/include/plat/audio.h b/arch/arm/plat-s3c/include/plat/audio.h index de0e8da48bc..f22d23bb627 100644 --- a/arch/arm/plat-s3c/include/plat/audio.h +++ b/arch/arm/plat-s3c/include/plat/audio.h @@ -1,45 +1,17 @@ -/* arch/arm/mach-s3c2410/include/mach/audio.h +/* arch/arm/plat-s3c/include/plat/audio.h * - * Copyright (c) 2004-2005 Simtec Electronics - * http://www.simtec.co.uk/products/SWLINUX/ - * Ben Dooks <ben@simtec.co.uk> - * - * S3C24XX - Audio platfrom_device info + * Copyright (c) 2009 Samsung Electronics Co. Ltd + * Author: Jaswinder Singh <jassi.brar@samsung.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. -*/ - -#ifndef __ASM_ARCH_AUDIO_H -#define __ASM_ARCH_AUDIO_H __FILE__ - -/* struct s3c24xx_iis_ops - * - * called from the s3c24xx audio core to deal with the architecture - * or the codec's setup and control. - * - * the pointer to itself is passed through in case the caller wants to - * embed this in an larger structure for easy reference to it's context. -*/ + */ -struct s3c24xx_iis_ops { - struct module *owner; - - int (*startup)(struct s3c24xx_iis_ops *me); - void (*shutdown)(struct s3c24xx_iis_ops *me); - int (*suspend)(struct s3c24xx_iis_ops *me); - int (*resume)(struct s3c24xx_iis_ops *me); - - int (*open)(struct s3c24xx_iis_ops *me, struct snd_pcm_substream *strm); - int (*close)(struct s3c24xx_iis_ops *me, struct snd_pcm_substream *strm); - int (*prepare)(struct s3c24xx_iis_ops *me, struct snd_pcm_substream *strm, struct snd_pcm_runtime *rt); +/** + * struct s3c_audio_pdata - common platform data for audio device drivers + * @cfg_gpio: Callback function to setup mux'ed pins in I2S/PCM/AC97 mode + */ +struct s3c_audio_pdata { + int (*cfg_gpio)(struct platform_device *); }; - -struct s3c24xx_platdata_iis { - const char *codec_clk; - struct s3c24xx_iis_ops *ops; - int (*match_dev)(struct device *dev); -}; - -#endif /* __ASM_ARCH_AUDIO_H */ diff --git a/arch/arm/plat-s3c/include/plat/devs.h b/arch/arm/plat-s3c/include/plat/devs.h index 0f540ea1e99..932cbbbb427 100644 --- a/arch/arm/plat-s3c/include/plat/devs.h +++ b/arch/arm/plat-s3c/include/plat/devs.h @@ -28,6 +28,9 @@ extern struct platform_device s3c64xx_device_iis0; extern struct platform_device s3c64xx_device_iis1; extern struct platform_device s3c64xx_device_iisv4; +extern struct platform_device s3c64xx_device_pcm0; +extern struct platform_device s3c64xx_device_pcm1; + extern struct platform_device s3c_device_fb; extern struct platform_device s3c_device_usb; extern struct platform_device s3c_device_lcd; diff --git a/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h b/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h index 07659dad174..abf2fbc2eb2 100644 --- a/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h +++ b/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h @@ -67,6 +67,8 @@ #define S3C2412_IISMOD_BCLK_MASK (3 << 1) #define S3C2412_IISMOD_8BIT (1 << 0) +#define S3C64XX_IISMOD_CDCLKCON (1 << 12) + #define S3C2412_IISPSR_PSREN (1 << 15) #define S3C2412_IISFIC_TXFLUSH (1 << 15) diff --git a/arch/arm/plat-s3c64xx/dev-audio.c b/arch/arm/plat-s3c64xx/dev-audio.c index 1322beb40dd..a21a88fbb7e 100644 --- a/arch/arm/plat-s3c64xx/dev-audio.c +++ b/arch/arm/plat-s3c64xx/dev-audio.c @@ -15,9 +15,14 @@ #include <mach/irqs.h> #include <mach/map.h> +#include <mach/dma.h> +#include <mach/gpio.h> #include <plat/devs.h> - +#include <plat/audio.h> +#include <plat/gpio-bank-d.h> +#include <plat/gpio-bank-e.h> +#include <plat/gpio-cfg.h> static struct resource s3c64xx_iis0_resource[] = { [0] = { @@ -66,3 +71,97 @@ struct platform_device s3c64xx_device_iisv4 = { .resource = s3c64xx_iisv4_resource, }; EXPORT_SYMBOL(s3c64xx_device_iisv4); + + +/* PCM Controller platform_devices */ + +static int s3c64xx_pcm_cfg_gpio(struct platform_device *pdev) +{ + switch (pdev->id) { + case 0: + s3c_gpio_cfgpin(S3C64XX_GPD(0), S3C64XX_GPD0_PCM0_SCLK); + s3c_gpio_cfgpin(S3C64XX_GPD(1), S3C64XX_GPD1_PCM0_EXTCLK); + s3c_gpio_cfgpin(S3C64XX_GPD(2), S3C64XX_GPD2_PCM0_FSYNC); + s3c_gpio_cfgpin(S3C64XX_GPD(3), S3C64XX_GPD3_PCM0_SIN); + s3c_gpio_cfgpin(S3C64XX_GPD(4), S3C64XX_GPD4_PCM0_SOUT); + break; + case 1: + s3c_gpio_cfgpin(S3C64XX_GPE(0), S3C64XX_GPE0_PCM1_SCLK); + s3c_gpio_cfgpin(S3C64XX_GPE(1), S3C64XX_GPE1_PCM1_EXTCLK); + s3c_gpio_cfgpin(S3C64XX_GPE(2), S3C64XX_GPE2_PCM1_FSYNC); + s3c_gpio_cfgpin(S3C64XX_GPE(3), S3C64XX_GPE3_PCM1_SIN); + s3c_gpio_cfgpin(S3C64XX_GPE(4), S3C64XX_GPE4_PCM1_SOUT); + break; + default: + printk(KERN_DEBUG "Invalid PCM Controller number!"); + return -EINVAL; + } + + return 0; +} + +static struct resource s3c64xx_pcm0_resource[] = { + [0] = { + .start = S3C64XX_PA_PCM0, + .end = S3C64XX_PA_PCM0 + 0x100 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = DMACH_PCM0_TX, + .end = DMACH_PCM0_TX, + .flags = IORESOURCE_DMA, + }, + [2] = { + .start = DMACH_PCM0_RX, + .end = DMACH_PCM0_RX, + .flags = IORESOURCE_DMA, + }, +}; + +static struct s3c_audio_pdata s3c_pcm0_pdata = { + .cfg_gpio = s3c64xx_pcm_cfg_gpio, +}; + +struct platform_device s3c64xx_device_pcm0 = { + .name = "samsung-pcm", + .id = 0, + .num_resources = ARRAY_SIZE(s3c64xx_pcm0_resource), + .resource = s3c64xx_pcm0_resource, + .dev = { + .platform_data = &s3c_pcm0_pdata, + }, +}; +EXPORT_SYMBOL(s3c64xx_device_pcm0); + +static struct resource s3c64xx_pcm1_resource[] = { + [0] = { + .start = S3C64XX_PA_PCM1, + .end = S3C64XX_PA_PCM1 + 0x100 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = DMACH_PCM1_TX, + .end = DMACH_PCM1_TX, + .flags = IORESOURCE_DMA, + }, + [2] = { + .start = DMACH_PCM1_RX, + .end = DMACH_PCM1_RX, + .flags = IORESOURCE_DMA, + }, +}; + +static struct s3c_audio_pdata s3c_pcm1_pdata = { + .cfg_gpio = s3c64xx_pcm_cfg_gpio, +}; + +struct platform_device s3c64xx_device_pcm1 = { + .name = "samsung-pcm", + .id = 1, + .num_resources = ARRAY_SIZE(s3c64xx_pcm1_resource), + .resource = s3c64xx_pcm1_resource, + .dev = { + .platform_data = &s3c_pcm1_pdata, + }, +}; +EXPORT_SYMBOL(s3c64xx_device_pcm1); diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index 0d4d3e3a4cf..5fa3889d858 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c @@ -211,37 +211,6 @@ static int cmode_procctl(ctl_table *ctl, int write, return try_set_cmode(new_cmode)?:*lenp; } -static int cmode_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - if (oldval && oldlenp) { - size_t oldlen; - - if (get_user(oldlen, oldlenp)) - return -EFAULT; - - if (oldlen != sizeof(int)) - return -EINVAL; - - if (put_user(clock_cmode_current, (unsigned __user *)oldval) || - put_user(sizeof(int), oldlenp)) - return -EFAULT; - } - if (newval && newlen) { - int new_cmode; - - if (newlen != sizeof(int)) - return -EINVAL; - - if (get_user(new_cmode, (int __user *)newval)) - return -EFAULT; - - return try_set_cmode(new_cmode)?:1; - } - return 1; -} - static int try_set_p0(int new_p0) { unsigned long flags, clkc; @@ -314,37 +283,6 @@ static int p0_procctl(ctl_table *ctl, int write, return try_set_p0(new_p0)?:*lenp; } -static int p0_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - if (oldval && oldlenp) { - size_t oldlen; - - if (get_user(oldlen, oldlenp)) - return -EFAULT; - - if (oldlen != sizeof(int)) - return -EINVAL; - - if (put_user(clock_p0_current, (unsigned __user *)oldval) || - put_user(sizeof(int), oldlenp)) - return -EFAULT; - } - if (newval && newlen) { - int new_p0; - - if (newlen != sizeof(int)) - return -EINVAL; - - if (get_user(new_p0, (int __user *)newval)) - return -EFAULT; - - return try_set_p0(new_p0)?:1; - } - return 1; -} - static int cm_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { @@ -358,87 +296,47 @@ static int cm_procctl(ctl_table *ctl, int write, return try_set_cm(new_cm)?:*lenp; } -static int cm_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - if (oldval && oldlenp) { - size_t oldlen; - - if (get_user(oldlen, oldlenp)) - return -EFAULT; - - if (oldlen != sizeof(int)) - return -EINVAL; - - if (put_user(clock_cm_current, (unsigned __user *)oldval) || - put_user(sizeof(int), oldlenp)) - return -EFAULT; - } - if (newval && newlen) { - int new_cm; - - if (newlen != sizeof(int)) - return -EINVAL; - - if (get_user(new_cm, (int __user *)newval)) - return -EFAULT; - - return try_set_cm(new_cm)?:1; - } - return 1; -} - - static struct ctl_table pm_table[] = { { - .ctl_name = CTL_PM_SUSPEND, .procname = "suspend", .data = NULL, .maxlen = 0, .mode = 0200, - .proc_handler = &sysctl_pm_do_suspend, + .proc_handler = sysctl_pm_do_suspend, }, { - .ctl_name = CTL_PM_CMODE, .procname = "cmode", .data = &clock_cmode_current, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &cmode_procctl, - .strategy = &cmode_sysctl, + .proc_handler = cmode_procctl, }, { - .ctl_name = CTL_PM_P0, .procname = "p0", .data = &clock_p0_current, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &p0_procctl, - .strategy = &p0_sysctl, + .proc_handler = p0_procctl, }, { - .ctl_name = CTL_PM_CM, .procname = "cm", .data = &clock_cm_current, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &cm_procctl, - .strategy = &cm_sysctl, + .proc_handler = cm_procctl, }, - { .ctl_name = 0} + { } }; static struct ctl_table pm_dir_table[] = { { - .ctl_name = CTL_PM, .procname = "pm", .mode = 0555, .child = pm_table, }, - { .ctl_name = 0} + { } }; /* diff --git a/arch/frv/kernel/sysctl.c b/arch/frv/kernel/sysctl.c index 3e9d7e03fb9..035516cb7a9 100644 --- a/arch/frv/kernel/sysctl.c +++ b/arch/frv/kernel/sysctl.c @@ -176,21 +176,19 @@ static int procctl_frv_pin_cxnr(ctl_table *table, int write, struct file *filp, static struct ctl_table frv_table[] = { { - .ctl_name = 1, .procname = "cache-mode", .data = NULL, .maxlen = 0, .mode = 0644, - .proc_handler = &procctl_frv_cachemode, + .proc_handler = procctl_frv_cachemode, }, #ifdef CONFIG_MMU { - .ctl_name = 2, .procname = "pin-cxnr", .data = NULL, .maxlen = 0, .mode = 0644, - .proc_handler = &procctl_frv_pin_cxnr + .proc_handler = procctl_frv_pin_cxnr }, #endif {} @@ -203,7 +201,6 @@ static struct ctl_table frv_table[] = static struct ctl_table frv_dir_table[] = { { - .ctl_name = CTL_FRV, .procname = "frv", .mode = 0555, .child = frv_table diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index af9405cd70e..10c37510f4b 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -327,7 +327,7 @@ ia32_syscall_table: data8 compat_sys_writev data8 sys_getsid data8 sys_fdatasync - data8 sys32_sysctl + data8 compat_sys_sysctl data8 sys_mlock /* 150 */ data8 sys_munlock data8 sys_mlockall diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 625ed8f76fc..429ec968c9e 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1628,61 +1628,6 @@ sys32_msync (unsigned int start, unsigned int len, int flags) return sys_msync(addr, len + (start - addr), flags); } -struct sysctl32 { - unsigned int name; - int nlen; - unsigned int oldval; - unsigned int oldlenp; - unsigned int newval; - unsigned int newlen; - unsigned int __unused[4]; -}; - -#ifdef CONFIG_SYSCTL_SYSCALL -asmlinkage long -sys32_sysctl (struct sysctl32 __user *args) -{ - struct sysctl32 a32; - mm_segment_t old_fs = get_fs (); - void __user *oldvalp, *newvalp; - size_t oldlen; - int __user *namep; - long ret; - - if (copy_from_user(&a32, args, sizeof(a32))) - return -EFAULT; - - /* - * We need to pre-validate these because we have to disable address checking - * before calling do_sysctl() because of OLDLEN but we can't run the risk of the - * user specifying bad addresses here. Well, since we're dealing with 32 bit - * addresses, we KNOW that access_ok() will always succeed, so this is an - * expensive NOP, but so what... - */ - namep = (int __user *) compat_ptr(a32.name); - oldvalp = compat_ptr(a32.oldval); - newvalp = compat_ptr(a32.newval); - - if ((oldvalp && get_user(oldlen, (int __user *) compat_ptr(a32.oldlenp))) - || !access_ok(VERIFY_WRITE, namep, 0) - || !access_ok(VERIFY_WRITE, oldvalp, 0) - || !access_ok(VERIFY_WRITE, newvalp, 0)) - return -EFAULT; - - set_fs(KERNEL_DS); - lock_kernel(); - ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *) &oldlen, - newvalp, (size_t) a32.newlen); - unlock_kernel(); - set_fs(old_fs); - - if (oldvalp && put_user (oldlen, (int __user *) compat_ptr(a32.oldlenp))) - return -EFAULT; - - return ret; -} -#endif - asmlinkage long sys32_newuname (struct new_utsname __user *name) { diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 6631a9dfafd..b942f4032d7 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -239,32 +239,29 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) #ifdef CONFIG_SYSCTL static ctl_table kdump_ctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "kdump_on_init", .data = &kdump_on_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "kdump_on_fatal_mca", .data = &kdump_on_fatal_mca, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; static ctl_table sys_table[] = { { - .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = kdump_ctl_table, }, - { .ctl_name = 0 } + { } }; #endif diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f1782705b1f..402698b6689 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -522,42 +522,37 @@ EXPORT_SYMBOL(pfm_sysctl); static ctl_table pfm_ctl_table[]={ { - .ctl_name = CTL_UNNUMBERED, .procname = "debug", .data = &pfm_sysctl.debug, .maxlen = sizeof(int), .mode = 0666, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "debug_ovfl", .data = &pfm_sysctl.debug_ovfl, .maxlen = sizeof(int), .mode = 0666, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "fastctxsw", .data = &pfm_sysctl.fastctxsw, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "expert_mode", .data = &pfm_sysctl.expert_mode, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {} }; static ctl_table pfm_sysctl_dir[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "perfmon", .mode = 0555, .child = pfm_ctl_table, @@ -566,7 +561,6 @@ static ctl_table pfm_sysctl_dir[] = { }; static ctl_table pfm_sysctl_root[] = { { - .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = pfm_sysctl_dir, diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index 37e6f305a68..ef3ec1d6ceb 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -12,23 +12,15 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/of.h> /* linux/of.h gets to determine #include ordering */ + #ifndef _ASM_MICROBLAZE_PROM_H #define _ASM_MICROBLAZE_PROM_H #ifdef __KERNEL__ - -/* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* marker */ -#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ -#define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, content */ -#define OF_DT_NOP 0x4 /* nop */ -#define OF_DT_END 0x9 - -#define OF_DT_VERSION 0x10 - #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <linux/of_fdt.h> #include <linux/proc_fs.h> #include <linux/platform_device.h> #include <asm/irq.h> @@ -41,122 +33,19 @@ #define of_prop_cmp(s1, s2) strcmp((s1), (s2)) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) -/* - * This is what gets passed to the kernel by prom_init or kexec - * - * The dt struct contains the device tree structure, full pathes and - * property contents. The dt strings contain a separate block with just - * the strings for the property names, and is fully page aligned and - * self contained in a page, so that it can be kept around by the kernel, - * each property name appears only once in this page (cheap compression) - * - * the mem_rsvmap contains a map of reserved ranges of physical memory, - * passing it here instead of in the device-tree itself greatly simplifies - * the job of everybody. It's just a list of u64 pairs (base/size) that - * ends when size is 0 - */ -struct boot_param_header { - u32 magic; /* magic word OF_DT_HEADER */ - u32 totalsize; /* total size of DT block */ - u32 off_dt_struct; /* offset to structure */ - u32 off_dt_strings; /* offset to strings */ - u32 off_mem_rsvmap; /* offset to memory reserve map */ - u32 version; /* format version */ - u32 last_comp_version; /* last compatible version */ - /* version 2 fields below */ - u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ - /* version 3 fields below */ - u32 dt_strings_size; /* size of the DT strings block */ - /* version 17 fields below */ - u32 dt_struct_size; /* size of the DT structure block */ -}; - -typedef u32 phandle; -typedef u32 ihandle; - -struct property { - char *name; - int length; - void *value; - struct property *next; -}; - -struct device_node { - const char *name; - const char *type; - phandle node; - phandle linux_phandle; - char *full_name; - - struct property *properties; - struct property *deadprops; /* removed properties */ - struct device_node *parent; - struct device_node *child; - struct device_node *sibling; - struct device_node *next; /* next device of same type */ - struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ - struct kref kref; - unsigned long _flags; - void *data; -}; - extern struct device_node *of_chosen; -static inline int of_node_check_flag(struct device_node *n, unsigned long flag) -{ - return test_bit(flag, &n->_flags); -} - -static inline void of_node_set_flag(struct device_node *n, unsigned long flag) -{ - set_bit(flag, &n->_flags); -} - #define HAVE_ARCH_DEVTREE_FIXUPS -static inline void set_node_proc_entry(struct device_node *dn, - struct proc_dir_entry *de) -{ - dn->pde = de; -} - extern struct device_node *allnodes; /* temporary while merging */ extern rwlock_t devtree_lock; /* temporary while merging */ -extern struct device_node *of_find_all_nodes(struct device_node *prev); -extern struct device_node *of_node_get(struct device_node *node); -extern void of_node_put(struct device_node *node); - -/* For scanning the flat device-tree at boot time */ -extern int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data); -extern void *__init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size); -extern int __init - of_flat_dt_is_compatible(unsigned long node, const char *name); -extern unsigned long __init of_get_flat_dt_root(void); - /* For updating the device tree at runtime */ extern void of_attach_node(struct device_node *); extern void of_detach_node(struct device_node *); /* Other Prototypes */ -extern void finish_device_tree(void); -extern void unflatten_device_tree(void); extern int early_uartlite_console(void); -extern void early_init_devtree(void *); -extern int machine_is_compatible(const char *compat); -extern void print_properties(struct device_node *node); -extern int prom_n_intr_cells(struct device_node *np); -extern void prom_get_irq_senses(unsigned char *senses, int off, int max); -extern int prom_add_property(struct device_node *np, struct property *prop); -extern int prom_remove_property(struct device_node *np, struct property *prop); -extern int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop); extern struct resource *request_OF_resource(struct device_node *node, int index, const char *name_postfix); @@ -166,18 +55,6 @@ extern int release_OF_resource(struct device_node *node, int index); * OF address retreival & translation */ -/* Helper to read a big number; size is in cells (not bytes) */ -static inline u64 of_read_number(const u32 *cell, int size) -{ - u64 r = 0; - while (size--) - r = (r << 32) | *(cell++); - return r; -} - -/* Like of_read_number, but we want an unsigned long result */ -#define of_read_ulong(cell, size) of_read_number(cell, size) - /* Translate an OF address block into a CPU physical address */ extern u64 of_translate_address(struct device_node *np, const u32 *addr); @@ -305,12 +182,6 @@ extern int of_irq_to_resource(struct device_node *dev, int index, */ extern void __iomem *of_iomap(struct device_node *device, int index); -/* - * NB: This is here while we transition from using asm/prom.h - * to linux/of.h - */ -#include <linux/of.h> - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_MICROBLAZE_PROM_H */ diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S index 697ce3007f3..30916193fcc 100644 --- a/arch/microblaze/kernel/head.S +++ b/arch/microblaze/kernel/head.S @@ -31,7 +31,7 @@ #include <linux/linkage.h> #include <asm/thread_info.h> #include <asm/page.h> -#include <asm/prom.h> /* for OF_DT_HEADER */ +#include <linux/of_fdt.h> /* for OF_DT_HEADER */ #ifdef CONFIG_MMU #include <asm/setup.h> /* COMMAND_LINE_SIZE */ diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index c005cc6f1aa..b817df172aa 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -860,29 +860,6 @@ struct device_node *of_find_node_by_phandle(phandle handle) EXPORT_SYMBOL(of_find_node_by_phandle); /** - * of_find_all_nodes - Get next node in global list - * @prev: Previous node or NULL to start iteration - * of_node_put() will be called on it - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_all_nodes(struct device_node *prev) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = prev ? prev->allnext : allnodes; - for (; np != NULL; np = np->allnext) - if (of_node_get(np)) - break; - of_node_put(prev); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_all_nodes); - -/** * of_node_get - Increment refcount of a node * @node: Node to inc refcount, NULL is supported to * simplify writing of callers diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index b77fefaff9d..1a2793efdc4 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -265,67 +265,6 @@ SYSCALL_DEFINE5(n32_msgrcv, int, msqid, u32, msgp, size_t, msgsz, } #endif -struct sysctl_args32 -{ - compat_caddr_t name; - int nlen; - compat_caddr_t oldval; - compat_caddr_t oldlenp; - compat_caddr_t newval; - compat_size_t newlen; - unsigned int __unused[4]; -}; - -#ifdef CONFIG_SYSCTL_SYSCALL - -SYSCALL_DEFINE1(32_sysctl, struct sysctl_args32 __user *, args) -{ - struct sysctl_args32 tmp; - int error; - size_t oldlen; - size_t __user *oldlenp = NULL; - unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && tmp.oldlenp) { - /* Duh, this is ugly and might not work if sysctl_args - is in read-only memory, but do_sysctl does indirectly - a lot of uaccess in both directions and we'd have to - basically copy the whole sysctl.c here, and - glibc's __sysctl uses rw memory for the structure - anyway. */ - if (get_user(oldlen, (u32 __user *)A(tmp.oldlenp)) || - put_user(oldlen, (size_t __user *)addr)) - return -EFAULT; - oldlenp = (size_t __user *)addr; - } - - lock_kernel(); - error = do_sysctl((int __user *)A(tmp.name), tmp.nlen, (void __user *)A(tmp.oldval), - oldlenp, (void __user *)A(tmp.newval), tmp.newlen); - unlock_kernel(); - if (oldlenp) { - if (!error) { - if (get_user(oldlen, (size_t __user *)addr) || - put_user(oldlen, (u32 __user *)A(tmp.oldlenp))) - error = -EFAULT; - } - copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)); - } - return error; -} - -#else - -SYSCALL_DEFINE1(32_sysctl, struct sysctl_args32 __user *, args) -{ - return -ENOSYS; -} - -#endif /* CONFIG_SYSCTL_SYSCALL */ - SYSCALL_DEFINE1(32_newuname, struct new_utsname __user *, name) { int ret = 0; diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 5154e64f7cf..66b5a48676d 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -272,7 +272,7 @@ EXPORT(sysn32_call_table) PTR sys_munlockall PTR sys_vhangup /* 6150 */ PTR sys_pivot_root - PTR sys_32_sysctl + PTR compat_sys_sysctl PTR sys_prctl PTR compat_sys_adjtimex PTR compat_sys_setrlimit /* 6155 */ diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 23b842be83e..515f9eab2b2 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -356,7 +356,7 @@ sys_call_table: PTR sys_ni_syscall /* 4150 */ PTR sys_getsid PTR sys_fdatasync - PTR sys_32_sysctl + PTR compat_sys_sysctl PTR sys_mlock PTR sys_munlock /* 4155 */ PTR sys_mlockall diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index b3deed8db61..14b9a28a4ae 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -37,23 +37,6 @@ #include "ds1603.h" #endif -/* Strategy function to write EEPROM after changing string entry */ -int sysctl_lasatstring(ctl_table *table, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen) -{ - int r; - - r = sysctl_string(table, oldval, oldlenp, newval, newlen); - if (r < 0) - return r; - - if (newval && newlen) - lasat_write_eeprom_info(); - - return 0; -} - /* And the same for proc */ int proc_dolasatstring(ctl_table *table, int write, @@ -113,46 +96,6 @@ int proc_dolasatrtc(ctl_table *table, int write, } #endif -/* Sysctl for setting the IP addresses */ -int sysctl_lasat_intvec(ctl_table *table, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen) -{ - int r; - - r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); - if (r < 0) - return r; - - if (newval && newlen) - lasat_write_eeprom_info(); - - return 0; -} - -#ifdef CONFIG_DS1603 -/* Same for RTC */ -int sysctl_lasat_rtc(ctl_table *table, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen) -{ - struct timespec ts; - int r; - - read_persistent_clock(&ts); - rtctmp = ts.tv_sec; - if (rtctmp < 0) - rtctmp = 0; - r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); - if (r < 0) - return r; - if (newval && newlen) - rtc_mips_set_mmss(rtctmp); - - return r; -} -#endif - #ifdef CONFIG_INET int proc_lasat_ip(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -214,23 +157,6 @@ int proc_lasat_ip(ctl_table *table, int write, } #endif -static int sysctl_lasat_prid(ctl_table *table, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen) -{ - int r; - - r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); - if (r < 0) - return r; - if (newval && newlen) { - lasat_board_info.li_eeprom_info.prid = *(int *)newval; - lasat_write_eeprom_info(); - lasat_init_board_info(); - } - return 0; -} - int proc_lasat_prid(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -252,115 +178,92 @@ extern int lasat_boot_to_service; static ctl_table lasat_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "cpu-hz", .data = &lasat_board_info.li_cpu_hz, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "bus-hz", .data = &lasat_board_info.li_bus_hz, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "bmid", .data = &lasat_board_info.li_bmid, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "prid", .data = &lasat_board_info.li_prid, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_lasat_prid, - .strategy = &sysctl_lasat_prid - }, + .proc_handler = proc_lasat_prid, +. }, #ifdef CONFIG_INET { - .ctl_name = CTL_UNNUMBERED, .procname = "ipaddr", .data = &lasat_board_info.li_eeprom_info.ipaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_lasat_ip, - .strategy = &sysctl_lasat_intvec + .proc_handler = proc_lasat_ip, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "netmask", .data = &lasat_board_info.li_eeprom_info.netmask, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_lasat_ip, - .strategy = &sysctl_lasat_intvec + .proc_handler = proc_lasat_ip, }, #endif { - .ctl_name = CTL_UNNUMBERED, .procname = "passwd_hash", .data = &lasat_board_info.li_eeprom_info.passwd_hash, .maxlen = sizeof(lasat_board_info.li_eeprom_info.passwd_hash), .mode = 0600, - .proc_handler = &proc_dolasatstring, - .strategy = &sysctl_lasatstring + .proc_handler = proc_dolasatstring, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "boot-service", .data = &lasat_boot_to_service, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, }, #ifdef CONFIG_DS1603 { - .ctl_name = CTL_UNNUMBERED, .procname = "rtc", .data = &rtctmp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dolasatrtc, - .strategy = &sysctl_lasat_rtc + .proc_handler = proc_dolasatrtc, }, #endif { - .ctl_name = CTL_UNNUMBERED, .procname = "namestr", .data = &lasat_board_info.li_namestr, .maxlen = sizeof(lasat_board_info.li_namestr), .mode = 0444, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string + .proc_handler = proc_dostring, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "typestr", .data = &lasat_board_info.li_typestr, .maxlen = sizeof(lasat_board_info.li_typestr), .mode = 0444, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string + .proc_handler = proc_dostring, }, {} }; static ctl_table lasat_root_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "lasat", .mode = 0555, .child = lasat_table diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 561388b17c9..76d23ec8dfa 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -90,77 +90,6 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23, return -ENOSYS; } -#ifdef CONFIG_SYSCTL - -struct __sysctl_args32 { - u32 name; - int nlen; - u32 oldval; - u32 oldlenp; - u32 newval; - u32 newlen; - u32 __unused[4]; -}; - -asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) -{ -#ifndef CONFIG_SYSCTL_SYSCALL - return -ENOSYS; -#else - struct __sysctl_args32 tmp; - int error; - unsigned int oldlen32; - size_t oldlen, __user *oldlenp = NULL; - unsigned long addr = (((long __force)&args->__unused[0]) + 7) & ~7; - - DBG(("sysctl32(%p)\n", args)); - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && tmp.oldlenp) { - /* Duh, this is ugly and might not work if sysctl_args - is in read-only memory, but do_sysctl does indirectly - a lot of uaccess in both directions and we'd have to - basically copy the whole sysctl.c here, and - glibc's __sysctl uses rw memory for the structure - anyway. */ - /* a possibly better hack than this, which will avoid the - * problem if the struct is read only, is to push the - * 'oldlen' value out to the user's stack instead. -PB - */ - if (get_user(oldlen32, (u32 *)(u64)tmp.oldlenp)) - return -EFAULT; - oldlen = oldlen32; - if (put_user(oldlen, (size_t *)addr)) - return -EFAULT; - oldlenp = (size_t *)addr; - } - - lock_kernel(); - error = do_sysctl((int __user *)(u64)tmp.name, tmp.nlen, - (void __user *)(u64)tmp.oldval, oldlenp, - (void __user *)(u64)tmp.newval, tmp.newlen); - unlock_kernel(); - if (oldlenp) { - if (!error) { - if (get_user(oldlen, (size_t *)addr)) { - error = -EFAULT; - } else { - oldlen32 = oldlen; - if (put_user(oldlen32, (u32 *)(u64)tmp.oldlenp)) - error = -EFAULT; - } - } - if (copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused))) - error = -EFAULT; - } - return error; -#endif -} - -#endif /* CONFIG_SYSCTL */ - asmlinkage long sys32_sched_rr_get_interval(pid_t pid, struct compat_timespec __user *interval) { diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 843f423dec6..01c4fcf8f48 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -234,7 +234,7 @@ ENTRY_SAME(getsid) ENTRY_SAME(fdatasync) /* struct __sysctl_args is a mess */ - ENTRY_DIFF(sysctl) + ENTRY_COMP(sysctl) ENTRY_SAME(mlock) /* 150 */ ENTRY_SAME(munlock) ENTRY_SAME(mlockall) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3b100518539..bf3382f1904 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE config HCALL_STATS bool "Hypervisor call instrumentation" - depends on PPC_PSERIES && DEBUG_FS + depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS help Adds code to keep track of the number of hypervisor calls made and the amount of time spent in hypervisor calls. Wall time spent in diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index f1889abb89b..c568329723b 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUG_PAGEALLOC is not set -CONFIG_HCALL_STATS=y +# CONFIG_HCALL_STATS is not set # CONFIG_CODE_PATCHING_SELFTEST is not set # CONFIG_FTR_FIXUP_SELFTEST is not set # CONFIG_MSI_BITMAP_SELFTEST is not set diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index 9154e852673..f0fb4fc1f6e 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -19,6 +19,7 @@ #define _ASM_POWERPC_EMULATED_OPS_H #include <asm/atomic.h> +#include <linux/perf_event.h> #ifdef CONFIG_PPC_EMULATED_STATS @@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated; extern void ppc_warn_emulated_print(const char *type); -#define PPC_WARN_EMULATED(type) \ +#define __PPC_WARN_EMULATED(type) \ do { \ atomic_inc(&ppc_emulated.type.val); \ if (ppc_warn_emulated) \ @@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type); #else /* !CONFIG_PPC_EMULATED_STATS */ -#define PPC_WARN_EMULATED(type) do { } while (0) +#define __PPC_WARN_EMULATED(type) do { } while (0) #endif /* !CONFIG_PPC_EMULATED_STATS */ +#define PPC_WARN_EMULATED(type, regs) \ + do { \ + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \ + 1, 0, regs, 0); \ + __PPC_WARN_EMULATED(type); \ + } while (0) + +#define PPC_WARN_ALIGNMENT(type, regs) \ + do { \ + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \ + 1, 0, regs, regs->dar); \ + __PPC_WARN_EMULATED(type); \ + } while (0) + #endif /* _ASM_POWERPC_EMULATED_OPS_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 6251a4b10be..c27caac47ad 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -274,6 +274,8 @@ struct hcall_stats { unsigned long num_calls; /* number of calls (on this CPU) */ unsigned long tb_total; /* total wall time (mftb) of calls. */ unsigned long purr_total; /* total cpu time (PURR) of calls. */ + unsigned long tb_start; + unsigned long purr_start; }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 6ff04185d2a..2ab9cbd9882 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -1,3 +1,4 @@ +#include <linux/of.h> /* linux/of.h gets to determine #include ordering */ #ifndef _POWERPC_PROM_H #define _POWERPC_PROM_H #ifdef __KERNEL__ @@ -16,6 +17,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> +#include <linux/of_fdt.h> #include <linux/proc_fs.h> #include <linux/platform_device.h> #include <asm/irq.h> @@ -28,133 +30,14 @@ #define of_prop_cmp(s1, s2) strcmp((s1), (s2)) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) -/* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* marker */ -#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ -#define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, - * content */ -#define OF_DT_NOP 0x4 /* nop */ -#define OF_DT_END 0x9 - -#define OF_DT_VERSION 0x10 - -/* - * This is what gets passed to the kernel by prom_init or kexec - * - * The dt struct contains the device tree structure, full pathes and - * property contents. The dt strings contain a separate block with just - * the strings for the property names, and is fully page aligned and - * self contained in a page, so that it can be kept around by the kernel, - * each property name appears only once in this page (cheap compression) - * - * the mem_rsvmap contains a map of reserved ranges of physical memory, - * passing it here instead of in the device-tree itself greatly simplifies - * the job of everybody. It's just a list of u64 pairs (base/size) that - * ends when size is 0 - */ -struct boot_param_header -{ - u32 magic; /* magic word OF_DT_HEADER */ - u32 totalsize; /* total size of DT block */ - u32 off_dt_struct; /* offset to structure */ - u32 off_dt_strings; /* offset to strings */ - u32 off_mem_rsvmap; /* offset to memory reserve map */ - u32 version; /* format version */ - u32 last_comp_version; /* last compatible version */ - /* version 2 fields below */ - u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ - /* version 3 fields below */ - u32 dt_strings_size; /* size of the DT strings block */ - /* version 17 fields below */ - u32 dt_struct_size; /* size of the DT structure block */ -}; - - - -typedef u32 phandle; -typedef u32 ihandle; - -struct property { - char *name; - int length; - void *value; - struct property *next; -}; - -struct device_node { - const char *name; - const char *type; - phandle node; - phandle linux_phandle; - char *full_name; - - struct property *properties; - struct property *deadprops; /* removed properties */ - struct device_node *parent; - struct device_node *child; - struct device_node *sibling; - struct device_node *next; /* next device of same type */ - struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ - struct kref kref; - unsigned long _flags; - void *data; -}; - extern struct device_node *of_chosen; -static inline int of_node_check_flag(struct device_node *n, unsigned long flag) -{ - return test_bit(flag, &n->_flags); -} - -static inline void of_node_set_flag(struct device_node *n, unsigned long flag) -{ - set_bit(flag, &n->_flags); -} - - #define HAVE_ARCH_DEVTREE_FIXUPS -static inline void set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de) -{ - dn->pde = de; -} - - -extern struct device_node *of_find_all_nodes(struct device_node *prev); -extern struct device_node *of_node_get(struct device_node *node); -extern void of_node_put(struct device_node *node); - -/* For scanning the flat device-tree at boot time */ -extern int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data); -extern void* __init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size); -extern int __init of_flat_dt_is_compatible(unsigned long node, const char *name); -extern unsigned long __init of_get_flat_dt_root(void); - /* For updating the device tree at runtime */ extern void of_attach_node(struct device_node *); extern void of_detach_node(struct device_node *); -/* Other Prototypes */ -extern void finish_device_tree(void); -extern void unflatten_device_tree(void); -extern void early_init_devtree(void *); -extern int machine_is_compatible(const char *compat); -extern void print_properties(struct device_node *node); -extern int prom_n_intr_cells(struct device_node* np); -extern void prom_get_irq_senses(unsigned char *senses, int off, int max); -extern int prom_add_property(struct device_node* np, struct property* prop); -extern int prom_remove_property(struct device_node *np, struct property *prop); -extern int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop); - #ifdef CONFIG_PPC32 /* * PCI <-> OF matching functions @@ -178,26 +61,6 @@ extern int release_OF_resource(struct device_node* node, int index); * OF address retreival & translation */ - -/* Helper to read a big number; size is in cells (not bytes) */ -static inline u64 of_read_number(const u32 *cell, int size) -{ - u64 r = 0; - while (size--) - r = (r << 32) | *(cell++); - return r; -} - -/* Like of_read_number, but we want an unsigned long result */ -#ifdef CONFIG_PPC32 -static inline unsigned long of_read_ulong(const u32 *cell, int size) -{ - return cell[size-1]; -} -#else -#define of_read_ulong(cell, size) of_read_number(cell, size) -#endif - /* Translate an OF address block into a CPU physical address */ extern u64 of_translate_address(struct device_node *np, const u32 *addr); @@ -349,11 +212,5 @@ extern int of_irq_to_resource(struct device_node *dev, int index, */ extern void __iomem *of_iomap(struct device_node *device, int index); -/* - * NB: This is here while we transition from using asm/prom.h - * to linux/of.h - */ -#include <linux/of.h> - #endif /* __KERNEL__ */ #endif /* _POWERPC_PROM_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 6315edc205d..bc8dd53f718 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -489,6 +489,8 @@ #define SPRN_MMCR1 798 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ +#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL +#define MMCRA_SDAR_ERAT_MISS 0x20000000UL #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h new file mode 100644 index 00000000000..cbe2297d68b --- /dev/null +++ b/arch/powerpc/include/asm/trace.h @@ -0,0 +1,133 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM powerpc + +#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWERPC_H + +#include <linux/tracepoint.h> + +struct pt_regs; + +TRACE_EVENT(irq_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(irq_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(timer_interrupt_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(timer_interrupt_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +#ifdef CONFIG_PPC_PSERIES +extern void hcall_tracepoint_regfunc(void); +extern void hcall_tracepoint_unregfunc(void); + +TRACE_EVENT_FN(hcall_entry, + + TP_PROTO(unsigned long opcode, unsigned long *args), + + TP_ARGS(opcode, args), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + ), + + TP_fast_assign( + __entry->opcode = opcode; + ), + + TP_printk("opcode=%lu", __entry->opcode), + + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc +); + +TRACE_EVENT_FN(hcall_exit, + + TP_PROTO(unsigned long opcode, unsigned long retval, + unsigned long *retbuf), + + TP_ARGS(opcode, retval, retbuf), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + __field(unsigned long, retval) + ), + + TP_fast_assign( + __entry->opcode = opcode; + __entry->retval = retval; + ), + + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), + + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc +); +#endif + +#endif /* _TRACE_POWERPC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH asm +#define TRACE_INCLUDE_FILE trace + +#include <trace/define_trace.h> diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index a5b632e52fa..3839839f83c 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_SPE if ((instr >> 26) == 0x4) { - PPC_WARN_EMULATED(spe); + PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } #endif @@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs) flags |= SPLT; nb = 8; } - PPC_WARN_EMULATED(vsx); + PPC_WARN_ALIGNMENT(vsx, regs); return emulate_vsx(addr, reg, areg, regs, flags, nb); } #endif @@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs) * the exception of DCBZ which is handled as a special case here */ if (instr == DCBZ) { - PPC_WARN_EMULATED(dcbz); + PPC_WARN_ALIGNMENT(dcbz, regs); return emulate_dcbz(regs, addr); } if (unlikely(nb == 0)) @@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs) * function */ if (flags & M) { - PPC_WARN_EMULATED(multiple); + PPC_WARN_ALIGNMENT(multiple, regs); return emulate_multiple(regs, addr, reg, nb, flags, instr, swiz); } @@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs) /* Special case for 16-byte FP loads and stores */ if (nb == 16) { - PPC_WARN_EMULATED(fp_pair); + PPC_WARN_ALIGNMENT(fp_pair, regs); return emulate_fp_pair(addr, reg, flags); } - PPC_WARN_EMULATED(unaligned); + PPC_WARN_ALIGNMENT(unaligned, regs); /* If we are loading, get the data from user space, else * get it from register values diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9763267e38b..bdcb557d470 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -551,7 +551,7 @@ restore: BEGIN_FW_FTR_SECTION ld r5,SOFTE(r1) FW_FTR_SECTION_ELSE - b iseries_check_pending_irqs + b .Liseries_check_pending_irqs ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); @@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) #endif /* CONFIG_PPC_BOOK3E */ -iseries_check_pending_irqs: +.Liseries_check_pending_irqs: #ifdef CONFIG_PPC_ISERIES ld r5,SOFTE(r1) cmpdi 0,r5,0 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1808876edcc..c7eb4e0eb86 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) * prolog code of the PerformanceMonitor one. A little * trickery is thus necessary */ +performance_monitor_pSeries_1: . = 0xf00 b performance_monitor_pSeries +altivec_unavailable_pSeries_1: . = 0xf20 b altivec_unavailable_pSeries +vsx_unavailable_pSeries_1: . = 0xf40 b vsx_unavailable_pSeries diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 88d9c1d5e5f..049dda60e47 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -110,18 +110,16 @@ int powersave_nap; */ static ctl_table powersave_nap_ctl_table[]={ { - .ctl_name = KERN_PPC_POWERSAVE_NAP, .procname = "powersave-nap", .data = &powersave_nap, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {} }; static ctl_table powersave_nap_sysctl_root[] = { { - .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = powersave_nap_ctl_table, diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index e5d12117798..02a334662cc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -70,6 +70,8 @@ #include <asm/firmware.h> #include <asm/lv1call.h> #endif +#define CREATE_TRACE_POINTS +#include <asm/trace.h> int __irq_offset_value; static int ppc_spurious_interrupts; @@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); unsigned int irq; + trace_irq_entry(regs); + irq_enter(); check_stack_overflow(); @@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs) timer_interrupt(regs); } #endif + + trace_irq_exit(regs); } void __init init_IRQ(void) diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 87f1663584b..1eb85fbf53a 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, */ if (record) { struct perf_sample_data data = { - .addr = 0, + .addr = ~0ULL, .period = event->hw.last_period, }; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 0f4c1c73a6a..199de527d41 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index c351b3a57fb..98b6a729a9d 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index ca399ba5034..84a607bda8f 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 28a4daacdc0..852f7b7f6b4 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -51,10 +51,6 @@ #define MMCR1_PMCSEL_MSK 0xff /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc_inuse = 0; int i; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 479574413a9..8eff48e20db 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -84,10 +84,6 @@ static short mmcr1_adder_bits[8] = { }; /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index d4405b95bfa..4ec30086246 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -1317,29 +1317,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np) } /** - * of_find_all_nodes - Get next node in global list - * @prev: Previous node or NULL to start iteration - * of_node_put() will be called on it - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_all_nodes(struct device_node *prev) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = prev ? prev->allnext : allnodes; - for (; np != 0; np = np->allnext) - if (of_node_get(np)) - break; - of_node_put(prev); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_all_nodes); - -/** * of_node_get - Increment refcount of a node * @node: Node to inc refcount, NULL is supported to * simplify writing of callers diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4271f7a655a..845c72ab735 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -660,6 +660,7 @@ late_initcall(check_cache_coherency); #ifdef CONFIG_DEBUG_FS struct dentry *powerpc_debugfs_root; +EXPORT_SYMBOL(powerpc_debugfs_root); static int powerpc_debugfs_init(void) { diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index b97c2d67f4a..c5a4732bcc4 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -520,58 +520,6 @@ asmlinkage long compat_sys_umask(u32 mask) return sys_umask((int)mask); } -#ifdef CONFIG_SYSCTL_SYSCALL -struct __sysctl_args32 { - u32 name; - int nlen; - u32 oldval; - u32 oldlenp; - u32 newval; - u32 newlen; - u32 __unused[4]; -}; - -asmlinkage long compat_sys_sysctl(struct __sysctl_args32 __user *args) -{ - struct __sysctl_args32 tmp; - int error; - size_t oldlen; - size_t __user *oldlenp = NULL; - unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && tmp.oldlenp) { - /* Duh, this is ugly and might not work if sysctl_args - is in read-only memory, but do_sysctl does indirectly - a lot of uaccess in both directions and we'd have to - basically copy the whole sysctl.c here, and - glibc's __sysctl uses rw memory for the structure - anyway. */ - oldlenp = (size_t __user *)addr; - if (get_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp)) || - put_user(oldlen, oldlenp)) - return -EFAULT; - } - - lock_kernel(); - error = do_sysctl(compat_ptr(tmp.name), tmp.nlen, - compat_ptr(tmp.oldval), oldlenp, - compat_ptr(tmp.newval), tmp.newlen); - unlock_kernel(); - if (oldlenp) { - if (!error) { - if (get_user(oldlen, oldlenp) || - put_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp))) - error = -EFAULT; - } - copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)); - } - return error; -} -#endif - unsigned long compat_sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a136a11c490..36707dec94d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -54,6 +54,7 @@ #include <linux/irq.h> #include <linux/delay.h> #include <linux/perf_event.h> +#include <asm/trace.h> #include <asm/io.h> #include <asm/processor.h> @@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs) struct clock_event_device *evt = &decrementer->event; u64 now; + trace_timer_interrupt_entry(regs); + /* Ensure a positive value is written to the decrementer, or else * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); @@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs) now = decrementer->next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); + trace_timer_interrupt_exit(regs); return; } old_regs = set_irq_regs(regs); @@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs) irq_exit(); set_irq_regs(old_regs); + + trace_timer_interrupt_exit(regs); } void wakeup_decrementer(void) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6f0ae1a9bfa..9d1f9354d6c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate the mfspr rD, PVR. */ if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { - PPC_WARN_EMULATED(mfpvr); + PPC_WARN_EMULATED(mfpvr, regs); rd = (instword >> 21) & 0x1f; regs->gpr[rd] = mfspr(SPRN_PVR); return 0; @@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulating the dcba insn is just a no-op. */ if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { - PPC_WARN_EMULATED(dcba); + PPC_WARN_EMULATED(dcba, regs); return 0; } @@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs) int shift = (instword >> 21) & 0x1c; unsigned long msk = 0xf0000000UL >> shift; - PPC_WARN_EMULATED(mcrxr); + PPC_WARN_EMULATED(mcrxr, regs); regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); regs->xer &= ~0xf0000000UL; return 0; @@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate load/store string insn. */ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { - PPC_WARN_EMULATED(string); + PPC_WARN_EMULATED(string, regs); return emulate_string_inst(regs, instword); } /* Emulate the popcntb (Population Count Bytes) instruction. */ if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { - PPC_WARN_EMULATED(popcntb); + PPC_WARN_EMULATED(popcntb, regs); return emulate_popcntb_inst(regs, instword); } /* Emulate isel (Integer Select) instruction */ if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { - PPC_WARN_EMULATED(isel); + PPC_WARN_EMULATED(isel, regs); return emulate_isel(regs, instword); } @@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs) #ifdef CONFIG_MATH_EMULATION errcode = do_mathemu(regs); if (errcode >= 0) - PPC_WARN_EMULATED(math); + PPC_WARN_EMULATED(math, regs); switch (errcode) { case 0: @@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs) #elif defined(CONFIG_8XX_MINIMAL_FPEMU) errcode = Soft_emulate_8xx(regs); if (errcode >= 0) - PPC_WARN_EMULATED(8xx); + PPC_WARN_EMULATED(8xx, regs); switch (errcode) { case 0: @@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs) flush_altivec_to_thread(current); - PPC_WARN_EMULATED(altivec); + PPC_WARN_EMULATED(altivec, regs); err = emulate_altivec(regs); if (err == 0) { regs->nip += 4; /* skip emulated instruction */ diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 75f3267fdc3..e68beac0a17 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -26,11 +26,11 @@ BEGIN_FTR_SECTION srd r8,r5,r11 mtctr r8 -setup: +.Lsetup: dcbt r9,r4 dcbz r9,r3 add r9,r9,r12 - bdnz setup + bdnz .Lsetup END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) addi r3,r3,-8 srdi r8,r5,7 /* page is copied in 128 byte strides */ diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3634e..383a5d0e981 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -14,68 +14,94 @@ #define STK_PARM(i) (48 + ((i)-3)*8) -#ifdef CONFIG_HCALL_STATS +#ifdef CONFIG_TRACEPOINTS + + .section ".toc","aw" + + .globl hcall_tracepoint_refcount +hcall_tracepoint_refcount: + .llong 0 + + .section ".text" + /* * precall must preserve all registers. use unused STK_PARM() - * areas to save snapshots and opcode. + * areas to save snapshots and opcode. We branch around this + * in early init (eg when populating the MMU hashtable) by using an + * unconditional cpu feature. */ -#define HCALL_INST_PRECALL \ - std r3,STK_PARM(r3)(r1); /* save opcode */ \ - mftb r0; /* get timebase and */ \ - std r0,STK_PARM(r5)(r1); /* save for later */ \ +#define HCALL_INST_PRECALL(FIRST_REG) \ BEGIN_FTR_SECTION; \ - mfspr r0,SPRN_PURR; /* get PURR and */ \ - std r0,STK_PARM(r6)(r1); /* save for later */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); - + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + cmpdi r12,0; \ + beq+ 1f; \ + mflr r0; \ + std r3,STK_PARM(r3)(r1); \ + std r4,STK_PARM(r4)(r1); \ + std r5,STK_PARM(r5)(r1); \ + std r6,STK_PARM(r6)(r1); \ + std r7,STK_PARM(r7)(r1); \ + std r8,STK_PARM(r8)(r1); \ + std r9,STK_PARM(r9)(r1); \ + std r10,STK_PARM(r10)(r1); \ + std r0,16(r1); \ + addi r4,r1,STK_PARM(FIRST_REG); \ + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + bl .__trace_hcall_entry; \ + addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,16(r1); \ + ld r3,STK_PARM(r3)(r1); \ + ld r4,STK_PARM(r4)(r1); \ + ld r5,STK_PARM(r5)(r1); \ + ld r6,STK_PARM(r6)(r1); \ + ld r7,STK_PARM(r7)(r1); \ + ld r8,STK_PARM(r8)(r1); \ + ld r9,STK_PARM(r9)(r1); \ + ld r10,STK_PARM(r10)(r1); \ + mtlr r0; \ +1: + /* * postcall is performed immediately before function return which * allows liberal use of volatile registers. We branch around this * in early init (eg when populating the MMU hashtable) by using an * unconditional cpu feature. */ -#define HCALL_INST_POSTCALL \ +#define __HCALL_INST_POSTCALL \ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ - cmpldi cr7,r4,MAX_HCALL_OPCODE; \ - bgt- cr7,1f; \ - \ - /* get time and PURR snapshots after hcall */ \ - mftb r7; /* timebase after */ \ -BEGIN_FTR_SECTION; \ - mfspr r8,SPRN_PURR; /* PURR after */ \ - ld r6,STK_PARM(r6)(r1); /* PURR before */ \ - subf r6,r6,r8; /* delta */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ - ld r5,STK_PARM(r5)(r1); /* timebase before */ \ - subf r5,r5,r7; /* time delta */ \ - \ - /* calculate address of stat structure r4 = opcode */ \ - srdi r4,r4,2; /* index into array */ \ - mulli r4,r4,HCALL_STAT_SIZE; \ - LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ - add r4,r4,r7; \ - ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ - add r4,r4,r7; \ - \ - /* update stats */ \ - ld r7,HCALL_STAT_CALLS(r4); /* count */ \ - addi r7,r7,1; \ - std r7,HCALL_STAT_CALLS(r4); \ - ld r7,HCALL_STAT_TB(r4); /* timebase */ \ - add r7,r7,r5; \ - std r7,HCALL_STAT_TB(r4); \ -BEGIN_FTR_SECTION; \ - ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ - add r7,r7,r6; \ - std r7,HCALL_STAT_PURR(r4); \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + cmpdi r12,0; \ + beq+ 1f; \ + mflr r0; \ + ld r6,STK_PARM(r3)(r1); \ + std r3,STK_PARM(r3)(r1); \ + mr r4,r3; \ + mr r3,r6; \ + std r0,16(r1); \ + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + bl .__trace_hcall_exit; \ + addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,16(r1); \ + ld r3,STK_PARM(r3)(r1); \ + mtlr r0; \ 1: + +#define HCALL_INST_POSTCALL_NORETS \ + li r5,0; \ + __HCALL_INST_POSTCALL + +#define HCALL_INST_POSTCALL(BUFREG) \ + mr r5,BUFREG; \ + __HCALL_INST_POSTCALL + #else -#define HCALL_INST_PRECALL -#define HCALL_INST_POSTCALL +#define HCALL_INST_PRECALL(FIRST_ARG) +#define HCALL_INST_POSTCALL_NORETS +#define HCALL_INST_POSTCALL(BUFREG) #endif .text @@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r4) HVSC /* invoke the hypervisor */ - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL_NORETS lwz r0,8(r1) mtcrf 0xff,r0 @@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r5) std r4,STK_PARM(r4)(r1) /* Save ret buffer */ @@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall) std r6, 16(r12) std r7, 24(r12) - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 @@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r5) std r4,STK_PARM(r4)(r1) /* Save ret buffer */ @@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9) std r11,56(r12) std r0, 64(r12) - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 3631a4f277e..2f58c71b725 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -26,6 +26,7 @@ #include <asm/hvcall.h> #include <asm/firmware.h> #include <asm/cputable.h> +#include <asm/trace.h> DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); @@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = { #define HCALL_ROOT_DIR "hcall_inst" #define CPU_NAME_BUF_SIZE 32 + +static void probe_hcall_entry(unsigned long opcode, unsigned long *args) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = &get_cpu_var(hcall_stats)[opcode / 4]; + h->tb_start = mftb(); + h->purr_start = mfspr(SPRN_PURR); +} + +static void probe_hcall_exit(unsigned long opcode, unsigned long retval, + unsigned long *retbuf) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h->num_calls++; + h->tb_total = mftb() - h->tb_start; + h->purr_total = mfspr(SPRN_PURR) - h->purr_start; + + put_cpu_var(hcall_stats); +} + static int __init hcall_inst_init(void) { struct dentry *hcall_root; @@ -110,6 +140,14 @@ static int __init hcall_inst_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; + if (register_trace_hcall_entry(probe_hcall_entry)) + return -EINVAL; + + if (register_trace_hcall_exit(probe_hcall_exit)) { + unregister_trace_hcall_entry(probe_hcall_entry); + return -EINVAL; + } + hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); if (!hcall_root) return -ENOMEM; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 903eb9eec68..0707653612b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -39,6 +39,7 @@ #include <asm/cputable.h> #include <asm/udbg.h> #include <asm/smp.h> +#include <asm/trace.h> #include "plpar_wrappers.h" #include "pseries.h" @@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order) EXPORT_SYMBOL(arch_free_page); #endif + +#ifdef CONFIG_TRACEPOINTS +/* + * We optimise our hcall path by placing hcall_tracepoint_refcount + * directly in the TOC so we can check if the hcall tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long hcall_tracepoint_refcount; + +void hcall_tracepoint_regfunc(void) +{ + hcall_tracepoint_refcount++; +} + +void hcall_tracepoint_unregfunc(void) +{ + hcall_tracepoint_refcount--; +} + +void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +{ + trace_hcall_entry(opcode, args); +} + +void __trace_hcall_exit(long opcode, unsigned long retval, + unsigned long *retbuf) +{ + trace_hcall_exit(opcode, retval, retbuf); +} +#endif diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index b55fd7ed1c3..495589950dc 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -61,12 +61,12 @@ static struct ctl_table appldata_table[] = { { .procname = "timer", .mode = S_IRUGO | S_IWUSR, - .proc_handler = &appldata_timer_handler, + .proc_handler = appldata_timer_handler, }, { .procname = "interval", .mode = S_IRUGO | S_IWUSR, - .proc_handler = &appldata_interval_handler, + .proc_handler = appldata_interval_handler, }, { }, }; diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 0debcec23a3..fda1a8123f9 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -527,59 +527,6 @@ asmlinkage long sys32_sendfile64(int out_fd, int in_fd, return ret; } -#ifdef CONFIG_SYSCTL_SYSCALL -struct __sysctl_args32 { - u32 name; - int nlen; - u32 oldval; - u32 oldlenp; - u32 newval; - u32 newlen; - u32 __unused[4]; -}; - -asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) -{ - struct __sysctl_args32 tmp; - int error; - size_t oldlen; - size_t __user *oldlenp = NULL; - unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && tmp.oldlenp) { - /* Duh, this is ugly and might not work if sysctl_args - is in read-only memory, but do_sysctl does indirectly - a lot of uaccess in both directions and we'd have to - basically copy the whole sysctl.c here, and - glibc's __sysctl uses rw memory for the structure - anyway. */ - if (get_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp)) || - put_user(oldlen, (size_t __user *)addr)) - return -EFAULT; - oldlenp = (size_t __user *)addr; - } - - lock_kernel(); - error = do_sysctl(compat_ptr(tmp.name), tmp.nlen, compat_ptr(tmp.oldval), - oldlenp, compat_ptr(tmp.newval), tmp.newlen); - unlock_kernel(); - if (oldlenp) { - if (!error) { - if (get_user(oldlen, (size_t __user *)addr) || - put_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp))) - error = -EFAULT; - } - if (copy_to_user(args->__unused, tmp.__unused, - sizeof(tmp.__unused))) - error = -EFAULT; - } - return error; -} -#endif - struct stat64_emu31 { unsigned long long st_dev; unsigned int __pad1; diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index c07f9ca05ad..45e9092b3aa 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -162,7 +162,6 @@ struct ucontext32 { compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; -struct __sysctl_args32; struct stat64_emu31; struct mmap_arg_struct_emu31; struct fadvise64_64_args; @@ -212,7 +211,6 @@ long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, size_t count); long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count); -long sys32_sysctl(struct __sysctl_args32 __user *args); long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf); long sys32_lstat64(char __user * filename, struct stat64_emu31 __user * statbuf); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index cbd9901dc0f..30de2d0e52b 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -689,8 +689,6 @@ sys32_fdatasync_wrapper: llgfr %r2,%r2 # unsigned int jg sys_fdatasync # branch to system call -#sys32_sysctl_wrapper # tbd - .globl sys32_mlock_wrapper sys32_mlock_wrapper: llgfr %r2,%r2 # unsigned long @@ -1087,8 +1085,8 @@ sys32_stime_wrapper: .globl sys32_sysctl_wrapper sys32_sysctl_wrapper: - llgtr %r2,%r2 # struct __sysctl_args32 * - jg sys32_sysctl + llgtr %r2,%r2 # struct compat_sysctl_args * + jg compat_sys_sysctl .globl sys32_fstat64_wrapper sys32_fstat64_wrapper: diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 20f282c911c..071c81f179e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -893,35 +893,30 @@ s390dbf_procactive(ctl_table *table, int write, static struct ctl_table s390dbf_table[] = { { - .ctl_name = CTL_S390DBF_STOPPABLE, .procname = "debug_stoppable", .data = &debug_stoppable, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_S390DBF_ACTIVE, .procname = "debug_active", .data = &debug_active, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = &s390dbf_procactive, - .strategy = &sysctl_intvec, + .proc_handler = s390dbf_procactive, }, - { .ctl_name = 0 } + { } }; static struct ctl_table s390dbf_dir_table[] = { { - .ctl_name = CTL_S390DBF, .procname = "s390dbf", .maxlen = 0, .mode = S_IRUGO | S_IXUGO, .child = s390dbf_table, }, - { .ctl_name = 0 } + { } }; static struct ctl_table_header *s390dbf_sysctl_header; diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index b201135cc18..ff58779bf7e 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -343,30 +343,29 @@ static struct ctl_table cmm_table[] = { { .procname = "cmm_pages", .mode = 0644, - .proc_handler = &cmm_pages_handler, + .proc_handler = cmm_pages_handler, }, { .procname = "cmm_timed_pages", .mode = 0644, - .proc_handler = &cmm_pages_handler, + .proc_handler = cmm_pages_handler, }, { .procname = "cmm_timeout", .mode = 0644, - .proc_handler = &cmm_timeout_handler, + .proc_handler = cmm_timeout_handler, }, - { .ctl_name = 0 } + { } }; static struct ctl_table cmm_dir_table[] = { { - .ctl_name = CTL_VM, .procname = "vm", .maxlen = 0, .mode = 0555, .child = cmm_table, }, - { .ctl_name = 0 } + { } }; #endif diff --git a/arch/sh/boards/mach-hp6xx/setup.c b/arch/sh/boards/mach-hp6xx/setup.c index 8f305b36358..e6dd5e96321 100644 --- a/arch/sh/boards/mach-hp6xx/setup.c +++ b/arch/sh/boards/mach-hp6xx/setup.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/irq.h> +#include <sound/sh_dac_audio.h> #include <asm/hd64461.h> #include <asm/io.h> #include <mach/hp6xx.h> @@ -51,9 +52,63 @@ static struct platform_device jornadakbd_device = { .id = -1, }; +static void dac_audio_start(struct dac_audio_pdata *pdata) +{ + u16 v; + u8 v8; + + /* HP Jornada 680/690 speaker on */ + v = inw(HD64461_GPADR); + v &= ~HD64461_GPADR_SPEAKER; + outw(v, HD64461_GPADR); + + /* HP Palmtop 620lx/660lx speaker on */ + v8 = inb(PKDR); + v8 &= ~PKDR_SPEAKER; + outb(v8, PKDR); + + sh_dac_enable(pdata->channel); +} + +static void dac_audio_stop(struct dac_audio_pdata *pdata) +{ + u16 v; + u8 v8; + + /* HP Jornada 680/690 speaker off */ + v = inw(HD64461_GPADR); + v |= HD64461_GPADR_SPEAKER; + outw(v, HD64461_GPADR); + + /* HP Palmtop 620lx/660lx speaker off */ + v8 = inb(PKDR); + v8 |= PKDR_SPEAKER; + outb(v8, PKDR); + + sh_dac_output(0, pdata->channel); + sh_dac_disable(pdata->channel); +} + +static struct dac_audio_pdata dac_audio_platform_data = { + .buffer_size = 64000, + .channel = 1, + .start = dac_audio_start, + .stop = dac_audio_stop, +}; + +static struct platform_device dac_audio_device = { + .name = "dac_audio", + .id = -1, + .dev = { + .platform_data = &dac_audio_platform_data, + } + +}; + static struct platform_device *hp6xx_devices[] __initdata = { &cf_ide_device, &jornadakbd_device, + &dac_audio_device, }; static void __init hp6xx_init_irq(void) diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index e78c3be8ad2..0894bba9fad 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -313,6 +313,9 @@ static struct platform_device fsi_device = { .dev = { .platform_data = &fsi_info, }, + .archdata = { + .hwblk_id = HWBLK_SPU, /* FSI needs SPU hwblk */ + }, }; /* KEYSC in SoC (Needs SW33-2 set to ON) */ diff --git a/arch/sh/include/mach-common/mach/hp6xx.h b/arch/sh/include/mach-common/mach/hp6xx.h index 0d4165a32dc..bcc301ac12f 100644 --- a/arch/sh/include/mach-common/mach/hp6xx.h +++ b/arch/sh/include/mach-common/mach/hp6xx.h @@ -29,6 +29,9 @@ #define PKDR_LED_GREEN 0x10 +/* HP Palmtop 620lx/660lx speaker on/off */ +#define PKDR_SPEAKER 0x20 + #define SCPDR_TS_SCAN_ENABLE 0x20 #define SCPDR_TS_SCAN_Y 0x02 #define SCPDR_TS_SCAN_X 0x01 @@ -42,6 +45,7 @@ #define ADC_CHANNEL_BACKUP 4 #define ADC_CHANNEL_CHARGE 5 +/* HP Jornada 680/690 speaker on/off */ #define HD64461_GPADR_SPEAKER 0x01 #define HD64461_GPADR_PCMCIA0 (0x02|0x08) diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c index 267e5ebbb47..75c0cbe2eda 100644 --- a/arch/sh/kernel/traps_64.c +++ b/arch/sh/kernel/traps_64.c @@ -877,44 +877,39 @@ static int misaligned_fixup(struct pt_regs *regs) static ctl_table unaligned_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "kernel_reports", .data = &kernel_mode_unaligned_fixup_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "user_reports", .data = &user_mode_unaligned_fixup_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "user_enable", .data = &user_mode_unaligned_fixup_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec}, + .proc_handler = proc_dointvec}, {} }; static ctl_table unaligned_root[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "unaligned_fixup", .mode = 0555, - unaligned_table + .child = unaligned_table }, {} }; static ctl_table sh64_root[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "sh64", .mode = 0555, .child = unaligned_root diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 05ef5380a68..33ac1a9ac88 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -221,6 +221,13 @@ config SPARC64_SMP default y depends on SPARC64 && SMP +config EARLYFB + bool "Support for early boot text console" + default y + depends on SPARC64 + help + Say Y here to enable a faster early framebuffer boot console. + choice prompt "Kernel page size" if SPARC64 default SPARC64_PAGE_SIZE_8KB diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index dfe272d1446..113225b241e 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -27,6 +27,7 @@ AS := $(AS) -32 LDFLAGS := -m elf32_sparc CHECKFLAGS += -D__sparc__ export BITS := 32 +UTS_MACHINE := sparc #KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7 KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 @@ -46,6 +47,7 @@ CHECKFLAGS += -D__sparc__ -D__sparc_v9__ -D__arch64__ -m64 LDFLAGS := -m elf64_sparc export BITS := 64 +UTS_MACHINE := sparc64 KBUILD_CFLAGS += -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow \ -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare \ diff --git a/arch/sparc/include/asm/btext.h b/arch/sparc/include/asm/btext.h new file mode 100644 index 00000000000..9b2bc6b6ed0 --- /dev/null +++ b/arch/sparc/include/asm/btext.h @@ -0,0 +1,6 @@ +#ifndef _SPARC_BTEXT_H +#define _SPARC_BTEXT_H + +extern int btext_find_display(void); + +#endif /* _SPARC_BTEXT_H */ diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h index 28a42b73f64..3ea5964c43b 100644 --- a/arch/sparc/include/asm/leon.h +++ b/arch/sparc/include/asm/leon.h @@ -148,7 +148,7 @@ static inline unsigned long leon_load_reg(unsigned long paddr) return retval; } -extern inline void leon_srmmu_disabletlb(void) +static inline void leon_srmmu_disabletlb(void) { unsigned int retval; __asm__ __volatile__("lda [%%g0] %2, %0\n\t" : "=r"(retval) : "r"(0), @@ -158,7 +158,7 @@ extern inline void leon_srmmu_disabletlb(void) "i"(ASI_LEON_MMUREGS) : "memory"); } -extern inline void leon_srmmu_enabletlb(void) +static inline void leon_srmmu_enabletlb(void) { unsigned int retval; __asm__ __volatile__("lda [%%g0] %2, %0\n\t" : "=r"(retval) : "r"(0), @@ -190,7 +190,7 @@ extern void leon_init_IRQ(void); extern unsigned long last_valid_pfn; -extern inline unsigned long sparc_leon3_get_dcachecfg(void) +static inline unsigned long sparc_leon3_get_dcachecfg(void) { unsigned int retval; __asm__ __volatile__("lda [%1] %2, %0\n\t" : @@ -201,7 +201,7 @@ extern inline unsigned long sparc_leon3_get_dcachecfg(void) } /* enable snooping */ -extern inline void sparc_leon3_enable_snooping(void) +static inline void sparc_leon3_enable_snooping(void) { __asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t" "set 0x800000, %%l2\n\t" @@ -209,7 +209,14 @@ extern inline void sparc_leon3_enable_snooping(void) "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2"); }; -extern inline void sparc_leon3_disable_cache(void) +static inline int sparc_leon3_snooping_enabled(void) +{ + u32 cctrl; + __asm__ __volatile__("lda [%%g0] 2, %0\n\t" : "=r"(cctrl)); + return (cctrl >> 23) & 1; +}; + +static inline void sparc_leon3_disable_cache(void) { __asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t" "set 0x00000f, %%l2\n\t" @@ -340,6 +347,30 @@ extern int leon_flush_needed(void); extern void leon_switch_mm(void); extern int srmmu_swprobe_trace; +#ifdef CONFIG_SMP +extern int leon_smp_nrcpus(void); +extern void leon_clear_profile_irq(int cpu); +extern void leon_smp_done(void); +extern void leon_boot_cpus(void); +extern int leon_boot_one_cpu(int i); +void leon_init_smp(void); +extern void cpu_probe(void); +extern void cpu_idle(void); +extern void init_IRQ(void); +extern void cpu_panic(void); +extern int __leon_processor_id(void); +void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu); + +extern unsigned int real_irq_entry[], smpleon_ticker[]; +extern unsigned int patchme_maybe_smp_msg[]; +extern unsigned long trapbase_cpu1[]; +extern unsigned long trapbase_cpu2[]; +extern unsigned long trapbase_cpu3[]; +extern unsigned int t_nmi[], linux_trap_ipi15_leon[]; +extern unsigned int linux_trap_ipi15_sun4m[]; + +#endif /* CONFIG_SMP */ + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ @@ -356,6 +387,10 @@ extern int srmmu_swprobe_trace; #define leon_switch_mm() do {} while (0) #define leon_init_IRQ() do {} while (0) #define init_leon() do {} while (0) +#define leon_smp_done() do {} while (0) +#define leon_boot_cpus() do {} while (0) +#define leon_boot_one_cpu(i) 1 +#define leon_init_smp() do {} while (0) #endif /* !defined(CONFIG_SPARC_LEON) */ diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h index 82a190d7efc..f845828ca4c 100644 --- a/arch/sparc/include/asm/prom.h +++ b/arch/sparc/include/asm/prom.h @@ -1,3 +1,4 @@ +#include <linux/of.h> /* linux/of.h gets to determine #include ordering */ #ifndef _SPARC_PROM_H #define _SPARC_PROM_H #ifdef __KERNEL__ @@ -28,50 +29,11 @@ #define of_prop_cmp(s1, s2) strcasecmp((s1), (s2)) #define of_node_cmp(s1, s2) strcmp((s1), (s2)) -typedef u32 phandle; -typedef u32 ihandle; - -struct property { - char *name; - int length; - void *value; - struct property *next; - unsigned long _flags; - unsigned int unique_id; -}; - -struct of_irq_controller; -struct device_node { - const char *name; - const char *type; - phandle node; - char *path_component_name; - char *full_name; - - struct property *properties; - struct property *deadprops; /* removed properties */ - struct device_node *parent; - struct device_node *child; - struct device_node *sibling; - struct device_node *next; /* next device of same type */ - struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ - struct kref kref; - unsigned long _flags; - void *data; - unsigned int unique_id; - - struct of_irq_controller *irq_trans; -}; - struct of_irq_controller { unsigned int (*irq_build)(struct device_node *, unsigned int, void *); void *data; }; -#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) -#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) - extern struct device_node *of_find_node_by_cpuid(int cpuid); extern int of_set_property(struct device_node *node, const char *name, void *val, int len); extern struct mutex of_set_property_mutex; @@ -89,15 +51,6 @@ extern void prom_build_devicetree(void); extern void of_populate_present_mask(void); extern void of_fill_in_cpu_data(void); -/* Dummy ref counting routines - to be implemented later */ -static inline struct device_node *of_node_get(struct device_node *node) -{ - return node; -} -static inline void of_node_put(struct device_node *node) -{ -} - /* These routines are here to provide compatibility with how powerpc * handles IRQ mapping for OF device nodes. We precompute and permanently * register them in the of_device objects, whereas powerpc computes them @@ -108,12 +61,6 @@ static inline void irq_dispose_mapping(unsigned int virq) { } -/* - * NB: This is here while we transition from using asm/prom.h - * to linux/of.h - */ -#include <linux/of.h> - extern struct device_node *of_console_device; extern char *of_console_path; extern char *of_console_options; diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h index 1dc129ac2fe..6e5621006f8 100644 --- a/arch/sparc/include/asm/rwsem.h +++ b/arch/sparc/include/asm/rwsem.h @@ -35,8 +35,8 @@ struct rw_semaphore { #endif #define __RWSEM_INITIALIZER(name) \ -{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } +{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ + LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 58101dc7049..841905c1021 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -106,6 +106,15 @@ static inline int hard_smp4d_processor_id(void) return cpuid; } +extern inline int hard_smpleon_processor_id(void) +{ + int cpuid; + __asm__ __volatile__("rd %%asr17,%0\n\t" + "srl %0,28,%0" : + "=&r" (cpuid) : ); + return cpuid; +} + #ifndef MODULE static inline int hard_smp_processor_id(void) { diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 5b47fab9966..c6316142db4 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -72,7 +72,7 @@ obj-y += dma.o obj-$(CONFIG_SPARC32_PCI) += pcic.o obj-$(CONFIG_SMP) += trampoline_$(BITS).o smp_$(BITS).o -obj-$(CONFIG_SPARC32_SMP) += sun4m_smp.o sun4d_smp.o +obj-$(CONFIG_SPARC32_SMP) += sun4m_smp.o sun4d_smp.o leon_smp.o obj-$(CONFIG_SPARC64_SMP) += hvtramp.o obj-y += auxio_$(BITS).o @@ -87,6 +87,7 @@ obj-$(CONFIG_KGDB) += kgdb_$(BITS).o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o CFLAGS_REMOVE_ftrace.o := -pg +obj-$(CONFIG_EARLYFB) += btext.o obj-$(CONFIG_STACKTRACE) += stacktrace.o # sparc64 PCI obj-$(CONFIG_SPARC64_PCI) += pci.o pci_common.o psycho_common.o diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index 9c115823c4b..71ec90b9e31 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -10,7 +10,6 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/miscdevice.h> -#include <linux/smp_lock.h> #include <linux/pm.h> #include <linux/of.h> #include <linux/of_device.h> @@ -76,7 +75,6 @@ static inline void apc_free(struct of_device *op) static int apc_open(struct inode *inode, struct file *f) { - cycle_kernel_lock(); return 0; } @@ -87,61 +85,46 @@ static int apc_release(struct inode *inode, struct file *f) static long apc_ioctl(struct file *f, unsigned int cmd, unsigned long __arg) { - __u8 inarg, __user *arg; - - arg = (__u8 __user *) __arg; - - lock_kernel(); + __u8 inarg, __user *arg = (__u8 __user *) __arg; switch (cmd) { case APCIOCGFANCTL: - if (put_user(apc_readb(APC_FANCTL_REG) & APC_REGMASK, arg)) { - unlock_kernel(); + if (put_user(apc_readb(APC_FANCTL_REG) & APC_REGMASK, arg)) return -EFAULT; - } break; case APCIOCGCPWR: - if (put_user(apc_readb(APC_CPOWER_REG) & APC_REGMASK, arg)) { - unlock_kernel(); + if (put_user(apc_readb(APC_CPOWER_REG) & APC_REGMASK, arg)) return -EFAULT; - } break; case APCIOCGBPORT: - if (put_user(apc_readb(APC_BPORT_REG) & APC_BPMASK, arg)) { - unlock_kernel(); + if (put_user(apc_readb(APC_BPORT_REG) & APC_BPMASK, arg)) return -EFAULT; - } break; case APCIOCSFANCTL: - if (get_user(inarg, arg)) { - unlock_kernel(); + if (get_user(inarg, arg)) return -EFAULT; - } apc_writeb(inarg & APC_REGMASK, APC_FANCTL_REG); break; + case APCIOCSCPWR: - if (get_user(inarg, arg)) { - unlock_kernel(); + if (get_user(inarg, arg)) return -EFAULT; - } apc_writeb(inarg & APC_REGMASK, APC_CPOWER_REG); break; + case APCIOCSBPORT: - if (get_user(inarg, arg)) { - unlock_kernel(); + if (get_user(inarg, arg)) return -EFAULT; - } apc_writeb(inarg & APC_BPMASK, APC_BPORT_REG); break; + default: - unlock_kernel(); return -EINVAL; }; - unlock_kernel(); return 0; } diff --git a/arch/sparc/kernel/auxio_32.c b/arch/sparc/kernel/auxio_32.c index 45c41232fc4..ee8d214cae1 100644 --- a/arch/sparc/kernel/auxio_32.c +++ b/arch/sparc/kernel/auxio_32.c @@ -28,6 +28,7 @@ void __init auxio_probe(void) struct resource r; switch (sparc_cpu_model) { + case sparc_leon: case sun4d: case sun4: return; diff --git a/arch/sparc/kernel/btext.c b/arch/sparc/kernel/btext.c new file mode 100644 index 00000000000..8cc2d56ffe9 --- /dev/null +++ b/arch/sparc/kernel/btext.c @@ -0,0 +1,673 @@ +/* + * Procedures for drawing on the screen early on in the boot process. + * + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + */ +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/console.h> + +#include <asm/btext.h> +#include <asm/oplib.h> +#include <asm/io.h> + +#define NO_SCROLL + +#ifndef NO_SCROLL +static void scrollscreen(void); +#endif + +static void draw_byte(unsigned char c, long locX, long locY); +static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); +static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); + +#define __force_data __attribute__((__section__(".data"))) + +static int g_loc_X __force_data; +static int g_loc_Y __force_data; +static int g_max_loc_X __force_data; +static int g_max_loc_Y __force_data; + +static int dispDeviceRowBytes __force_data; +static int dispDeviceDepth __force_data; +static int dispDeviceRect[4] __force_data; +static unsigned char *dispDeviceBase __force_data; + +#define cmapsz (16*256) + +static unsigned char vga_font[cmapsz]; + +static int __init btext_initialize(unsigned int node) +{ + unsigned int width, height, depth, pitch; + unsigned long address = 0; + u32 prop; + + if (prom_getproperty(node, "width", (char *)&width, 4) < 0) + return -EINVAL; + if (prom_getproperty(node, "height", (char *)&height, 4) < 0) + return -EINVAL; + if (prom_getproperty(node, "depth", (char *)&depth, 4) < 0) + return -EINVAL; + pitch = width * ((depth + 7) / 8); + + if (prom_getproperty(node, "linebytes", (char *)&prop, 4) >= 0 && + prop != 0xffffffffu) + pitch = prop; + + if (pitch == 1) + pitch = 0x1000; + + if (prom_getproperty(node, "address", (char *)&prop, 4) >= 0) + address = prop; + + /* FIXME: Add support for PCI reg properties. Right now, only + * reliable on macs + */ + if (address == 0) + return -EINVAL; + + g_loc_X = 0; + g_loc_Y = 0; + g_max_loc_X = width / 8; + g_max_loc_Y = height / 16; + dispDeviceBase = (unsigned char *)address; + dispDeviceRowBytes = pitch; + dispDeviceDepth = depth == 15 ? 16 : depth; + dispDeviceRect[0] = dispDeviceRect[1] = 0; + dispDeviceRect[2] = width; + dispDeviceRect[3] = height; + + return 0; +} + +/* Calc the base address of a given point (x,y) */ +static unsigned char * calc_base(int x, int y) +{ + unsigned char *base = dispDeviceBase; + + base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3); + base += (y + dispDeviceRect[1]) * dispDeviceRowBytes; + return base; +} + +static void btext_clearscreen(void) +{ + unsigned int *base = (unsigned int *)calc_base(0, 0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 2; + int i,j; + + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++) + { + unsigned int *ptr = base; + for(j=width; j; --j) + *(ptr++) = 0; + base += (dispDeviceRowBytes >> 2); + } +} + +#ifndef NO_SCROLL +static void scrollscreen(void) +{ + unsigned int *src = (unsigned int *)calc_base(0,16); + unsigned int *dst = (unsigned int *)calc_base(0,0); + unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * + (dispDeviceDepth >> 3)) >> 2; + int i,j; + + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++) + { + unsigned int *src_ptr = src; + unsigned int *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = *(src_ptr++); + src += (dispDeviceRowBytes >> 2); + dst += (dispDeviceRowBytes >> 2); + } + for (i=0; i<16; i++) + { + unsigned int *dst_ptr = dst; + for(j=width; j; --j) + *(dst_ptr++) = 0; + dst += (dispDeviceRowBytes >> 2); + } +} +#endif /* ndef NO_SCROLL */ + +void btext_drawchar(char c) +{ + int cline = 0; +#ifdef NO_SCROLL + int x; +#endif + switch (c) { + case '\b': + if (g_loc_X > 0) + --g_loc_X; + break; + case '\t': + g_loc_X = (g_loc_X & -8) + 8; + break; + case '\r': + g_loc_X = 0; + break; + case '\n': + g_loc_X = 0; + g_loc_Y++; + cline = 1; + break; + default: + draw_byte(c, g_loc_X++, g_loc_Y); + } + if (g_loc_X >= g_max_loc_X) { + g_loc_X = 0; + g_loc_Y++; + cline = 1; + } +#ifndef NO_SCROLL + while (g_loc_Y >= g_max_loc_Y) { + scrollscreen(); + g_loc_Y--; + } +#else + /* wrap around from bottom to top of screen so we don't + waste time scrolling each line. -- paulus. */ + if (g_loc_Y >= g_max_loc_Y) + g_loc_Y = 0; + if (cline) { + for (x = 0; x < g_max_loc_X; ++x) + draw_byte(' ', x, g_loc_Y); + } +#endif +} + +static void btext_drawtext(const char *c, unsigned int len) +{ + while (len--) + btext_drawchar(*c++); +} + +static void draw_byte(unsigned char c, long locX, long locY) +{ + unsigned char *base = calc_base(locX << 3, locY << 4); + unsigned char *font = &vga_font[((unsigned int)c) * 16]; + int rb = dispDeviceRowBytes; + + switch(dispDeviceDepth) { + case 24: + case 32: + draw_byte_32(font, (unsigned int *)base, rb); + break; + case 15: + case 16: + draw_byte_16(font, (unsigned int *)base, rb); + break; + case 8: + draw_byte_8(font, (unsigned int *)base, rb); + break; + } +} + +static unsigned int expand_bits_8[16] = { + 0x00000000, + 0x000000ff, + 0x0000ff00, + 0x0000ffff, + 0x00ff0000, + 0x00ff00ff, + 0x00ffff00, + 0x00ffffff, + 0xff000000, + 0xff0000ff, + 0xff00ff00, + 0xff00ffff, + 0xffff0000, + 0xffff00ff, + 0xffffff00, + 0xffffffff +}; + +static unsigned int expand_bits_16[4] = { + 0x00000000, + 0x0000ffff, + 0xffff0000, + 0xffffffff +}; + + +static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (-(bits >> 7) & fg) ^ bg; + base[1] = (-((bits >> 6) & 1) & fg) ^ bg; + base[2] = (-((bits >> 5) & 1) & fg) ^ bg; + base[3] = (-((bits >> 4) & 1) & fg) ^ bg; + base[4] = (-((bits >> 3) & 1) & fg) ^ bg; + base[5] = (-((bits >> 2) & 1) & fg) ^ bg; + base[6] = (-((bits >> 1) & 1) & fg) ^ bg; + base[7] = (-(bits & 1) & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_16; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 6] & fg) ^ bg; + base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg; + base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg; + base[3] = (eb[bits & 3] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0x0F0F0F0FUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_8; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 4] & fg) ^ bg; + base[1] = (eb[bits & 0xf] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static void btext_console_write(struct console *con, const char *s, + unsigned int n) +{ + btext_drawtext(s, n); +} + +static struct console btext_console = { + .name = "btext", + .write = btext_console_write, + .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME, + .index = 0, +}; + +int __init btext_find_display(void) +{ + unsigned int node; + char type[32]; + int ret; + + node = prom_inst2pkg(prom_stdout); + if (prom_getproperty(node, "device_type", type, 32) < 0) + return -ENODEV; + if (strcmp(type, "display")) + return -ENODEV; + + ret = btext_initialize(node); + if (!ret) { + btext_clearscreen(); + register_console(&btext_console); + } + return ret; +} + +static unsigned char vga_font[cmapsz] = { +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, +0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, +0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, +0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, +0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, +0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, +0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, +0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, +0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, +0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, +0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, +0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, +0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, +0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, +0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, +0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, +0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, +0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, +0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, +0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, +0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, +0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, +0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, +0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, +0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, +0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, +0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, +0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, +0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, +0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, +0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, +0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, +0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, +0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, +0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, +0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, +0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, +0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, +0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, +0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, +0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, +0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, +0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, +0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, +0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, +0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, +0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, +0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, +0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, +0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, +0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, +0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, +0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, +0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, +0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, +0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, +0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, +0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, +0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, +0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, +0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, +0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, +0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, +0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, +0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, +0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, +0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, +0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, +0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, +0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, +0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, +0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, +0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, +0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, +0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, +0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, +0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, +0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, +0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, +0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, +0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, +0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, +0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, +0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, +0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, +0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, +0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, +0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, +0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, +0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, +0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, +0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, +0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, +0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, +0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, +0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, +0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, +0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, +0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, +0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, +0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, +0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, +0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, +0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, +}; diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 1446df90ef8..e447938d39c 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -185,6 +185,17 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { FPU(-1, NULL) } },{ + 0xF, /* Aeroflex Gaisler */ + .cpu_info = { + CPU(3, "LEON"), + CPU(-1, NULL) + }, + .fpu_info = { + FPU(2, "GRFPU"), + FPU(3, "GRFPU-Lite"), + FPU(-1, NULL) + } +},{ 0x17, .cpu_info = { CPU_PMU(0x10, "TI UltraSparc I (SpitFire)", "ultra12"), diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index f41ecc5ac0b..ec9c7bc67d2 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -400,6 +400,39 @@ linux_trap_ipi15_sun4d: /* FIXME */ 1: b,a 1b +#ifdef CONFIG_SPARC_LEON + + .globl smpleon_ticker + /* SMP per-cpu ticker interrupts are handled specially. */ +smpleon_ticker: + SAVE_ALL + or %l0, PSR_PIL, %g2 + wr %g2, 0x0, %psr + WRITE_PAUSE + wr %g2, PSR_ET, %psr + WRITE_PAUSE + call leon_percpu_timer_interrupt + add %sp, STACKFRAME_SZ, %o0 + wr %l0, PSR_ET, %psr + WRITE_PAUSE + RESTORE_ALL + + .align 4 + .globl linux_trap_ipi15_leon +linux_trap_ipi15_leon: + SAVE_ALL + or %l0, PSR_PIL, %l4 + wr %l4, 0x0, %psr + WRITE_PAUSE + wr %l4, PSR_ET, %psr + WRITE_PAUSE + call leon_cross_call_irq + nop + b ret_trap_lockless_ipi + clr %l6 + +#endif /* CONFIG_SPARC_LEON */ + #endif /* CONFIG_SMP */ /* This routine handles illegal instructions and privileged diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index 439d82a95ac..21bb2590d4a 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -811,9 +811,31 @@ found_version: got_prop: #ifdef CONFIG_SPARC_LEON /* no cpu-type check is needed, it is a SPARC-LEON */ +#ifdef CONFIG_SMP + ba leon_smp_init + nop + + .global leon_smp_init +leon_smp_init: + sethi %hi(boot_cpu_id), %g1 ! master always 0 + stb %g0, [%g1 + %lo(boot_cpu_id)] + sethi %hi(boot_cpu_id4), %g1 ! master always 0 + stb %g0, [%g1 + %lo(boot_cpu_id4)] + + rd %asr17,%g1 + srl %g1,28,%g1 + + cmp %g0,%g1 + beq sun4c_continue_boot !continue with master + nop + + ba leon_smp_cpu_startup + nop +#else ba sun4c_continue_boot nop #endif +#endif set cputypval, %o2 ldub [%o2 + 0x4], %l1 diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 9f61fd8cbb7..3c8c44f6a41 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -48,8 +48,13 @@ #include <asm/dma.h> #include <asm/iommu.h> #include <asm/io-unit.h> +#include <asm/leon.h> +#ifdef CONFIG_SPARC_LEON +#define mmu_inval_dma_area(p, l) leon_flush_dcache_all() +#else #define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */ +#endif static struct resource *_sparc_find_resource(struct resource *r, unsigned long); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 8ab1d4728a4..ce996f97855 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -187,7 +187,7 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif - seq_printf(p, " %9s", irq_desc[i].chip->typename); + seq_printf(p, " %9s", irq_desc[i].chip->name); seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) @@ -484,7 +484,7 @@ static void sun4v_virq_eoi(unsigned int virt_irq) } static struct irq_chip sun4u_irq = { - .typename = "sun4u", + .name = "sun4u", .enable = sun4u_irq_enable, .disable = sun4u_irq_disable, .eoi = sun4u_irq_eoi, @@ -492,7 +492,7 @@ static struct irq_chip sun4u_irq = { }; static struct irq_chip sun4v_irq = { - .typename = "sun4v", + .name = "sun4v", .enable = sun4v_irq_enable, .disable = sun4v_irq_disable, .eoi = sun4v_irq_eoi, @@ -500,7 +500,7 @@ static struct irq_chip sun4v_irq = { }; static struct irq_chip sun4v_virq = { - .typename = "vsun4v", + .name = "vsun4v", .enable = sun4v_virq_enable, .disable = sun4v_virq_disable, .eoi = sun4v_virq_eoi, diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 54d8a5bd482..87f1760c0aa 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -12,11 +12,14 @@ #include <linux/of_platform.h> #include <linux/interrupt.h> #include <linux/of_device.h> + #include <asm/oplib.h> #include <asm/timer.h> #include <asm/prom.h> #include <asm/leon.h> #include <asm/leon_amba.h> +#include <asm/traps.h> +#include <asm/cacheflush.h> #include "prom.h" #include "irq.h" @@ -115,6 +118,21 @@ void __init leon_init_timers(irq_handler_t counter_fn) (((1000000 / 100) - 1))); LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].ctrl, 0); +#ifdef CONFIG_SMP + leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs; + leon_percpu_timer_dev[0].irq = leon3_gptimer_irq+1; + + if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) & + (1<<LEON3_GPTIMER_SEPIRQ))) { + prom_printf("irq timer not configured with seperate irqs \n"); + BUG(); + } + + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[1].val, 0); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[1].rld, (((1000000/100) - 1))); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[1].ctrl, 0); +# endif + } else { printk(KERN_ERR "No Timer/irqctrl found\n"); BUG(); @@ -130,11 +148,41 @@ void __init leon_init_timers(irq_handler_t counter_fn) prom_halt(); } +# ifdef CONFIG_SMP + { + unsigned long flags; + struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)]; + + /* For SMP we use the level 14 ticker, however the bootup code + * has copied the firmwares level 14 vector into boot cpu's + * trap table, we must fix this now or we get squashed. + */ + local_irq_save(flags); + + patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */ + + /* Adjust so that we jump directly to smpleon_ticker */ + trap_table->inst_three += smpleon_ticker - real_irq_entry; + + local_flush_cache_all(); + local_irq_restore(flags); + } +# endif + if (leon3_gptimer_regs) { LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].ctrl, LEON3_GPTIMER_EN | LEON3_GPTIMER_RL | LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN); + +#ifdef CONFIG_SMP + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[1].ctrl, + LEON3_GPTIMER_EN | + LEON3_GPTIMER_RL | + LEON3_GPTIMER_LD | + LEON3_GPTIMER_IRQEN); +#endif + } } @@ -175,6 +223,42 @@ void __init leon_node_init(struct device_node *dp, struct device_node ***nextp) } } +#ifdef CONFIG_SMP + +void leon_set_cpu_int(int cpu, int level) +{ + unsigned long mask; + mask = get_irqmask(level); + LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->force[cpu], mask); +} + +static void leon_clear_ipi(int cpu, int level) +{ + unsigned long mask; + mask = get_irqmask(level); + LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->force[cpu], mask<<16); +} + +static void leon_set_udt(int cpu) +{ +} + +void leon_clear_profile_irq(int cpu) +{ +} + +void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu) +{ + unsigned long mask, flags, *addr; + mask = get_irqmask(irq_nr); + local_irq_save(flags); + addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]); + LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask))); + local_irq_restore(flags); +} + +#endif + void __init leon_init_IRQ(void) { sparc_init_timers = leon_init_timers; diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c new file mode 100644 index 00000000000..05c0dadd637 --- /dev/null +++ b/arch/sparc/kernel/leon_smp.c @@ -0,0 +1,468 @@ +/* leon_smp.c: Sparc-Leon SMP support. + * + * based on sun4m_smp.c + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB + * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB + */ + +#include <asm/head.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/threads.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/profile.h> +#include <linux/pm.h> +#include <linux/delay.h> + +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> + +#include <asm/ptrace.h> +#include <asm/atomic.h> +#include <asm/irq_regs.h> + +#include <asm/delay.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/oplib.h> +#include <asm/cpudata.h> +#include <asm/asi.h> +#include <asm/leon.h> +#include <asm/leon_amba.h> + +#ifdef CONFIG_SPARC_LEON + +#include "irq.h" + +extern ctxd_t *srmmu_ctx_table_phys; +static int smp_processors_ready; +extern volatile unsigned long cpu_callin_map[NR_CPUS]; +extern unsigned char boot_cpu_id; +extern cpumask_t smp_commenced_mask; +void __init leon_configure_cache_smp(void); + +static inline unsigned long do_swap(volatile unsigned long *ptr, + unsigned long val) +{ + __asm__ __volatile__("swapa [%1] %2, %0\n\t" : "=&r"(val) + : "r"(ptr), "i"(ASI_LEON_DCACHE_MISS) + : "memory"); + return val; +} + +static void smp_setup_percpu_timer(void); + +void __cpuinit leon_callin(void) +{ + int cpuid = hard_smpleon_processor_id(); + + local_flush_cache_all(); + local_flush_tlb_all(); + leon_configure_cache_smp(); + + /* Get our local ticker going. */ + smp_setup_percpu_timer(); + + calibrate_delay(); + smp_store_cpu_info(cpuid); + + local_flush_cache_all(); + local_flush_tlb_all(); + + /* + * Unblock the master CPU _only_ when the scheduler state + * of all secondary CPUs will be up-to-date, so after + * the SMP initialization the master will be just allowed + * to call the scheduler code. + * Allow master to continue. + */ + do_swap(&cpu_callin_map[cpuid], 1); + + local_flush_cache_all(); + local_flush_tlb_all(); + + cpu_probe(); + + /* Fix idle thread fields. */ + __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(¤t_set[cpuid]) + : "memory" /* paranoid */); + + /* Attach to the address space of init_task. */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + + while (!cpu_isset(cpuid, smp_commenced_mask)) + mb(); + + local_irq_enable(); + cpu_set(cpuid, cpu_online_map); +} + +/* + * Cycle through the processors asking the PROM to start each one. + */ + +extern struct linux_prom_registers smp_penguin_ctable; + +void __init leon_configure_cache_smp(void) +{ + unsigned long cfg = sparc_leon3_get_dcachecfg(); + int me = smp_processor_id(); + + if (ASI_LEON3_SYSCTRL_CFG_SSIZE(cfg) > 4) { + printk(KERN_INFO "Note: SMP with snooping only works on 4k cache, found %dk(0x%x) on cpu %d, disabling caches\n", + (unsigned int)ASI_LEON3_SYSCTRL_CFG_SSIZE(cfg), + (unsigned int)cfg, (unsigned int)me); + sparc_leon3_disable_cache(); + } else { + if (cfg & ASI_LEON3_SYSCTRL_CFG_SNOOPING) { + sparc_leon3_enable_snooping(); + } else { + printk(KERN_INFO "Note: You have to enable snooping in the vhdl model cpu %d, disabling caches\n", + me); + sparc_leon3_disable_cache(); + } + } + + local_flush_cache_all(); + local_flush_tlb_all(); +} + +void leon_smp_setbroadcast(unsigned int mask) +{ + int broadcast = + ((LEON3_BYPASS_LOAD_PA(&(leon3_irqctrl_regs->mpstatus)) >> + LEON3_IRQMPSTATUS_BROADCAST) & 1); + if (!broadcast) { + prom_printf("######## !!!! The irqmp-ctrl must have broadcast enabled, smp wont work !!!!! ####### nr cpus: %d\n", + leon_smp_nrcpus()); + if (leon_smp_nrcpus() > 1) { + BUG(); + } else { + prom_printf("continue anyway\n"); + return; + } + } + LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpbroadcast), mask); +} + +unsigned int leon_smp_getbroadcast(void) +{ + unsigned int mask; + mask = LEON_BYPASS_LOAD_PA(&(leon3_irqctrl_regs->mpbroadcast)); + return mask; +} + +int leon_smp_nrcpus(void) +{ + int nrcpu = + ((LEON3_BYPASS_LOAD_PA(&(leon3_irqctrl_regs->mpstatus)) >> + LEON3_IRQMPSTATUS_CPUNR) & 0xf) + 1; + return nrcpu; +} + +void __init leon_boot_cpus(void) +{ + int nrcpu = leon_smp_nrcpus(); + int me = smp_processor_id(); + + printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x \n", (unsigned int)me, + (unsigned int)nrcpu, (unsigned int)NR_CPUS, + (unsigned int)&(leon3_irqctrl_regs->mpstatus)); + + leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me); + leon_enable_irq_cpu(LEON3_IRQ_TICKER, me); + leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me); + + leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER); + + leon_configure_cache_smp(); + smp_setup_percpu_timer(); + local_flush_cache_all(); + +} + +int __cpuinit leon_boot_one_cpu(int i) +{ + + struct task_struct *p; + int timeout; + + /* Cook up an idler for this guy. */ + p = fork_idle(i); + + current_set[i] = task_thread_info(p); + + /* See trampoline.S:leon_smp_cpu_startup for details... + * Initialize the contexts table + * Since the call to prom_startcpu() trashes the structure, + * we need to re-initialize it for each cpu + */ + smp_penguin_ctable.which_io = 0; + smp_penguin_ctable.phys_addr = (unsigned int)srmmu_ctx_table_phys; + smp_penguin_ctable.reg_size = 0; + + /* whirrr, whirrr, whirrrrrrrrr... */ + printk(KERN_INFO "Starting CPU %d : (irqmp: 0x%x)\n", (unsigned int)i, + (unsigned int)&leon3_irqctrl_regs->mpstatus); + local_flush_cache_all(); + + LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i); + + /* wheee... it's going... */ + for (timeout = 0; timeout < 10000; timeout++) { + if (cpu_callin_map[i]) + break; + udelay(200); + } + printk(KERN_INFO "Started CPU %d \n", (unsigned int)i); + + if (!(cpu_callin_map[i])) { + printk(KERN_ERR "Processor %d is stuck.\n", i); + return -ENODEV; + } else { + leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i); + leon_enable_irq_cpu(LEON3_IRQ_TICKER, i); + leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i); + } + + local_flush_cache_all(); + return 0; +} + +void __init leon_smp_done(void) +{ + + int i, first; + int *prev; + + /* setup cpu list for irq rotation */ + first = 0; + prev = &first; + for (i = 0; i < NR_CPUS; i++) { + if (cpu_online(i)) { + *prev = i; + prev = &cpu_data(i).next; + } + } + *prev = first; + local_flush_cache_all(); + + /* Free unneeded trap tables */ + if (!cpu_isset(1, cpu_present_map)) { + ClearPageReserved(virt_to_page(trapbase_cpu1)); + init_page_count(virt_to_page(trapbase_cpu1)); + free_page((unsigned long)trapbase_cpu1); + totalram_pages++; + num_physpages++; + } + if (!cpu_isset(2, cpu_present_map)) { + ClearPageReserved(virt_to_page(trapbase_cpu2)); + init_page_count(virt_to_page(trapbase_cpu2)); + free_page((unsigned long)trapbase_cpu2); + totalram_pages++; + num_physpages++; + } + if (!cpu_isset(3, cpu_present_map)) { + ClearPageReserved(virt_to_page(trapbase_cpu3)); + init_page_count(virt_to_page(trapbase_cpu3)); + free_page((unsigned long)trapbase_cpu3); + totalram_pages++; + num_physpages++; + } + /* Ok, they are spinning and ready to go. */ + smp_processors_ready = 1; + +} + +void leon_irq_rotate(int cpu) +{ +} + +static struct smp_funcall { + smpfunc_t func; + unsigned long arg1; + unsigned long arg2; + unsigned long arg3; + unsigned long arg4; + unsigned long arg5; + unsigned long processors_in[NR_CPUS]; /* Set when ipi entered. */ + unsigned long processors_out[NR_CPUS]; /* Set when ipi exited. */ +} ccall_info; + +static DEFINE_SPINLOCK(cross_call_lock); + +/* Cross calls must be serialized, at least currently. */ +static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, + unsigned long arg2, unsigned long arg3, + unsigned long arg4) +{ + if (smp_processors_ready) { + register int high = NR_CPUS - 1; + unsigned long flags; + + spin_lock_irqsave(&cross_call_lock, flags); + + { + /* If you make changes here, make sure gcc generates proper code... */ + register smpfunc_t f asm("i0") = func; + register unsigned long a1 asm("i1") = arg1; + register unsigned long a2 asm("i2") = arg2; + register unsigned long a3 asm("i3") = arg3; + register unsigned long a4 asm("i4") = arg4; + register unsigned long a5 asm("i5") = 0; + + __asm__ __volatile__("std %0, [%6]\n\t" + "std %2, [%6 + 8]\n\t" + "std %4, [%6 + 16]\n\t" : : + "r"(f), "r"(a1), "r"(a2), "r"(a3), + "r"(a4), "r"(a5), + "r"(&ccall_info.func)); + } + + /* Init receive/complete mapping, plus fire the IPI's off. */ + { + register int i; + + cpu_clear(smp_processor_id(), mask); + cpus_and(mask, cpu_online_map, mask); + for (i = 0; i <= high; i++) { + if (cpu_isset(i, mask)) { + ccall_info.processors_in[i] = 0; + ccall_info.processors_out[i] = 0; + set_cpu_int(i, LEON3_IRQ_CROSS_CALL); + + } + } + } + + { + register int i; + + i = 0; + do { + if (!cpu_isset(i, mask)) + continue; + + while (!ccall_info.processors_in[i]) + barrier(); + } while (++i <= high); + + i = 0; + do { + if (!cpu_isset(i, mask)) + continue; + + while (!ccall_info.processors_out[i]) + barrier(); + } while (++i <= high); + } + + spin_unlock_irqrestore(&cross_call_lock, flags); + } +} + +/* Running cross calls. */ +void leon_cross_call_irq(void) +{ + int i = smp_processor_id(); + + ccall_info.processors_in[i] = 1; + ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, + ccall_info.arg4, ccall_info.arg5); + ccall_info.processors_out[i] = 1; +} + +void leon_percpu_timer_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + int cpu = smp_processor_id(); + + old_regs = set_irq_regs(regs); + + leon_clear_profile_irq(cpu); + + profile_tick(CPU_PROFILING); + + if (!--prof_counter(cpu)) { + int user = user_mode(regs); + + irq_enter(); + update_process_times(user); + irq_exit(); + + prof_counter(cpu) = prof_multiplier(cpu); + } + set_irq_regs(old_regs); +} + +static void __init smp_setup_percpu_timer(void) +{ + int cpu = smp_processor_id(); + + prof_counter(cpu) = prof_multiplier(cpu) = 1; +} + +void __init leon_blackbox_id(unsigned *addr) +{ + int rd = *addr & 0x3e000000; + int rs1 = rd >> 11; + + /* patch places where ___b_hard_smp_processor_id appears */ + addr[0] = 0x81444000 | rd; /* rd %asr17, reg */ + addr[1] = 0x8130201c | rd | rs1; /* srl reg, 0x1c, reg */ + addr[2] = 0x01000000; /* nop */ +} + +void __init leon_blackbox_current(unsigned *addr) +{ + int rd = *addr & 0x3e000000; + int rs1 = rd >> 11; + + /* patch LOAD_CURRENT macro where ___b_load_current appears */ + addr[0] = 0x81444000 | rd; /* rd %asr17, reg */ + addr[2] = 0x8130201c | rd | rs1; /* srl reg, 0x1c, reg */ + addr[4] = 0x81282002 | rd | rs1; /* sll reg, 0x2, reg */ + +} + +/* + * CPU idle callback function + * See .../arch/sparc/kernel/process.c + */ +void pmc_leon_idle(void) +{ + __asm__ volatile ("mov %g0, %asr19"); +} + +void __init leon_init_smp(void) +{ + /* Patch ipi15 trap table */ + t_nmi[1] = t_nmi[1] + (linux_trap_ipi15_leon - linux_trap_ipi15_sun4m); + + BTFIXUPSET_BLACKBOX(hard_smp_processor_id, leon_blackbox_id); + BTFIXUPSET_BLACKBOX(load_current, leon_blackbox_current); + BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id, + BTFIXUPCALL_NORM); + +#ifndef PMC_NO_IDLE + /* Assign power management IDLE handler */ + pm_idle = pmc_leon_idle; + printk(KERN_INFO "leon: power management initialized\n"); +#endif + +} + +#endif /* CONFIG_SPARC_LEON */ diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index f1be37a7b12..e1b0541feb1 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -112,7 +112,7 @@ static void free_msi(struct pci_pbm_info *pbm, int msi_num) } static struct irq_chip msi_irq = { - .typename = "PCI-MSI", + .name = "PCI-MSI", .mask = mask_msi_irq, .unmask = unmask_msi_irq, .enable = unmask_msi_irq, diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 21180339cb0..a2a79e76344 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -46,6 +46,7 @@ #include <asm/setup.h> #include <asm/mmu.h> #include <asm/ns87303.h> +#include <asm/btext.h> #ifdef CONFIG_IP_PNP #include <net/ipconfig.h> @@ -286,7 +287,10 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); boot_flags_init(*cmdline_p); - register_console(&prom_early_console); +#ifdef CONFIG_EARLYFB + if (btext_find_display()) +#endif + register_console(&prom_early_console); if (tlb_type == hypervisor) printk("ARCH: SUN4V\n"); diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 132d81fb261..91c10fb7085 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -32,6 +32,7 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/cpudata.h> +#include <asm/leon.h> #include "irq.h" @@ -96,6 +97,9 @@ void __init smp_cpus_done(unsigned int max_cpus) case sun4d: smp4d_smp_done(); break; + case sparc_leon: + leon_smp_done(); + break; case sun4e: printk("SUN4E\n"); BUG(); @@ -306,6 +310,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) case sun4d: smp4d_boot_cpus(); break; + case sparc_leon: + leon_boot_cpus(); + break; case sun4e: printk("SUN4E\n"); BUG(); @@ -376,6 +383,9 @@ int __cpuinit __cpu_up(unsigned int cpu) case sun4d: ret = smp4d_boot_one_cpu(cpu); break; + case sparc_leon: + ret = leon_boot_one_cpu(cpu); + break; case sun4e: printk("SUN4E\n"); BUG(); diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 04e28b2671c..00abe87e5b5 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -26,11 +26,6 @@ #include <linux/nfs_fs.h> #include <linux/quota.h> #include <linux/module.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/syscall.h> #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> @@ -591,63 +586,6 @@ out: return ret; } -struct __sysctl_args32 { - u32 name; - int nlen; - u32 oldval; - u32 oldlenp; - u32 newval; - u32 newlen; - u32 __unused[4]; -}; - -asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) -{ -#ifndef CONFIG_SYSCTL_SYSCALL - return -ENOSYS; -#else - struct __sysctl_args32 tmp; - int error; - size_t oldlen, __user *oldlenp = NULL; - unsigned long addr = (((unsigned long)&args->__unused[0]) + 7UL) & ~7UL; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && tmp.oldlenp) { - /* Duh, this is ugly and might not work if sysctl_args - is in read-only memory, but do_sysctl does indirectly - a lot of uaccess in both directions and we'd have to - basically copy the whole sysctl.c here, and - glibc's __sysctl uses rw memory for the structure - anyway. */ - if (get_user(oldlen, (u32 __user *)(unsigned long)tmp.oldlenp) || - put_user(oldlen, (size_t __user *)addr)) - return -EFAULT; - oldlenp = (size_t __user *)addr; - } - - lock_kernel(); - error = do_sysctl((int __user *)(unsigned long) tmp.name, - tmp.nlen, - (void __user *)(unsigned long) tmp.oldval, - oldlenp, - (void __user *)(unsigned long) tmp.newval, - tmp.newlen); - unlock_kernel(); - if (oldlenp) { - if (!error) { - if (get_user(oldlen, (size_t __user *)addr) || - put_user(oldlen, (u32 __user *)(unsigned long) tmp.oldlenp)) - error = -EFAULT; - } - if (copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused))) - error = -EFAULT; - } - return error; -#endif -} - long sys32_lookup_dcookie(unsigned long cookie_high, unsigned long cookie_low, char __user *buf, size_t len) diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index f37bef747e6..cc8e7862e95 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -68,7 +68,7 @@ sys_call_table32: .word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys32_mlockall /*240*/ .word sys_munlockall, sys32_sched_setparam, sys32_sched_getparam, sys32_sched_setscheduler, sys32_sched_getscheduler .word sys_sched_yield, sys32_sched_get_priority_max, sys32_sched_get_priority_min, sys32_sched_rr_get_interval, compat_sys_nanosleep -/*250*/ .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl +/*250*/ .word sys32_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 614ac7b4a9d..5b2f595fe65 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -210,9 +210,6 @@ static void __init sbus_time_init(void) btfixup(); sparc_init_timers(timer_interrupt); - - /* Now that OBP ticker has been silenced, it is safe to enable IRQ. */ - local_irq_enable(); } void __init time_init(void) diff --git a/arch/sparc/kernel/trampoline_32.S b/arch/sparc/kernel/trampoline_32.S index 5e235c52d66..691f484e03b 100644 --- a/arch/sparc/kernel/trampoline_32.S +++ b/arch/sparc/kernel/trampoline_32.S @@ -15,7 +15,7 @@ #include <asm/contregs.h> #include <asm/thread_info.h> - .globl sun4m_cpu_startup, __smp4m_processor_id + .globl sun4m_cpu_startup, __smp4m_processor_id, __leon_processor_id .globl sun4d_cpu_startup, __smp4d_processor_id __CPUINIT @@ -106,6 +106,12 @@ __smp4d_processor_id: retl mov %g1, %o7 +__leon_processor_id: + rd %asr17,%g2 + srl %g2,28,%g2 + retl + mov %g1, %o7 + /* CPUID in bootbus can be found at PA 0xff0140000 */ #define SUN4D_BOOTBUS_CPUID 0xf0140000 @@ -160,3 +166,64 @@ sun4d_cpu_startup: nop b,a smp_do_cpu_idle + +#ifdef CONFIG_SPARC_LEON + + __CPUINIT + .align 4 + .global leon_smp_cpu_startup, smp_penguin_ctable + +leon_smp_cpu_startup: + + set smp_penguin_ctable,%g1 + ld [%g1+4],%g1 + srl %g1,4,%g1 + set 0x00000100,%g5 /* SRMMU_CTXTBL_PTR */ + sta %g1, [%g5] ASI_M_MMUREGS + + /* Set up a sane %psr -- PIL<0xf> S<0x1> PS<0x1> CWP<0x0> */ + set (PSR_PIL | PSR_S | PSR_PS), %g1 + wr %g1, 0x0, %psr ! traps off though + WRITE_PAUSE + + /* Our %wim is one behind CWP */ + mov 2, %g1 + wr %g1, 0x0, %wim + WRITE_PAUSE + + /* Set tbr - we use just one trap table. */ + set trapbase, %g1 + wr %g1, 0x0, %tbr + WRITE_PAUSE + + /* Get our CPU id */ + rd %asr17,%g3 + + /* Give ourselves a stack and curptr. */ + set current_set, %g5 + srl %g3, 28, %g4 + sll %g4, 2, %g4 + ld [%g5 + %g4], %g6 + + sethi %hi(THREAD_SIZE - STACKFRAME_SZ), %sp + or %sp, %lo(THREAD_SIZE - STACKFRAME_SZ), %sp + add %g6, %sp, %sp + + /* Turn on traps (PSR_ET). */ + rd %psr, %g1 + wr %g1, PSR_ET, %psr ! traps on + WRITE_PAUSE + + /* Init our caches, etc. */ + set poke_srmmu, %g5 + ld [%g5], %g5 + call %g5 + nop + + /* Start this processor. */ + call leon_callin + nop + + b,a smp_do_cpu_idle + +#endif diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 509b1ffeba6..367321a030d 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -1990,7 +1990,7 @@ void __init poke_leonsparc(void) void __init init_leon(void) { - srmmu_name = "Leon"; + srmmu_name = "LEON"; BTFIXUPSET_CALL(flush_cache_all, leon_flush_cache_all, BTFIXUPCALL_NORM); @@ -2037,8 +2037,6 @@ static void __init get_srmmu_type(void) /* First, check for sparc-leon. */ if (sparc_cpu_model == sparc_leon) { - psr_typ = 0xf; /* hardcoded ids for older models/simulators */ - psr_vers = 2; init_leon(); return; } @@ -2301,7 +2299,8 @@ void __init ld_mmu_srmmu(void) BTFIXUPSET_CALL(flush_cache_mm, smp_flush_cache_mm, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(flush_cache_range, smp_flush_cache_range, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(flush_cache_page, smp_flush_cache_page, BTFIXUPCALL_NORM); - if (sparc_cpu_model != sun4d) { + if (sparc_cpu_model != sun4d && + sparc_cpu_model != sparc_leon) { BTFIXUPSET_CALL(flush_tlb_all, smp_flush_tlb_all, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(flush_tlb_mm, smp_flush_tlb_mm, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(flush_tlb_range, smp_flush_tlb_range, BTFIXUPCALL_NORM); @@ -2330,6 +2329,8 @@ void __init ld_mmu_srmmu(void) #ifdef CONFIG_SMP if (sparc_cpu_model == sun4d) sun4d_init_smp(); + else if (sparc_cpu_model == sparc_leon) + leon_init_smp(); else sun4m_init_smp(); #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72ace9515a0..178084b4377 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -49,6 +49,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_HW_BREAKPOINT select HAVE_ARCH_KMEMCHECK config OUTPUT_FORMAT diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2649840d888..5e99762eb5c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -406,7 +406,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6b..731318e5ac1 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,6 +186,15 @@ config X86_DS_SELFTEST config HAVE_MMIOTRACE_SUPPORT def_bool y +config X86_DECODER_SELFTEST + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL + ---help--- + Perform x86 instruction decoder selftests at build time. + This option is useful for checking the sanity of x86 instruction + decoder code. + If unsure, say "N". + # # IO delay types: # @@ -287,4 +296,18 @@ config OPTIMIZE_INLINING If unsure, say N. +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict copy size checks" + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING + ---help--- + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, or if you run an older (pre 4.4) gcc, say N. + endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d2d24c9ee64..78b32be55e9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -155,6 +155,9 @@ all: bzImage KBUILD_IMAGE := $(boot)/bzImage bzImage: vmlinux +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) + $(Q)$(MAKE) $(build)=arch/x86/tools posttest +endif $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 30e9a264f69..cbf0776dbec 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -41,7 +41,7 @@ cflags-$(CONFIG_X86_ELAN) += -march=i486 # Geode GX1 support cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx - +cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) # add at the end to overwrite eventual tuning options from earlier # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index d23b9878265..4eefdca9832 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -653,7 +653,7 @@ ia32_sys_call_table: .quad compat_sys_writev .quad sys_getsid .quad sys_fdatasync - .quad sys32_sysctl /* sysctl */ + .quad compat_sys_sysctl /* sysctl */ .quad sys_mlock /* 150 */ .quad sys_munlock .quad sys_mlockall diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 9f552719882..df82c0e48de 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -434,62 +434,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, return ret; } -#ifdef CONFIG_SYSCTL_SYSCALL -struct sysctl_ia32 { - unsigned int name; - int nlen; - unsigned int oldval; - unsigned int oldlenp; - unsigned int newval; - unsigned int newlen; - unsigned int __unused[4]; -}; - - -asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32) -{ - struct sysctl_ia32 a32; - mm_segment_t old_fs = get_fs(); - void __user *oldvalp, *newvalp; - size_t oldlen; - int __user *namep; - long ret; - - if (copy_from_user(&a32, args32, sizeof(a32))) - return -EFAULT; - - /* - * We need to pre-validate these because we have to disable - * address checking before calling do_sysctl() because of - * OLDLEN but we can't run the risk of the user specifying bad - * addresses here. Well, since we're dealing with 32 bit - * addresses, we KNOW that access_ok() will always succeed, so - * this is an expensive NOP, but so what... - */ - namep = compat_ptr(a32.name); - oldvalp = compat_ptr(a32.oldval); - newvalp = compat_ptr(a32.newval); - - if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) - || !access_ok(VERIFY_WRITE, namep, 0) - || !access_ok(VERIFY_WRITE, oldvalp, 0) - || !access_ok(VERIFY_WRITE, newvalp, 0)) - return -EFAULT; - - set_fs(KERNEL_DS); - lock_kernel(); - ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen, - newvalp, (size_t) a32.newlen); - unlock_kernel(); - set_fs(old_fs); - - if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) - return -EFAULT; - - return ret; -} -#endif - /* warning: next two assume little endian */ asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa..9f828f87ca3 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,6 +10,7 @@ header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h header-y += processor-flags.h +header-y += hw_breakpoint.h unifdef-y += e820.h unifdef-y += ist.h diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa8..7a15588e45d 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -17,6 +17,7 @@ #include <linux/user.h> #include <linux/elfcore.h> +#include <asm/debugreg.h> /* * fill in the user structure for an a.out core dump @@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg0; - dump->u_debugreg[1] = current->thread.debugreg1; - dump->u_debugreg[2] = current->thread.debugreg2; - dump->u_debugreg[3] = current->thread.debugreg3; - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - dump->u_debugreg[7] = current->thread.debugreg7; + aout_dump_debugregs(dump); if (dump->start_stack < TASK_SIZE) dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e2077d343c3..b97f786a48d 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -1,17 +1,13 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_X86_32 -# define X86_ALIGN .long -#else -# define X86_ALIGN .quad -#endif +#include <asm/asm.h> #ifdef CONFIG_SMP .macro LOCK_PREFIX 1: lock .section .smp_locks,"a" - .align 4 - X86_ALIGN 1b + _ASM_ALIGN + _ASM_PTR 1b .previous .endm #else diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c240efc74e0..69b74a7b877 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {} " .byte " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ + " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ ".previous\n" \ ".section .altinstr_replacement, \"ax\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */ \ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 474d80d3e6c..b4ac2cdcb64 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -297,20 +297,20 @@ struct apic { int disable_esr; int dest_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); void (*setup_apic_routing)(void); int (*multi_timer_check)(int apic, int irq); int (*apicid_to_node)(int logical_apicid); int (*cpu_to_logical_apicid)(int cpu); int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int phys_apicid); void (*enable_apic_mode)(void); @@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void) extern void default_setup_apic_routing(void); +extern struct apic apic_noop; + #ifdef CONFIG_X86_32 extern struct apic apic_default; @@ -532,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long default_check_apicid_present(int bit) @@ -542,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { - return phys_map; + *retmap = *phys_map; } /* Mapping from cpu number to logical apicid */ @@ -583,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); #endif -static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 3b62da926de..7fe3b3060f0 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -11,6 +11,12 @@ #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 #define APIC_DEFAULT_PHYS_BASE 0xfee00000 +/* + * This is the IO-APIC register space as specified + * by Intel docs: + */ +#define IO_APIC_SLOT_SIZE 1024 + #define APIC_ID 0x20 #define APIC_LVR 0x30 diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h deleted file mode 100644 index 82f613c607c..00000000000 --- a/arch/x86/include/asm/apicnum.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_APICNUM_H -#define _ASM_X86_APICNUM_H - -/* define MAX_IO_APICS */ -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif - -#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ee1931be659..ffb9bb6b6c3 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -8,14 +8,50 @@ * you need to test for the feature in boot_cpu_data. */ -#define xchg(ptr, v) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr)))) +extern void __xchg_wrong_size(void); + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ struct __xchg_dummy { unsigned long a[100]; }; #define __xg(x) ((struct __xchg_dummy *)(x)) +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + /* * The semantics of XCHGCMP8B are a bit strange, this is why * there is a loop and the loading of %%eax and %%edx has to @@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr, (unsigned int)((value) >> 32)) \ : __set_64bit(ptr, ll_low((value)), ll_high((value)))) -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +extern void __cmpxchg_wrong_size(void); /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") #ifdef CONFIG_X86_CMPXCHG #define __HAVE_ARCH_CMPXCHG 1 -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, (unsigned long long)(n))) #endif -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, unsigned long long new) diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 52de72e0de8..485ae415fae 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,9 +3,6 @@ #include <asm/alternative.h> /* Provides LOCK_PREFIX */ -#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \ - (ptr), sizeof(*(ptr)))) - #define __xg(x) ((volatile long *)(x)) static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) @@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) #define _set_64bit set_64bit +extern void __xchg_wrong_size(void); +extern void __cmpxchg_wrong_size(void); + /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway * Note 2: xchg has side effect, so that attribute volatile is necessary, * but generally the primitive is invalid, *ptr is output argument. --ANK */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %k0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 8: - asm volatile("xchgq %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %k0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile("xchgq %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + +#define __HAVE_ARCH_CMPXCHG 1 /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %k1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile(lock "cmpxchgq %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) -#define __HAVE_ARCH_CMPXCHG 1 +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile("cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) #define cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + #define cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e..8240f76b531 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -18,6 +18,7 @@ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ #define DR_TRAP3 (0x8) /* db3 */ +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ @@ -49,6 +50,8 @@ #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ @@ -67,4 +70,34 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ +/* + * HW breakpoint additions + */ +#ifdef __KERNEL__ + +DECLARE_PER_CPU(unsigned long, cpu_dr7); + +static inline void hw_breakpoint_disable(void) +{ + /* Zero the control register for HW Breakpoint */ + set_debugreg(0UL, 7); + + /* Zero-out the individual HW breakpoint address registers */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); +} + +static inline int hw_breakpoint_active(void) +{ + return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; +} + +extern void aout_dump_debugregs(struct user *dump); + +extern void hw_breakpoint_restore(void); + +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f0104..108eb6fd1ae 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -20,11 +20,11 @@ typedef struct { unsigned int irq_call_count; unsigned int irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; -# endif #endif } ____cacheline_aligned irq_cpustat_t; diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 00000000000..0675a7c4c20 --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -0,0 +1,73 @@ +#ifndef _I386_HW_BREAKPOINT_H +#define _I386_HW_BREAKPOINT_H + +#ifdef __KERNEL__ +#define __ARCH_HW_BREAKPOINT_H + +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ +struct arch_hw_breakpoint { + char *name; /* Contains name of the symbol to set bkpt */ + unsigned long address; + u8 len; + u8 type; +}; + +#include <linux/kdebug.h> +#include <linux/percpu.h> +#include <linux/list.h> + +/* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c +#define X86_BREAKPOINT_LEN_EXECUTE 0x40 + +#ifdef CONFIG_X86_64 +#define X86_BREAKPOINT_LEN_8 0x48 +#endif + +/* Available HW breakpoint type encodings */ + +/* trigger on instruction execute */ +#define X86_BREAKPOINT_EXECUTE 0x80 +/* trigger on memory write */ +#define X86_BREAKPOINT_WRITE 0x81 +/* trigger on memory read or write */ +#define X86_BREAKPOINT_RW 0x83 + +/* Total number of available HW breakpoint registers */ +#define HBP_NUM 4 + +struct perf_event; +struct pmu; + +extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + +#endif /* __KERNEL__ */ +#endif /* _I386_HW_BREAKPOINT_H */ + diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d93b08..6e124269fd4 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -79,14 +79,32 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, int ioapic, int ioapic_pin, int trigger, int polarity) { - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; } -extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, - struct io_apic_irq_attr *irq_attr); +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ +struct irq_cfg { + struct irq_pin_list *irq_2_pin; + cpumask_var_t domain; + cpumask_var_t old_domain; + u8 vector; + u8 move_in_progress : 1; +}; + +extern struct irq_cfg *irq_cfg(unsigned int); +extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void send_cleanup_vector(struct irq_cfg *); + +struct irq_desc; +extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 00000000000..205b063e3e3 --- /dev/null +++ b/arch/x86/include/asm/inat.h @@ -0,0 +1,220 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/inat_types.h> + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy last prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ +/* Other Legacy prefixes */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ + +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); + +/* Attribute checking functions */ +static inline int inat_is_legacy_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} +#endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 00000000000..cb3c20ce39c --- /dev/null +++ b/arch/x86/include/asm/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 00000000000..96c2e0ad04c --- /dev/null +++ b/arch/x86/include/asm/insn.h @@ -0,0 +1,184 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include <asm/inat.h> + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *next_byte; +}; + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + +/* The last prefix is needed for two-byte and three-byte opcodes */ +static inline insn_byte_t insn_last_prefix(struct insn *insn) +{ + return insn->prefixes.bytes[3]; +} + +extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, const void *kaddr) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, 0); +#endif +} + +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_vex_prefix(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ddda6cbed6f..ffd700ff5dc 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include <linux/cpumask.h> extern void fixup_irqs(void); +extern void irq_force_complete_move(int); #endif extern void (*generic_interrupt_extension)(void); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f1363b72364..858baa061cf 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -108,6 +108,8 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) +extern struct atomic_notifier_head x86_mce_decoder_chain; + #ifdef __KERNEL__ #include <linux/percpu.h> @@ -118,9 +120,11 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); +int mcheck_init(void); +void mcheck_cpu_init(struct cpuinfo_x86 *c); #else -static inline void mcheck_init(struct cpuinfo_x86 *c) {} +static inline int mcheck_init(void) { return 0; } +static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} #endif #ifdef CONFIG_X86_ANCIENT_MCE @@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); +#ifdef CONFIG_X86_THERMAL_VECTOR +extern void mcheck_intel_therm_init(void); +#else +static inline void mcheck_intel_therm_init(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 79c94500c0b..61d90b1331c 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t; #define physids_shift_left(d, s, n) \ bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) -#define physids_coerce(map) ((map).mask[0]) +static inline unsigned long physids_coerce(physid_mask_t *map) +{ + return map->mask[0]; +} -#define physids_promote(physids) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - __physid_mask.mask[0] = physids; \ - __physid_mask; \ - }) +static inline void physids_promote(unsigned long physids, physid_mask_t *map) +{ + physids_clear(*map); + map->mask[0] = physids; +} /* Note: will create very large stack frames if physid_mask_t is big */ #define physid_mask_of_physid(physid) \ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7e2b6ba962f..5bef931f8b1 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -247,8 +247,8 @@ do { \ #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); @@ -264,12 +264,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); return 0; } -static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); } -static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ad7ce3fd506..8d9f8548a87 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -28,9 +28,20 @@ */ #define ARCH_PERFMON_EVENT_MASK 0xffff +/* + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. + */ +#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 + #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c9786480f0f..6f8ec1c37e0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -30,6 +30,7 @@ struct mm_struct; #include <linux/math64.h> #include <linux/init.h> +#define HBP_NUM 4 /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -422,6 +423,8 @@ extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; +struct perf_event; + struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -443,13 +446,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Hardware debugging registers: */ - unsigned long debugreg0; - unsigned long debugreg1; - unsigned long debugreg2; - unsigned long debugreg3; - unsigned long debugreg6; - unsigned long debugreg7; + /* Save middle states of ptrace breakpoints */ + struct perf_event *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long debugreg6; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908349a..3d11fd0f44c 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -7,6 +7,7 @@ #ifdef __KERNEL__ #include <asm/segment.h> +#include <asm/page_types.h> #endif #ifndef __ASSEMBLY__ @@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) return regs->sp; } +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register. The @offset is the + * offset of the register in struct pt_regs address which specified by @regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +/* Get Nth argument at function call */ +extern unsigned long regs_get_argument_nth(struct pt_regs *regs, + unsigned int n); + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index ae907e61718..3d3e8353ee5 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) */ #ifndef CONFIG_KMEMCHECK + +#if (__GNUC__ >= 4) +#define memcpy(t, f, n) __builtin_memcpy(t, f, n) +#else #define memcpy(t, f, n) \ (__builtin_constant_p((n)) \ ? __constant_memcpy((t), (f), (n)) \ : __memcpy((t), (f), (n))) +#endif #else /* * kmemcheck becomes very happy if we use the REP instructions unconditionally, @@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET +#if (__GNUC__ >= 4) +#define memset(s, c, count) __builtin_memset(s, c, count) +#else #define memset(s, c, count) \ (__builtin_constant_p(c) \ ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ (count)) \ : __memset((s), (c), (count))) +#endif /* * find the first occurrence of byte 'c', or 1 past the area if none diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 72a6dcd1299..9af9decb38c 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -51,11 +51,6 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); -#ifdef CONFIG_SYSCTL_SYSCALL -struct sysctl_ia32; -asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); -#endif - asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index f08f9737489..022a84386de 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -128,8 +128,6 @@ do { \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ - ".globl thread_return\n" \ - "thread_return:\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ @@ -157,19 +155,22 @@ extern void native_load_gs_index(unsigned); * Load a segment. Fall back on loading the zero * segment if something goes wrong.. */ -#define loadsegment(seg, value) \ - asm volatile("\n" \ - "1:\t" \ - "movl %k0,%%" #seg "\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "movl %k1, %%" #seg "\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b,3b) \ - : :"r" (value), "r" (0) : "memory") - +#define loadsegment(seg, value) \ +do { \ + unsigned short __val = (value); \ + \ + asm volatile(" \n" \ + "1: movl %k0,%%" #seg " \n" \ + \ + ".section .fixup,\"ax\" \n" \ + "2: xorl %k0,%k0 \n" \ + " jmp 1b \n" \ + ".previous \n" \ + \ + _ASM_EXTABLE(1b, 2b) \ + \ + : "+r" (__val) : : "memory"); \ +} while (0) /* * Save a segment register away diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d2c6c930b49..abd3e0ea762 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -570,7 +570,6 @@ extern struct movsl_mask { #ifdef CONFIG_X86_32 # include "uaccess_32.h" #else -# define ARCH_HAS_SEARCH_EXTABLE # include "uaccess_64.h" #endif diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44b4cb..0c9825e97f3 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -187,9 +187,34 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n); -unsigned long __must_check copy_from_user(void *to, +unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + + +extern void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_from_user() buffer size is not provably correct") +#else + __compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; + +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); + else + copy_from_user_overflow(); + + return ret; +} + long __must_check strncpy_from_user(char *dst, const char __user *src, long count); long __must_check __strncpy_from_user(char *dst, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b215fc5..46324c6a4f6 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -19,12 +19,37 @@ __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned len); __must_check unsigned long -copy_to_user(void __user *to, const void *from, unsigned len); +_copy_to_user(void __user *to, const void *from, unsigned len); __must_check unsigned long -copy_from_user(void *to, const void __user *from, unsigned len); +_copy_from_user(void *to, const void __user *from, unsigned len); __must_check unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len); +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + might_fault(); + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + +static __always_inline __must_check +int copy_to_user(void __user *dst, const void *src, unsigned size) +{ + might_fault(); + + return _copy_to_user(dst, src, size); +} + static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { @@ -176,8 +201,11 @@ __must_check long strlen_user(const char __user *str); __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); -__must_check long __copy_from_user_inatomic(void *dst, const void __user *src, - unsigned size); +static __must_check __always_inline int +__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) +{ + return copy_user_generic(dst, (__force const void *)src, size); +} static __must_check __always_inline int __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 9613c8c0b64..d6b17c76062 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -25,12 +25,14 @@ struct uv_IO_APIC_route_entry { dest : 32; }; -extern struct irq_chip uv_irq_chip; - -extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); -extern void arch_disable_uv_irq(int, unsigned long); +enum { + UV_AFFINITY_ALL, + UV_AFFINITY_NODE, + UV_AFFINITY_CPU +}; -extern int uv_setup_irq(char *, int, int, unsigned long); -extern void uv_teardown_irq(unsigned int, int, unsigned long); +extern int uv_irq_2_mmr_info(int, unsigned long *, int *); +extern int uv_setup_irq(char *, int, int, unsigned long, int); +extern void uv_teardown_irq(unsigned int); #endif /* _ASM_X86_UV_UV_IRQ_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d8e5d0cdd67..4f2e66e29ec 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o +obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da7b7b9f8bd..565c1bfc507 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,7 +2,7 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97f071..ad8c75b9e45 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -241,28 +241,13 @@ static int modern_apic(void) } /* - * bare function to substitute write operation - * and it's _that_ fast :) - */ -static void native_apic_write_dummy(u32 reg, u32 v) -{ - WARN_ON_ONCE((cpu_has_apic || !disable_apic)); -} - -static u32 native_apic_read_dummy(u32 reg) -{ - WARN_ON_ONCE((cpu_has_apic && !disable_apic)); - return 0; -} - -/* - * right after this call apic->write/read doesn't do anything - * note that there is no restore operation it works one way + * right after this call apic become NOOP driven + * so apic->write/read doesn't do anything */ void apic_disable(void) { - apic->read = native_apic_read_dummy; - apic->write = native_apic_write_dummy; + pr_info("APIC: switched to apic NOOP\n"); + apic = &apic_noop; } void native_apic_wait_icr_idle(void) @@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0xffffffff); + apic_write(APIC_TMICT, 0); break; case CLOCK_EVT_MODE_RESUME: /* Nothing to do here */ @@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; int ret, x2apic_enabled = 0; - int dmar_table_init_ret = 0; + int dmar_table_init_ret; -#ifdef CONFIG_INTR_REMAP dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret) - pr_debug("dmar_table_init() failed with %d:\n", - dmar_table_init_ret); -#endif + if (dmar_table_init_ret && !x2apic_supported()) + return; ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c new file mode 100644 index 00000000000..d9acc3bee0f --- /dev/null +++ b/arch/x86/kernel/apic/apic_noop.c @@ -0,0 +1,200 @@ +/* + * NOOP APIC driver. + * + * Does almost nothing and should be substituted by a real apic driver via + * probe routine. + * + * Though in case if apic is disabled (for some reason) we try + * to not uglify the caller's code and allow to call (some) apic routines + * like self-ipi, etc... + */ + +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <asm/fixmap.h> +#include <asm/mpspec.h> +#include <asm/apicdef.h> +#include <asm/apic.h> +#include <asm/setup.h> + +#include <linux/smp.h> +#include <asm/ipi.h> + +#include <linux/interrupt.h> +#include <asm/acpi.h> +#include <asm/e820.h> + +static void noop_init_apic_ldr(void) { } +static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_allbutself(int vector) { } +static void noop_send_IPI_all(int vector) { } +static void noop_send_IPI_self(int vector) { } +static void noop_apic_wait_icr_idle(void) { } +static void noop_apic_icr_write(u32 low, u32 id) { } + +static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) +{ + return -1; +} + +static u32 noop_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static u64 noop_apic_icr_read(void) +{ + return 0; +} + +static int noop_cpu_to_logical_apicid(int cpu) +{ + return 0; +} + +static int noop_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return 0; +} + +static unsigned int noop_get_apic_id(unsigned long x) +{ + return 0; +} + +static int noop_probe(void) +{ + /* + * NOOP apic should not ever be + * enabled via probe routine + */ + return 0; +} + +static int noop_apic_id_registered(void) +{ + /* + * if we would be really "pedantic" + * we should pass read_apic_id() here + * but since NOOP suppose APIC ID = 0 + * lets save a few cycles + */ + return physid_isset(0, phys_cpu_present_map); +} + +static const struct cpumask *noop_target_cpus(void) +{ + /* only BSP here */ + return cpumask_of(0); +} + +static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) +{ + return physid_isset(apicid, *map); +} + +static unsigned long noop_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + if (cpu != 0) + pr_warning("APIC: Vector allocated for non-BSP cpu\n"); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); +} + +int noop_apicid_to_node(int logical_apicid) +{ + /* we're always on node 0 */ + return 0; +} + +static u32 noop_apic_read(u32 reg) +{ + WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + return 0; +} + +static void noop_apic_write(u32 reg, u32 v) +{ + WARN_ON_ONCE((cpu_has_apic || !disable_apic)); +} + +struct apic apic_noop = { + .name = "noop", + .probe = noop_probe, + .acpi_madt_oem_check = NULL, + + .apic_id_registered = noop_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = noop_target_cpus, + .disable_esr = 0, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = noop_check_apicid_used, + .check_apicid_present = noop_check_apicid_present, + + .vector_allocation_domain = noop_vector_allocation_domain, + .init_apic_ldr = noop_init_apic_ldr, + + .ioapic_phys_id_map = default_ioapic_phys_id_map, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = noop_apicid_to_node, + + .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = physid_set_mask_of_physid, + + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + + .phys_pkg_id = noop_phys_pkg_id, + + .mps_oem_check = NULL, + + .get_apic_id = noop_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = noop_send_IPI_mask, + .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, + .send_IPI_allbutself = noop_send_IPI_allbutself, + .send_IPI_all = noop_send_IPI_all, + .send_IPI_self = noop_send_IPI_self, + + .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, + + /* should be safe */ + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = NULL, + + .smp_callin_clear_local_apic = NULL, + .inquire_remote_apic = NULL, + + .read = noop_apic_read, + .write = noop_apic_write, + .icr_read = noop_apic_icr_read, + .icr_write = noop_apic_icr_write, + .wait_icr_idle = noop_apic_wait_icr_idle, + .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, +}; diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 77a06413b6b..38dcecfa581 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void) #endif } -static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - /* Mapping from cpu number to logical apicid */ static inline int bigsmp_cpu_to_logical_apicid(int cpu) { @@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) return cpu_physical_id(cpu); } -static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); + physids_promote(0xFFL, retmap); } static int bigsmp_check_phys_apicid_present(int phys_apicid) @@ -230,7 +225,7 @@ struct apic apic_bigsmp = { .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 89174f847b4..e85f8fb7f8e 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void) return cpumask_of(smp_processor_id()); } -static unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } + static unsigned long es7000_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); @@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) static int cpu_id; -static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) { - physid_mask_t mask; - - mask = physid_mask_of_physid(cpu_id); + physid_set_mask_of_physid(cpu_id, retmap); ++cpu_id; - - return mask; } /* Mapping from cpu number to logical apicid */ @@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu) #endif } -static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); + physids_promote(0xFFL, retmap); } static int es7000_check_phys_apicid_present(int cpu_physical_apicid) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28489f..c0b4468683f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -60,8 +60,6 @@ #include <asm/irq_remapping.h> #include <asm/hpet.h> #include <asm/hw_irq.h> -#include <asm/uv/uv_hub.h> -#include <asm/uv/uv_irq.h> #include <asm/apic.h> @@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) return pin; } -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ -struct irq_cfg { - struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[] = { @@ -209,7 +193,7 @@ int __init arch_early_irq_init(void) } #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { struct irq_cfg *cfg = NULL; struct irq_desc *desc; @@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) /* end for move_irq_desc */ #else -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { return irq < nr_irqs ? irq_cfgx + irq : NULL; } @@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, add_pin_to_irq_node(cfg, node, newapic, newpin); } +static void __io_apic_modify_irq(struct irq_pin_list *entry, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + unsigned int reg, pin; + + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); +} + static void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { - int pin; struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); - } + for_each_irq_pin(entry, cfg->irq_2_pin) + __io_apic_modify_irq(entry, mask_and, mask_or, final); +} + +static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} + +static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) @@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED, NULL); -} - -static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER, NULL); -} - static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { struct irq_cfg *cfg = desc->chip_data; @@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int cpu, err; cpumask_var_t tmp_mask; - if ((cfg->move_in_progress) || cfg->move_cleanup_count) + if (cfg->move_in_progress) return -EBUSY; if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) @@ -1237,8 +1227,7 @@ next: return err; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_desc *desc; unsigned int irq; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", @@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base) { int i; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG); for (i = 0; i < 8; i++) @@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) unsigned int i, v, ver, maxlvt; u64 icr; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); @@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n"); } -__apicdebuginit(void) print_all_local_APICs(void) +__apicdebuginit(void) print_local_APICs(int maxcpu) { int cpu; + if (!maxcpu) + return; + preempt_disable(); - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (cpu >= maxcpu) + break; smp_call_function_single(cpu, print_local_APIC, NULL, 1); + } preempt_enable(); } @@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) + if (!nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -__apicdebuginit(int) print_all_ICs(void) +static int __initdata show_lapic = 1; +static __init int setup_show_lapic(char *arg) { + int num = -1; + + if (strcmp(arg, "all") == 0) { + show_lapic = CONFIG_NR_CPUS; + } else { + get_option(&arg, &num); + if (num >= 0) + show_lapic = num; + } + + return 1; +} +__setup("show_lapic=", setup_show_lapic); + +__apicdebuginit(int) print_ICs(void) +{ + if (apic_verbosity == APIC_QUIET) + return 0; + print_PIC(); /* don't print out if apic is not there */ if (!cpu_has_apic && !apic_from_smp_config()) return 0; - print_all_local_APICs(); + print_local_APICs(show_lapic); print_IO_APIC(); return 0; } -fs_initcall(print_all_ICs); +fs_initcall(print_ICs); /* Where if anywhere is the i8259 connect in external int mode */ @@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(&phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) +void send_cleanup_vector(struct irq_cfg *cfg) { cpumask_var_t cleanup_mask; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } @@ -2272,15 +2274,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - /* * Either sets desc->affinity to a valid value, and returns * ->cpu_mask_to_apicid of that, or returns BAD_APICID and * leaves desc->affinity untouched. */ -static unsigned int +unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; @@ -2433,8 +2432,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; @@ -2452,7 +2449,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) goto unlock; } __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; unlock: spin_unlock(&desc->lock); } @@ -2460,21 +2456,33 @@ unlock: irq_exit(); } -static void irq_complete_move(struct irq_desc **descp) +static void __irq_complete_move(struct irq_desc **descp, unsigned vector) { struct irq_desc *desc = *descp; struct irq_cfg *cfg = desc->chip_data; - unsigned vector, me; + unsigned me; if (likely(!cfg->move_in_progress)) return; - vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); } + +static void irq_complete_move(struct irq_desc **descp) +{ + __irq_complete_move(descp, ~get_irq_regs()->orig_ax); +} + +void irq_force_complete_move(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + + __irq_complete_move(&desc, cfg->vector); +} #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif @@ -2490,6 +2498,59 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; +/* + * IO-APIC versions below 0x20 don't support EOI register. + * For the record, here is the information about various versions: + * 0Xh 82489DX + * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant + * 2Xh I/O(x)APIC which is PCI 2.2 Compliant + * 30h-FFh Reserved + * + * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic + * version as 0x2. This is an error with documentation and these ICH chips + * use io-apic's of version 0x20. + * + * For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during this. +*/ +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + if (mp_ioapics[entry->apic].apicver >= 0x20) { + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } else { + __mask_and_edge_IO_APIC_irq(entry); + __unmask_and_level_IO_APIC_irq(entry); + } + } +} + +static void eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2525,6 +2586,19 @@ static void ack_apic_level(unsigned int irq) * level-triggered interrupt. We mask the source for the time of the * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro + * + * Also in the case when cpu goes offline, fixup_irqs() will forward + * any unhandled interrupt on the offlined cpu to the new cpu + * destination that is handling the corresponding interrupt. This + * interrupt forwarding is done via IPI's. Hence, in this case also + * level-triggered io-apic interrupt will be seen as an edge + * interrupt in the IRR. And we can't rely on the cpu's EOI + * to be broadcasted to the IO-APIC's which will clear the remoteIRR + * corresponding to the level-triggered interrupt. Hence on IO-APIC's + * supporting EOI register, we do an explicit EOI to clear the + * remote IRR and on IO-APIC's which don't have an EOI register, + * we use the above logic (mask+edge followed by unmask+level) from + * Manfred Spraul to clear the remote IRR. */ cfg = desc->chip_data; i = cfg->vector; @@ -2536,6 +2610,19 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); + /* + * Tail end of clearing remote IRR bit (either by delivering the EOI + * message via io-apic EOI register write or simulating it using + * mask+edge followed by unnask+level logic) manually when the + * level triggered interrupt is seen as the edge triggered interrupt + * at the cpu. + */ + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + + eoi_ioapic_irq(desc); + } + /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2569,41 +2656,9 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq_desc(desc); } - - /* Tail end of version 0x11 I/O APIC bug workaround */ - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } } #ifdef CONFIG_INTR_REMAP -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, cfg->irq_2_pin) - io_apic_eoi(entry->apic, entry->pin); -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - static void ir_ack_apic_edge(unsigned int irq) { ack_APIC_irq(); @@ -3157,6 +3212,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) continue; desc_new = move_irq_desc(desc_new, node); + cfg_new = desc_new->chip_data; if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; @@ -3708,75 +3764,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_UV -/* - * Re-target the irq to the specified CPU and enable the specified MMR located - * on the specified blade to allow the sending of MSIs to the specified CPU. - */ -int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset) -{ - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg; - int mmr_pnode; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long flags; - int err; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - cfg = irq_cfg(irq); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - spin_lock_irqsave(&vector_lock, flags); - set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - spin_unlock_irqrestore(&vector_lock, flags); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; -} - -/* - * Disable the specified MMR located on the specified blade so that MSIs are - * longer allowed to be sent. - */ -void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) -{ - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->mask = 1; - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); -} -#endif /* CONFIG_X86_64 */ - int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; @@ -3944,7 +3931,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -3960,10 +3947,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(&apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!apic->check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(&apic_id_map, i)) break; } @@ -3976,7 +3963,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apic->apicid_to_cpu_present(apic_id); + apic->apicid_to_cpu_present(apic_id, &tmp); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { @@ -4106,7 +4093,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) for (i = 0; i < nr_ioapics; i++) { res[i].name = mem; res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); + snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; } @@ -4140,18 +4127,17 @@ void __init ioapic_init_mappings(void) #ifdef CONFIG_X86_32 fake_ioapic_page: #endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), + ioapic_phys); idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } } diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6a188..6389432a9db 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,8 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_t backtrace_mask __read_mostly; +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) } /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, &backtrace_mask)) { + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); @@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) show_regs(regs); dump_stack(); spin_unlock(&lock); - cpumask_clear_cpu(cpu, &backtrace_mask); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); rc = 1; } @@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void) { int i; - cpumask_copy(&backtrace_mask, cpu_online_mask); + cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); printk(KERN_INFO "sending NMI to all CPUs:\n"); apic->send_IPI_all(NMI_VECTOR); /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(&backtrace_mask)) + if (cpumask_empty(to_cpumask(backtrace_mask))) break; mdelay(1); } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2b850..07cdbdcd7a9 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void) return cpu_all_mask; } -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long numaq_check_apicid_present(int bit) @@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq) return apic != 0 && irq == 0; } -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); + return physids_promote(0xFUL, retmap); } static inline int numaq_cpu_to_logical_apicid(int cpu) @@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid) return logical_apicid >> 4; } -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) { int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); - return physid_mask_of_physid(cpu + 4*node); + physid_set_mask_of_physid(cpu + 4*node, retmap); } /* Where the IO area was mapped on multiquad, always 0 otherwise */ diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0c0182cc947..1a6559f6768 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -108,7 +108,7 @@ struct apic apic_default = { .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = default_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 645ecc4ff0b..9b419263d90 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void) return cpumask_of(0); } -static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); + physids_promote(0x0FL, retmap); } -static physid_mask_t summit_apicid_to_cpu_present(int apicid) +static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) { - return physid_mask_of_physid(0); + physid_set_mask_of_physid(0, retmap); } static int summit_check_phys_apicid_present(int physical_apicid) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 326c25477d3..130c4b93487 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode) map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); } +static __init void map_low_mmrs(void) +{ + init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); + init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); +} + static __init void uv_rtc_init(void) { long status; @@ -550,6 +556,8 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; + map_low_mmrs(); + m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace69a5a..b5b6b23bce5 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -204,7 +204,6 @@ #include <linux/module.h> #include <linux/poll.h> -#include <linux/smp_lock.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/timer.h> @@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user *user_list; static DEFINE_SPINLOCK(user_list_lock); +static DEFINE_MUTEX(apm_mutex); /* * Set up a segment that references the real mode segment 0x40 @@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) return -EPERM; switch (cmd) { case APM_IOC_STANDBY: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->standbys_read > 0) { as->standbys_read--; as->standbys_pending--; @@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_STANDBY, as); if (standbys_pending <= 0) standby(); - unlock_kernel(); + mutex_unlock(&apm_mutex); break; case APM_IOC_SUSPEND: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->suspends_read > 0) { as->suspends_read--; as->suspends_pending--; @@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_SUSPEND, as); if (suspends_pending <= 0) { ret = suspend(1); + mutex_unlock(&apm_mutex); } else { as->suspend_wait = 1; + mutex_unlock(&apm_mutex); wait_event_interruptible(apm_suspend_waitqueue, as->suspend_wait == 0); ret = as->suspend_result; } - unlock_kernel(); return ret; default: return -ENOTTY; @@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp) { struct apm_user *as; - lock_kernel(); as = kmalloc(sizeof(*as), GFP_KERNEL); if (as == NULL) { printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", sizeof(*as)); - unlock_kernel(); return -ENOMEM; } as->magic = APM_BIOS_MAGIC; @@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp) user_list = as; spin_unlock(&user_list_lock); filp->private_data = as; - unlock_kernel(); return 0; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9..1d2cb383410 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,6 +5,7 @@ # Don't trace early stages of a secondary CPU boot ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif # Make sure load_percpu_segment has no stackprotector diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a716a71..7128b3799ce 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) { diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831bb09..e58d978e075 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); } enum { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a56..a4ec8b64754 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) static void __cpuinit default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 - display_cacheinfo(c); + cpu_detect_cache_sizes(c); #else /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) } } -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); #ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ @@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #endif c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -659,24 +654,31 @@ void __init early_cpu_init(void) const struct cpu_dev *const *cdev; int count = 0; +#ifdef PROCESSOR_SELECT printk(KERN_INFO "KERNEL supported cpus:\n"); +#endif + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { const struct cpu_dev *cpudev = *cdev; - unsigned int j; if (count >= X86_VENDOR_NUM) break; cpu_devs[count] = cpudev; count++; - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); +#ifdef PROCESSOR_SELECT + { + unsigned int j; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } } +#endif } - early_identify_cpu(&boot_cpu_data); } @@ -837,10 +839,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } -#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ - mcheck_init(c); -#endif + mcheck_cpu_init(c); select_idle_routine(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a908e40..3624e8a0f71 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -32,6 +32,6 @@ struct cpu_dev { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; -extern void display_cacheinfo(struct cpuinfo_x86 *c); +extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b89f05..4fbd384fb64 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) /* Handle the GX (Formally known as the GX2) */ if (c->x86 == 5 && c->x86_model == 5) - display_cacheinfo(c); + cpu_detect_cache_sizes(c); else init_cyrix(c); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e2bc3..0df4c2b7107 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -488,22 +488,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } - if (trace) - printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if (l1i) - printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); - - if (l1d) - printk(KERN_CONT ", L1 D cache: %dK\n", l1d); - else - printk(KERN_CONT "\n"); - - if (l2) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - - if (l3) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 721a77ca811..0bcaa387586 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/mce.h> + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; -static void default_decode_mce(struct mce *m) +/* + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); + +static int default_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) { pr_emerg("No human readable MCE decoding support on this CPU type.\n"); pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); + + return NOTIFY_STOP; } -/* - * CPU/chipset specific EDAC code can register a callback here to print - * MCE errors in a human-readable form: - */ -void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; -EXPORT_SYMBOL(x86_mce_decode_callback); +static struct notifier_block mce_dec_nb = { + .notifier_call = default_decode_mce, + .priority = -1, +}; /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -141,6 +152,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { @@ -204,9 +218,9 @@ static void print_mce(struct mce *m) /* * Print out human-readable details about the MCE error, - * (if the CPU has an implementation for that): + * (if the CPU has an implementation for that) */ - x86_mce_decode_callback(m); + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); } static void print_mce_head(void) @@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); -static void mcheck_timer(unsigned long data) +static void mce_start_timer(unsigned long data) { struct timer_list *t = &per_cpu(mce_timer, data); int *n; @@ -1187,7 +1201,7 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int mce_banks_init(void) +static int __cpuinit __mcheck_cpu_mce_banks_init(void) { int i; @@ -1206,7 +1220,7 @@ static int mce_banks_init(void) /* * Initialize Machine Checks for a CPU. */ -static int __cpuinit mce_cap_init(void) +static int __cpuinit __mcheck_cpu_cap_init(void) { unsigned b; u64 cap; @@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void) WARN_ON(banks != 0 && b != banks); banks = b; if (!mce_banks) { - int err = mce_banks_init(); + int err = __mcheck_cpu_mce_banks_init(); if (err) return err; @@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) return 0; } -static void mce_init(void) +static void __mcheck_cpu_init_generic(void) { mce_banks_t all_banks; u64 cap; @@ -1273,7 +1287,7 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) +static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { if (c->x86_vendor == X86_VENDOR_UNKNOWN) { pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); @@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) return 0; } -static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) +static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) return; @@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) } } -static void mce_cpu_features(struct cpuinfo_x86 *c) +static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) } } -static void mce_init_timer(void) +static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); @@ -1380,7 +1394,7 @@ static void mce_init_timer(void) *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mcheck_timer, smp_processor_id()); + setup_timer(t, mce_start_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } @@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = * Called for each booted CPU to set up machine checks. * Must be called with preempt off: */ -void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) { if (mce_disabled) return; - mce_ancient_init(c); + __mcheck_cpu_ancient_init(c); if (!mce_available(c)) return; - if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { + if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { mce_disabled = 1; return; } machine_check_vector = do_machine_check; - mce_init(); - mce_cpu_features(c); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(c); + __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); + } /* @@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str) } __setup("mce", mcheck_enable); +int __init mcheck_init(void) +{ + atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); + + mcheck_intel_therm_init(); + + return 0; +} + /* * Sysfs support */ @@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable); * Disable machine checks on suspend and shutdown. We can't really handle * them later. */ -static int mce_disable(void) +static int mce_disable_error_reporting(void) { int i; @@ -1663,12 +1687,12 @@ static int mce_disable(void) static int mce_suspend(struct sys_device *dev, pm_message_t state) { - return mce_disable(); + return mce_disable_error_reporting(); } static int mce_shutdown(struct sys_device *dev) { - return mce_disable(); + return mce_disable_error_reporting(); } /* @@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev) */ static int mce_resume(struct sys_device *dev) { - mce_init(); - mce_cpu_features(¤t_cpu_data); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(¤t_cpu_data); return 0; } @@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data) del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(¤t_cpu_data)) return; - mce_init(); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_timer(); } /* Reinit MCEs after user configuration changes */ @@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all) cmci_reenable(); cmci_recheck(); if (all) - mce_init_timer(); + __mcheck_cpu_init_timer(); } static struct sysdev_class mce_sysclass = { @@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void __cpuinit mce_disable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; if (!mce_available(¤t_cpu_data)) return; + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { @@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h) } } -static void mce_reenable_cpu(void *h) +static void __cpuinit mce_reenable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; @@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void) } } -static __init int mce_init_device(void) +static __init int mcheck_init_device(void) { int err; int i = 0; @@ -2053,7 +2078,7 @@ static __init int mce_init_device(void) return err; } -device_initcall(mce_init_device); +device_initcall(mcheck_init_device); /* * Old style boot options parsing. Only for compatibility. @@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mce_debugfs_init(void) +static int __init mcheck_debugfs_init(void) { struct dentry *dmce, *ffake_panic; @@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void) return 0; } -late_initcall(mce_debugfs_init); +late_initcall(mcheck_debugfs_init); #endif diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba7533..4fef985fc22 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); +static u32 lvtthmr_init __read_mostly; + #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) @@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } +void __init mcheck_intel_therm_init(void) +{ + /* + * This function is only called on boot CPU. Save the init thermal + * LVT value on BSP and use that value to restore APs' thermal LVT + * entry BIOS programmed later + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && + cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) + lvtthmr_init = apic_read(APIC_LVTTHMR); +} + void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) * since it might be delivered via SMI already: */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); + + /* + * The initial value of thermal LVT entries on all APs always reads + * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI + * sequence to them and LVT registers are reset to 0s except for + * the mask bits which are set to 1s when APs receive INIT IPI. + * Always restore the value that BIOS has programmed on AP based on + * BSP's info we saved since BIOS is always setting the same value + * for all threads/cores + */ + apic_write(APIC_LVTTHMR, lvtthmr_init); + + h = lvtthmr_init; + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c31184..c1bbed1021d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -77,6 +77,18 @@ struct cpu_hw_events { struct debug_store *ds; }; +struct event_constraint { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int code; +}; + +#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } +#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->idxmsk[0]; (e)++) + + /* * struct x86_pmu - generic x86 pmu */ @@ -102,6 +114,8 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); + int (*get_event_idx)(struct cpu_hw_events *cpuc, + struct hw_perf_event *hwc); }; static struct x86_pmu x86_pmu __read_mostly; @@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; +static const struct event_constraint *event_constraints; + /* * Not sure about some of these */ @@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } +static const struct event_constraint intel_p6_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT_END +}; /* * Intel PerfMon v3. Used on Core2 and later. @@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; +static const struct event_constraint intel_core_event_constraints[] = +{ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static const struct event_constraint intel_nehalem_event_constraints[] = +{ + EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ + EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; -static const u64 nehalem_hw_cache_event_ids +static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids }, }; -static const u64 core2_hw_cache_event_ids +static __initconst u64 core2_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids }, }; -static const u64 atom_hw_cache_event_ids +static __initconst u64 atom_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL #define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ (CORE_EVNTSEL_EVENT_MASK | \ @@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) return hw_event & CORE_EVNTSEL_MASK; } -static const u64 amd_hw_cache_event_ids +static __initconst u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event) */ hwc->config = ARCH_PERFMON_EVENTSEL_INT; + hwc->idx = -1; + /* * Count user and OS events unless requested not to. */ @@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int -fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +static int fixed_mode_idx(struct hw_perf_event *hwc) { unsigned int hw_event; @@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) if (!x86_pmu.num_events_fixed) return -1; + /* + * fixed counters do not take all possible filters + */ + if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) + return -1; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) @@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) } /* - * Find a PMC slot for the freshly enabled / scheduled in event: + * generic counter allocator: get next free counter */ -static int x86_pmu_enable(struct perf_event *event) +static int +gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + int idx; + + idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); + return idx == x86_pmu.num_events ? -1 : idx; +} + +/* + * intel-specific counter allocator: check event constraints + */ +static int +intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + const struct event_constraint *event_constraint; + int i, code; + + if (!event_constraints) + goto skip; + + code = hwc->config & CORE_EVNTSEL_EVENT_MASK; + + for_each_event_constraint(event_constraint, event_constraints) { + if (code == event_constraint->code) { + for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + return -1; + } + } +skip: + return gen_get_event_idx(cpuc, hwc); +} + +static int +x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; int idx; - idx = fixed_mode_idx(event, hwc); + idx = fixed_mode_idx(hwc); if (idx == X86_PMC_IDX_FIXED_BTS) { /* BTS is already occupied. */ if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; hwc->config_base = 0; - hwc->event_base = 0; + hwc->event_base = 0; hwc->idx = idx; } else if (idx >= 0) { /* @@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event) } else { idx = hwc->idx; /* Try to get the previous generic event again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_events); - if (idx == x86_pmu.num_events) + idx = x86_pmu.get_event_idx(cpuc, hwc); + if (idx == -1) return -EAGAIN; set_bit(idx, cpuc->used_mask); hwc->idx = idx; } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; } + return idx; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = x86_schedule_event(cpuc, hwc); + if (idx < 0) + return idx; + perf_events_lapic_init(); x86_pmu.disable(hwc, idx); @@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { .priority = 1 }; -static struct x86_pmu p6_pmu = { +static __initconst struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, @@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, + .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu intel_pmu = { +static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, .disable_all = intel_pmu_disable_all, @@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, + .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu amd_pmu = { +static __initconst struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, @@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, + .get_event_idx = gen_get_event_idx, }; -static int p6_pmu_init(void) +static __init int p6_pmu_init(void) { switch (boot_cpu_data.x86_model) { case 1: @@ -1932,10 +2047,12 @@ static int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ + event_constraints = intel_p6_event_constraints; break; case 9: case 13: /* Pentium M */ + event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -1954,7 +2071,7 @@ static int p6_pmu_init(void) return 0; } -static int intel_pmu_init(void) +static __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -2007,12 +2124,14 @@ static int intel_pmu_init(void) sizeof(hw_cache_event_ids)); pr_cont("Core2 events, "); + event_constraints = intel_core_event_constraints; break; default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: @@ -2025,7 +2144,7 @@ static int intel_pmu_init(void) return 0; } -static int amd_pmu_init(void) +static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) @@ -2105,11 +2224,47 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; +static int +validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event fake_event = event->hw; + + if (event->pmu && event->pmu != &pmu) + return 0; + + return x86_schedule_event(cpuc, &fake_event) >= 0; +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct cpu_hw_events fake_pmu; + + memset(&fake_pmu, 0, sizeof(fake_pmu)); + + if (!validate_event(&fake_pmu, leader)) + return -ENOSPC; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(&fake_pmu, sibling)) + return -ENOSPC; + } + + if (!validate_event(&fake_pmu, event)) + return -ENOSPC; + + return 0; +} + const struct pmu *hw_perf_event_init(struct perf_event *event) { int err; err = __hw_perf_event_init(event); + if (!err) { + if (event->group_leader != event) + err = validate_group(event); + } if (err) { if (event->destroy) event->destroy(event); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f60ed..898df9719af 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16) + boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) return; wd_ops = &k7_wd_ops; break; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e5caa..28000743bbb 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) early_init_transmeta(c); - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Print CMS and CPU revision */ max = cpuid_eax(0x80860000); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 6a52d4b36a3..7ef24a79699 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu; struct cpuinfo_x86 *c; - int ret = 0; - - lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (c->cpuid_level < 0) - ret = -EIO; /* CPUID not supported */ -out: - unlock_kernel(); - return ret; + return -EIO; /* CPUID not supported */ + + return 0; } /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371d433..b8ce165dde5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) show_registers(regs); #ifdef CONFIG_X86_32 - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; + } else { + sp = kernel_stack_pointer(regs); + savesegment(ss, ss); } printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); print_symbol("%s", regs->ip); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7c3bf..e0ed4c7abb6 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -10,9 +10,9 @@ #include <linux/module.h> #include <linux/ptrace.h> #include <linux/kexec.h> +#include <linux/sysfs.h> #include <linux/bug.h> #include <linux/nmi.h> -#include <linux/sysfs.h> #include <asm/stacktrace.h> @@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; + stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; @@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, - data, NULL, &graph); + bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) @@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177..8e740934bd1 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -10,26 +10,28 @@ #include <linux/module.h> #include <linux/ptrace.h> #include <linux/kexec.h> +#include <linux/sysfs.h> #include <linux/bug.h> #include <linux/nmi.h> -#include <linux/sysfs.h> #include <asm/stacktrace.h> #include "dumpstack.h" +#define N_EXCEPTION_STACKS_END \ + (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) static char x86_stack_ids[][8] = { - [DEBUG_STACK - 1] = "#DB", - [NMI_STACK - 1] = "NMI", - [DOUBLEFAULT_STACK - 1] = "#DF", - [STACKFAULT_STACK - 1] = "#SS", - [MCE_STACK - 1] = "#MC", + [ DEBUG_STACK-1 ] = "#DB", + [ NMI_STACK-1 ] = "NMI", + [ DOUBLEFAULT_STACK-1 ] = "#DF", + [ STACKFAULT_STACK-1 ] = "#SS", + [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... - N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [ N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS_END ] = "#DB[?]" #endif - }; +}; int x86_is_stack_id(int id, char *name) { @@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name) } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) + unsigned *usedp, char **idp) { unsigned k; @@ -202,21 +204,24 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { + unsigned long *irq_stack_end; + unsigned long *irq_stack; unsigned long *stack; + int cpu; int i; - const int cpu = smp_processor_id(); - unsigned long *irq_stack_end = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); - unsigned long *irq_stack = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); + + preempt_disable(); + cpu = smp_processor_id(); + + irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* - * debugging aid: "show_stack(NULL, NULL);" prints the - * back trace for this cpu. + * Debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu: */ - if (sp == NULL) { if (task) sp = (unsigned long *)task->thread.sp; @@ -240,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + preempt_enable(); + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -303,4 +310,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 7d52e9da5e0..50b9c220e12 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -334,6 +334,10 @@ ENTRY(ret_from_fork) END(ret_from_fork) /* + * Interrupt exit functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" +/* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to * go as quickly as possible which is why some of this is @@ -383,6 +387,10 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -513,6 +521,10 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) +/* + * syscall stub including irq exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -705,6 +717,10 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* * System calls that need a pt_regs pointer. @@ -814,6 +830,10 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC +/* + * Irq entries should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) +/* + * End of kprobes section + */ + .popsection ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bd5bbddddf9..4deb8fc849d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -803,6 +803,10 @@ END(interrupt) call \func .endm +/* + * Interrupt entry/exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -941,6 +945,10 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) +/* + * End of kprobes section + */ + .popsection /* * APIC interrupts. @@ -1491,12 +1499,17 @@ error_kernelspace: leaq irq_return(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs - movl %ecx,%ecx /* zero extend */ - cmpq %rcx,RIP+8(%rsp) - je error_swapgs + movl %ecx,%eax /* zero extend */ + cmpq %rax,RIP+8(%rsp) + je bstep_iret cmpq $gs_change,RIP+8(%rsp) je error_swapgs jmp error_sti + +bstep_iret: + /* Fix truncated RIP */ + movq %rcx,RIP+8(%rsp) + jmp error_swapgs END(error_entry) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 780cd928fcd..22db86a3764 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -212,8 +212,8 @@ ENTRY(secondary_startup_64) */ lgdt early_gdt_descr(%rip) - /* set up data segments. actually 0 would do too */ - movl $__KERNEL_DS,%eax + /* set up data segments */ + xorl %eax,%eax movl %eax,%ds movl %eax,%ss movl %eax,%es diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..d42f65ac492 --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c @@ -0,0 +1,555 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) 2009 IBM Corporation + * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> + * + * Authors: Alan Stern <stern@rowland.harvard.edu> + * K.Prasad <prasad@linux.vnet.ibm.com> + * Frederic Weisbecker <fweisbec@gmail.com> + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ + +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> +#include <linux/irqflags.h> +#include <linux/notifier.h> +#include <linux/kallsyms.h> +#include <linux/kprobes.h> +#include <linux/percpu.h> +#include <linux/kdebug.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/hw_breakpoint.h> +#include <asm/processor.h> +#include <asm/debugreg.h> + +/* Per cpu debug control register value */ +DEFINE_PER_CPU(unsigned long, cpu_dr7); +EXPORT_PER_CPU_SYMBOL(cpu_dr7); + +/* Per cpu debug address registers values */ +static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); + +/* + * Stores the breakpoints currently in use on each breakpoint address + * register for each cpus + */ +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); + + +static inline unsigned long +__encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + unsigned long bp_info; + + bp_info = (len | type) & 0xf; + bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); + + return bp_info; +} + +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; +} + +/* + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. + */ +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) +{ + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); + + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; + + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; +} + +/* + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. Eventually we enable it in the debug control register. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (!*slot) { + *slot = bp; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return -EBUSY; + + set_debugreg(info->address, i); + __get_cpu_var(cpu_debugreg[i]) = info->address; + + dr7 = &__get_cpu_var(cpu_dr7); + *dr7 |= encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); + + return 0; +} + +/* + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return; + + dr7 = &__get_cpu_var(cpu_dr7); + *dr7 &= ~__encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case X86_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case X86_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case X86_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; +#endif + } + return len_in_bytes; +} + +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + +/* + * Check for virtual address in kernel space. + */ +static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Store a breakpoint's encoded address, length, and type. + */ +static int arch_store_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (info->name) + info->address = (unsigned long) + kallsyms_lookup_name(info->name); + if (info->address) + return 0; + + return -EINVAL; +} + +int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type) +{ + /* Len */ + switch (x86_len) { + case X86_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case X86_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case X86_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; + break; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; + break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + + return 0; +} + + +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + info->address = bp->attr.bp_addr; + + /* Len */ + switch (bp->attr.bp_len) { + case HW_BREAKPOINT_LEN_1: + info->len = X86_BREAKPOINT_LEN_1; + break; + case HW_BREAKPOINT_LEN_2: + info->len = X86_BREAKPOINT_LEN_2; + break; + case HW_BREAKPOINT_LEN_4: + info->len = X86_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + info->len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + break; + default: + return -EINVAL; + } + + return 0; +} +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned int align; + int ret; + + + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + ret = -EINVAL; + + if (info->type == X86_BREAKPOINT_EXECUTE) + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + if ((!arch_check_va_in_userspace(info->address, info->len)) && + info->len != X86_BREAKPOINT_EXECUTE) + return ret; + + switch (info->len) { + case X86_BREAKPOINT_LEN_1: + align = 0; + break; + case X86_BREAKPOINT_LEN_2: + align = 1; + break; + case X86_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + align = 7; + break; +#endif + default: + return ret; + } + + if (bp->callback) + ret = arch_store_info(bp); + + if (ret < 0) + return ret; + /* + * Check that the low-order bits of the address are appropriate + * for the alignment implied by len. + */ + if (info->address & align) + return -EINVAL; + + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(info->address, info->len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(info->address, info->len)) + return -EFAULT; + } + + return 0; +} + +/* + * Dump the debug register contents to the user. + * We can't dump our per cpu values because it + * may contain cpu wide breakpoint, something that + * doesn't belong to the current task. + * + * TODO: include non-ptrace user breakpoints (perf) + */ +void aout_dump_debugregs(struct user *dump) +{ + int i; + int dr7 = 0; + struct perf_event *bp; + struct arch_hw_breakpoint *info; + struct thread_struct *thread = ¤t->thread; + + for (i = 0; i < HBP_NUM; i++) { + bp = thread->ptrace_bps[i]; + + if (bp && !bp->attr.disabled) { + dump->u_debugreg[i] = bp->attr.bp_addr; + info = counter_arch_bp(bp); + dr7 |= encode_dr7(i, info->len, info->type); + } else { + dump->u_debugreg[i] = 0; + } + } + + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + + dump->u_debugreg[7] = dr7; +} +EXPORT_SYMBOL_GPL(aout_dump_debugregs); + +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < HBP_NUM; i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } +} + +void hw_breakpoint_restore(void) +{ + set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); + set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); + set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); + set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(__get_cpu_var(cpu_dr7), 7); +} +EXPORT_SYMBOL_GPL(hw_breakpoint_restore); + +/* + * Handle debug exception notifications. + * + * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. + * + * NOTIFY_DONE returned if one of the following conditions is true. + * i) When the causative address is from user-space and the exception + * is a valid one, i.e. not triggered as a result of lazy debug register + * switching + * ii) When there are more bits than trap<n> set in DR6 register (such + * as BD, BS or BT) indicating that more than one debug condition is + * met and requires some more action in do_debug(). + * + * NOTIFY_STOP returned for all other cases + * + */ +static int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int i, cpu, rc = NOTIFY_STOP; + struct perf_event *bp; + unsigned long dr7, dr6; + unsigned long *dr6_p; + + /* The DR6 value is pointed by args->err */ + dr6_p = (unsigned long *)ERR_PTR(args->err); + dr6 = *dr6_p; + + /* Do an early return if no trap bits are set in DR6 */ + if ((dr6 & DR_TRAP_BITS) == 0) + return NOTIFY_DONE; + + get_debugreg(dr7, 7); + /* Disable breakpoints during exception handling */ + set_debugreg(0UL, 7); + /* + * Assert that local interrupts are disabled + * Reset the DRn bits in the virtualized register value. + * The ptrace trigger routine will add in whatever is needed. + */ + current->thread.debugreg6 &= ~DR_TRAP_BITS; + cpu = get_cpu(); + + /* Handle all the breakpoints that were triggered */ + for (i = 0; i < HBP_NUM; ++i) { + if (likely(!(dr6 & (DR_TRAP0 << i)))) + continue; + + /* + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. + */ + rcu_read_lock(); + + bp = per_cpu(bp_per_reg[i], cpu); + if (bp) + rc = NOTIFY_DONE; + /* + * Reset the 'i'th TRAP bit in dr6 to denote completion of + * exception handling + */ + (*dr6_p) &= ~(DR_TRAP0 << i); + /* + * bp can be NULL due to lazy debug register switching + * or due to concurrent perf counter removing. + */ + if (!bp) { + rcu_read_unlock(); + break; + } + + (bp->callback)(bp, args->regs); + + rcu_read_unlock(); + } + if (dr6 & (~DR_TRAP_BITS)) + rc = NOTIFY_DONE; + + set_debugreg(dr7, 7); + put_cpu(); + + return rc; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + if (val != DIE_DEBUG) + return NOTIFY_DONE; + + return hw_breakpoint_handler(data); +} + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} + +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd527856..fee6cc2b207 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR seq_printf(p, "%*s: ", prec, "TRM"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); -# endif #endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); @@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_call_count; sum += irq_stats(cpu)->irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; -# endif #endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); @@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs) } EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); + +#ifdef CONFIG_HOTPLUG_CPU +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) +{ + unsigned int irq, vector; + static int warned; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + int break_affinity = 0; + int set_affinity = 1; + const struct cpumask *affinity; + + if (!desc) + continue; + if (irq == 2) + continue; + + /* interrupt's are disabled at this point */ + spin_lock(&desc->lock); + + affinity = desc->affinity; + if (!irq_has_action(irq) || + cpumask_equal(affinity, cpu_online_mask)) { + spin_unlock(&desc->lock); + continue; + } + + /* + * Complete the irq move. This cpu is going down and for + * non intr-remapping case, we can't wait till this interrupt + * arrives at this cpu before completing the irq move. + */ + irq_force_complete_move(irq); + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + break_affinity = 1; + affinity = cpu_all_mask; + } + + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) + desc->chip->mask(irq); + + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, affinity); + else if (!(warned++)) + set_affinity = 0; + + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) + desc->chip->unmask(irq); + + spin_unlock(&desc->lock); + + if (break_affinity && set_affinity) + printk("Broke affinity for irq %i\n", irq); + else if (!set_affinity) + printk("Cannot set affinity for irq %i\n", irq); + } + + /* + * We can remove mdelay() and then send spuriuous interrupts to + * new cpu targets for all the irqs that were handled previously by + * this cpu. While it works, I have seen spurious interrupt messages + * (nothing wrong but still...). + * + * So for now, retain mdelay(1) and check the IRR and then send those + * interrupts to new targets as this cpu is already offlined... + */ + mdelay(1); + + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irr; + + if (__get_cpu_var(vector_irq)[vector] < 0) + continue; + + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + if (irr & (1 << (vector % 32))) { + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + spin_lock(&desc->lock); + if (desc->chip->retrigger) + desc->chip->retrigger(irq); + spin_unlock(&desc->lock); + } + } +} +#endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0fe232..10709f29d16 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - -#ifdef CONFIG_HOTPLUG_CPU - -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - affinity = desc->affinity; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); - affinity = cpu_all_mask; - } - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (desc->action) - printk_once("Cannot set affinity for irq %i\n", irq); - } - -#if 0 - barrier(); - /* Ingo Molnar says: "after the IO-APIC masks have been redirected - [note the nop - the interrupt-enable boundary on x86 is two - instructions from sti] - to flush out pending hardirqs and - IPIs. After this point nothing is supposed to reach this CPU." */ - __asm__ __volatile__("sti; nop; cli"); - barrier(); -#else - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -#endif -} -#endif - diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b43a0d..acf8fbf8fbd 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -#ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - static int warned; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - int break_affinity = 0; - int set_affinity = 1; - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - /* interrupt's are disabled at this point */ - spin_lock(&desc->lock); - - affinity = desc->affinity; - if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { - spin_unlock(&desc->lock); - continue; - } - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - break_affinity = 1; - affinity = cpu_all_mask; - } - - if (desc->chip->mask) - desc->chip->mask(irq); - - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (!(warned++)) - set_affinity = 0; - - if (desc->chip->unmask) - desc->chip->unmask(irq); - - spin_unlock(&desc->lock); - - if (break_affinity && set_affinity) - printk("Broke affinity for irq %i\n", irq); - else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); - } - - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif extern void call_softirq(void); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3..20a5b368946 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,7 @@ #include <linux/smp.h> #include <linux/nmi.h> +#include <asm/debugreg.h> #include <asm/apicdef.h> #include <asm/system.h> @@ -88,7 +89,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; - gdb_regs[GDB_SP] = (int)®s->sp; #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -101,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; - gdb_regs[GDB_SP] = regs->sp; #endif + gdb_regs[GDB_SP] = kernel_stack_pointer(regs); } /** @@ -434,6 +434,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) "resuming...\n"); kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c", "", regs); + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; return NOTIFY_STOP; } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b00..1f3186ce213 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -48,31 +48,22 @@ #include <linux/preempt.h> #include <linux/module.h> #include <linux/kdebug.h> +#include <linux/kallsyms.h> #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/alternative.h> +#include <asm/insn.h> +#include <asm/debugreg.h> void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#ifdef CONFIG_X86_64 -#define stack_addr(regs) ((unsigned long *)regs->sp) -#else -/* - * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs - * don't save the ss and esp registers if the CPU is already in kernel - * mode when it traps. So for kprobes, regs->sp and regs->ss are not - * the [nonexistent] saved stack pointer and ss register, but rather - * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to - * point to the top of the pre-int3 stack. - */ -#define stack_addr(regs) ((unsigned long *)®s->sp) -#endif +#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { /* ----------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -static const u32 onebyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ - W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ - W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ - W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ - W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ - W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ - W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ - W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ - W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ - W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ - W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ - W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; -static const u32 twobyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ - W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ - W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ - W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ - W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ - W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ - W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; #undef W struct kretprobe_blackpoint kretprobe_blacklist[] = { @@ -244,6 +191,75 @@ retry: } } +/* Recover the probed instruction at addr for further analysis. */ +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + struct kprobe *kp; + kp = get_kprobe((void *)addr); + if (!kp) + return -EINVAL; + + /* + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, fix_riprel() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. + * + * On the other hand, kp->opcode has a copy of the first byte of + * the probed instruction, which is overwritten by int3. And + * the instruction at kp->addr is not modified by kprobes except + * for the first byte, we can recover the original instruction + * from it and kp->opcode. + */ + memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + buf[0] = kp->opcode; + return 0; +} + +/* Dummy buffers for kallsyms_lookup */ +static char __dummy_buf[KSYM_NAME_LEN]; + +/* Check if paddr is at an instruction boundary */ +static int __kprobes can_probe(unsigned long paddr) +{ + int ret; + unsigned long addr, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + kernel_insn_init(&insn, (void *)addr); + insn_get_opcode(&insn); + + /* + * Check if the instruction has been modified by another + * kprobe, in which case we replace the breakpoint by the + * original instruction in our buffer. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { + ret = recover_probed_instruction(buf, addr); + if (ret) + /* + * Another debugging subsystem might insert + * this breakpoint. In that case, we can't + * recover it. + */ + return 0; + kernel_insn_init(&insn, buf); + } + insn_get_length(&insn); + addr += insn.length; + } + + return (addr == paddr); +} + /* * Returns non-zero if opcode modifies the interrupt flag. */ @@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) static void __kprobes fix_riprel(struct kprobe *p) { #ifdef CONFIG_X86_64 - u8 *insn = p->ainsn.insn; - s64 disp; - int need_modrm; - - /* Skip legacy instruction prefixes. */ - while (1) { - switch (*insn) { - case 0x66: - case 0x67: - case 0x2e: - case 0x3e: - case 0x26: - case 0x64: - case 0x65: - case 0x36: - case 0xf0: - case 0xf3: - case 0xf2: - ++insn; - continue; - } - break; - } + struct insn insn; + kernel_insn_init(&insn, p->ainsn.insn); - /* Skip REX instruction prefix. */ - if (is_REX_prefix(insn)) - ++insn; - - if (*insn == 0x0f) { - /* Two-byte opcode. */ - ++insn; - need_modrm = test_bit(*insn, - (unsigned long *)twobyte_has_modrm); - } else - /* One-byte opcode. */ - need_modrm = test_bit(*insn, - (unsigned long *)onebyte_has_modrm); - - if (need_modrm) { - u8 modrm = *++insn; - if ((modrm & 0xc7) == 0x05) { - /* %rip+disp32 addressing mode */ - /* Displacement follows ModRM byte. */ - ++insn; - /* - * The copied instruction uses the %rip-relative - * addressing mode. Adjust the displacement for the - * difference between the original location of this - * instruction and the location of the copy that will - * actually be run. The tricky bit here is making sure - * that the sign extension happens correctly in this - * calculation, since we need a signed 32-bit result to - * be sign-extended to 64 bits when it's added to the - * %rip value and yield the same 64-bit result that the - * sign-extension of the original signed 32-bit - * displacement would have given. - */ - disp = (u8 *) p->addr + *((s32 *) insn) - - (u8 *) p->ainsn.insn; - BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ - *(s32 *)insn = (s32) disp; - } + if (insn_rip_relative(&insn)) { + s64 newdisp; + u8 *disp; + insn_get_displacement(&insn); + /* + * The copied instruction uses the %rip-relative addressing + * mode. Adjust the displacement for the difference between + * the original location of this instruction and the location + * of the copy that will actually be run. The tricky bit here + * is making sure that the sign extension happens correctly in + * this calculation, since we need a signed 32-bit result to + * be sign-extended to 64 bits when it's added to the %rip + * value and yield the same 64-bit result that the sign- + * extension of the original signed 32-bit displacement would + * have given. + */ + newdisp = (u8 *) p->addr + (s64) insn.displacement.value - + (u8 *) p->ainsn.insn; + BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ + disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); + *(s32 *) disp = (s32) newdisp; } #endif } @@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + if (!can_probe((unsigned long)p->addr)) + return -EILSEQ; /* insn: must be on special executable page on x86. */ p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: -#ifdef CONFIG_X86_64 - /* TODO: Provide re-entrancy from post_kprobes_handler() and - * avoid exception stack corruption while single-stepping on - * the instruction of the new probe. - */ - arch_disarm_kprobe(p); - regs->ip = (unsigned long)p->addr; - reset_current_kprobe(); - preempt_enable_no_resched(); - break; -#endif case KPROBE_HIT_ACTIVE: save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); @@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_status = KPROBE_REENTER; break; case KPROBE_HIT_SS: - if (p == kprobe_running()) { - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= kcb->kprobe_saved_flags; - return 0; - } else { - /* A probe has been hit in the codepath leading up - * to, or just after, single-stepping of a probed - * instruction. This entire codepath should strictly - * reside in .kprobes.text section. Raise a warning - * to highlight this peculiar case. - */ - } + /* A probe has been hit in the codepath leading up to, or just + * after, single-stepping of a probed instruction. This entire + * codepath should strictly reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless reentering loop + * and eventually a stack overflow. + */ + printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", + p->addr); + dump_kprobe(p); + BUG(); default: /* impossible cases */ WARN_ON(1); @@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(args->regs)) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; ret = NOTIFY_STOP; + } break; case DIE_GPF: /* diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d0013..c843f8406da 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -25,6 +25,7 @@ #include <asm/desc.h> #include <asm/system.h> #include <asm/cacheflush.h> +#include <asm/debugreg.h> static void set_idt(void *newidt, __u16 limit) { @@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e9..4a8bb82248a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,6 +18,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/debugreg.h> static int init_one_level2_page(struct kimage *image, pgd_t *pgd, unsigned long addr) @@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8f1bf..2bcad3926ed 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -73,7 +73,6 @@ #include <linux/platform_device.h> #include <linux/miscdevice.h> #include <linux/capability.h> -#include <linux/smp_lock.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> @@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size) static int microcode_open(struct inode *unused1, struct file *unused2) { - cycle_kernel_lock(); return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 6a3cefc7dda..553449951b8 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); struct cpuinfo_x86 *c = &cpu_data(cpu); - int ret = 0; - lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) - ret = -EIO; /* MSR not supported */ -out: - unlock_kernel(); - return ret; + return -EIO; /* MSR not supported */ + + return 0; } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b577..744508e7cfd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,7 @@ #include <linux/clockchips.h> #include <linux/random.h> #include <trace/events/power.h> +#include <linux/hw_breakpoint.h> #include <asm/system.h> #include <asm/apic.h> #include <asm/syscalls.h> @@ -17,6 +18,7 @@ #include <asm/uaccess.h> #include <asm/i387.h> #include <asm/ds.h> +#include <asm/debugreg.h> unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -103,14 +105,7 @@ void flush_thread(void) } #endif - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; + flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); - set_debugreg(next->debugreg2, 2); - set_debugreg(next->debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg6, 6); - set_debugreg(next->debugreg7, 7); - } - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cda..075580b3568 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -58,6 +58,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/ds.h> +#include <asm/debugreg.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -134,7 +135,7 @@ void __show_regs(struct pt_regs *regs, int all) ss = regs->ss & 0xffff; gs = get_user_gs(regs); } else { - sp = (unsigned long) (®s->sp); + sp = kernel_stack_pointer(regs); savesegment(ss, ss); savesegment(gs, gs); } @@ -187,7 +188,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, ®s->sp, regs->bp); } @@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); + p->thread.io_bitmap_ptr = NULL; tsk = current; + err = -ENOMEM; + + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa49..a98fe88fab6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -52,6 +52,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/ds.h> +#include <asm/debugreg.h> asmlinkage extern void ret_from_fork(void); @@ -226,8 +227,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - printk(KERN_INFO "CPU %d:", smp_processor_id()); - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } @@ -297,12 +297,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; + p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); savesegment(fs, p->thread.fsindex); savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + err = -ENOMEM; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -341,6 +345,7 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + return err; } @@ -495,6 +500,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); + return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66..04d182a7cfd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,8 @@ #include <linux/seccomp.h> #include <linux/signal.h> #include <linux/workqueue.h> +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -34,6 +36,7 @@ #include <asm/prctl.h> #include <asm/proto.h> #include <asm/ds.h> +#include <asm/hw_breakpoint.h> #include "tls.h" @@ -49,6 +52,118 @@ enum x86_regset { REGSET_IOPERM32, }; +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { +#ifdef CONFIG_X86_64 + REG_OFFSET_NAME(r15), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r8), +#endif + REG_OFFSET_NAME(bx), + REG_OFFSET_NAME(cx), + REG_OFFSET_NAME(dx), + REG_OFFSET_NAME(si), + REG_OFFSET_NAME(di), + REG_OFFSET_NAME(bp), + REG_OFFSET_NAME(ax), +#ifdef CONFIG_X86_32 + REG_OFFSET_NAME(ds), + REG_OFFSET_NAME(es), + REG_OFFSET_NAME(fs), + REG_OFFSET_NAME(gs), +#endif + REG_OFFSET_NAME(orig_ax), + REG_OFFSET_NAME(ip), + REG_OFFSET_NAME(cs), + REG_OFFSET_NAME(flags), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(ss), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +static const int arg_offs_table[] = { +#ifdef CONFIG_X86_32 + [0] = offsetof(struct pt_regs, ax), + [1] = offsetof(struct pt_regs, dx), + [2] = offsetof(struct pt_regs, cx) +#else /* CONFIG_X86_64 */ + [0] = offsetof(struct pt_regs, di), + [1] = offsetof(struct pt_regs, si), + [2] = offsetof(struct pt_regs, dx), + [3] = offsetof(struct pt_regs, cx), + [4] = offsetof(struct pt_regs, r8), + [5] = offsetof(struct pt_regs, r9) +#endif +}; + +/** + * regs_get_argument_nth() - get Nth argument at function call + * @regs: pt_regs which contains registers at function entry. + * @n: argument number. + * + * regs_get_argument_nth() returns @n th argument of a function call. + * Since usually the kernel stack will be changed right after function entry, + * you must use this at function entry. If the @n th entry is NOT in the + * kernel stack or pt_regs, this returns 0. + */ +unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) +{ + if (n < ARRAY_SIZE(arg_offs_table)) + return *(unsigned long *)((char *)regs + arg_offs_table[n]); + else { + /* + * The typical case: arg n is on the stack. + * (Note: stack[0] = return address, so skip it) + */ + n -= ARRAY_SIZE(arg_offs_table); + return regs_get_kernel_stack_nth(regs, 1 + n); + } +} + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE_MAX - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -454,99 +555,239 @@ static int genregs_set(struct task_struct *target, return ret; } +static void ptrace_triggered(struct perf_event *bp, void *data) +{ + int i; + struct thread_struct *thread = &(current->thread); + + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) + break; + } + + thread->debugreg6 |= (DR_TRAP0 << i); +} + /* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) { - switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } } - return 0; + + return dr7; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static struct perf_event * +ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + struct task_struct *tsk, int disabled) { - int i; + int err; + int gen_len, gen_type; + DEFINE_BREAKPOINT_ATTR(attr); - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ + if (!bp) + return ERR_PTR(-EINVAL); - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + if (err) + return ERR_PTR(err); - switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; + attr = bp->attr; + attr.bp_len = gen_len; + attr.bp_type = gen_type; + attr.disabled = disabled; - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; + return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); +} + +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct perf_event *bp; + + data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->ptrace_bps[i]; + + if (!enabled) { + if (bp) { + /* + * Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + + thread->ptrace_bps[i] = NULL; + bp = ptrace_modify_breakpoint(bp, len, type, + tsk, 1); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + thread->ptrace_bps[i] = NULL; + break; + } + thread->ptrace_bps[i] = bp; + } + continue; + } + + bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + + /* Incorrect bp, or we have a bug in bp API */ + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + thread->ptrace_bps[i] = NULL; + break; + } + thread->ptrace_bps[i] = bp; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} + +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; - case 7: + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { + val = thread->debugreg6; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } + return val; +} + +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + DEFINE_BREAKPOINT_ATTR(attr); + + if (!t->ptrace_bps[nr]) { /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) + * Put stub len and type to register (reserve) an inactive but + * correct bp */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; + attr.bp_addr = addr; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); + } else { + bp = t->ptrace_bps[nr]; + t->ptrace_bps[nr] = NULL; + + attr = bp->attr; + attr.bp_addr = addr; + bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); } + /* + * CHECKME: the previous code returned -EIO if the addr wasn't a + * valid task virtual addr. The new one will return -EINVAL in this + * case. + * -EINVAL may be what we want for in-kernel breakpoints users, but + * -EIO looks better for ptrace, since we refuse a register writing + * for the user. And anyway this is the previous behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; return 0; } /* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + thread->debugreg6 = val; + goto ret_path; + } + if (n < HBP_NUM) { + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; + } + /* All that's left is DR7 */ + if (n == 7) + rc = ptrace_write_dr7(tsk, val); + +ret_path: + return rc; +} + +/* * These access the current or another (stopped) task's io permission * bitmap for debugging or core dump. */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2a34f9c5be2..82e88cdda9b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -109,6 +109,7 @@ #ifdef CONFIG_X86_64 #include <asm/numa_64.h> #endif +#include <asm/mce.h> /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -247,7 +248,7 @@ EXPORT_SYMBOL(edd); * from boot_params into a safe place. * */ -static inline void copy_edd(void) +static inline void __init copy_edd(void) { memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, sizeof(edd.mbr_signature)); @@ -256,7 +257,7 @@ static inline void copy_edd(void) edd.edd_info_nr = boot_params.eddbuf_entries; } #else -static inline void copy_edd(void) +static inline void __init copy_edd(void) { } #endif @@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p) #endif #endif x86_init.oem.banner(); + + mcheck_init(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055a..fbf3b07c856 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* - * Re-enable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if (current->thread.debugreg7) - set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920..324f2a44c22 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu) void cpu_disable_common(void) { int cpu = smp_processor_id(); - /* - * HACK: - * Allow any queued timer interrupts to get serviced - * This is only a temporary solution until we cleanup - * fixup_irqs as we do for IA64. - */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc..33399176512 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long condition; + unsigned long dr6; int si_code; - get_debugreg(condition, 6); + get_debugreg(dr6, 6); /* Catch kmemcheck conditions first of all! */ - if (condition & DR_STEP && kmemcheck_trap(regs)) + if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; + /* DR6 may or may not be cleared by the CPU */ + set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, - SIGTRAP) == NOTIFY_STOP) + /* Store the virtualized DR6 value */ + tsk->thread.debugreg6 = dr6; + + if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; + if (regs->flags & X86_VM_MASK) { + handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, 1); + return; } -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). + * Single-stepping through system calls: ignore any exceptions in + * kernel space, but re-enable TF when returning to user mode. + * + * We already checked v86 mode above, so we can check for kernel mode + * by just checking the CPL of CS. */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; + if ((dr6 & DR_STEP) && !user_mode(regs)) { + tsk->thread.debugreg6 &= ~DR_STEP; + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); + si_code = get_si_code(tsk->thread.debugreg6); + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); - return; -#ifdef CONFIG_X86_32 -debug_vm86: - /* reenable preemption: handle_vm86_trap() might sleep */ - dec_preempt_count(); - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); return; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f37930954d1..eed156851f5 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); + if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) + pr_info( + "Skipped synchronization checks as TSC is reliable.\n"); return; } - pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", - smp_processor_id(), cpu); - /* * Reset it - in case this is a second bootup: */ @@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu) cpu_relax(); if (nr_warps) { - printk("\n"); + pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", + smp_processor_id(), cpu); pr_warning("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { - printk(" passed.\n"); + pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + smp_processor_id(), cpu); } /* diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e..61d805df4c9 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -9,10 +9,25 @@ */ #include <linux/module.h> +#include <linux/rbtree.h> #include <linux/irq.h> #include <asm/apic.h> #include <asm/uv/uv_irq.h> +#include <asm/uv/uv_hub.h> + +/* MMR offset and pnode of hub sourcing interrupts for a given irq */ +struct uv_irq_2_mmr_pnode{ + struct rb_node list; + unsigned long offset; + int pnode; + int irq; +}; + +static spinlock_t uv_irq_lock; +static struct rb_root uv_irq_root; + +static int uv_set_irq_affinity(unsigned int, const struct cpumask *); static void uv_noop(unsigned int irq) { @@ -39,25 +54,214 @@ struct irq_chip uv_irq_chip = { .unmask = uv_noop, .eoi = uv_ack_apic, .end = uv_noop, + .set_affinity = uv_set_irq_affinity, }; /* + * Add offset and pnode information of the hub sourcing interrupts to the + * rb tree for a specific irq. + */ +static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +{ + struct rb_node **link = &uv_irq_root.rb_node; + struct rb_node *parent = NULL; + struct uv_irq_2_mmr_pnode *n; + struct uv_irq_2_mmr_pnode *e; + unsigned long irqflags; + + n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, + uv_blade_to_memory_nid(blade)); + if (!n) + return -ENOMEM; + + n->irq = irq; + n->offset = offset; + n->pnode = uv_blade_to_pnode(blade); + spin_lock_irqsave(&uv_irq_lock, irqflags); + /* Find the right place in the rbtree: */ + while (*link) { + parent = *link; + e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); + + if (unlikely(irq == e->irq)) { + /* irq entry exists */ + e->pnode = uv_blade_to_pnode(blade); + e->offset = offset; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + kfree(n); + return 0; + } + + if (irq < e->irq) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Insert the node into the rbtree. */ + rb_link_node(&n->list, parent, link); + rb_insert_color(&n->list, &uv_irq_root); + + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; +} + +/* Retrieve offset and pnode information from the rb tree for a specific irq */ +int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + + if (e->irq == irq) { + *offset = e->offset; + *pnode = e->pnode; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; + } + + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return -1; +} + +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +static int +arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset, int restrict) +{ + const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int err; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, eligible_cpu); + if (err != 0) + return err; + + if (restrict == UV_AFFINITY_CPU) + desc->status |= IRQ_NO_BALANCING; + else + desc->status |= IRQ_MOVE_PCNTXT; + + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->mask = 1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} + +static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long mmr_offset; + unsigned mmr_pnode; + + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} + +/* * Set up a mapping of an available irq and vector, and enable the specified * MMR that defines the MSI that is to be sent to the specified CPU when an * interrupt is raised. */ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, - unsigned long mmr_offset) + unsigned long mmr_offset, int restrict) { - int irq; - int ret; + int irq, ret; + + irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); - irq = create_irq(); if (irq <= 0) return -EBUSY; - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); - if (ret != irq) + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, + restrict); + if (ret == irq) + uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); + else destroy_irq(irq); return ret; @@ -71,9 +275,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); * * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). */ -void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) +void uv_teardown_irq(unsigned int irq) { - arch_disable_uv_irq(mmr_blade, mmr_offset); + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + if (e->irq == irq) { + arch_disable_uv_irq(e->pnode, e->offset); + rb_erase(n, &uv_irq_root); + kfree(e); + break; + } + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); destroy_irq(irq); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b1..abda6f53e71 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - apic_cpus = apic->apicid_to_cpu_present(m->apicid); + apic->apicid_to_cpu_present(m->apicid, &apic_cpus); physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); /* * Validate version @@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq) } static struct irq_chip cobalt_irq_type = { - .typename = "Cobalt-APIC", + .name = "Cobalt-APIC", .startup = startup_cobalt_irq, .shutdown = disable_cobalt_irq, .enable = enable_cobalt_irq, @@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq) } static struct irq_chip piix4_master_irq_type = { - .typename = "PIIX4-master", + .name = "PIIX4-master", .startup = startup_piix4_master_irq, .ack = ack_cobalt_irq, .end = end_piix4_master_irq, @@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = { static struct irq_chip piix4_virtual_irq_type = { - .typename = "PIIX4-virtual", + .name = "PIIX4-virtual", .shutdown = disable_8259A_irq, .enable = enable_8259A_irq, .disable = disable_8259A_irq, diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8cb4974ff59..e02d92d12bc 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -237,7 +237,7 @@ static ctl_table kernel_table2[] = { }; static ctl_table kernel_root_table2[] = { - { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, + { .procname = "kernel", .mode = 0555, .child = kernel_table2 }, {} }; diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce..a1029769b6f 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -30,9 +30,8 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(__copy_from_user_inatomic); +EXPORT_SYMBOL(_copy_from_user); +EXPORT_SYMBOL(_copy_to_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d261527..4fc80174191 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -42,6 +42,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" +#include <asm/debugreg.h> #include <asm/uaccess.h> #include <asm/msr.h> #include <asm/desc.h> @@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); - if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } + /* + * If the guest has used debug registers, at least dr7 + * will be disabled while returning to the host. + * If we don't have active breakpoints in the host, we don't + * care about the messed up debug address registers. But if + * we have some of them active, restore the old state. + */ + if (hw_breakpoint_active()) + hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 00000000000..8df89f0a3fe --- /dev/null +++ b/arch/x86/lib/.gitignore @@ -0,0 +1 @@ +inat-tables.c diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 85f5db95c60..a2d6472895f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -2,12 +2,25 @@ # Makefile for x86 specific library files. # +inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk +inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt +quiet_cmd_inat_tables = GEN $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call cmd,inat_tables) + +$(obj)/inat.o: $(obj)/inat-tables.c + +clean-files := inat-tables.c + obj-$(CONFIG_SMP) := msr.o lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += insn.o inat.o obj-y += msr-reg.o msr-reg-export.o diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7bb85e..cf889d4e076 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -65,7 +65,7 @@ .endm /* Standard copy_to_user with segment limit checking */ -ENTRY(copy_to_user) +ENTRY(_copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx @@ -75,10 +75,10 @@ ENTRY(copy_to_user) jae bad_to_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_to_user) +ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(_copy_from_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx @@ -88,7 +88,7 @@ ENTRY(copy_from_user) jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) +ENDPROC(_copy_from_user) ENTRY(copy_user_generic) CFI_STARTPROC @@ -96,12 +96,6 @@ ENTRY(copy_user_generic) CFI_ENDPROC ENDPROC(copy_user_generic) -ENTRY(__copy_from_user_inatomic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(__copy_from_user_inatomic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 00000000000..46fc4ee09fc --- /dev/null +++ b/arch/x86/lib/inat.c @@ -0,0 +1,90 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/insn.h> + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_escape_id(esc_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_group_id(grp_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { + table = inat_group_tables[n][m]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + return table[opcode]; +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 00000000000..9f33b984d0e --- /dev/null +++ b/arch/x86/lib/insn.c @@ -0,0 +1,516 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#include <linux/string.h> +#include <asm/inat.h> +#include <asm/insn.h> + +#define get_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define peek_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; r; }) + +#define peek_nbyte_next(t, insn, n) \ + ({t r; r = *(t*)((insn)->next_byte + n); r; }) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int x86_64) +{ + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_legacy_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + /* Decode REX prefix */ + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + + prefixes->got = 1; + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op, pfx; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx = insn_last_prefix(insn); + insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: + opcode->got = 1; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx = insn_last_prefix(insn); + insn->attr = inat_get_group_attribute(mod, pfx, + insn->attr); + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; +} + +/* Decode moffset16/32/64 */ +static void __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + } + insn->moffset1.got = insn->moffset2.got = 1; +} + +/* Decode imm v32(Iz) */ +static void __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + } +} + +/* Decode imm v64(Iv/Ov) */ +static void __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + } + insn->immediate1.got = insn->immediate2.got = 1; +} + +/* Decode ptr16:16/32(Ap) */ +static void __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + __get_moffset(insn); + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + __get_immptr(insn); + break; + case INAT_IMM_VWORD32: + __get_immv32(insn); + break; + case INAT_IMM_VWORD: + __get_immv(insn); + break; + default: + break; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 33a1e3ca22d..41628b104b9 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -71,14 +71,9 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) } EXPORT_SYMBOL(wrmsr_on_cpu); -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) { struct msr_info rv; int this_cpu; @@ -92,11 +87,23 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) - __rdmsr_on_cpu(&rv); + msr_func(&rv); - smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); + smp_call_function_many(mask, msr_func, &rv, 1); put_cpu(); } + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} EXPORT_SYMBOL(rdmsr_on_cpus); /* @@ -107,24 +114,9 @@ EXPORT_SYMBOL(rdmsr_on_cpus); * @msrs: array of MSR values * */ -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) { - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.off = cpumask_first(mask); - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - __wrmsr_on_cpu(&rv); - - smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); - put_cpu(); + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } EXPORT_SYMBOL(wrmsr_on_cpus); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d462ac..e218d5df85f 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); * data to the requested size using zero bytes. */ unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) +_copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); @@ -882,4 +882,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) memset(to, 0, n); return n; } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 00000000000..a793da5e560 --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt @@ -0,0 +1,893 @@ +# x86 Opcode Maps +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# AVXcode: avx-code +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# +# AVX Superscripts +# (VEX): this opcode can accept VEX prefix. +# (oVEX): this opcode requires VEX prefix. +# (o128): this opcode only supports 128bit VEX. +# (o256): this opcode only supports 256bit VEX. +# + +Table: one byte opcode +Referrer: +AVXcode: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Xb,Yb +a5: MOVS/W/D/Q Xv,Yv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +af: SCAS/W/D/Q rAX,Xv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) +c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) +c6: Grp11 Eb,Ib (1A) +c7: Grp11 Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) +f3: REP/REPE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode (0x0f) +Referrer: 2-byte escape +AVXcode: 1 +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +0d: NOP Ev | GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) +11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) +12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) +13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) +14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) +15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) +16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) +17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) +18: Grp16 (1A) +19: +1a: +1b: +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) +29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) +2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) +2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) +2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) +51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) +52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) +53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) +54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) +55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) +56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) +57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) +58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) +59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) +5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) +5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) +5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) +5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) +5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) +5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) +6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) +6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) +77: emms/vzeroupper/vzeroall (VEX) +78: VMREAD Ed/q,Gd/q +79: VMWRITE Gd/q,Ed/q +7a: +7b: +7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) +7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) +# 0x0f 0x80-0x8f +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JNAE/JC Jz (f64) +83: JNB/JAE/JNC Jz (f64) +84: JZ/JE Jz (f64) +85: JNZ/JNE Jz (f64) +86: JBE/JNA Jz (f64) +87: JNBE/JA Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev +bd: BSR Gv,Ev +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) +c3: movnti Md/q,Gd/q +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) +c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) +d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) +da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) +db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) +df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) +e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) +eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0xf0-0xff +f0: lddqu Vdq,Mdq (F2),(VEX) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +AVXcode: 2 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) +08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) +09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) +0c: Vpermilps /r (66),(oVEX) +0d: Vpermilpd /r (66),(oVEX) +0e: vtestps /r (66),(oVEX) +0f: vtestpd /r (66),(oVEX) +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: +17: ptest Vdq,Wdq (66),(VEX) +18: vbroadcastss /r (66),(oVEX) +19: vbroadcastsd /r (66),(oVEX),(o256) +1a: vbroadcastf128 /r (66),(oVEX),(o256) +1b: +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) +1f: +# 0x0f 0x38 0x20-0x2f +20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) +21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) +22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) +23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) +24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) +25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) +26: +27: +28: pmuldq Vdq,Wdq (66),(VEX),(o128) +29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) +2a: movntdqa Vdq,Mdq (66),(VEX),(o128) +2b: packusdw Vdq,Wdq (66),(VEX),(o128) +2c: vmaskmovps(ld) /r (66),(oVEX) +2d: vmaskmovpd(ld) /r (66),(oVEX) +2e: vmaskmovps(st) /r (66),(oVEX) +2f: vmaskmovpd(st) /r (66),(oVEX) +# 0x0f 0x38 0x30-0x3f +30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) +31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) +32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) +33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) +34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) +35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) +36: +37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) +38: pminsb Vdq,Wdq (66),(VEX),(o128) +39: pminsd Vdq,Wdq (66),(VEX),(o128) +3a: pminuw Vdq,Wdq (66),(VEX),(o128) +3b: pminud Vdq,Wdq (66),(VEX),(o128) +3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) +3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) +3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) +3f: pmaxud Vdq,Wdq (66),(VEX),(o128) +# 0x0f 0x38 0x40-0x8f +40: pmulld Vdq,Wdq (66),(VEX),(o128) +41: phminposuw Vdq,Wdq (66),(VEX),(o128) +80: INVEPT Gd/q,Mdq (66) +81: INVPID Gd/q,Mdq (66) +# 0x0f 0x38 0x90-0xbf (FMA) +96: vfmaddsub132pd/ps /r (66),(VEX) +97: vfmsubadd132pd/ps /r (66),(VEX) +98: vfmadd132pd/ps /r (66),(VEX) +99: vfmadd132sd/ss /r (66),(VEX),(o128) +9a: vfmsub132pd/ps /r (66),(VEX) +9b: vfmsub132sd/ss /r (66),(VEX),(o128) +9c: vfnmadd132pd/ps /r (66),(VEX) +9d: vfnmadd132sd/ss /r (66),(VEX),(o128) +9e: vfnmsub132pd/ps /r (66),(VEX) +9f: vfnmsub132sd/ss /r (66),(VEX),(o128) +a6: vfmaddsub213pd/ps /r (66),(VEX) +a7: vfmsubadd213pd/ps /r (66),(VEX) +a8: vfmadd213pd/ps /r (66),(VEX) +a9: vfmadd213sd/ss /r (66),(VEX),(o128) +aa: vfmsub213pd/ps /r (66),(VEX) +ab: vfmsub213sd/ss /r (66),(VEX),(o128) +ac: vfnmadd213pd/ps /r (66),(VEX) +ad: vfnmadd213sd/ss /r (66),(VEX),(o128) +ae: vfnmsub213pd/ps /r (66),(VEX) +af: vfnmsub213sd/ss /r (66),(VEX),(o128) +b6: vfmaddsub231pd/ps /r (66),(VEX) +b7: vfmsubadd231pd/ps /r (66),(VEX) +b8: vfmadd231pd/ps /r (66),(VEX) +b9: vfmadd231sd/ss /r (66),(VEX),(o128) +ba: vfmsub231pd/ps /r (66),(VEX) +bb: vfmsub231sd/ss /r (66),(VEX),(o128) +bc: vfnmadd231pd/ps /r (66),(VEX) +bd: vfnmadd231sd/ss /r (66),(VEX),(o128) +be: vfnmsub231pd/ps /r (66),(VEX) +bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +# 0x0f 0x38 0xc0-0xff +db: aesimc Vdq,Wdq (66),(VEX),(o128) +dc: aesenc Vdq,Wdq (66),(VEX),(o128) +dd: aesenclast Vdq,Wdq (66),(VEX),(o128) +de: aesdec Vdq,Wdq (66),(VEX),(o128) +df: aesdeclast Vdq,Wdq (66),(VEX),(o128) +f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) +f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +AVXcode: 3 +# 0x0f 0x3a 0x00-0xff +04: vpermilps /r,Ib (66),(oVEX) +05: vpermilpd /r,Ib (66),(oVEX) +06: vperm2f128 /r,Ib (66),(oVEX),(o256) +08: roundps Vdq,Wdq,Ib (66),(VEX) +09: roundpd Vdq,Wdq,Ib (66),(VEX) +0a: roundss Vss,Wss,Ib (66),(VEX),(o128) +0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) +0c: blendps Vdq,Wdq,Ib (66),(VEX) +0d: blendpd Vdq,Wdq,Ib (66),(VEX) +0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) +14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) +15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) +16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) +17: extractps Ed,Vdq,Ib (66),(VEX),(o128) +18: vinsertf128 /r,Ib (66),(oVEX),(o256) +19: vextractf128 /r,Ib (66),(oVEX),(o256) +20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) +21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) +40: dpps Vdq,Wdq,Ib (66),(VEX) +41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) +42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) +44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) +4a: vblendvps /r,Ib (66),(oVEX) +4b: vblendvpd /r,Ib (66),(oVEX) +4c: vpblendvb /r,Ib (66),(oVEX),(o128) +60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) +61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) +62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) +63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) +df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Ep +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) +7: VMPTRST Mq +EndTable + +GrpTable: Grp10 +EndTable + +GrpTable: Grp11 +0: MOV +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) +3: psrldq Udq,Ib (66),(11B),(VEX),(o128) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) +7: pslldq Udq,Ib (66),(11B),(VEX),(o128) +EndTable + +GrpTable: Grp15 +0: fxsave +1: fxstor +2: ldmxcsr (VEX) +3: stmxcsr (VEX) +4: XSAVE +5: XRSTOR | lfence (11B) +6: mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca3b5a..d0474ad2a6e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs) return 0; } - -#ifdef CONFIG_X86_64 -/* - * Need to defined our own search_extable on X86_64 to work around - * a B stepping K8 bug. - */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) -{ - /* B stepping K8 bug */ - if ((value >> 32) == 0) - value |= 0xffffffffUL << 32; - - while (first <= last) { - const struct exception_table_entry *mid; - long diff; - - mid = (last - first) / 2 + first; - diff = mid->insn - value; - if (diff == 0) - return mid; - else if (diff < 0) - first = mid+1; - else - last = mid-1; - } - return NULL; -} -#endif diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee9028cf..f62777940df 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -38,7 +38,8 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) +static inline int __kprobes +kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) @@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int notify_page_fault(struct pt_regs *regs) +static inline int __kprobes notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -240,7 +241,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -357,7 +358,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -658,7 +659,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, error_code, address); stackend = end_of_stack(tsk); - if (*stackend != STACK_END_MAGIC) + if (tsk != &init_task && *stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; @@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline int +static noinline __kprobes int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917..11a4ad4d625 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) struct die_args *arg = args; if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) + if (post_kmmio_handler(arg->err, arg->regs) == 1) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; return NOTIFY_STOP; + } return NOTIFY_DONE; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index dbb5381f7b3..9d7ce96e5a5 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); } @@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); } diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 8aa85f17667..0a979f3e5b8 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -18,6 +18,7 @@ #include <asm/mce.h> #include <asm/xcr.h> #include <asm/suspend.h> +#include <asm/debugreg.h> #ifdef CONFIG_X86_32 static struct saved_context saved_context; @@ -142,31 +143,6 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg7) { -#ifdef CONFIG_X86_32 - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); -#else - /* CONFIG_X86_64 */ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); -#endif - } - } /** diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 00000000000..f8208267733 --- /dev/null +++ b/arch/x86/tools/Makefile @@ -0,0 +1,31 @@ +PHONY += posttest + +ifeq ($(KBUILD_VERBOSE),1) + posttest_verbose = -v +else + posttest_verbose = +endif + +ifeq ($(CONFIG_64BIT),y) + posttest_64bit = -y +else + posttest_64bit = -n +endif + +distill_awk = $(srctree)/arch/x86/tools/distill.awk +chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk + +quiet_cmd_posttest = TEST $@ + cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) + +posttest: $(obj)/test_get_len vmlinux + $(call cmd,posttest) + +hostprogs-y := test_get_len + +# -I needed for generated C source and C source which in the kernel tree. +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ + +# Dependencies are also needed. +$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk new file mode 100644 index 00000000000..0d13cd9fdcf --- /dev/null +++ b/arch/x86/tools/chkobjdump.awk @@ -0,0 +1,23 @@ +# GNU objdump version checker +# +# Usage: +# objdump -v | awk -f chkobjdump.awk +BEGIN { + # objdump version 2.19 or later is OK for the test. + od_ver = 2; + od_sver = 19; +} + +/^GNU/ { + split($4, ver, "."); + if (ver[1] > od_ver || + (ver[1] == od_ver && ver[2] >= od_sver)) { + exit 1; + } else { + printf("Warning: objdump version %s is older than %d.%d\n", + $4, od_ver, od_sver); + print("Warning: Skipping posttest."); + # Logic is inverted, because we just skip test without error. + exit 0; + } +} diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 00000000000..c13c0ee48ab --- /dev/null +++ b/arch/x86/tools/distill.awk @@ -0,0 +1,47 @@ +#!/bin/awk -f +# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len +# Distills the disassembly as follows: +# - Removes all lines except the disassembled instructions. +# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes +# into a single line. +# - Remove bad(or prefix only) instructions + +BEGIN { + prev_addr = "" + prev_hex = "" + prev_mnemonic = "" + bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" + fwait_expr = "^9b " + fwait_str="9b\tfwait" +} + +/^ *[0-9a-f]+ <[^>]*>:/ { + # Symbol entry + printf("%s%s\n", $2, $1) +} + +/^ *[0-9a-f]+:/ { + if (split($0, field, "\t") < 3) { + # This is a continuation of the same insn. + prev_hex = prev_hex field[2] + } else { + # Skip bad instructions + if (match(prev_mnemonic, bad_expr)) + prev_addr = "" + # Split fwait from other f* instructions + if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { + printf "%s\t%s\n", prev_addr, fwait_str + sub(fwait_expr, "", prev_hex) + } + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic + prev_addr = field[1] + prev_hex = field[2] + prev_mnemonic = field[3] + } +} + +END { + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic +} diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 00000000000..e34e92a28eb --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -0,0 +1,380 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (!match("abc", "[[:lower:]]+")) + return "Your awk doesn't support charactor-class." + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */\n" + ggid = 1 + geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable + + opnd_expr = "^[[:alpha:]/]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[[:alnum:]]+" + + imm_expr = "^[IJAO][[:lower:]]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + + modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\(66\\)" + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\(F2\\)" + max_lprefix = 4 + + vexok_expr = "\\(VEX\\)" + vexonly_expr = "\\(oVEX\\)" + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" + prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" + + clear_vars() +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); +} + +/^Referrer:/ { + if (NF != 1) { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" + } + } + print "" + clear_vars() +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(opnd, i,imm,mod) +{ + imm = null + mod = null + for (i in opnd) { + i = opnd[i] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + split($(i++), opnds, ",") + flags = convert_operands(opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check VEX only code + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + + # check VEX only code + if (match(ext, vexok_expr)) + flags = add_flags(flags, "INAT_VEXOK") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } else { + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," + print "};" +} + diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 00000000000..d8214dc03fa --- /dev/null +++ b/arch/x86/tools/test_get_len.c @@ -0,0 +1,173 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#define unlikely(cond) (cond) + +#include <asm/insn.h> +#include <inat.c> +#include <insn.c> + +/* + * Test of instruction analysis in general and insn_get_length() in + * particular. See if insn_get_length() and the disassembler agree + * on the length of each instruction in an elf disassembly. + * + * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len + */ + +const char *prog; +static int verbose; +static int x86_64; + +static void usage(void) +{ + fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" + " %s [-y|-n] [-v] \n", prog); + fprintf(stderr, "\t-y 64bit mode\n"); + fprintf(stderr, "\t-n 32bit mode\n"); + fprintf(stderr, "\t-v verbose mode\n"); + exit(1); +} + +static void malformed_line(const char *line, int line_nr) +{ + fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + exit(3); +} + +static void dump_field(FILE *fp, const char *name, const char *indent, + struct insn_field *field) +{ + fprintf(fp, "%s.%s = {\n", indent, name); + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", + indent, field->value, field->bytes[0], field->bytes[1], + field->bytes[2], field->bytes[3]); + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, + field->got, field->nbytes); +} + +static void dump_insn(FILE *fp, struct insn *insn) +{ + fprintf(fp, "Instruction = { \n"); + dump_field(fp, "prefixes", "\t", &insn->prefixes); + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); + dump_field(fp, "opcode", "\t", &insn->opcode); + dump_field(fp, "modrm", "\t", &insn->modrm); + dump_field(fp, "sib", "\t", &insn->sib); + dump_field(fp, "displacement", "\t", &insn->displacement); + dump_field(fp, "immediate1", "\t", &insn->immediate1); + dump_field(fp, "immediate2", "\t", &insn->immediate2); + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", + insn->attr, insn->opnd_bytes, insn->addr_bytes); + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", + insn->length, insn->x86_64, insn->kaddr); +} + +static void parse_args(int argc, char **argv) +{ + int c; + prog = argv[0]; + while ((c = getopt(argc, argv, "ynv")) != -1) { + switch (c) { + case 'y': + x86_64 = 1; + break; + case 'n': + x86_64 = 0; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + } + } +} + +#define BUFSIZE 256 + +int main(int argc, char **argv) +{ + char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; + unsigned char insn_buf[16]; + struct insn insn; + int insns = 0, c; + int warnings = 0; + + parse_args(argc, argv); + + while (fgets(line, BUFSIZE, stdin)) { + char copy[BUFSIZE], *s, *tab1, *tab2; + int nb = 0; + unsigned int b; + + if (line[0] == '<') { + /* Symbol line */ + strcpy(sym, line); + continue; + } + + insns++; + memset(insn_buf, 0, 16); + strcpy(copy, line); + tab1 = strchr(copy, '\t'); + if (!tab1) + malformed_line(line, insns); + s = tab1 + 1; + s += strspn(s, " "); + tab2 = strchr(s, '\t'); + if (!tab2) + malformed_line(line, insns); + *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ + while (s < tab2) { + if (sscanf(s, "%x", &b) == 1) { + insn_buf[nb++] = (unsigned char) b; + s += 3; + } else + break; + } + /* Decode an instruction */ + insn_init(&insn, insn_buf, x86_64); + insn_get_length(&insn); + if (insn.length != nb) { + warnings++; + fprintf(stderr, "Warning: %s found difference at %s\n", + prog, sym); + fprintf(stderr, "Warning: %s", line); + fprintf(stderr, "Warning: objdump says %d bytes, but " + "insn_get_length() says %d\n", nb, + insn.length); + if (verbose) + dump_insn(stderr, &insn); + } + } + if (warnings) + fprintf(stderr, "Warning: decoded and checked %d" + " instructions with %d warnings\n", insns, warnings); + else + fprintf(stderr, "Succeed: decoded and checked %d" + " instructions\n", insns); + return 0; +} diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 58bc00f68b1..02b442e9200 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -393,7 +393,6 @@ static ctl_table abi_table2[] = { static ctl_table abi_root_table2[] = { { - .ctl_name = CTL_ABI, .procname = "abi", .mode = 0555, .child = abi_table2 |