diff options
Diffstat (limited to 'arch')
255 files changed, 5736 insertions, 5458 deletions
diff --git a/arch/alpha/include/asm/bug.h b/arch/alpha/include/asm/bug.h index 7b85b7c9370..1720c8ad86f 100644 --- a/arch/alpha/include/asm/bug.h +++ b/arch/alpha/include/asm/bug.h @@ -8,12 +8,12 @@ /* ??? Would be nice to use .gprel32 here, but we can't be sure that the function loaded the GP, so this could fail in modules. */ -#define BUG() { \ +#define BUG() do { \ __asm__ __volatile__( \ "call_pal %0 # bugchk\n\t" \ ".long %1\n\t.8byte %2" \ : : "i"(PAL_bugchk), "i"(__LINE__), "i"(__FILE__)); \ - for ( ; ; ); } + for ( ; ; ); } while (0) #define HAVE_ARCH_BUG #endif diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 703731accda..7bc7489223f 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq) cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); last_cpu = cpu; - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu)); return 0; } diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 77b04747553..85040cfeb5e 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -650,6 +650,7 @@ ENTRY(fp_enter) no_fp: mov pc, lr __und_usr_unknown: + enable_irq mov r0, sp adr lr, ret_from_exception b do_undefinstr diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 06269ea375c..49a6ba926c2 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -136,7 +136,7 @@ ENTRY(mcount) ldmia sp!, {r0-r3, pc} trace: - ldr r1, [fp, #-4] + ldr r1, [fp, #-4] @ lr of instrumented routine mov r0, lr sub r0, r0, #MCOUNT_INSN_SIZE mov lr, pc diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 7141cee1fab..45eacb5a2ec 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -101,9 +101,14 @@ unlock: /* Handle bad interrupts */ static struct irq_desc bad_irq_desc = { .handle_irq = handle_bad_irq, - .lock = SPIN_LOCK_UNLOCKED + .lock = __SPIN_LOCK_UNLOCKED(bad_irq_desc.lock), }; +#ifdef CONFIG_CPUMASK_OFFSTACK +/* We are not allocating bad_irq_desc.affinity or .pending_mask */ +#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK." +#endif + /* * do_IRQ handles all hardware IRQ's. Decoded IRQs should not * come via this function. Instead, they should provide their @@ -161,7 +166,7 @@ void __init init_IRQ(void) irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE; #ifdef CONFIG_SMP - bad_irq_desc.affinity = CPU_MASK_ALL; + cpumask_setall(bad_irq_desc.affinity); bad_irq_desc.cpu = smp_processor_id(); #endif init_arch_irq(); @@ -191,15 +196,16 @@ void migrate_irqs(void) struct irq_desc *desc = irq_desc + i; if (desc->cpu == cpu) { - unsigned int newcpu = any_online_cpu(desc->affinity); - - if (newcpu == NR_CPUS) { + unsigned int newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); + if (newcpu >= nr_cpu_ids) { if (printk_ratelimit()) printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n", i, cpu); - cpus_setall(desc->affinity); - newcpu = any_online_cpu(desc->affinity); + cpumask_setall(desc->affinity); + newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); } route_irq(desc, i, newcpu); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 00216071eaf..85598f7da40 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -65,6 +65,7 @@ SECTIONS #endif . = ALIGN(4096); __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c index c2a96e3965a..e61967dde9a 100644 --- a/arch/arm/mach-msm/board-halibut.c +++ b/arch/arm/mach-msm/board-halibut.c @@ -27,6 +27,7 @@ #include <asm/mach/map.h> #include <asm/mach/flash.h> +#include <mach/irqs.h> #include <mach/board.h> #include <mach/msm_iomap.h> diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c index 77382d8b6b2..ba5d7c08dc1 100644 --- a/arch/arm/mach-omap1/devices.c +++ b/arch/arm/mach-omap1/devices.c @@ -181,7 +181,7 @@ void __init omap1_init_mmc(struct omap_mmc_platform_data **mmc_data, } size = OMAP1_MMC_SIZE; - omap_mmc_add(i, base, size, irq, mmc_data[i]); + omap_mmc_add("mmci-omap", i, base, size, irq, mmc_data[i]); }; } diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c index ca7a0cc1707..575ba31295c 100644 --- a/arch/arm/mach-omap1/mcbsp.c +++ b/arch/arm/mach-omap1/mcbsp.c @@ -28,81 +28,8 @@ #define DPS_RSTCT2_PER_EN (1 << 0) #define DSP_RSTCT2_WD_PER_EN (1 << 1) -struct mcbsp_internal_clk { - struct clk clk; - struct clk **childs; - int n_childs; -}; - #if defined(CONFIG_ARCH_OMAP15XX) || defined(CONFIG_ARCH_OMAP16XX) -static void omap_mcbsp_clk_init(struct mcbsp_internal_clk *mclk) -{ - const char *clk_names[] = { "dsp_ck", "api_ck", "dspxor_ck" }; - int i; - - mclk->n_childs = ARRAY_SIZE(clk_names); - mclk->childs = kzalloc(mclk->n_childs * sizeof(struct clk *), - GFP_KERNEL); - - for (i = 0; i < mclk->n_childs; i++) { - /* We fake a platform device to get correct device id */ - struct platform_device pdev; - - pdev.dev.bus = &platform_bus_type; - pdev.id = mclk->clk.id; - mclk->childs[i] = clk_get(&pdev.dev, clk_names[i]); - if (IS_ERR(mclk->childs[i])) - printk(KERN_ERR "Could not get clock %s (%d).\n", - clk_names[i], mclk->clk.id); - } -} - -static int omap_mcbsp_clk_enable(struct clk *clk) -{ - struct mcbsp_internal_clk *mclk = container_of(clk, - struct mcbsp_internal_clk, clk); - int i; - - for (i = 0; i < mclk->n_childs; i++) - clk_enable(mclk->childs[i]); - return 0; -} - -static void omap_mcbsp_clk_disable(struct clk *clk) -{ - struct mcbsp_internal_clk *mclk = container_of(clk, - struct mcbsp_internal_clk, clk); - int i; - - for (i = 0; i < mclk->n_childs; i++) - clk_disable(mclk->childs[i]); -} - -static struct mcbsp_internal_clk omap_mcbsp_clks[] = { - { - .clk = { - .name = "mcbsp_clk", - .id = 1, - .enable = omap_mcbsp_clk_enable, - .disable = omap_mcbsp_clk_disable, - }, - }, - { - .clk = { - .name = "mcbsp_clk", - .id = 3, - .enable = omap_mcbsp_clk_enable, - .disable = omap_mcbsp_clk_disable, - }, - }, -}; - -#define omap_mcbsp_clks_size ARRAY_SIZE(omap_mcbsp_clks) -#else -#define omap_mcbsp_clks_size 0 -static struct mcbsp_internal_clk __initdata *omap_mcbsp_clks; -static inline void omap_mcbsp_clk_init(struct mcbsp_internal_clk *mclk) -{ } +const char *clk_names[] = { "dsp_ck", "api_ck", "dspxor_ck" }; #endif static void omap1_mcbsp_request(unsigned int id) @@ -167,8 +94,9 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = { .rx_irq = INT_McBSP1RX, .tx_irq = INT_McBSP1TX, .ops = &omap1_mcbsp_ops, - .clk_name = "mcbsp_clk", - }, + .clk_names = clk_names, + .num_clks = 3, + }, { .phys_base = OMAP1510_MCBSP2_BASE, .dma_rx_sync = OMAP_DMA_MCBSP2_RX, @@ -184,7 +112,8 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = { .rx_irq = INT_McBSP3RX, .tx_irq = INT_McBSP3TX, .ops = &omap1_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 3, }, }; #define OMAP15XX_MCBSP_PDATA_SZ ARRAY_SIZE(omap15xx_mcbsp_pdata) @@ -202,7 +131,8 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = { .rx_irq = INT_McBSP1RX, .tx_irq = INT_McBSP1TX, .ops = &omap1_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 3, }, { .phys_base = OMAP1610_MCBSP2_BASE, @@ -219,7 +149,8 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = { .rx_irq = INT_McBSP3RX, .tx_irq = INT_McBSP3TX, .ops = &omap1_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 3, }, }; #define OMAP16XX_MCBSP_PDATA_SZ ARRAY_SIZE(omap16xx_mcbsp_pdata) @@ -230,15 +161,6 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = { int __init omap1_mcbsp_init(void) { - int i; - - for (i = 0; i < omap_mcbsp_clks_size; i++) { - if (cpu_is_omap15xx() || cpu_is_omap16xx()) { - omap_mcbsp_clk_init(&omap_mcbsp_clks[i]); - clk_register(&omap_mcbsp_clks[i].clk); - } - } - if (cpu_is_omap730()) omap_mcbsp_count = OMAP730_MCBSP_PDATA_SZ; if (cpu_is_omap15xx()) diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index 9d7216ff6c9..ce03fa75077 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -421,6 +421,7 @@ void __init omap2_init_mmc(struct omap_mmc_platform_data **mmc_data, int nr_controllers) { int i; + char *name; for (i = 0; i < nr_controllers; i++) { unsigned long base, size; @@ -450,12 +451,14 @@ void __init omap2_init_mmc(struct omap_mmc_platform_data **mmc_data, continue; } - if (cpu_is_omap2420()) + if (cpu_is_omap2420()) { size = OMAP2420_MMC_SIZE; - else + name = "mmci-omap"; + } else { size = HSMMC_SIZE; - - omap_mmc_add(i, base, size, irq, mmc_data[i]); + name = "mmci-omap-hs"; + } + omap_mmc_add(name, i, base, size, irq, mmc_data[i]); }; } diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index b0f8e7d6279..b52a02fc7cd 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -172,9 +172,13 @@ void __init omap34xx_check_revision(void) omap_revision = OMAP3430_REV_ES3_0; rev_name = "ES3.0"; break; + case 4: + omap_revision = OMAP3430_REV_ES3_1; + rev_name = "ES3.1"; + break; default: /* Use the latest known revision as default */ - omap_revision = OMAP3430_REV_ES3_0; + omap_revision = OMAP3430_REV_ES3_1; rev_name = "Unknown revision\n"; } } diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c index 636e2821af7..9ba20d985dd 100644 --- a/arch/arm/mach-omap2/irq.c +++ b/arch/arm/mach-omap2/irq.c @@ -134,6 +134,7 @@ static struct irq_chip omap_irq_chip = { .ack = omap_mask_ack_irq, .mask = omap_mask_irq, .unmask = omap_unmask_irq, + .disable = omap_mask_irq, }; static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank) diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c index e20023c9d15..a9e631fc113 100644 --- a/arch/arm/mach-omap2/mcbsp.c +++ b/arch/arm/mach-omap2/mcbsp.c @@ -24,106 +24,7 @@ #include <mach/cpu.h> #include <mach/mcbsp.h> -struct mcbsp_internal_clk { - struct clk clk; - struct clk **childs; - int n_childs; -}; - -#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) -static void omap_mcbsp_clk_init(struct mcbsp_internal_clk *mclk) -{ - const char *clk_names[] = { "mcbsp_ick", "mcbsp_fck" }; - int i; - - mclk->n_childs = ARRAY_SIZE(clk_names); - mclk->childs = kzalloc(mclk->n_childs * sizeof(struct clk *), - GFP_KERNEL); - - for (i = 0; i < mclk->n_childs; i++) { - /* We fake a platform device to get correct device id */ - struct platform_device pdev; - - pdev.dev.bus = &platform_bus_type; - pdev.id = mclk->clk.id; - mclk->childs[i] = clk_get(&pdev.dev, clk_names[i]); - if (IS_ERR(mclk->childs[i])) - printk(KERN_ERR "Could not get clock %s (%d).\n", - clk_names[i], mclk->clk.id); - } -} - -static int omap_mcbsp_clk_enable(struct clk *clk) -{ - struct mcbsp_internal_clk *mclk = container_of(clk, - struct mcbsp_internal_clk, clk); - int i; - - for (i = 0; i < mclk->n_childs; i++) - clk_enable(mclk->childs[i]); - return 0; -} - -static void omap_mcbsp_clk_disable(struct clk *clk) -{ - struct mcbsp_internal_clk *mclk = container_of(clk, - struct mcbsp_internal_clk, clk); - int i; - - for (i = 0; i < mclk->n_childs; i++) - clk_disable(mclk->childs[i]); -} - -static struct mcbsp_internal_clk omap_mcbsp_clks[] = { - { - .clk = { - .name = "mcbsp_clk", - .id = 1, - .enable = omap_mcbsp_clk_enable, - .disable = omap_mcbsp_clk_disable, - }, - }, - { - .clk = { - .name = "mcbsp_clk", - .id = 2, - .enable = omap_mcbsp_clk_enable, - .disable = omap_mcbsp_clk_disable, - }, - }, - { - .clk = { - .name = "mcbsp_clk", - .id = 3, - .enable = omap_mcbsp_clk_enable, - .disable = omap_mcbsp_clk_disable, - }, - }, - { - .clk = { - .name = "mcbsp_clk", - .id = 4, - .enable = omap_mcbsp_clk_enable, - .disable = omap_mcbsp_clk_disable, - }, - }, - { - .clk = { - .name = "mcbsp_clk", - .id = 5, - .enable = omap_mcbsp_clk_enable, - .disable = omap_mcbsp_clk_disable, - }, - }, -}; - -#define omap_mcbsp_clks_size ARRAY_SIZE(omap_mcbsp_clks) -#else -#define omap_mcbsp_clks_size 0 -static struct mcbsp_internal_clk __initdata *omap_mcbsp_clks; -static inline void omap_mcbsp_clk_init(struct clk *clk) -{ } -#endif +const char *clk_names[] = { "mcbsp_ick", "mcbsp_fck" }; static void omap2_mcbsp2_mux_setup(void) { @@ -156,7 +57,8 @@ static struct omap_mcbsp_platform_data omap2420_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP1_IRQ_RX, .tx_irq = INT_24XX_MCBSP1_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP24XX_MCBSP2_BASE, @@ -165,7 +67,8 @@ static struct omap_mcbsp_platform_data omap2420_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP2_IRQ_RX, .tx_irq = INT_24XX_MCBSP2_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, }; #define OMAP2420_MCBSP_PDATA_SZ ARRAY_SIZE(omap2420_mcbsp_pdata) @@ -183,7 +86,8 @@ static struct omap_mcbsp_platform_data omap2430_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP1_IRQ_RX, .tx_irq = INT_24XX_MCBSP1_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP24XX_MCBSP2_BASE, @@ -192,7 +96,8 @@ static struct omap_mcbsp_platform_data omap2430_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP2_IRQ_RX, .tx_irq = INT_24XX_MCBSP2_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP2430_MCBSP3_BASE, @@ -201,7 +106,8 @@ static struct omap_mcbsp_platform_data omap2430_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP3_IRQ_RX, .tx_irq = INT_24XX_MCBSP3_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP2430_MCBSP4_BASE, @@ -210,7 +116,8 @@ static struct omap_mcbsp_platform_data omap2430_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP4_IRQ_RX, .tx_irq = INT_24XX_MCBSP4_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP2430_MCBSP5_BASE, @@ -219,7 +126,8 @@ static struct omap_mcbsp_platform_data omap2430_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP5_IRQ_RX, .tx_irq = INT_24XX_MCBSP5_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, }; #define OMAP2430_MCBSP_PDATA_SZ ARRAY_SIZE(omap2430_mcbsp_pdata) @@ -237,7 +145,8 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP1_IRQ_RX, .tx_irq = INT_24XX_MCBSP1_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP34XX_MCBSP2_BASE, @@ -246,7 +155,8 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP2_IRQ_RX, .tx_irq = INT_24XX_MCBSP2_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP34XX_MCBSP3_BASE, @@ -255,7 +165,8 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP3_IRQ_RX, .tx_irq = INT_24XX_MCBSP3_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP34XX_MCBSP4_BASE, @@ -264,7 +175,8 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP4_IRQ_RX, .tx_irq = INT_24XX_MCBSP4_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, { .phys_base = OMAP34XX_MCBSP5_BASE, @@ -273,7 +185,8 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP5_IRQ_RX, .tx_irq = INT_24XX_MCBSP5_IRQ_TX, .ops = &omap2_mcbsp_ops, - .clk_name = "mcbsp_clk", + .clk_names = clk_names, + .num_clks = 2, }, }; #define OMAP34XX_MCBSP_PDATA_SZ ARRAY_SIZE(omap34xx_mcbsp_pdata) @@ -284,14 +197,6 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { static int __init omap2_mcbsp_init(void) { - int i; - - for (i = 0; i < omap_mcbsp_clks_size; i++) { - /* Once we call clk_get inside init, we do not register it */ - omap_mcbsp_clk_init(&omap_mcbsp_clks[i]); - clk_register(&omap_mcbsp_clks[i].clk); - } - if (cpu_is_omap2420()) omap_mcbsp_count = OMAP2420_MCBSP_PDATA_SZ; if (cpu_is_omap2430()) diff --git a/arch/arm/mach-omap2/sleep24xx.S b/arch/arm/mach-omap2/sleep24xx.S index 43336b93b21..bf9e96105e1 100644 --- a/arch/arm/mach-omap2/sleep24xx.S +++ b/arch/arm/mach-omap2/sleep24xx.S @@ -93,9 +93,8 @@ ENTRY(omap24xx_cpu_suspend) orr r4, r4, #0x40 @ enable self refresh on idle req mov r5, #0x2000 @ set delay (DPLL relock + DLL relock) str r4, [r2] @ make it so - mov r2, #0 nop - mcr p15, 0, r2, c7, c0, 4 @ wait for interrupt + mcr p15, 0, r3, c7, c0, 4 @ wait for interrupt nop loop: subs r5, r5, #0x1 @ awake, wait just a bit diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index ae6036300f6..9fc13a2cc3f 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -118,7 +118,8 @@ static void __init omap2_gp_clockevent_init(void) clockevent_gpt.max_delta_ns = clockevent_delta2ns(0xffffffff, &clockevent_gpt); clockevent_gpt.min_delta_ns = - clockevent_delta2ns(1, &clockevent_gpt); + clockevent_delta2ns(3, &clockevent_gpt); + /* Timer internal resynch latency. */ clockevent_gpt.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_gpt); diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index c1fbd5b5f9c..23cfdd59395 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -289,7 +289,7 @@ static struct platform_device sa11x0pcmcia_device = { }; static struct platform_device sa11x0mtd_device = { - .name = "flash", + .name = "sa1100-mtd", .id = -1, }; diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 81d0b8772de..bc0099d5ae8 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -66,7 +66,10 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address) * fault (ie, is old), we can safely ignore any issues. */ if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) { - flush_cache_page(vma, address, pte_pfn(entry)); + unsigned long pfn = pte_pfn(entry); + flush_cache_page(vma, address, pfn); + outer_flush_range((pfn << PAGE_SHIFT), + (pfn << PAGE_SHIFT) + PAGE_SIZE); pte_val(entry) &= ~L_PTE_MT_MASK; pte_val(entry) |= shared_pte_mask; set_pte_at(vma->vm_mm, address, pte, entry); diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c index 6d6bd589924..853d42bb868 100644 --- a/arch/arm/oprofile/op_model_mpcore.c +++ b/arch/arm/oprofile/op_model_mpcore.c @@ -263,7 +263,7 @@ static void em_route_irq(int irq, unsigned int cpu) const struct cpumask *mask = cpumask_of(cpu); spin_lock_irq(&desc->lock); - desc->affinity = *mask; + cpumask_copy(desc->affinity, mask); desc->chip->set_affinity(irq, mask); spin_unlock_irq(&desc->lock); } diff --git a/arch/arm/plat-omap/devices.c b/arch/arm/plat-omap/devices.c index ac15c23fd5d..208dbb121f4 100644 --- a/arch/arm/plat-omap/devices.c +++ b/arch/arm/plat-omap/devices.c @@ -200,14 +200,15 @@ void omap_mcbsp_register_board_cfg(struct omap_mcbsp_platform_data *config, /* * Register MMC devices. Called from mach-omap1 and mach-omap2 device init. */ -int __init omap_mmc_add(int id, unsigned long base, unsigned long size, - unsigned int irq, struct omap_mmc_platform_data *data) +int __init omap_mmc_add(const char *name, int id, unsigned long base, + unsigned long size, unsigned int irq, + struct omap_mmc_platform_data *data) { struct platform_device *pdev; struct resource res[OMAP_MMC_NR_RES]; int ret; - pdev = platform_device_alloc("mmci-omap", id); + pdev = platform_device_alloc(name, id); if (!pdev) return -ENOMEM; diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index e77373c39f8..47ec77af4cc 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -709,6 +709,7 @@ int omap_request_dma(int dev_id, const char *dev_name, chan->dev_name = dev_name; chan->callback = callback; chan->data = data; + chan->flags = 0; #ifndef CONFIG_ARCH_OMAP1 if (cpu_class_is_omap2()) { @@ -1888,11 +1889,11 @@ static int omap2_dma_handle_ch(int ch) status = dma_read(CSR(ch)); } + dma_write(status, CSR(ch)); + if (likely(dma_chan[ch].callback != NULL)) dma_chan[ch].callback(ch, status, dma_chan[ch].data); - dma_write(status, CSR(ch)); - return 0; } diff --git a/arch/arm/plat-omap/include/mach/cpu.h b/arch/arm/plat-omap/include/mach/cpu.h index b2062f1175d..a8e1178a946 100644 --- a/arch/arm/plat-omap/include/mach/cpu.h +++ b/arch/arm/plat-omap/include/mach/cpu.h @@ -339,6 +339,7 @@ IS_OMAP_TYPE(3430, 0x3430) #define OMAP3430_REV_ES2_0 0x34301034 #define OMAP3430_REV_ES2_1 0x34302034 #define OMAP3430_REV_ES3_0 0x34303034 +#define OMAP3430_REV_ES3_1 0x34304034 /* * omap_chip bits diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h index eef873db3d4..113c2466c86 100644 --- a/arch/arm/plat-omap/include/mach/mcbsp.h +++ b/arch/arm/plat-omap/include/mach/mcbsp.h @@ -344,7 +344,8 @@ struct omap_mcbsp_platform_data { u8 dma_rx_sync, dma_tx_sync; u16 rx_irq, tx_irq; struct omap_mcbsp_ops *ops; - char const *clk_name; + char const **clk_names; + int num_clks; }; struct omap_mcbsp { @@ -376,7 +377,8 @@ struct omap_mcbsp { /* Protect the field .free, while checking if the mcbsp is in use */ spinlock_t lock; struct omap_mcbsp_platform_data *pdata; - struct clk *clk; + struct clk **clks; + int num_clks; }; extern struct omap_mcbsp **mcbsp_ptr; extern int omap_mcbsp_count; diff --git a/arch/arm/plat-omap/include/mach/mmc.h b/arch/arm/plat-omap/include/mach/mmc.h index 031250f0280..73a9e15031b 100644 --- a/arch/arm/plat-omap/include/mach/mmc.h +++ b/arch/arm/plat-omap/include/mach/mmc.h @@ -115,8 +115,9 @@ void omap1_init_mmc(struct omap_mmc_platform_data **mmc_data, int nr_controllers); void omap2_init_mmc(struct omap_mmc_platform_data **mmc_data, int nr_controllers); -int omap_mmc_add(int id, unsigned long base, unsigned long size, - unsigned int irq, struct omap_mmc_platform_data *data); +int omap_mmc_add(const char *name, int id, unsigned long base, + unsigned long size, unsigned int irq, + struct omap_mmc_platform_data *data); #else static inline void omap1_init_mmc(struct omap_mmc_platform_data **mmc_data, int nr_controllers) @@ -126,8 +127,9 @@ static inline void omap2_init_mmc(struct omap_mmc_platform_data **mmc_data, int nr_controllers) { } -static inline int omap_mmc_add(int id, unsigned long base, unsigned long size, - unsigned int irq, struct omap_mmc_platform_data *data) +static inline int omap_mmc_add(const char *name, int id, unsigned long base, + unsigned long size, unsigned int irq, + struct omap_mmc_platform_data *data) { return 0; } diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c index f2401a831f9..e5842e30e53 100644 --- a/arch/arm/plat-omap/mcbsp.c +++ b/arch/arm/plat-omap/mcbsp.c @@ -214,6 +214,7 @@ EXPORT_SYMBOL(omap_mcbsp_set_io_type); int omap_mcbsp_request(unsigned int id) { struct omap_mcbsp *mcbsp; + int i; int err; if (!omap_mcbsp_check_valid_id(id)) { @@ -225,7 +226,8 @@ int omap_mcbsp_request(unsigned int id) if (mcbsp->pdata && mcbsp->pdata->ops && mcbsp->pdata->ops->request) mcbsp->pdata->ops->request(id); - clk_enable(mcbsp->clk); + for (i = 0; i < mcbsp->num_clks; i++) + clk_enable(mcbsp->clks[i]); spin_lock(&mcbsp->lock); if (!mcbsp->free) { @@ -276,6 +278,7 @@ EXPORT_SYMBOL(omap_mcbsp_request); void omap_mcbsp_free(unsigned int id) { struct omap_mcbsp *mcbsp; + int i; if (!omap_mcbsp_check_valid_id(id)) { printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1); @@ -286,7 +289,8 @@ void omap_mcbsp_free(unsigned int id) if (mcbsp->pdata && mcbsp->pdata->ops && mcbsp->pdata->ops->free) mcbsp->pdata->ops->free(id); - clk_disable(mcbsp->clk); + for (i = mcbsp->num_clks - 1; i >= 0; i--) + clk_disable(mcbsp->clks[i]); spin_lock(&mcbsp->lock); if (mcbsp->free) { @@ -872,6 +876,7 @@ static int __devinit omap_mcbsp_probe(struct platform_device *pdev) struct omap_mcbsp_platform_data *pdata = pdev->dev.platform_data; struct omap_mcbsp *mcbsp; int id = pdev->id - 1; + int i; int ret = 0; if (!pdata) { @@ -916,14 +921,25 @@ static int __devinit omap_mcbsp_probe(struct platform_device *pdev) mcbsp->dma_rx_sync = pdata->dma_rx_sync; mcbsp->dma_tx_sync = pdata->dma_tx_sync; - if (pdata->clk_name) - mcbsp->clk = clk_get(&pdev->dev, pdata->clk_name); - if (IS_ERR(mcbsp->clk)) { - dev_err(&pdev->dev, - "Invalid clock configuration for McBSP%d.\n", - mcbsp->id); - ret = PTR_ERR(mcbsp->clk); - goto err_clk; + if (pdata->num_clks) { + mcbsp->num_clks = pdata->num_clks; + mcbsp->clks = kzalloc(mcbsp->num_clks * sizeof(struct clk *), + GFP_KERNEL); + if (!mcbsp->clks) { + ret = -ENOMEM; + goto exit; + } + for (i = 0; i < mcbsp->num_clks; i++) { + mcbsp->clks[i] = clk_get(&pdev->dev, pdata->clk_names[i]); + if (IS_ERR(mcbsp->clks[i])) { + dev_err(&pdev->dev, + "Invalid %s configuration for McBSP%d.\n", + pdata->clk_names[i], mcbsp->id); + ret = PTR_ERR(mcbsp->clks[i]); + goto err_clk; + } + } + } mcbsp->pdata = pdata; @@ -932,6 +948,9 @@ static int __devinit omap_mcbsp_probe(struct platform_device *pdev) return 0; err_clk: + while (i--) + clk_put(mcbsp->clks[i]); + kfree(mcbsp->clks); iounmap(mcbsp->io_base); err_ioremap: mcbsp->free = 0; @@ -942,6 +961,7 @@ exit: static int __devexit omap_mcbsp_remove(struct platform_device *pdev) { struct omap_mcbsp *mcbsp = platform_get_drvdata(pdev); + int i; platform_set_drvdata(pdev, NULL); if (mcbsp) { @@ -950,12 +970,18 @@ static int __devexit omap_mcbsp_remove(struct platform_device *pdev) mcbsp->pdata->ops->free) mcbsp->pdata->ops->free(mcbsp->id); - clk_disable(mcbsp->clk); - clk_put(mcbsp->clk); + for (i = mcbsp->num_clks - 1; i >= 0; i--) { + clk_disable(mcbsp->clks[i]); + clk_put(mcbsp->clks[i]); + } iounmap(mcbsp->io_base); - mcbsp->clk = NULL; + if (mcbsp->num_clks) { + kfree(mcbsp->clks); + mcbsp->clks = NULL; + mcbsp->num_clks = 0; + } mcbsp->free = 0; mcbsp->dev = NULL; } diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index a949c4fbbdd..8f1f97d56e1 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -169,26 +169,51 @@ config BF542 help BF542 Processor Support. +config BF542M + bool "BF542m" + help + BF542 Processor Support. + config BF544 bool "BF544" help BF544 Processor Support. +config BF544M + bool "BF544m" + help + BF544 Processor Support. + config BF547 bool "BF547" help BF547 Processor Support. +config BF547M + bool "BF547m" + help + BF547 Processor Support. + config BF548 bool "BF548" help BF548 Processor Support. +config BF548M + bool "BF548m" + help + BF548 Processor Support. + config BF549 bool "BF549" help BF549 Processor Support. +config BF549M + bool "BF549m" + help + BF549 Processor Support. + config BF561 bool "BF561" help @@ -224,39 +249,39 @@ config TICK_SOURCE_SYSTMR0 config BF_REV_MIN int - default 0 if (BF51x || BF52x || BF54x) + default 0 if (BF51x || BF52x || (BF54x && !BF54xM)) default 2 if (BF537 || BF536 || BF534) - default 3 if (BF561 ||BF533 || BF532 || BF531) + default 3 if (BF561 || BF533 || BF532 || BF531 || BF54xM) default 4 if (BF538 || BF539) config BF_REV_MAX int - default 2 if (BF51x || BF52x || BF54x) - default 3 if (BF537 || BF536 || BF534) + default 2 if (BF51x || BF52x || (BF54x && !BF54xM)) + default 3 if (BF537 || BF536 || BF534 || BF54xM) default 5 if (BF561 || BF538 || BF539) default 6 if (BF533 || BF532 || BF531) choice prompt "Silicon Rev" - default BF_REV_0_1 if (BF51x || BF52x || BF54x) + default BF_REV_0_1 if (BF51x || BF52x || (BF54x && !BF54xM)) default BF_REV_0_2 if (BF534 || BF536 || BF537) - default BF_REV_0_3 if (BF531 || BF532 || BF533 || BF561) + default BF_REV_0_3 if (BF531 || BF532 || BF533 || BF54xM || BF561) config BF_REV_0_0 bool "0.0" - depends on (BF51x || BF52x || BF54x) + depends on (BF51x || BF52x || (BF54x && !BF54xM)) config BF_REV_0_1 bool "0.1" - depends on (BF52x || BF54x) + depends on (BF52x || (BF54x && !BF54xM)) config BF_REV_0_2 bool "0.2" - depends on (BF52x || BF537 || BF536 || BF534 || BF54x) + depends on (BF52x || BF537 || BF536 || BF534 || (BF54x && !BF54xM)) config BF_REV_0_3 bool "0.3" - depends on (BF561 || BF537 || BF536 || BF534 || BF533 || BF532 || BF531) + depends on (BF54xM || BF561 || BF537 || BF536 || BF534 || BF533 || BF532 || BF531) config BF_REV_0_4 bool "0.4" @@ -293,9 +318,14 @@ config BF53x depends on (BF531 || BF532 || BF533 || BF534 || BF536 || BF537) default y +config BF54xM + bool + depends on (BF542M || BF544M || BF547M || BF548M || BF549M) + default y + config BF54x bool - depends on (BF542 || BF544 || BF547 || BF548 || BF549) + depends on (BF542 || BF544 || BF547 || BF548 || BF549 || BF54xM) default y config MEM_GENERIC_BOARD diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile index e550c8d4606..d54c8283825 100644 --- a/arch/blackfin/Makefile +++ b/arch/blackfin/Makefile @@ -21,57 +21,67 @@ KALLSYMS += --symbol-prefix=_ KBUILD_DEFCONFIG := BF537-STAMP_defconfig # setup the machine name and the machine dependent settings -machine-$(CONFIG_BF512) := bf518 -machine-$(CONFIG_BF514) := bf518 -machine-$(CONFIG_BF516) := bf518 -machine-$(CONFIG_BF518) := bf518 -machine-$(CONFIG_BF522) := bf527 -machine-$(CONFIG_BF523) := bf527 -machine-$(CONFIG_BF524) := bf527 -machine-$(CONFIG_BF525) := bf527 -machine-$(CONFIG_BF526) := bf527 -machine-$(CONFIG_BF527) := bf527 -machine-$(CONFIG_BF531) := bf533 -machine-$(CONFIG_BF532) := bf533 -machine-$(CONFIG_BF533) := bf533 -machine-$(CONFIG_BF534) := bf537 -machine-$(CONFIG_BF536) := bf537 -machine-$(CONFIG_BF537) := bf537 -machine-$(CONFIG_BF538) := bf538 -machine-$(CONFIG_BF539) := bf538 -machine-$(CONFIG_BF542) := bf548 -machine-$(CONFIG_BF544) := bf548 -machine-$(CONFIG_BF547) := bf548 -machine-$(CONFIG_BF548) := bf548 -machine-$(CONFIG_BF549) := bf548 -machine-$(CONFIG_BF561) := bf561 +machine-$(CONFIG_BF512) := bf518 +machine-$(CONFIG_BF514) := bf518 +machine-$(CONFIG_BF516) := bf518 +machine-$(CONFIG_BF518) := bf518 +machine-$(CONFIG_BF522) := bf527 +machine-$(CONFIG_BF523) := bf527 +machine-$(CONFIG_BF524) := bf527 +machine-$(CONFIG_BF525) := bf527 +machine-$(CONFIG_BF526) := bf527 +machine-$(CONFIG_BF527) := bf527 +machine-$(CONFIG_BF531) := bf533 +machine-$(CONFIG_BF532) := bf533 +machine-$(CONFIG_BF533) := bf533 +machine-$(CONFIG_BF534) := bf537 +machine-$(CONFIG_BF536) := bf537 +machine-$(CONFIG_BF537) := bf537 +machine-$(CONFIG_BF538) := bf538 +machine-$(CONFIG_BF539) := bf538 +machine-$(CONFIG_BF542) := bf548 +machine-$(CONFIG_BF542M) := bf548 +machine-$(CONFIG_BF544) := bf548 +machine-$(CONFIG_BF544M) := bf548 +machine-$(CONFIG_BF547) := bf548 +machine-$(CONFIG_BF547M) := bf548 +machine-$(CONFIG_BF548) := bf548 +machine-$(CONFIG_BF548M) := bf548 +machine-$(CONFIG_BF549) := bf548 +machine-$(CONFIG_BF549M) := bf548 +machine-$(CONFIG_BF561) := bf561 MACHINE := $(machine-y) export MACHINE -cpu-$(CONFIG_BF512) := bf512 -cpu-$(CONFIG_BF514) := bf514 -cpu-$(CONFIG_BF516) := bf516 -cpu-$(CONFIG_BF518) := bf518 -cpu-$(CONFIG_BF522) := bf522 -cpu-$(CONFIG_BF523) := bf523 -cpu-$(CONFIG_BF524) := bf524 -cpu-$(CONFIG_BF525) := bf525 -cpu-$(CONFIG_BF526) := bf526 -cpu-$(CONFIG_BF527) := bf527 -cpu-$(CONFIG_BF531) := bf531 -cpu-$(CONFIG_BF532) := bf532 -cpu-$(CONFIG_BF533) := bf533 -cpu-$(CONFIG_BF534) := bf534 -cpu-$(CONFIG_BF536) := bf536 -cpu-$(CONFIG_BF537) := bf537 -cpu-$(CONFIG_BF538) := bf538 -cpu-$(CONFIG_BF539) := bf539 -cpu-$(CONFIG_BF542) := bf542 -cpu-$(CONFIG_BF544) := bf544 -cpu-$(CONFIG_BF547) := bf547 -cpu-$(CONFIG_BF548) := bf548 -cpu-$(CONFIG_BF549) := bf549 -cpu-$(CONFIG_BF561) := bf561 +cpu-$(CONFIG_BF512) := bf512 +cpu-$(CONFIG_BF514) := bf514 +cpu-$(CONFIG_BF516) := bf516 +cpu-$(CONFIG_BF518) := bf518 +cpu-$(CONFIG_BF522) := bf522 +cpu-$(CONFIG_BF523) := bf523 +cpu-$(CONFIG_BF524) := bf524 +cpu-$(CONFIG_BF525) := bf525 +cpu-$(CONFIG_BF526) := bf526 +cpu-$(CONFIG_BF527) := bf527 +cpu-$(CONFIG_BF531) := bf531 +cpu-$(CONFIG_BF532) := bf532 +cpu-$(CONFIG_BF533) := bf533 +cpu-$(CONFIG_BF534) := bf534 +cpu-$(CONFIG_BF536) := bf536 +cpu-$(CONFIG_BF537) := bf537 +cpu-$(CONFIG_BF538) := bf538 +cpu-$(CONFIG_BF539) := bf539 +cpu-$(CONFIG_BF542) := bf542 +cpu-$(CONFIG_BF542M) := bf542m +cpu-$(CONFIG_BF544) := bf544 +cpu-$(CONFIG_BF544M) := bf544m +cpu-$(CONFIG_BF547) := bf547 +cpu-$(CONFIG_BF547M) := bf547m +cpu-$(CONFIG_BF548) := bf548 +cpu-$(CONFIG_BF548M) := bf548m +cpu-$(CONFIG_BF549) := bf549 +cpu-$(CONFIG_BF549M) := bf549m +cpu-$(CONFIG_BF561) := bf561 rev-$(CONFIG_BF_REV_0_0) := 0.0 rev-$(CONFIG_BF_REV_0_1) := 0.1 diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig index defb9785c65..4fdb9e04759 100644 --- a/arch/blackfin/configs/BF518F-EZBRD_defconfig +++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig @@ -1,6 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.28-rc2 +# Fri Jan 9 17:58:41 2009 # # CONFIG_MMU is not set # CONFIG_FPU is not set @@ -149,6 +150,7 @@ CONFIG_BF_REV_0_0=y # CONFIG_BF_REV_ANY is not set # CONFIG_BF_REV_NONE is not set CONFIG_BF51x=y +CONFIG_MEM_MT48LC32M8A2_75=y CONFIG_BFIN518F_EZBRD=y # @@ -598,7 +600,10 @@ CONFIG_PHYLIB=y # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y CONFIG_MII=y -# CONFIG_BFIN_MAC is not set +CONFIG_BFIN_MAC=y +CONFIG_BFIN_TX_DESC_NUM=10 +CONFIG_BFIN_RX_DESC_NUM=20 +# CONFIG_BFIN_MAC_RMII is not set # CONFIG_SMC91X is not set # CONFIG_SMSC911X is not set # CONFIG_DM9000 is not set @@ -679,7 +684,7 @@ CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y # CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig index 992424ff315..8e2b855b8db 100644 --- a/arch/blackfin/configs/BF526-EZBRD_defconfig +++ b/arch/blackfin/configs/BF526-EZBRD_defconfig @@ -723,7 +723,7 @@ CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y # CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig index 21e3c1af55b..833128b3972 100644 --- a/arch/blackfin/configs/BF527-EZKIT_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT_defconfig @@ -767,7 +767,7 @@ CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y # CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig index 0bdf20a1af6..334c94b51c4 100644 --- a/arch/blackfin/configs/BF533-EZKIT_defconfig +++ b/arch/blackfin/configs/BF533-EZKIT_defconfig @@ -672,7 +672,7 @@ CONFIG_BFIN_DMA_INTERFACE=m CONFIG_SIMPLE_GPIO=m # CONFIG_VT is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig index 2f747d6e97e..9d733436e30 100644 --- a/arch/blackfin/configs/BF533-STAMP_defconfig +++ b/arch/blackfin/configs/BF533-STAMP_defconfig @@ -679,7 +679,7 @@ CONFIG_BFIN_DMA_INTERFACE=m CONFIG_SIMPLE_GPIO=m # CONFIG_VT is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig index 8b0a81294e6..4fb4108d310 100644 --- a/arch/blackfin/configs/BF537-STAMP_defconfig +++ b/arch/blackfin/configs/BF537-STAMP_defconfig @@ -722,7 +722,7 @@ CONFIG_BFIN_DMA_INTERFACE=m CONFIG_SIMPLE_GPIO=m # CONFIG_VT is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig index a1f766bf7d9..cb32f5624a1 100644 --- a/arch/blackfin/configs/BF538-EZKIT_defconfig +++ b/arch/blackfin/configs/BF538-EZKIT_defconfig @@ -726,7 +726,7 @@ CONFIG_BFIN_DMA_INTERFACE=m CONFIG_SIMPLE_GPIO=m # CONFIG_VT is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig index cd2da6b7692..0f8697618aa 100644 --- a/arch/blackfin/configs/BF548-EZKIT_defconfig +++ b/arch/blackfin/configs/BF548-EZKIT_defconfig @@ -856,7 +856,7 @@ CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y # CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig index b398ca202db..042c7adfccf 100644 --- a/arch/blackfin/configs/BF561-EZKIT_defconfig +++ b/arch/blackfin/configs/BF561-EZKIT_defconfig @@ -709,7 +709,7 @@ CONFIG_BFIN_DMA_INTERFACE=m CONFIG_SIMPLE_GPIO=m # CONFIG_VT is not set # CONFIG_DEVKMEM is not set -# CONFIG_BFIN_JTAG_COMM is not set +CONFIG_BFIN_JTAG_COMM=m # CONFIG_SERIAL_NONSTANDARD is not set # diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig index 95146948166..865ed85a576 100644 --- a/arch/blackfin/configs/CM-BF527_defconfig +++ b/arch/blackfin/configs/CM-BF527_defconfig @@ -1,7 +1,6 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.24.7 -# Fri Jul 18 18:00:41 2008 +# Linux kernel version: 2.6.28 # # CONFIG_MMU is not set # CONFIG_FPU is not set @@ -9,7 +8,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y # CONFIG_RWSEM_XCHGADD_ALGORITHM is not set CONFIG_BLACKFIN=y CONFIG_ZONE_DMA=y -CONFIG_SEMAPHORE_SLEEPERS=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_HARDIRQS=y @@ -32,18 +30,16 @@ CONFIG_SYSVIPC_SYSCTL=y # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set -# CONFIG_PID_NS is not set # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CGROUPS is not set -CONFIG_FAIR_GROUP_SCHED=y -CONFIG_FAIR_USER_SCHED=y -# CONFIG_FAIR_CGROUP_SCHED is not set -# CONFIG_SYSFS_DEPRECATED is not set +# CONFIG_GROUP_SCHED is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set @@ -52,26 +48,35 @@ CONFIG_EMBEDDED=y CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y # CONFIG_ELF_CORE is not set +CONFIG_COMPAT_BRK=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y CONFIG_EVENTFD=y +CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y CONFIG_TINY_SHMEM=y CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set # CONFIG_MODVERSIONS is not set @@ -82,6 +87,7 @@ CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_LSF is not set # CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set # # IO Schedulers @@ -95,9 +101,11 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set +# CONFIG_FREEZER is not set # # Blackfin Processor Options @@ -106,6 +114,10 @@ CONFIG_PREEMPT_VOLUNTARY=y # # Processor and Board Settings # +# CONFIG_BF512 is not set +# CONFIG_BF514 is not set +# CONFIG_BF516 is not set +# CONFIG_BF518 is not set # CONFIG_BF522 is not set # CONFIG_BF523 is not set # CONFIG_BF524 is not set @@ -118,48 +130,32 @@ CONFIG_BF527=y # CONFIG_BF534 is not set # CONFIG_BF536 is not set # CONFIG_BF537 is not set +# CONFIG_BF538 is not set +# CONFIG_BF539 is not set # CONFIG_BF542 is not set +# CONFIG_BF542M is not set # CONFIG_BF544 is not set +# CONFIG_BF544M is not set # CONFIG_BF547 is not set +# CONFIG_BF547M is not set # CONFIG_BF548 is not set +# CONFIG_BF548M is not set # CONFIG_BF549 is not set +# CONFIG_BF549M is not set # CONFIG_BF561 is not set +CONFIG_BF_REV_MIN=0 +CONFIG_BF_REV_MAX=2 # CONFIG_BF_REV_0_0 is not set CONFIG_BF_REV_0_1=y # CONFIG_BF_REV_0_2 is not set # CONFIG_BF_REV_0_3 is not set # CONFIG_BF_REV_0_4 is not set # CONFIG_BF_REV_0_5 is not set +# CONFIG_BF_REV_0_6 is not set # CONFIG_BF_REV_ANY is not set # CONFIG_BF_REV_NONE is not set CONFIG_BF52x=y CONFIG_MEM_MT48LC16M16A2TG_75=y -# CONFIG_BFIN527_EZKIT is not set -CONFIG_BFIN527_BLUETECHNIX_CM=y - -# -# BF527 Specific Configuration -# - -# -# Alternative Multiplexing Scheme -# -# CONFIG_BF527_SPORT0_PORTF is not set -CONFIG_BF527_SPORT0_PORTG=y -CONFIG_BF527_SPORT0_TSCLK_PG10=y -# CONFIG_BF527_SPORT0_TSCLK_PG14 is not set -CONFIG_BF527_UART1_PORTF=y -# CONFIG_BF527_UART1_PORTG is not set -# CONFIG_BF527_NAND_D_PORTF is not set -CONFIG_BF527_NAND_D_PORTH=y - -# -# Interrupt Priority Assignment -# - -# -# Priority -# CONFIG_IRQ_PLL_WAKEUP=7 CONFIG_IRQ_DMA0_ERROR=7 CONFIG_IRQ_DMAR0_BLK=7 @@ -179,7 +175,6 @@ CONFIG_IRQ_SPORT0_TX=9 CONFIG_IRQ_SPORT1_RX=9 CONFIG_IRQ_SPORT1_TX=9 CONFIG_IRQ_TWI=10 -CONFIG_IRQ_SPI=10 CONFIG_IRQ_UART0_RX=10 CONFIG_IRQ_UART0_TX=10 CONFIG_IRQ_UART1_RX=10 @@ -205,6 +200,34 @@ CONFIG_IRQ_MEM_DMA1=13 CONFIG_IRQ_WATCH=13 CONFIG_IRQ_PORTF_INTA=13 CONFIG_IRQ_PORTF_INTB=13 +# CONFIG_BFIN527_EZKIT is not set +CONFIG_BFIN527_BLUETECHNIX_CM=y +# CONFIG_BFIN526_EZBRD is not set + +# +# BF527 Specific Configuration +# + +# +# Alternative Multiplexing Scheme +# +# CONFIG_BF527_SPORT0_PORTF is not set +CONFIG_BF527_SPORT0_PORTG=y +CONFIG_BF527_SPORT0_TSCLK_PG10=y +# CONFIG_BF527_SPORT0_TSCLK_PG14 is not set +CONFIG_BF527_UART1_PORTF=y +# CONFIG_BF527_UART1_PORTG is not set +# CONFIG_BF527_NAND_D_PORTF is not set +CONFIG_BF527_NAND_D_PORTH=y + +# +# Interrupt Priority Assignment +# + +# +# Priority +# +CONFIG_IRQ_SPI=10 CONFIG_IRQ_SPI_ERROR=7 CONFIG_IRQ_NFC_ERROR=7 CONFIG_IRQ_HDMA_ERROR=7 @@ -226,7 +249,6 @@ CONFIG_BOOT_LOAD=0x1000 # CONFIG_CLKIN_HZ=25000000 # CONFIG_BFIN_KERNEL_CLOCK is not set -CONFIG_MAX_MEM_SIZE=512 CONFIG_MAX_VCO_HZ=600000000 CONFIG_MIN_VCO_HZ=50000000 CONFIG_MAX_SCLK_HZ=133333333 @@ -240,10 +262,10 @@ CONFIG_HZ_250=y # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=250 +# CONFIG_SCHED_HRTICK is not set CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CLOCKEVENTS=y # CONFIG_CYCLES_CLOCKSOURCE is not set -# CONFIG_TICK_ONESHOT is not set # CONFIG_NO_HZ is not set # CONFIG_HIGH_RES_TIMERS is not set CONFIG_GENERIC_CLOCKEVENTS_BUILD=y @@ -277,6 +299,12 @@ CONFIG_ACCESS_OK_L1=y CONFIG_CACHELINE_ALIGNED_L1=y # CONFIG_SYSCALL_TAB_L1 is not set # CONFIG_CPLB_SWITCH_TAB_L1 is not set +CONFIG_APP_STACK_L1=y + +# +# Speed Optimizations +# +CONFIG_BFIN_INS_LOWOVERHEAD=y CONFIG_RAMKERNEL=y # CONFIG_ROMKERNEL is not set CONFIG_SELECT_MEMORY_MODEL=y @@ -285,10 +313,10 @@ CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set -# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_RESOURCES_64BIT is not set +# CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 CONFIG_VIRT_TO_BUS=y CONFIG_BFIN_GPTIMERS=y @@ -334,7 +362,6 @@ CONFIG_BANK_3=0xFFC0 # # Bus options (PCI, PCMCIA, EISA, MCA, ISA) # -# CONFIG_PCI is not set # CONFIG_ARCH_SUPPORTS_MSI is not set # CONFIG_PCCARD is not set @@ -345,25 +372,20 @@ CONFIG_BINFMT_ELF_FDPIC=y CONFIG_BINFMT_FLAT=y CONFIG_BINFMT_ZFLAT=y # CONFIG_BINFMT_SHARED_FLAT is not set +# CONFIG_HAVE_AOUT is not set # CONFIG_BINFMT_MISC is not set # # Power management options # # CONFIG_PM is not set -CONFIG_SUSPEND_UP_POSSIBLE=y -# CONFIG_PM_BFIN_SLEEP_DEEPER is not set -# CONFIG_PM_BFIN_SLEEP is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y # CONFIG_PM_WAKEUP_BY_GPIO is not set # # CPU Frequency scaling # # CONFIG_CPU_FREQ is not set - -# -# Networking -# CONFIG_NET=y # @@ -376,6 +398,7 @@ CONFIG_XFRM=y # CONFIG_XFRM_USER is not set # CONFIG_XFRM_SUB_POLICY is not set # CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set # CONFIG_NET_KEY is not set CONFIG_INET=y # CONFIG_IP_MULTICAST is not set @@ -405,8 +428,6 @@ CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_TCP_MD5SIG is not set # CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set # CONFIG_NETLABEL is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETFILTER is not set @@ -415,6 +436,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set # CONFIG_LLC2 is not set @@ -431,14 +453,14 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # # CONFIG_NET_PKTGEN is not set # CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set - -# -# Wireless -# +# CONFIG_PHONET is not set +CONFIG_WIRELESS=y # CONFIG_CFG80211 is not set +CONFIG_WIRELESS_OLD_REGULATORY=y # CONFIG_WIRELESS_EXT is not set # CONFIG_MAC80211 is not set # CONFIG_IEEE80211 is not set @@ -456,6 +478,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y # CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set # CONFIG_CONNECTOR is not set CONFIG_MTD=y @@ -464,6 +488,7 @@ CONFIG_MTD=y CONFIG_MTD_PARTITIONS=y # CONFIG_MTD_REDBOOT_PARTS is not set # CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set # # User Modules And Translation Layers @@ -507,6 +532,7 @@ CONFIG_MTD_ROM=m # CONFIG_MTD_COMPLEX_MAPPINGS=y # CONFIG_MTD_PHYSMAP is not set +# CONFIG_MTD_GPIO_ADDR is not set # CONFIG_MTD_UCLINUX is not set # CONFIG_MTD_PLATRAM is not set @@ -542,10 +568,12 @@ CONFIG_BLK_DEV=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set +# CONFIG_BLK_DEV_HD is not set # CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y # CONFIG_IDE is not set # @@ -558,7 +586,6 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 # CONFIG_ATA is not set # CONFIG_MD is not set CONFIG_NETDEVICES=y -# CONFIG_NETDEVICES_MULTIQUEUE is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set @@ -579,6 +606,7 @@ CONFIG_PHYLIB=y # CONFIG_SMSC_PHY is not set # CONFIG_BROADCOM_PHY is not set # CONFIG_ICPLUS_PHY is not set +# CONFIG_REALTEK_PHY is not set # CONFIG_FIXED_PHY is not set # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y @@ -591,11 +619,14 @@ CONFIG_BFIN_MAC_RMII=y # CONFIG_SMC91X is not set # CONFIG_SMSC911X is not set # CONFIG_DM9000 is not set +# CONFIG_ENC28J60 is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set # CONFIG_IBM_NEW_EMAC_EMAC4 is not set -# CONFIG_B44 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_NETDEV_1000 is not set # CONFIG_NETDEV_10000 is not set @@ -604,6 +635,7 @@ CONFIG_BFIN_MAC_RMII=y # # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set +# CONFIG_IWLWIFI_LEDS is not set # # USB Network Adapters @@ -616,7 +648,6 @@ CONFIG_BFIN_MAC_RMII=y # CONFIG_WAN is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set -# CONFIG_SHAPER is not set # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set # CONFIG_NET_POLL_CONTROLLER is not set @@ -642,14 +673,15 @@ CONFIG_BFIN_MAC_RMII=y # CONFIG_BF5xx_PPIFCD is not set # CONFIG_BFIN_SIMPLE_TIMER is not set # CONFIG_BF5xx_PPI is not set -CONFIG_BFIN_OTP=y -# CONFIG_BFIN_OTP_WRITE_ENABLE is not set +# CONFIG_BF5xx_EPPI is not set # CONFIG_BFIN_SPORT is not set # CONFIG_BFIN_TIMER_LATENCY is not set # CONFIG_TWI_LCD is not set +CONFIG_BFIN_DMA_INTERFACE=m CONFIG_SIMPLE_GPIO=m # CONFIG_VT is not set # CONFIG_DEVKMEM is not set +# CONFIG_BFIN_JTAG_COMM is not set # CONFIG_SERIAL_NONSTANDARD is not set # @@ -673,6 +705,8 @@ CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_BFIN_SPORT is not set CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set +CONFIG_BFIN_OTP=y +# CONFIG_BFIN_OTP_WRITE_ENABLE is not set # # CAN, the car bus and industrial fieldbus @@ -680,44 +714,49 @@ CONFIG_UNIX98_PTYS=y # CONFIG_CAN4LINUX is not set # CONFIG_IPMI_HANDLER is not set # CONFIG_HW_RANDOM is not set -# CONFIG_GEN_RTC is not set # CONFIG_R3964 is not set # CONFIG_RAW_DRIVER is not set # CONFIG_TCG_TPM is not set CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y CONFIG_I2C_CHARDEV=m +CONFIG_I2C_HELPER_AUTO=y # -# I2C Algorithms +# I2C Hardware Bus support # -# CONFIG_I2C_ALGOBIT is not set -# CONFIG_I2C_ALGOPCF is not set -# CONFIG_I2C_ALGOPCA is not set # -# I2C Hardware Bus support +# I2C system bus drivers (mostly embedded / system-on-chip) # CONFIG_I2C_BLACKFIN_TWI=m CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # CONFIG_I2C_GPIO is not set # CONFIG_I2C_OCORES is not set -# CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_TAOS_EVM is not set -# CONFIG_I2C_STUB is not set # CONFIG_I2C_TINY_USB is not set # +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_STUB is not set + +# # Miscellaneous I2C Chip support # -# CONFIG_SENSORS_DS1337 is not set -# CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set +# CONFIG_AT24 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_EEPROM_LEGACY is not set +# CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set -# CONFIG_SENSORS_PCF8575 is not set +# CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_MAX6875 is not set @@ -726,37 +765,41 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set # CONFIG_I2C_DEBUG_CHIP is not set - -# -# SPI support -# CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set CONFIG_SPI_MASTER=y # # SPI Master Controller Drivers # CONFIG_SPI_BFIN=y +# CONFIG_SPI_BFIN_LOCK is not set # CONFIG_SPI_BITBANG is not set # # SPI Protocol Masters # -# CONFIG_EEPROM_AT25 is not set +# CONFIG_SPI_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set +CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y +# CONFIG_GPIOLIB is not set # CONFIG_W1 is not set # CONFIG_POWER_SUPPLY is not set CONFIG_HWMON=y # CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_AD7414 is not set # CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADCXX is not set # CONFIG_SENSORS_ADM1021 is not set # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1029 is not set # CONFIG_SENSORS_ADM1031 is not set # CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7462 is not set # CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7473 is not set # CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_F71805F is not set @@ -777,6 +820,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_LM90 is not set # CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_MAX1111 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_MAX6650 is not set # CONFIG_SENSORS_PC87360 is not set @@ -785,6 +829,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_SMSC47M1 is not set # CONFIG_SENSORS_SMSC47M192 is not set # CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_ADS7828 is not set # CONFIG_SENSORS_THMC50 is not set # CONFIG_SENSORS_VT1211 is not set # CONFIG_SENSORS_W83781D is not set @@ -792,9 +837,12 @@ CONFIG_HWMON=y # CONFIG_SENSORS_W83792D is not set # CONFIG_SENSORS_W83793 is not set # CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set # CONFIG_SENSORS_W83627HF is not set # CONFIG_SENSORS_W83627EHF is not set # CONFIG_HWMON_DEBUG_CHIP is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set CONFIG_WATCHDOG=y # CONFIG_WATCHDOG_NOWAYOUT is not set @@ -810,21 +858,31 @@ CONFIG_BFIN_WDT=y # CONFIG_USBPCWATCHDOG is not set # -# Sonics Silicon Backplane -# -CONFIG_SSB_POSSIBLE=y -# CONFIG_SSB is not set - -# # Multifunction device drivers # +# CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_REGULATOR is not set # # Multimedia devices # + +# +# Multimedia core support +# # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# # CONFIG_DAB is not set # @@ -839,10 +897,6 @@ CONFIG_SSB_POSSIBLE=y # Display device support # # CONFIG_DISPLAY_SUPPORT is not set - -# -# Sound -# # CONFIG_SOUND is not set CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y @@ -850,6 +904,7 @@ CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB_ARCH_HAS_EHCI is not set CONFIG_USB=y # CONFIG_USB_DEBUG is not set +# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set # # Miscellaneous USB options @@ -860,40 +915,48 @@ CONFIG_USB_DEVICE_CLASS=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_WHITELIST is not set CONFIG_USB_OTG_BLACKLIST_HUB=y +CONFIG_USB_MON=y +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set # # USB Host Controller Drivers # +# CONFIG_USB_C67X00_HCD is not set # CONFIG_USB_ISP116X_HCD is not set -# CONFIG_USB_ISP1362_HCD is not set # CONFIG_USB_ISP1760_HCD is not set +# CONFIG_USB_ISP1362_HCD is not set # CONFIG_USB_SL811_HCD is not set # CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_HWA_HCD is not set CONFIG_USB_MUSB_HDRC=y CONFIG_USB_MUSB_SOC=y # -# Blackfin high speed USB support +# Blackfin high speed USB Support # CONFIG_USB_MUSB_HOST=y # CONFIG_USB_MUSB_PERIPHERAL is not set # CONFIG_USB_MUSB_OTG is not set CONFIG_USB_MUSB_HDRC_HCD=y CONFIG_MUSB_PIO_ONLY=y -CONFIG_USB_MUSB_LOGLEVEL=0 +CONFIG_MUSB_DMA_POLL=y +# CONFIG_USB_MUSB_DEBUG is not set # # USB Device Class drivers # # CONFIG_USB_ACM is not set # CONFIG_USB_PRINTER is not set +# CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; # # -# may also be needed; see USB_STORAGE Help for more information +# see USB_STORAGE Help for more information # # CONFIG_USB_LIBUSUAL is not set @@ -901,15 +964,10 @@ CONFIG_USB_MUSB_LOGLEVEL=0 # USB Imaging devices # # CONFIG_USB_MDC800 is not set -CONFIG_USB_MON=y # # USB port drivers # - -# -# USB Serial Converter support -# # CONFIG_USB_SERIAL is not set # @@ -918,7 +976,7 @@ CONFIG_USB_MON=y # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set # CONFIG_USB_ADUTUX is not set -# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_SEVSEG is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set @@ -934,17 +992,13 @@ CONFIG_USB_MON=y # CONFIG_USB_LD is not set # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set - -# -# USB DSL modem support -# - -# -# USB Gadget Support -# +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_VST is not set # CONFIG_USB_GADGET is not set # CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set CONFIG_RTC_LIB=y CONFIG_RTC_CLASS=y CONFIG_RTC_HCTOSYS=y @@ -973,51 +1027,59 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_PCF8563 is not set # CONFIG_RTC_DRV_PCF8583 is not set # CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set # # SPI RTC drivers # -# CONFIG_RTC_DRV_RS5C348 is not set +# CONFIG_RTC_DRV_M41T94 is not set +# CONFIG_RTC_DRV_DS1305 is not set +# CONFIG_RTC_DRV_DS1390 is not set # CONFIG_RTC_DRV_MAX6902 is not set +# CONFIG_RTC_DRV_R9701 is not set +# CONFIG_RTC_DRV_RS5C348 is not set +# CONFIG_RTC_DRV_DS3234 is not set # # Platform RTC drivers # +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set # CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set # CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_BQ4802 is not set # CONFIG_RTC_DRV_V3020 is not set # # on-CPU RTC drivers # CONFIG_RTC_DRV_BFIN=y - -# -# Userspace I/O -# +# CONFIG_DMADEVICES is not set # CONFIG_UIO is not set +# CONFIG_STAGING is not set # # File systems # # CONFIG_EXT2_FS is not set # CONFIG_EXT3_FS is not set -# CONFIG_EXT4DEV_FS is not set +# CONFIG_EXT4_FS is not set # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set # CONFIG_FS_POSIX_ACL is not set +CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +# CONFIG_DNOTIFY is not set CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set -# CONFIG_DNOTIFY is not set # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set # CONFIG_FUSE_FS is not set @@ -1059,8 +1121,11 @@ CONFIG_SYSFS=y # CONFIG_JFFS2_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y @@ -1068,13 +1133,12 @@ CONFIG_NFS_FS=m CONFIG_NFS_V3=y # CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_LOCKD=m CONFIG_LOCKD_V4=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=m -# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_SUNRPC_REGISTER_V4 is not set # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set CONFIG_SMB_FS=m @@ -1130,7 +1194,6 @@ CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_UTF8 is not set # CONFIG_DLM is not set -# CONFIG_INSTRUMENTATION is not set # # Kernel hacking @@ -1138,14 +1201,61 @@ CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_PRINTK_TIME is not set CONFIG_ENABLE_WARN_DEPRECATED=y CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 # CONFIG_MAGIC_SYSRQ is not set # CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set -# CONFIG_DEBUG_KERNEL is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set + +# +# Tracers +# +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set # CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_KGDB_TESTCASE is not set +CONFIG_DEBUG_VERBOSE=y CONFIG_DEBUG_MMRS=y +# CONFIG_DEBUG_HWERR is not set +# CONFIG_DEBUG_DOUBLEFAULT is not set CONFIG_DEBUG_HUNT_FOR_ZERO=y CONFIG_DEBUG_BFIN_HWTRACE_ON=y CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y @@ -1154,7 +1264,7 @@ CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set # CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set -CONFIG_EARLY_PRINTK=y +# CONFIG_EARLY_PRINTK is not set # CONFIG_CPLB_INFO is not set CONFIG_ACCESS_CHECK=y @@ -1163,10 +1273,96 @@ CONFIG_ACCESS_CHECK=y # # CONFIG_KEYS is not set CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set # CONFIG_SECURITY_NETWORK is not set -# CONFIG_SECURITY_CAPABILITIES is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_SECURITY_ROOTPLUG is not set -# CONFIG_CRYPTO is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0 +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_MANAGER2 is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRYPTO_HW=y # # Library routines @@ -1174,6 +1370,7 @@ CONFIG_SECURITY=y CONFIG_BITREVERSE=y CONFIG_CRC_CCITT=m # CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC7 is not set diff --git a/arch/blackfin/include/asm/checksum.h b/arch/blackfin/include/asm/checksum.h index f67289a0d8d..793581fc955 100644 --- a/arch/blackfin/include/asm/checksum.h +++ b/arch/blackfin/include/asm/checksum.h @@ -63,23 +63,23 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, __wsum sum) { - - __asm__ ("%0 = %0 + %1;\n\t" - "CC = AC0;\n\t" - "if !CC jump 4;\n\t" - "%0 = %0 + %4;\n\t" - "%0 = %0 + %2;\n\t" - "CC = AC0;\n\t" - "if !CC jump 4;\n\t" - "%0 = %0 + %4;\n\t" - "%0 = %0 + %3;\n\t" - "CC = AC0;\n\t" - "if !CC jump 4;\n\t" - "%0 = %0 + %4;\n\t" - "NOP;\n\t" - : "=d" (sum) - : "d" (daddr), "d" (saddr), "d" ((ntohs(len)<<16)+proto*256), "d" (1), "0"(sum) - : "CC"); + unsigned int carry; + + __asm__ ("%0 = %0 + %2;\n\t" + "CC = AC0;\n\t" + "%1 = CC;\n\t" + "%0 = %0 + %1;\n\t" + "%0 = %0 + %3;\n\t" + "CC = AC0;\n\t" + "%1 = CC;\n\t" + "%0 = %0 + %1;\n\t" + "%0 = %0 + %4;\n\t" + "CC = AC0;\n\t" + "%1 = CC;\n\t" + "%0 = %0 + %1;\n\t" + : "=d" (sum), "=&d" (carry) + : "d" (daddr), "d" (saddr), "d" ((len + proto) << 8), "0"(sum) + : "CC"); return (sum); } diff --git a/arch/blackfin/include/asm/delay.h b/arch/blackfin/include/asm/delay.h index 0889c3abb59..c31f91cc1d5 100644 --- a/arch/blackfin/include/asm/delay.h +++ b/arch/blackfin/include/asm/delay.h @@ -13,29 +13,7 @@ static inline void __delay(unsigned long loops) { - if (ANOMALY_05000312) { - /* Interrupted loads to loop registers -> bad */ - unsigned long tmp; - __asm__ __volatile__( - "[--SP] = LC0;" - "[--SP] = LT0;" - "[--SP] = LB0;" - "LSETUP (1f,1f) LC0 = %1;" - "1: NOP;" - /* We take advantage of the fact that LC0 is 0 at - * the end of the loop. Otherwise we'd need some - * NOPs after the CLI here. - */ - "CLI %0;" - "LB0 = [SP++];" - "LT0 = [SP++];" - "LC0 = [SP++];" - "STI %0;" - : "=d" (tmp) - : "a" (loops) - ); - } else - __asm__ __volatile__ ( +__asm__ __volatile__ ( "LSETUP(1f, 1f) LC0 = %0;" "1: NOP;" : @@ -47,16 +25,15 @@ static inline void __delay(unsigned long loops) #include <linux/param.h> /* needed for HZ */ /* - * Use only for very small delays ( < 1 msec). Should probably use a - * lookup table, really, as the multiplications take much too long with - * short delays. This is a "reasonable" implementation, though (and the - * first constant multiplications gets optimized away if the delay is - * a constant) + * close approximation borrowed from m68knommu to avoid 64-bit math */ + +#define HZSCALE (268435456 / (1000000/HZ)) + static inline void udelay(unsigned long usecs) { extern unsigned long loops_per_jiffy; - __delay(usecs * loops_per_jiffy / (1000000 / HZ)); + __delay((((usecs * HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6); } #endif diff --git a/arch/blackfin/include/asm/gpio.h b/arch/blackfin/include/asm/gpio.h index 9477d82fcad..d4a082ef75b 100644 --- a/arch/blackfin/include/asm/gpio.h +++ b/arch/blackfin/include/asm/gpio.h @@ -27,60 +27,6 @@ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -/* -* Number BF537/6/4 BF561 BF533/2/1 -* BF527/5/2 -* -* GPIO_0 PF0 PF0 PF0 -* GPIO_1 PF1 PF1 PF1 -* GPIO_2 PF2 PF2 PF2 -* GPIO_3 PF3 PF3 PF3 -* GPIO_4 PF4 PF4 PF4 -* GPIO_5 PF5 PF5 PF5 -* GPIO_6 PF6 PF6 PF6 -* GPIO_7 PF7 PF7 PF7 -* GPIO_8 PF8 PF8 PF8 -* GPIO_9 PF9 PF9 PF9 -* GPIO_10 PF10 PF10 PF10 -* GPIO_11 PF11 PF11 PF11 -* GPIO_12 PF12 PF12 PF12 -* GPIO_13 PF13 PF13 PF13 -* GPIO_14 PF14 PF14 PF14 -* GPIO_15 PF15 PF15 PF15 -* GPIO_16 PG0 PF16 -* GPIO_17 PG1 PF17 -* GPIO_18 PG2 PF18 -* GPIO_19 PG3 PF19 -* GPIO_20 PG4 PF20 -* GPIO_21 PG5 PF21 -* GPIO_22 PG6 PF22 -* GPIO_23 PG7 PF23 -* GPIO_24 PG8 PF24 -* GPIO_25 PG9 PF25 -* GPIO_26 PG10 PF26 -* GPIO_27 PG11 PF27 -* GPIO_28 PG12 PF28 -* GPIO_29 PG13 PF29 -* GPIO_30 PG14 PF30 -* GPIO_31 PG15 PF31 -* GPIO_32 PH0 PF32 -* GPIO_33 PH1 PF33 -* GPIO_34 PH2 PF34 -* GPIO_35 PH3 PF35 -* GPIO_36 PH4 PF36 -* GPIO_37 PH5 PF37 -* GPIO_38 PH6 PF38 -* GPIO_39 PH7 PF39 -* GPIO_40 PH8 PF40 -* GPIO_41 PH9 PF41 -* GPIO_42 PH10 PF42 -* GPIO_43 PH11 PF43 -* GPIO_44 PH12 PF44 -* GPIO_45 PH13 PF45 -* GPIO_46 PH14 PF46 -* GPIO_47 PH15 PF47 -*/ - #ifndef __ARCH_BLACKFIN_GPIO_H__ #define __ARCH_BLACKFIN_GPIO_H__ @@ -295,10 +241,6 @@ int bfin_gpio_direction_output(unsigned gpio, int value); int bfin_gpio_get_value(unsigned gpio); void bfin_gpio_set_value(unsigned gpio, int value); -#ifndef BF548_FAMILY -#define bfin_gpio_set_value(gpio, value) set_gpio_data(gpio, value) -#endif - #ifdef CONFIG_GPIOLIB #include <asm-generic/gpio.h> /* cansleep wrappers */ diff --git a/arch/blackfin/include/asm/kgdb.h b/arch/blackfin/include/asm/kgdb.h index 26ebac6646d..c8b256d2ea3 100644 --- a/arch/blackfin/include/asm/kgdb.h +++ b/arch/blackfin/include/asm/kgdb.h @@ -1,32 +1,8 @@ -/* - * File: include/asm-blackfin/kgdb.h - * Based on: - * Author: Sonic Zhang - * - * Created: - * Description: - * - * Rev: $Id: kgdb_bfin_linux-2.6.x.patch 4934 2007-02-13 09:32:11Z sonicz $ - * - * Modified: - * Copyright 2005-2006 Analog Devices Inc. - * - * Bugs: Enter bugs at http://blackfin.uclinux.org/ +/* Blackfin KGDB header * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Copyright 2005-2009 Analog Devices Inc. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see the file COPYING, or write - * to the Free Software Foundation, Inc., - * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * Licensed under the GPL-2 or later. */ #ifndef __ASM_BLACKFIN_KGDB_H__ @@ -37,17 +13,18 @@ /* gdb locks */ #define KGDB_MAX_NO_CPUS 8 -/************************************************************************/ -/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ -/* at least NUMREGBYTES*2 are needed for register packets */ -/* Longer buffer is needed to list all threads */ +/* + * BUFMAX defines the maximum number of characters in inbound/outbound buffers. + * At least NUMREGBYTES*2 are needed for register packets. + * Longer buffer is needed to list all threads. + */ #define BUFMAX 2048 /* - * Note that this register image is different from - * the register image that Linux produces at interrupt time. - * - * Linux's register image is defined by struct pt_regs in ptrace.h. + * Note that this register image is different from + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. */ enum regnames { /* Core Registers */ @@ -104,14 +81,14 @@ enum regnames { BFIN_RETX, BFIN_RETN, BFIN_RETE, - + /* Pseudo Registers */ BFIN_PC, BFIN_CC, BFIN_EXTRA1, /* Address of .text section. */ BFIN_EXTRA2, /* Address of .data section. */ BFIN_EXTRA3, /* Address of .bss section. */ - BFIN_FDPIC_EXEC, + BFIN_FDPIC_EXEC, BFIN_FDPIC_INTERP, /* MMRs */ @@ -126,7 +103,7 @@ enum regnames { static inline void arch_kgdb_breakpoint(void) { - asm(" EXCPT 2;"); + asm("EXCPT 2;"); } #define BREAK_INSTR_SIZE 2 #define CACHE_FLUSH_IS_SAFE 1 diff --git a/arch/blackfin/include/asm/mem_init.h b/arch/blackfin/include/asm/mem_init.h index 255a9316ad3..61f7487fbf1 100644 --- a/arch/blackfin/include/asm/mem_init.h +++ b/arch/blackfin/include/asm/mem_init.h @@ -115,7 +115,7 @@ #define mem_SDRRC (((CONFIG_SCLK_HZ / 1000) * SDRAM_Tref) / SDRAM_NRA) - (SDRAM_tRAS_num + SDRAM_tRP_num) /* Enable SCLK Out */ -#define mem_SDGCTL (0x80000000 | SCTLE | SDRAM_CL | SDRAM_tRAS | SDRAM_tRP | SDRAM_tRCD | SDRAM_tWR | PSS) +#define mem_SDGCTL (SCTLE | SDRAM_CL | SDRAM_tRAS | SDRAM_tRP | SDRAM_tRCD | SDRAM_tWR | PSS) #else #define mem_SDRRC CONFIG_MEM_SDRRC #define mem_SDGCTL CONFIG_MEM_SDGCTL diff --git a/arch/blackfin/include/asm/pda.h b/arch/blackfin/include/asm/pda.h index bd8d4a7efeb..a67142740df 100644 --- a/arch/blackfin/include/asm/pda.h +++ b/arch/blackfin/include/asm/pda.h @@ -59,6 +59,7 @@ struct blackfin_pda { /* Per-processor Data Area */ unsigned long icplb_fault_addr; unsigned long retx; unsigned long seqstat; + unsigned int __nmi_count; /* number of times NMI asserted on this CPU */ }; extern struct blackfin_pda cpu_pda[]; diff --git a/arch/blackfin/include/asm/reboot.h b/arch/blackfin/include/asm/reboot.h index 4856d62b746..ae1e36329be 100644 --- a/arch/blackfin/include/asm/reboot.h +++ b/arch/blackfin/include/asm/reboot.h @@ -15,6 +15,6 @@ extern void native_machine_halt(void); extern void native_machine_power_off(void); /* common reboot workarounds */ -extern void bfin_gpio_reset_spi0_ssel1(void); +extern void bfin_reset_boot_spi_cs(unsigned short pin); #endif diff --git a/arch/blackfin/kernel/Makefile b/arch/blackfin/kernel/Makefile index 38a233374f0..4a92a86824b 100644 --- a/arch/blackfin/kernel/Makefile +++ b/arch/blackfin/kernel/Makefile @@ -15,6 +15,8 @@ else obj-y += time.o endif +CFLAGS_kgdb_test.o := -mlong-calls -O0 + obj-$(CONFIG_IPIPE) += ipipe.o obj-$(CONFIG_IPIPE_TRACE_MCOUNT) += mcount.o obj-$(CONFIG_BFIN_GPTIMERS) += gptimers.o diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c index 07e02c0d1c0..8531693fb48 100644 --- a/arch/blackfin/kernel/bfin_dma_5xx.c +++ b/arch/blackfin/kernel/bfin_dma_5xx.c @@ -249,6 +249,13 @@ static void __dma_memcpy(u32 daddr, s16 dmod, u32 saddr, s16 smod, size_t cnt, u spin_lock_irqsave(&mdma_lock, flags); + /* Force a sync in case a previous config reset on this channel + * occurred. This is needed so subsequent writes to DMA registers + * are not spuriously lost/corrupted. Do it under irq lock and + * without the anomaly version (because we are atomic already). + */ + __builtin_bfin_ssync(); + if (bfin_read_MDMA_S0_CONFIG()) while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE)) continue; diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index 4c14331978f..51dac55c524 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -27,59 +27,6 @@ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -/* -* Number BF537/6/4 BF561 BF533/2/1 BF549/8/4/2 -* -* GPIO_0 PF0 PF0 PF0 PA0...PJ13 -* GPIO_1 PF1 PF1 PF1 -* GPIO_2 PF2 PF2 PF2 -* GPIO_3 PF3 PF3 PF3 -* GPIO_4 PF4 PF4 PF4 -* GPIO_5 PF5 PF5 PF5 -* GPIO_6 PF6 PF6 PF6 -* GPIO_7 PF7 PF7 PF7 -* GPIO_8 PF8 PF8 PF8 -* GPIO_9 PF9 PF9 PF9 -* GPIO_10 PF10 PF10 PF10 -* GPIO_11 PF11 PF11 PF11 -* GPIO_12 PF12 PF12 PF12 -* GPIO_13 PF13 PF13 PF13 -* GPIO_14 PF14 PF14 PF14 -* GPIO_15 PF15 PF15 PF15 -* GPIO_16 PG0 PF16 -* GPIO_17 PG1 PF17 -* GPIO_18 PG2 PF18 -* GPIO_19 PG3 PF19 -* GPIO_20 PG4 PF20 -* GPIO_21 PG5 PF21 -* GPIO_22 PG6 PF22 -* GPIO_23 PG7 PF23 -* GPIO_24 PG8 PF24 -* GPIO_25 PG9 PF25 -* GPIO_26 PG10 PF26 -* GPIO_27 PG11 PF27 -* GPIO_28 PG12 PF28 -* GPIO_29 PG13 PF29 -* GPIO_30 PG14 PF30 -* GPIO_31 PG15 PF31 -* GPIO_32 PH0 PF32 -* GPIO_33 PH1 PF33 -* GPIO_34 PH2 PF34 -* GPIO_35 PH3 PF35 -* GPIO_36 PH4 PF36 -* GPIO_37 PH5 PF37 -* GPIO_38 PH6 PF38 -* GPIO_39 PH7 PF39 -* GPIO_40 PH8 PF40 -* GPIO_41 PH9 PF41 -* GPIO_42 PH10 PF42 -* GPIO_43 PH11 PF43 -* GPIO_44 PH12 PF44 -* GPIO_45 PH13 PF45 -* GPIO_46 PH14 PF46 -* GPIO_47 PH15 PF47 -*/ - #include <linux/delay.h> #include <linux/module.h> #include <linux/err.h> @@ -119,62 +66,61 @@ enum { #define AWA_DUMMY_READ(...) do { } while (0) #endif +static struct gpio_port_t * const gpio_array[] = { #if defined(BF533_FAMILY) || defined(BF538_FAMILY) -static struct gpio_port_t *gpio_bankb[] = { (struct gpio_port_t *) FIO_FLAG_D, -}; -#endif - -#if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) -static struct gpio_port_t *gpio_bankb[] = { +#elif defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) (struct gpio_port_t *) PORTFIO, (struct gpio_port_t *) PORTGIO, (struct gpio_port_t *) PORTHIO, +#elif defined(BF561_FAMILY) + (struct gpio_port_t *) FIO0_FLAG_D, + (struct gpio_port_t *) FIO1_FLAG_D, + (struct gpio_port_t *) FIO2_FLAG_D, +#elif defined(BF548_FAMILY) + (struct gpio_port_t *)PORTA_FER, + (struct gpio_port_t *)PORTB_FER, + (struct gpio_port_t *)PORTC_FER, + (struct gpio_port_t *)PORTD_FER, + (struct gpio_port_t *)PORTE_FER, + (struct gpio_port_t *)PORTF_FER, + (struct gpio_port_t *)PORTG_FER, + (struct gpio_port_t *)PORTH_FER, + (struct gpio_port_t *)PORTI_FER, + (struct gpio_port_t *)PORTJ_FER, +#else +# error no gpio arrays defined +#endif }; -static unsigned short *port_fer[] = { +#if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) +static unsigned short * const port_fer[] = { (unsigned short *) PORTF_FER, (unsigned short *) PORTG_FER, (unsigned short *) PORTH_FER, }; -#endif -#if defined(BF527_FAMILY) || defined(BF518_FAMILY) -static unsigned short *port_mux[] = { +# if !defined(BF537_FAMILY) +static unsigned short * const port_mux[] = { (unsigned short *) PORTF_MUX, (unsigned short *) PORTG_MUX, (unsigned short *) PORTH_MUX, }; static const -u8 pmux_offset[][16] = - {{ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 4, 6, 8, 8, 10, 10 }, /* PORTF */ - { 0, 0, 0, 0, 0, 2, 2, 4, 4, 6, 8, 10, 10, 10, 12, 12 }, /* PORTG */ - { 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 4, 4, 4, 4, 4, 4 }, /* PORTH */ - }; -#endif - -#ifdef BF561_FAMILY -static struct gpio_port_t *gpio_bankb[] = { - (struct gpio_port_t *) FIO0_FLAG_D, - (struct gpio_port_t *) FIO1_FLAG_D, - (struct gpio_port_t *) FIO2_FLAG_D, +u8 pmux_offset[][16] = { +# if defined(BF527_FAMILY) + { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 4, 6, 8, 8, 10, 10 }, /* PORTF */ + { 0, 0, 0, 0, 0, 2, 2, 4, 4, 6, 8, 10, 10, 10, 12, 12 }, /* PORTG */ + { 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 4, 4, 4, 4, 4, 4 }, /* PORTH */ +# elif defined(BF518_FAMILY) + { 0, 2, 2, 2, 2, 2, 2, 4, 6, 6, 6, 8, 8, 8, 8, 10 }, /* PORTF */ + { 0, 0, 0, 2, 4, 6, 6, 6, 8, 10, 10, 12, 14, 14, 14, 14 }, /* PORTG */ + { 0, 0, 0, 0, 2, 2, 4, 6, 10, 10, 10, 10, 10, 10, 10, 10 }, /* PORTH */ +# endif }; -#endif +# endif -#ifdef BF548_FAMILY -static struct gpio_port_t *gpio_array[] = { - (struct gpio_port_t *)PORTA_FER, - (struct gpio_port_t *)PORTB_FER, - (struct gpio_port_t *)PORTC_FER, - (struct gpio_port_t *)PORTD_FER, - (struct gpio_port_t *)PORTE_FER, - (struct gpio_port_t *)PORTF_FER, - (struct gpio_port_t *)PORTG_FER, - (struct gpio_port_t *)PORTH_FER, - (struct gpio_port_t *)PORTI_FER, - (struct gpio_port_t *)PORTJ_FER, -}; #endif static unsigned short reserved_gpio_map[GPIO_BANK_NUM]; @@ -188,35 +134,9 @@ static struct str_ident { } str_ident[MAX_RESOURCES]; #if defined(CONFIG_PM) -#if defined(CONFIG_BF54x) -static struct gpio_port_s gpio_bank_saved[GPIO_BANK_NUM]; -#else -static unsigned short wakeup_map[GPIO_BANK_NUM]; -static unsigned char wakeup_flags_map[MAX_BLACKFIN_GPIOS]; static struct gpio_port_s gpio_bank_saved[GPIO_BANK_NUM]; - -#ifdef BF533_FAMILY -static unsigned int sic_iwr_irqs[] = {IRQ_PROG_INTB}; -#endif - -#ifdef BF537_FAMILY -static unsigned int sic_iwr_irqs[] = {IRQ_PROG_INTB, IRQ_PORTG_INTB, IRQ_MAC_TX}; -#endif - -#ifdef BF538_FAMILY -static unsigned int sic_iwr_irqs[] = {IRQ_PORTF_INTB}; #endif -#if defined(BF527_FAMILY) || defined(BF518_FAMILY) -static unsigned int sic_iwr_irqs[] = {IRQ_PORTF_INTB, IRQ_PORTG_INTB, IRQ_PORTH_INTB}; -#endif - -#ifdef BF561_FAMILY -static unsigned int sic_iwr_irqs[] = {IRQ_PROG0_INTB, IRQ_PROG1_INTB, IRQ_PROG2_INTB}; -#endif -#endif -#endif /* CONFIG_PM */ - inline int check_gpio(unsigned gpio) { #if defined(BF548_FAMILY) @@ -330,9 +250,10 @@ static struct { {.res = P_SPI0_SSEL3, .offset = 0}, }; -static void portmux_setup(unsigned short per, unsigned short function) +static void portmux_setup(unsigned short per) { u16 y, offset, muxreg; + u16 function = P_FUNCT2MUX(per); for (y = 0; y < ARRAY_SIZE(port_mux_lut); y++) { if (port_mux_lut[y].res == per) { @@ -353,30 +274,33 @@ static void portmux_setup(unsigned short per, unsigned short function) } } #elif defined(BF548_FAMILY) -inline void portmux_setup(unsigned short portno, unsigned short function) +inline void portmux_setup(unsigned short per) { u32 pmux; + u16 ident = P_IDENT(per); + u16 function = P_FUNCT2MUX(per); - pmux = gpio_array[gpio_bank(portno)]->port_mux; + pmux = gpio_array[gpio_bank(ident)]->port_mux; - pmux &= ~(0x3 << (2 * gpio_sub_n(portno))); - pmux |= (function & 0x3) << (2 * gpio_sub_n(portno)); + pmux &= ~(0x3 << (2 * gpio_sub_n(ident))); + pmux |= (function & 0x3) << (2 * gpio_sub_n(ident)); - gpio_array[gpio_bank(portno)]->port_mux = pmux; + gpio_array[gpio_bank(ident)]->port_mux = pmux; } -inline u16 get_portmux(unsigned short portno) +inline u16 get_portmux(unsigned short per) { u32 pmux; + u16 ident = P_IDENT(per); - pmux = gpio_array[gpio_bank(portno)]->port_mux; + pmux = gpio_array[gpio_bank(ident)]->port_mux; - return (pmux >> (2 * gpio_sub_n(portno)) & 0x3); + return (pmux >> (2 * gpio_sub_n(ident)) & 0x3); } #elif defined(BF527_FAMILY) || defined(BF518_FAMILY) -inline void portmux_setup(unsigned short portno, unsigned short function) +inline void portmux_setup(unsigned short per) { - u16 pmux, ident = P_IDENT(portno); + u16 pmux, ident = P_IDENT(per), function = P_FUNCT2MUX(per); u8 offset = pmux_offset[gpio_bank(ident)][gpio_sub_n(ident)]; pmux = *port_mux[gpio_bank(ident)]; @@ -424,90 +348,71 @@ void set_gpio_ ## name(unsigned gpio, unsigned short arg) \ unsigned long flags; \ local_irq_save_hw(flags); \ if (arg) \ - gpio_bankb[gpio_bank(gpio)]->name |= gpio_bit(gpio); \ + gpio_array[gpio_bank(gpio)]->name |= gpio_bit(gpio); \ else \ - gpio_bankb[gpio_bank(gpio)]->name &= ~gpio_bit(gpio); \ + gpio_array[gpio_bank(gpio)]->name &= ~gpio_bit(gpio); \ AWA_DUMMY_READ(name); \ local_irq_restore_hw(flags); \ } \ EXPORT_SYMBOL(set_gpio_ ## name); -SET_GPIO(dir) -SET_GPIO(inen) -SET_GPIO(polar) -SET_GPIO(edge) -SET_GPIO(both) +SET_GPIO(dir) /* set_gpio_dir() */ +SET_GPIO(inen) /* set_gpio_inen() */ +SET_GPIO(polar) /* set_gpio_polar() */ +SET_GPIO(edge) /* set_gpio_edge() */ +SET_GPIO(both) /* set_gpio_both() */ -#if ANOMALY_05000311 || ANOMALY_05000323 #define SET_GPIO_SC(name) \ void set_gpio_ ## name(unsigned gpio, unsigned short arg) \ { \ unsigned long flags; \ - local_irq_save_hw(flags); \ - if (arg) \ - gpio_bankb[gpio_bank(gpio)]->name ## _set = gpio_bit(gpio); \ - else \ - gpio_bankb[gpio_bank(gpio)]->name ## _clear = gpio_bit(gpio); \ - AWA_DUMMY_READ(name); \ - local_irq_restore_hw(flags); \ -} \ -EXPORT_SYMBOL(set_gpio_ ## name); -#else -#define SET_GPIO_SC(name) \ -void set_gpio_ ## name(unsigned gpio, unsigned short arg) \ -{ \ + if (ANOMALY_05000311 || ANOMALY_05000323) \ + local_irq_save_hw(flags); \ if (arg) \ - gpio_bankb[gpio_bank(gpio)]->name ## _set = gpio_bit(gpio); \ + gpio_array[gpio_bank(gpio)]->name ## _set = gpio_bit(gpio); \ else \ - gpio_bankb[gpio_bank(gpio)]->name ## _clear = gpio_bit(gpio); \ + gpio_array[gpio_bank(gpio)]->name ## _clear = gpio_bit(gpio); \ + if (ANOMALY_05000311 || ANOMALY_05000323) { \ + AWA_DUMMY_READ(name); \ + local_irq_restore_hw(flags); \ + } \ } \ EXPORT_SYMBOL(set_gpio_ ## name); -#endif SET_GPIO_SC(maska) SET_GPIO_SC(maskb) SET_GPIO_SC(data) -#if ANOMALY_05000311 || ANOMALY_05000323 void set_gpio_toggle(unsigned gpio) { unsigned long flags; - local_irq_save_hw(flags); - gpio_bankb[gpio_bank(gpio)]->toggle = gpio_bit(gpio); - AWA_DUMMY_READ(toggle); - local_irq_restore_hw(flags); -} -#else -void set_gpio_toggle(unsigned gpio) -{ - gpio_bankb[gpio_bank(gpio)]->toggle = gpio_bit(gpio); + if (ANOMALY_05000311 || ANOMALY_05000323) + local_irq_save_hw(flags); + gpio_array[gpio_bank(gpio)]->toggle = gpio_bit(gpio); + if (ANOMALY_05000311 || ANOMALY_05000323) { + AWA_DUMMY_READ(toggle); + local_irq_restore_hw(flags); + } } -#endif EXPORT_SYMBOL(set_gpio_toggle); /*Set current PORT date (16-bit word)*/ -#if ANOMALY_05000311 || ANOMALY_05000323 #define SET_GPIO_P(name) \ void set_gpiop_ ## name(unsigned gpio, unsigned short arg) \ { \ unsigned long flags; \ - local_irq_save_hw(flags); \ - gpio_bankb[gpio_bank(gpio)]->name = arg; \ - AWA_DUMMY_READ(name); \ - local_irq_restore_hw(flags); \ + if (ANOMALY_05000311 || ANOMALY_05000323) \ + local_irq_save_hw(flags); \ + gpio_array[gpio_bank(gpio)]->name = arg; \ + if (ANOMALY_05000311 || ANOMALY_05000323) { \ + AWA_DUMMY_READ(name); \ + local_irq_restore_hw(flags); \ + } \ } \ EXPORT_SYMBOL(set_gpiop_ ## name); -#else -#define SET_GPIO_P(name) \ -void set_gpiop_ ## name(unsigned gpio, unsigned short arg) \ -{ \ - gpio_bankb[gpio_bank(gpio)]->name = arg; \ -} \ -EXPORT_SYMBOL(set_gpiop_ ## name); -#endif SET_GPIO_P(data) SET_GPIO_P(dir) @@ -519,27 +424,21 @@ SET_GPIO_P(maska) SET_GPIO_P(maskb) /* Get a specific bit */ -#if ANOMALY_05000311 || ANOMALY_05000323 #define GET_GPIO(name) \ unsigned short get_gpio_ ## name(unsigned gpio) \ { \ unsigned long flags; \ unsigned short ret; \ - local_irq_save_hw(flags); \ - ret = 0x01 & (gpio_bankb[gpio_bank(gpio)]->name >> gpio_sub_n(gpio)); \ - AWA_DUMMY_READ(name); \ - local_irq_restore_hw(flags); \ + if (ANOMALY_05000311 || ANOMALY_05000323) \ + local_irq_save_hw(flags); \ + ret = 0x01 & (gpio_array[gpio_bank(gpio)]->name >> gpio_sub_n(gpio)); \ + if (ANOMALY_05000311 || ANOMALY_05000323) { \ + AWA_DUMMY_READ(name); \ + local_irq_restore_hw(flags); \ + } \ return ret; \ } \ EXPORT_SYMBOL(get_gpio_ ## name); -#else -#define GET_GPIO(name) \ -unsigned short get_gpio_ ## name(unsigned gpio) \ -{ \ - return (0x01 & (gpio_bankb[gpio_bank(gpio)]->name >> gpio_sub_n(gpio))); \ -} \ -EXPORT_SYMBOL(get_gpio_ ## name); -#endif GET_GPIO(data) GET_GPIO(dir) @@ -552,27 +451,21 @@ GET_GPIO(maskb) /*Get current PORT date (16-bit word)*/ -#if ANOMALY_05000311 || ANOMALY_05000323 #define GET_GPIO_P(name) \ unsigned short get_gpiop_ ## name(unsigned gpio) \ { \ unsigned long flags; \ unsigned short ret; \ - local_irq_save_hw(flags); \ - ret = (gpio_bankb[gpio_bank(gpio)]->name); \ - AWA_DUMMY_READ(name); \ - local_irq_restore_hw(flags); \ + if (ANOMALY_05000311 || ANOMALY_05000323) \ + local_irq_save_hw(flags); \ + ret = (gpio_array[gpio_bank(gpio)]->name); \ + if (ANOMALY_05000311 || ANOMALY_05000323) { \ + AWA_DUMMY_READ(name); \ + local_irq_restore_hw(flags); \ + } \ return ret; \ } \ EXPORT_SYMBOL(get_gpiop_ ## name); -#else -#define GET_GPIO_P(name) \ -unsigned short get_gpiop_ ## name(unsigned gpio) \ -{ \ - return (gpio_bankb[gpio_bank(gpio)]->name);\ -} \ -EXPORT_SYMBOL(get_gpiop_ ## name); -#endif GET_GPIO_P(data) GET_GPIO_P(dir) @@ -585,6 +478,26 @@ GET_GPIO_P(maskb) #ifdef CONFIG_PM + +static unsigned short wakeup_map[GPIO_BANK_NUM]; +static unsigned char wakeup_flags_map[MAX_BLACKFIN_GPIOS]; + +static const unsigned int sic_iwr_irqs[] = { +#if defined(BF533_FAMILY) + IRQ_PROG_INTB +#elif defined(BF537_FAMILY) + IRQ_PROG_INTB, IRQ_PORTG_INTB, IRQ_MAC_TX +#elif defined(BF538_FAMILY) + IRQ_PORTF_INTB +#elif defined(BF527_FAMILY) || defined(BF518_FAMILY) + IRQ_PORTF_INTB, IRQ_PORTG_INTB, IRQ_PORTH_INTB +#elif defined(BF561_FAMILY) + IRQ_PROG0_INTB, IRQ_PROG1_INTB, IRQ_PROG2_INTB +#else +# error no SIC_IWR defined +#endif +}; + /*********************************************************** * * FUNCTIONS: Blackfin PM Setup API @@ -669,18 +582,18 @@ u32 bfin_pm_standby_setup(void) mask = wakeup_map[gpio_bank(i)]; bank = gpio_bank(i); - gpio_bank_saved[bank].maskb = gpio_bankb[bank]->maskb; - gpio_bankb[bank]->maskb = 0; + gpio_bank_saved[bank].maskb = gpio_array[bank]->maskb; + gpio_array[bank]->maskb = 0; if (mask) { #if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) gpio_bank_saved[bank].fer = *port_fer[bank]; #endif - gpio_bank_saved[bank].inen = gpio_bankb[bank]->inen; - gpio_bank_saved[bank].polar = gpio_bankb[bank]->polar; - gpio_bank_saved[bank].dir = gpio_bankb[bank]->dir; - gpio_bank_saved[bank].edge = gpio_bankb[bank]->edge; - gpio_bank_saved[bank].both = gpio_bankb[bank]->both; + gpio_bank_saved[bank].inen = gpio_array[bank]->inen; + gpio_bank_saved[bank].polar = gpio_array[bank]->polar; + gpio_bank_saved[bank].dir = gpio_array[bank]->dir; + gpio_bank_saved[bank].edge = gpio_array[bank]->edge; + gpio_bank_saved[bank].both = gpio_array[bank]->both; gpio_bank_saved[bank].reserved = reserved_gpio_map[bank]; @@ -700,7 +613,7 @@ u32 bfin_pm_standby_setup(void) } bfin_internal_set_wake(sic_iwr_irqs[bank], 1); - gpio_bankb[bank]->maskb_set = wakeup_map[gpio_bank(i)]; + gpio_array[bank]->maskb_set = wakeup_map[gpio_bank(i)]; } } @@ -721,18 +634,18 @@ void bfin_pm_standby_restore(void) #if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) *port_fer[bank] = gpio_bank_saved[bank].fer; #endif - gpio_bankb[bank]->inen = gpio_bank_saved[bank].inen; - gpio_bankb[bank]->dir = gpio_bank_saved[bank].dir; - gpio_bankb[bank]->polar = gpio_bank_saved[bank].polar; - gpio_bankb[bank]->edge = gpio_bank_saved[bank].edge; - gpio_bankb[bank]->both = gpio_bank_saved[bank].both; + gpio_array[bank]->inen = gpio_bank_saved[bank].inen; + gpio_array[bank]->dir = gpio_bank_saved[bank].dir; + gpio_array[bank]->polar = gpio_bank_saved[bank].polar; + gpio_array[bank]->edge = gpio_bank_saved[bank].edge; + gpio_array[bank]->both = gpio_bank_saved[bank].both; reserved_gpio_map[bank] = gpio_bank_saved[bank].reserved; bfin_internal_set_wake(sic_iwr_irqs[bank], 0); } - gpio_bankb[bank]->maskb = gpio_bank_saved[bank].maskb; + gpio_array[bank]->maskb = gpio_bank_saved[bank].maskb; } AWA_DUMMY_READ(maskb); } @@ -745,21 +658,21 @@ void bfin_gpio_pm_hibernate_suspend(void) bank = gpio_bank(i); #if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) - gpio_bank_saved[bank].fer = *port_fer[bank]; + gpio_bank_saved[bank].fer = *port_fer[bank]; #if defined(BF527_FAMILY) || defined(BF518_FAMILY) - gpio_bank_saved[bank].mux = *port_mux[bank]; + gpio_bank_saved[bank].mux = *port_mux[bank]; #else - if (bank == 0) - gpio_bank_saved[bank].mux = bfin_read_PORT_MUX(); + if (bank == 0) + gpio_bank_saved[bank].mux = bfin_read_PORT_MUX(); #endif #endif - gpio_bank_saved[bank].data = gpio_bankb[bank]->data; - gpio_bank_saved[bank].inen = gpio_bankb[bank]->inen; - gpio_bank_saved[bank].polar = gpio_bankb[bank]->polar; - gpio_bank_saved[bank].dir = gpio_bankb[bank]->dir; - gpio_bank_saved[bank].edge = gpio_bankb[bank]->edge; - gpio_bank_saved[bank].both = gpio_bankb[bank]->both; - gpio_bank_saved[bank].maska = gpio_bankb[bank]->maska; + gpio_bank_saved[bank].data = gpio_array[bank]->data; + gpio_bank_saved[bank].inen = gpio_array[bank]->inen; + gpio_bank_saved[bank].polar = gpio_array[bank]->polar; + gpio_bank_saved[bank].dir = gpio_array[bank]->dir; + gpio_bank_saved[bank].edge = gpio_array[bank]->edge; + gpio_bank_saved[bank].both = gpio_array[bank]->both; + gpio_bank_saved[bank].maska = gpio_array[bank]->maska; } AWA_DUMMY_READ(maska); @@ -770,27 +683,27 @@ void bfin_gpio_pm_hibernate_restore(void) int i, bank; for (i = 0; i < MAX_BLACKFIN_GPIOS; i += GPIO_BANKSIZE) { - bank = gpio_bank(i); + bank = gpio_bank(i); #if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) #if defined(BF527_FAMILY) || defined(BF518_FAMILY) - *port_mux[bank] = gpio_bank_saved[bank].mux; + *port_mux[bank] = gpio_bank_saved[bank].mux; #else - if (bank == 0) - bfin_write_PORT_MUX(gpio_bank_saved[bank].mux); + if (bank == 0) + bfin_write_PORT_MUX(gpio_bank_saved[bank].mux); #endif - *port_fer[bank] = gpio_bank_saved[bank].fer; + *port_fer[bank] = gpio_bank_saved[bank].fer; #endif - gpio_bankb[bank]->inen = gpio_bank_saved[bank].inen; - gpio_bankb[bank]->dir = gpio_bank_saved[bank].dir; - gpio_bankb[bank]->polar = gpio_bank_saved[bank].polar; - gpio_bankb[bank]->edge = gpio_bank_saved[bank].edge; - gpio_bankb[bank]->both = gpio_bank_saved[bank].both; + gpio_array[bank]->inen = gpio_bank_saved[bank].inen; + gpio_array[bank]->dir = gpio_bank_saved[bank].dir; + gpio_array[bank]->polar = gpio_bank_saved[bank].polar; + gpio_array[bank]->edge = gpio_bank_saved[bank].edge; + gpio_array[bank]->both = gpio_bank_saved[bank].both; - gpio_bankb[bank]->data_set = gpio_bank_saved[bank].data - | gpio_bank_saved[bank].dir; + gpio_array[bank]->data_set = gpio_bank_saved[bank].data + | gpio_bank_saved[bank].dir; - gpio_bankb[bank]->maska = gpio_bank_saved[bank].maska; + gpio_array[bank]->maska = gpio_bank_saved[bank].maska; } AWA_DUMMY_READ(maska); } @@ -817,12 +730,12 @@ void bfin_gpio_pm_hibernate_suspend(void) for (i = 0; i < MAX_BLACKFIN_GPIOS; i += GPIO_BANKSIZE) { bank = gpio_bank(i); - gpio_bank_saved[bank].fer = gpio_array[bank]->port_fer; - gpio_bank_saved[bank].mux = gpio_array[bank]->port_mux; - gpio_bank_saved[bank].data = gpio_array[bank]->port_data; - gpio_bank_saved[bank].data = gpio_array[bank]->port_data; - gpio_bank_saved[bank].inen = gpio_array[bank]->port_inen; - gpio_bank_saved[bank].dir = gpio_array[bank]->port_dir_set; + gpio_bank_saved[bank].fer = gpio_array[bank]->port_fer; + gpio_bank_saved[bank].mux = gpio_array[bank]->port_mux; + gpio_bank_saved[bank].data = gpio_array[bank]->data; + gpio_bank_saved[bank].data = gpio_array[bank]->data; + gpio_bank_saved[bank].inen = gpio_array[bank]->inen; + gpio_bank_saved[bank].dir = gpio_array[bank]->dir_set; } } @@ -831,21 +744,21 @@ void bfin_gpio_pm_hibernate_restore(void) int i, bank; for (i = 0; i < MAX_BLACKFIN_GPIOS; i += GPIO_BANKSIZE) { - bank = gpio_bank(i); - - gpio_array[bank]->port_mux = gpio_bank_saved[bank].mux; - gpio_array[bank]->port_fer = gpio_bank_saved[bank].fer; - gpio_array[bank]->port_inen = gpio_bank_saved[bank].inen; - gpio_array[bank]->port_dir_set = gpio_bank_saved[bank].dir; - gpio_array[bank]->port_set = gpio_bank_saved[bank].data - | gpio_bank_saved[bank].dir; + bank = gpio_bank(i); + + gpio_array[bank]->port_mux = gpio_bank_saved[bank].mux; + gpio_array[bank]->port_fer = gpio_bank_saved[bank].fer; + gpio_array[bank]->inen = gpio_bank_saved[bank].inen; + gpio_array[bank]->dir_set = gpio_bank_saved[bank].dir; + gpio_array[bank]->data_set = gpio_bank_saved[bank].data + | gpio_bank_saved[bank].dir; } } #endif unsigned short get_gpio_dir(unsigned gpio) { - return (0x01 & (gpio_array[gpio_bank(gpio)]->port_dir_clear >> gpio_sub_n(gpio))); + return (0x01 & (gpio_array[gpio_bank(gpio)]->dir_clear >> gpio_sub_n(gpio))); } EXPORT_SYMBOL(get_gpio_dir); @@ -905,9 +818,7 @@ int peripheral_request(unsigned short per, const char *label) */ #ifdef BF548_FAMILY - u16 funct = get_portmux(ident); - - if (!((per & P_MAYSHARE) && (funct == P_FUNCT2MUX(per)))) { + if (!((per & P_MAYSHARE) && get_portmux(per) == P_FUNCT2MUX(per))) { #else if (!(per & P_MAYSHARE)) { #endif @@ -931,11 +842,7 @@ int peripheral_request(unsigned short per, const char *label) anyway: reserved_peri_map[gpio_bank(ident)] |= gpio_bit(ident); -#ifdef BF548_FAMILY - portmux_setup(ident, P_FUNCT2MUX(per)); -#else - portmux_setup(per, P_FUNCT2MUX(per)); -#endif + portmux_setup(per); port_setup(ident, PERIPHERAL_USAGE); local_irq_restore_hw(flags); @@ -977,9 +884,6 @@ void peripheral_free(unsigned short per) if (!(per & P_DEFINED)) return; - if (check_gpio(ident) < 0) - return; - local_irq_save_hw(flags); if (unlikely(!(reserved_peri_map[gpio_bank(ident)] & gpio_bit(ident)))) { @@ -1056,9 +960,15 @@ int bfin_gpio_request(unsigned gpio, const char *label) local_irq_restore_hw(flags); return -EBUSY; } - if (unlikely(reserved_gpio_irq_map[gpio_bank(gpio)] & gpio_bit(gpio))) + if (unlikely(reserved_gpio_irq_map[gpio_bank(gpio)] & gpio_bit(gpio))) { printk(KERN_NOTICE "bfin-gpio: GPIO %d is already reserved as gpio-irq!" " (Documentation/blackfin/bfin-gpio-notes.txt)\n", gpio); + } +#ifndef BF548_FAMILY + else { /* Reset POLAR setting when acquiring a gpio for the first time */ + set_gpio_polar(gpio, 0); + } +#endif reserved_gpio_map[gpio_bank(gpio)] |= gpio_bit(gpio); set_label(gpio, label); @@ -1078,6 +988,8 @@ void bfin_gpio_free(unsigned gpio) if (check_gpio(gpio) < 0) return; + might_sleep(); + local_irq_save_hw(flags); if (unlikely(!(reserved_gpio_map[gpio_bank(gpio)] & gpio_bit(gpio)))) { @@ -1158,8 +1070,16 @@ void bfin_gpio_irq_free(unsigned gpio) local_irq_restore_hw(flags); } - +static inline void __bfin_gpio_direction_input(unsigned gpio) +{ #ifdef BF548_FAMILY + gpio_array[gpio_bank(gpio)]->dir_clear = gpio_bit(gpio); +#else + gpio_array[gpio_bank(gpio)]->dir &= ~gpio_bit(gpio); +#endif + gpio_array[gpio_bank(gpio)]->inen |= gpio_bit(gpio); +} + int bfin_gpio_direction_input(unsigned gpio) { unsigned long flags; @@ -1170,125 +1090,85 @@ int bfin_gpio_direction_input(unsigned gpio) } local_irq_save_hw(flags); - gpio_array[gpio_bank(gpio)]->port_dir_clear = gpio_bit(gpio); - gpio_array[gpio_bank(gpio)]->port_inen |= gpio_bit(gpio); + __bfin_gpio_direction_input(gpio); + AWA_DUMMY_READ(inen); local_irq_restore_hw(flags); return 0; } EXPORT_SYMBOL(bfin_gpio_direction_input); -int bfin_gpio_direction_output(unsigned gpio, int value) +void bfin_gpio_irq_prepare(unsigned gpio) { +#ifdef BF548_FAMILY unsigned long flags; +#endif - if (!(reserved_gpio_map[gpio_bank(gpio)] & gpio_bit(gpio))) { - gpio_error(gpio); - return -EINVAL; - } + port_setup(gpio, GPIO_USAGE); +#ifdef BF548_FAMILY local_irq_save_hw(flags); - gpio_array[gpio_bank(gpio)]->port_inen &= ~gpio_bit(gpio); - gpio_set_value(gpio, value); - gpio_array[gpio_bank(gpio)]->port_dir_set = gpio_bit(gpio); + __bfin_gpio_direction_input(gpio); local_irq_restore_hw(flags); - - return 0; +#endif } -EXPORT_SYMBOL(bfin_gpio_direction_output); void bfin_gpio_set_value(unsigned gpio, int arg) { if (arg) - gpio_array[gpio_bank(gpio)]->port_set = gpio_bit(gpio); + gpio_array[gpio_bank(gpio)]->data_set = gpio_bit(gpio); else - gpio_array[gpio_bank(gpio)]->port_clear = gpio_bit(gpio); + gpio_array[gpio_bank(gpio)]->data_clear = gpio_bit(gpio); } EXPORT_SYMBOL(bfin_gpio_set_value); -int bfin_gpio_get_value(unsigned gpio) -{ - return (1 & (gpio_array[gpio_bank(gpio)]->port_data >> gpio_sub_n(gpio))); -} -EXPORT_SYMBOL(bfin_gpio_get_value); - -void bfin_gpio_irq_prepare(unsigned gpio) +int bfin_gpio_direction_output(unsigned gpio, int value) { unsigned long flags; - port_setup(gpio, GPIO_USAGE); + if (!(reserved_gpio_map[gpio_bank(gpio)] & gpio_bit(gpio))) { + gpio_error(gpio); + return -EINVAL; + } local_irq_save_hw(flags); - gpio_array[gpio_bank(gpio)]->port_dir_clear = gpio_bit(gpio); - gpio_array[gpio_bank(gpio)]->port_inen |= gpio_bit(gpio); - local_irq_restore_hw(flags); -} + gpio_array[gpio_bank(gpio)]->inen &= ~gpio_bit(gpio); + gpio_set_value(gpio, value); +#ifdef BF548_FAMILY + gpio_array[gpio_bank(gpio)]->dir_set = gpio_bit(gpio); #else + gpio_array[gpio_bank(gpio)]->dir |= gpio_bit(gpio); +#endif + + AWA_DUMMY_READ(dir); + local_irq_restore_hw(flags); + + return 0; +} +EXPORT_SYMBOL(bfin_gpio_direction_output); int bfin_gpio_get_value(unsigned gpio) { +#ifdef BF548_FAMILY + return (1 & (gpio_array[gpio_bank(gpio)]->data >> gpio_sub_n(gpio))); +#else unsigned long flags; - int ret; if (unlikely(get_gpio_edge(gpio))) { + int ret; local_irq_save_hw(flags); set_gpio_edge(gpio, 0); ret = get_gpio_data(gpio); set_gpio_edge(gpio, 1); local_irq_restore_hw(flags); - return ret; } else return get_gpio_data(gpio); +#endif } EXPORT_SYMBOL(bfin_gpio_get_value); - -int bfin_gpio_direction_input(unsigned gpio) -{ - unsigned long flags; - - if (!(reserved_gpio_map[gpio_bank(gpio)] & gpio_bit(gpio))) { - gpio_error(gpio); - return -EINVAL; - } - - local_irq_save_hw(flags); - gpio_bankb[gpio_bank(gpio)]->dir &= ~gpio_bit(gpio); - gpio_bankb[gpio_bank(gpio)]->inen |= gpio_bit(gpio); - AWA_DUMMY_READ(inen); - local_irq_restore_hw(flags); - - return 0; -} -EXPORT_SYMBOL(bfin_gpio_direction_input); - -int bfin_gpio_direction_output(unsigned gpio, int value) -{ - unsigned long flags; - - if (!(reserved_gpio_map[gpio_bank(gpio)] & gpio_bit(gpio))) { - gpio_error(gpio); - return -EINVAL; - } - - local_irq_save_hw(flags); - gpio_bankb[gpio_bank(gpio)]->inen &= ~gpio_bit(gpio); - - if (value) - gpio_bankb[gpio_bank(gpio)]->data_set = gpio_bit(gpio); - else - gpio_bankb[gpio_bank(gpio)]->data_clear = gpio_bit(gpio); - - gpio_bankb[gpio_bank(gpio)]->dir |= gpio_bit(gpio); - AWA_DUMMY_READ(dir); - local_irq_restore_hw(flags); - - return 0; -} -EXPORT_SYMBOL(bfin_gpio_direction_output); - /* If we are booting from SPI and our board lacks a strong enough pull up, * the core can reset and execute the bootrom faster than the resistor can * pull the signal logically high. To work around this (common) error in @@ -1299,23 +1179,15 @@ EXPORT_SYMBOL(bfin_gpio_direction_output); * lives here as we need to force all the GPIO states w/out going through * BUG() checks and such. */ -void bfin_gpio_reset_spi0_ssel1(void) +void bfin_reset_boot_spi_cs(unsigned short pin) { - u16 gpio = P_IDENT(P_SPI0_SSEL1); - + unsigned short gpio = P_IDENT(pin); port_setup(gpio, GPIO_USAGE); - gpio_bankb[gpio_bank(gpio)]->data_set = gpio_bit(gpio); + gpio_array[gpio_bank(gpio)]->data_set = gpio_bit(gpio); AWA_DUMMY_READ(data_set); udelay(1); } -void bfin_gpio_irq_prepare(unsigned gpio) -{ - port_setup(gpio, GPIO_USAGE); -} - -#endif /*BF548_FAMILY */ - #if defined(CONFIG_PROC_FS) static int gpio_proc_read(char *buf, char **start, off_t offset, int len, int *unused_i, void *unused_v) @@ -1369,11 +1241,7 @@ int bfin_gpiolib_get_value(struct gpio_chip *chip, unsigned gpio) void bfin_gpiolib_set_value(struct gpio_chip *chip, unsigned gpio, int value) { -#ifdef BF548_FAMILY return bfin_gpio_set_value(gpio, value); -#else - return set_gpio_data(gpio, value); -#endif } int bfin_gpiolib_gpio_request(struct gpio_chip *chip, unsigned gpio) diff --git a/arch/blackfin/kernel/cplb-mpu/cplbinit.c b/arch/blackfin/kernel/cplb-mpu/cplbinit.c index bdb958486e7..3e329a6ce04 100644 --- a/arch/blackfin/kernel/cplb-mpu/cplbinit.c +++ b/arch/blackfin/kernel/cplb-mpu/cplbinit.c @@ -63,10 +63,8 @@ void __init generate_cplb_tables_cpu(unsigned int cpu) dcplb_tbl[cpu][i_d].addr = 0; dcplb_tbl[cpu][i_d++].data = SDRAM_OOPS | PAGE_SIZE_1KB; -#if 0 icplb_tbl[cpu][i_i].addr = 0; - icplb_tbl[cpu][i_i++].data = i_cache | CPLB_USER_RD | PAGE_SIZE_4KB; -#endif + icplb_tbl[cpu][i_i++].data = i_cache | CPLB_USER_RD | PAGE_SIZE_1KB; /* Cover kernel memory with 4M pages. */ addr = 0; diff --git a/arch/blackfin/kernel/cplb-nompu/cplbmgr.c b/arch/blackfin/kernel/cplb-nompu/cplbmgr.c index 376249ab269..8cbb47c7b66 100644 --- a/arch/blackfin/kernel/cplb-nompu/cplbmgr.c +++ b/arch/blackfin/kernel/cplb-nompu/cplbmgr.c @@ -163,12 +163,14 @@ MGR_ATTR static int icplb_miss(int cpu) nr_icplb_supv_miss[cpu]++; base = 0; - for (idx = 0; idx < icplb_nr_bounds; idx++) { + idx = 0; + do { eaddr = icplb_bounds[idx].eaddr; if (addr < eaddr) break; base = eaddr; - } + } while (++idx < icplb_nr_bounds); + if (unlikely(idx == icplb_nr_bounds)) return CPLB_NO_ADDR_MATCH; @@ -208,12 +210,14 @@ MGR_ATTR static int dcplb_miss(int cpu) nr_dcplb_supv_miss[cpu]++; base = 0; - for (idx = 0; idx < dcplb_nr_bounds; idx++) { + idx = 0; + do { eaddr = dcplb_bounds[idx].eaddr; if (addr < eaddr) break; base = eaddr; - } + } while (++idx < dcplb_nr_bounds); + if (unlikely(idx == dcplb_nr_bounds)) return CPLB_NO_ADDR_MATCH; diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index ab8209cbbad..23e9aa08071 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -35,6 +35,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <asm/trace.h> +#include <asm/pda.h> static atomic_t irq_err_count; static spinlock_t irq_controller_lock; @@ -69,6 +70,11 @@ static struct irq_desc bad_irq_desc = { #endif }; +#ifdef CONFIG_CPUMASK_OFFSTACK +/* We are not allocating a variable-sized bad_irq_desc.affinity */ +#error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK." +#endif + int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, j; @@ -91,8 +97,13 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) + } else if (i == NR_IRQS) { + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda[j].__nmi_count); + seq_printf(p, " CORE Non Maskable Interrupt\n"); seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count)); + } return 0; } diff --git a/arch/blackfin/kernel/reboot.c b/arch/blackfin/kernel/reboot.c index eeee8cb4336..53d08dee853 100644 --- a/arch/blackfin/kernel/reboot.c +++ b/arch/blackfin/kernel/reboot.c @@ -20,8 +20,8 @@ * reset while the Core B bit (on dual core parts) is cleared by * the core reset. */ -__attribute__((l1_text)) -static void _bfin_reset(void) +__attribute__ ((__l1_text__, __noreturn__)) +static void bfin_reset(void) { /* Wait for completion of "system" events such as cache line * line fills so that we avoid infinite stalls later on as @@ -30,7 +30,11 @@ static void _bfin_reset(void) */ __builtin_bfin_ssync(); - while (1) { + /* The bootrom checks to see how it was reset and will + * automatically perform a software reset for us when + * it starts executing after the core reset. + */ + if (ANOMALY_05000353 || ANOMALY_05000386) { /* Initiate System software reset. */ bfin_write_SWRST(0x7); @@ -50,6 +54,11 @@ static void _bfin_reset(void) /* Clear System software reset */ bfin_write_SWRST(0); + /* The BF526 ROM will crash during reset */ +#if defined(__ADSPBF522__) || defined(__ADSPBF524__) || defined(__ADSPBF526__) + bfin_read_SWRST(); +#endif + /* Wait for the SWRST write to complete. Cannot rely on SSYNC * though as the System state is all reset now. */ @@ -60,22 +69,11 @@ static void _bfin_reset(void) : "a" (15 * 1) : "LC1", "LB1", "LT1" ); + } + while (1) /* Issue core reset */ asm("raise 1"); - } -} - -static void bfin_reset(void) -{ - if (ANOMALY_05000353 || ANOMALY_05000386) - _bfin_reset(); - else - /* the bootrom checks to see how it was reset and will - * automatically perform a software reset for us when - * it starts executing boot - */ - asm("raise 1;"); } __attribute__((weak)) diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c index b2a811347b6..e5c11623080 100644 --- a/arch/blackfin/kernel/setup.c +++ b/arch/blackfin/kernel/setup.c @@ -60,7 +60,7 @@ void __initdata *init_retx, *init_saved_retx, *init_saved_seqstat, #define BFIN_MEMMAP_MAX 128 /* number of entries in bfin_memmap */ #define BFIN_MEMMAP_RAM 1 #define BFIN_MEMMAP_RESERVED 2 -struct bfin_memmap { +static struct bfin_memmap { int nr_map; struct bfin_memmap_entry { unsigned long long addr; /* start of memory segment */ @@ -824,7 +824,15 @@ void __init setup_arch(char **cmdline_p) flash_probe(); #endif + printk(KERN_INFO "Boot Mode: %i\n", bfin_read_SYSCR() & 0xF); + + /* Newer parts mirror SWRST bits in SYSCR */ +#if defined(CONFIG_BF53x) || defined(CONFIG_BF561) || \ + defined(CONFIG_BF538) || defined(CONFIG_BF539) _bfin_swrst = bfin_read_SWRST(); +#else + _bfin_swrst = bfin_read_SYSCR(); +#endif #ifdef CONFIG_DEBUG_DOUBLEFAULT_PRINT bfin_write_SWRST(_bfin_swrst & ~DOUBLE_FAULT); @@ -853,7 +861,7 @@ void __init setup_arch(char **cmdline_p) else if (_bfin_swrst & RESET_SOFTWARE) printk(KERN_NOTICE "Reset caused by Software reset\n"); - printk(KERN_INFO "Blackfin support (C) 2004-2008 Analog Devices, Inc.\n"); + printk(KERN_INFO "Blackfin support (C) 2004-2009 Analog Devices, Inc.\n"); if (bfin_compiled_revid() == 0xffff) printk(KERN_INFO "Compiled for ADSP-%s Rev any\n", CPU); else if (bfin_compiled_revid() == -1) diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 5b0667da8d0..ffe7fb53ecc 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -673,6 +673,14 @@ static void decode_instruction(unsigned short *address) verbose_printk("RTI"); else if (opcode == 0x0012) verbose_printk("RTX"); + else if (opcode == 0x0013) + verbose_printk("RTN"); + else if (opcode == 0x0014) + verbose_printk("RTE"); + else if (opcode == 0x0025) + verbose_printk("EMUEXCPT"); + else if (opcode == 0x0040 && opcode <= 0x0047) + verbose_printk("STI R%i", opcode & 7); else if (opcode >= 0x0050 && opcode <= 0x0057) verbose_printk("JUMP (P%i)", opcode & 7); else if (opcode >= 0x0060 && opcode <= 0x0067) @@ -681,6 +689,10 @@ static void decode_instruction(unsigned short *address) verbose_printk("CALL (PC+P%i)", opcode & 7); else if (opcode >= 0x0080 && opcode <= 0x0087) verbose_printk("JUMP (PC+P%i)", opcode & 7); + else if (opcode >= 0x0090 && opcode <= 0x009F) + verbose_printk("RAISE 0x%x", opcode & 0xF); + else if (opcode >= 0x00A0 && opcode <= 0x00AF) + verbose_printk("EXCPT 0x%x", opcode & 0xF); else if ((opcode >= 0x1000 && opcode <= 0x13FF) || (opcode >= 0x1800 && opcode <= 0x1BFF)) verbose_printk("IF !CC JUMP"); else if ((opcode >= 0x1400 && opcode <= 0x17ff) || (opcode >= 0x1c00 && opcode <= 0x1fff)) @@ -820,11 +832,8 @@ void show_stack(struct task_struct *task, unsigned long *stack) decode_address(buf, (unsigned int)stack); printk(KERN_NOTICE " SP: [0x%p] %s\n", stack, buf); - addr = (unsigned int *)((unsigned int)stack & ~0x3F); - /* First thing is to look for a frame pointer */ - for (addr = (unsigned int *)((unsigned int)stack & ~0xF), i = 0; - addr < endstack; addr++, i++) { + for (addr = (unsigned int *)((unsigned int)stack & ~0xF); addr < endstack; addr++) { if (*addr & 0x1) continue; ins_addr = (unsigned short *)*addr; @@ -834,7 +843,8 @@ void show_stack(struct task_struct *task, unsigned long *stack) if (fp) { /* Let's check to see if it is a frame pointer */ - while (fp >= (addr - 1) && fp < endstack && fp) + while (fp >= (addr - 1) && fp < endstack + && fp && ((unsigned int) fp & 0x3) == 0) fp = (unsigned int *)*fp; if (fp == 0 || fp == endstack) { fp = addr - 1; @@ -1052,8 +1062,9 @@ void show_regs(struct pt_regs *fp) char buf [150]; struct irqaction *action; unsigned int i; - unsigned long flags; + unsigned long flags = 0; unsigned int cpu = smp_processor_id(); + unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); verbose_printk(KERN_NOTICE "\n" KERN_NOTICE "SEQUENCER STATUS:\t\t%s\n", print_tainted()); verbose_printk(KERN_NOTICE " SEQSTAT: %08lx IPEND: %04lx SYSCFG: %04lx\n", @@ -1073,17 +1084,22 @@ void show_regs(struct pt_regs *fp) } verbose_printk(KERN_NOTICE " EXCAUSE : 0x%lx\n", fp->seqstat & SEQSTAT_EXCAUSE); - for (i = 6; i <= 15 ; i++) { + for (i = 2; i <= 15 ; i++) { if (fp->ipend & (1 << i)) { - decode_address(buf, bfin_read32(EVT0 + 4*i)); - verbose_printk(KERN_NOTICE " physical IVG%i asserted : %s\n", i, buf); + if (i != 4) { + decode_address(buf, bfin_read32(EVT0 + 4*i)); + verbose_printk(KERN_NOTICE " physical IVG%i asserted : %s\n", i, buf); + } else + verbose_printk(KERN_NOTICE " interrupts disabled\n"); } } /* if no interrupts are going off, don't print this out */ if (fp->ipend & ~0x3F) { for (i = 0; i < (NR_IRQS - 1); i++) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + if (!in_atomic) + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; if (!action) goto unlock; @@ -1096,7 +1112,8 @@ void show_regs(struct pt_regs *fp) } verbose_printk("\n"); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + if (!in_atomic) + spin_unlock_irqrestore(&irq_desc[i].lock, flags); } } diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c index 15f1351c864..0e175342112 100644 --- a/arch/blackfin/mach-bf518/boards/ezbrd.c +++ b/arch/blackfin/mach-bf518/boards/ezbrd.c @@ -46,6 +46,7 @@ #include <asm/dpmc.h> #include <asm/bfin_sdh.h> #include <linux/spi/ad7877.h> +#include <net/dsa.h> /* * Name the Board for the /proc/cpuinfo @@ -104,8 +105,31 @@ static struct platform_device rtc_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, +}; +#endif + +#if defined(CONFIG_NET_DSA_KSZ8893M) || defined(CONFIG_NET_DSA_KSZ8893M_MODULE) +static struct dsa_platform_data ksz8893m_switch_data = { + .mii_bus = &bfin_mii_bus.dev, + .netdev = &bfin_mac_device.dev, + .port_names[0] = NULL, + .port_names[1] = "eth%d", + .port_names[2] = "eth%d", + .port_names[3] = "cpu", +}; + +static struct platform_device ksz8893m_switch_device = { + .name = "dsa", + .id = 0, + .num_resources = 0, + .dev.platform_data = &ksz8893m_switch_data, }; #endif @@ -147,6 +171,15 @@ static struct bfin5xx_spi_chip spi_adc_chip_info = { }; #endif +#if defined(CONFIG_NET_DSA_KSZ8893M) \ + || defined(CONFIG_NET_DSA_KSZ8893M_MODULE) +/* SPI SWITCH CHIP */ +static struct bfin5xx_spi_chip spi_switch_info = { + .enable_dma = 0, + .bits_per_word = 8, +}; +#endif + #if defined(CONFIG_SPI_MMC) || defined(CONFIG_SPI_MMC_MODULE) static struct bfin5xx_spi_chip spi_mmc_chip_info = { .enable_dma = 1, @@ -226,6 +259,19 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = { }, #endif +#if defined(CONFIG_NET_DSA_KSZ8893M) \ + || defined(CONFIG_NET_DSA_KSZ8893M_MODULE) + { + .modalias = "ksz8893m", + .max_speed_hz = 5000000, + .bus_num = 0, + .chip_select = 1, + .platform_data = NULL, + .controller_data = &spi_switch_info, + .mode = SPI_MODE_3, + }, +#endif + #if defined(CONFIG_SPI_MMC) || defined(CONFIG_SPI_MMC_MODULE) { .modalias = "spi_mmc_dummy", @@ -473,7 +519,6 @@ static struct platform_device i2c_bfin_twi_device = { }; #endif -#ifdef CONFIG_I2C_BOARDINFO static struct i2c_board_info __initdata bfin_i2c_board_info[] = { #if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE) { @@ -487,7 +532,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = { }, #endif }; -#endif #if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE) static struct platform_device bfin_sport0_uart_device = { @@ -584,9 +628,14 @@ static struct platform_device *stamp_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif +#if defined(CONFIG_NET_DSA_KSZ8893M) || defined(CONFIG_NET_DSA_KSZ8893M_MODULE) + &ksz8893m_switch_device, +#endif + #if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE) &bfin_spi0_device, &bfin_spi1_device, @@ -632,12 +681,8 @@ static struct platform_device *stamp_devices[] __initdata = { static int __init ezbrd_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); - -#ifdef CONFIG_I2C_BOARDINFO i2c_register_board_info(0, bfin_i2c_board_info, ARRAY_SIZE(bfin_i2c_board_info)); -#endif - platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info)); return 0; @@ -649,7 +694,7 @@ void native_machine_restart(char *cmd) { /* workaround reboot hang when booting from SPI */ if ((bfin_read_SYSCR() & 0x7) == 0x3) - bfin_gpio_reset_spi0_ssel1(); + bfin_reset_boot_spi_cs(P_DEFAULT_BOOT_SPI_CS); } void bfin_get_ether_addr(char *addr) diff --git a/arch/blackfin/mach-bf518/include/mach/portmux.h b/arch/blackfin/mach-bf518/include/mach/portmux.h index ac16d54734d..f618b487b2b 100644 --- a/arch/blackfin/mach-bf518/include/mach/portmux.h +++ b/arch/blackfin/mach-bf518/include/mach/portmux.h @@ -103,6 +103,8 @@ #define P_SPI1_SSEL4 (P_DEFINED | P_IDENT(GPIO_PF8) | P_FUNCT(2)) #define P_SPI1_SSEL5 (P_DEFINED | P_IDENT(GPIO_PG11) | P_FUNCT(2)) +#define P_DEFAULT_BOOT_SPI_CS P_SPI0_SSEL2 + /* SPORT Port Mux */ #define P_SPORT0_DRPRI (P_DEFINED | P_IDENT(GPIO_PG3) | P_FUNCT(0)) #define P_SPORT0_RSCLK (P_DEFINED | P_IDENT(GPIO_PG4) | P_FUNCT(0)) diff --git a/arch/blackfin/mach-bf527/boards/cm_bf527.c b/arch/blackfin/mach-bf527/boards/cm_bf527.c index a2c3578f4b6..856c097b531 100644 --- a/arch/blackfin/mach-bf527/boards/cm_bf527.c +++ b/arch/blackfin/mach-bf527/boards/cm_bf527.c @@ -403,8 +403,13 @@ static struct platform_device isp1362_hcd_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -793,7 +798,6 @@ static struct platform_device i2c_bfin_twi_device = { }; #endif -#ifdef CONFIG_I2C_BOARDINFO static struct i2c_board_info __initdata bfin_i2c_board_info[] = { #if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE) { @@ -809,7 +813,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = { }, #endif }; -#endif #if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE) static struct platform_device bfin_sport0_uart_device = { @@ -920,6 +923,7 @@ static struct platform_device *stamp_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif @@ -968,27 +972,23 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_gpios_device, }; -static int __init stamp_init(void) +static int __init cm_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); - -#ifdef CONFIG_I2C_BOARDINFO i2c_register_board_info(0, bfin_i2c_board_info, ARRAY_SIZE(bfin_i2c_board_info)); -#endif - platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info)); return 0; } -arch_initcall(stamp_init); +arch_initcall(cm_init); void native_machine_restart(char *cmd) { /* workaround reboot hang when booting from SPI */ if ((bfin_read_SYSCR() & 0x7) == 0x3) - bfin_gpio_reset_spi0_ssel1(); + bfin_reset_boot_spi_cs(P_DEFAULT_BOOT_SPI_CS); } void bfin_get_ether_addr(char *addr) diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c index 0314bd3355e..83606fcdde2 100644 --- a/arch/blackfin/mach-bf527/boards/ezbrd.c +++ b/arch/blackfin/mach-bf527/boards/ezbrd.c @@ -208,8 +208,13 @@ static struct platform_device rtc_device = { #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -590,7 +595,6 @@ static struct platform_device i2c_bfin_twi_device = { }; #endif -#ifdef CONFIG_I2C_BOARDINFO static struct i2c_board_info __initdata bfin_i2c_board_info[] = { #if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE) { @@ -604,7 +608,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = { }, #endif }; -#endif #if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE) static struct platform_device bfin_sport0_uart_device = { @@ -720,6 +723,7 @@ static struct platform_device *stamp_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif @@ -764,27 +768,23 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_gpios_device, }; -static int __init stamp_init(void) +static int __init ezbrd_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); - -#ifdef CONFIG_I2C_BOARDINFO i2c_register_board_info(0, bfin_i2c_board_info, ARRAY_SIZE(bfin_i2c_board_info)); -#endif - platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info)); return 0; } -arch_initcall(stamp_init); +arch_initcall(ezbrd_init); void native_machine_restart(char *cmd) { /* workaround reboot hang when booting from SPI */ if ((bfin_read_SYSCR() & 0x7) == 0x3) - bfin_gpio_reset_spi0_ssel1(); + bfin_reset_boot_spi_cs(P_DEFAULT_BOOT_SPI_CS); } void bfin_get_ether_addr(char *addr) diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c index 9454fb7b18c..d0864111ef5 100644 --- a/arch/blackfin/mach-bf527/boards/ezkit.c +++ b/arch/blackfin/mach-bf527/boards/ezkit.c @@ -425,8 +425,13 @@ static struct platform_device isp1362_hcd_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -830,7 +835,6 @@ static struct platform_device i2c_bfin_twi_device = { }; #endif -#ifdef CONFIG_I2C_BOARDINFO static struct i2c_board_info __initdata bfin_i2c_board_info[] = { #if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE) { @@ -844,7 +848,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = { }, #endif }; -#endif #if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE) static struct platform_device bfin_sport0_uart_device = { @@ -988,6 +991,7 @@ static struct platform_device *stamp_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif @@ -1048,27 +1052,23 @@ static struct platform_device *stamp_devices[] __initdata = { &bfin_gpios_device, }; -static int __init stamp_init(void) +static int __init ezkit_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); - -#ifdef CONFIG_I2C_BOARDINFO i2c_register_board_info(0, bfin_i2c_board_info, ARRAY_SIZE(bfin_i2c_board_info)); -#endif - platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info)); return 0; } -arch_initcall(stamp_init); +arch_initcall(ezkit_init); void native_machine_restart(char *cmd) { /* workaround reboot hang when booting from SPI */ if ((bfin_read_SYSCR() & 0x7) == 0x3) - bfin_gpio_reset_spi0_ssel1(); + bfin_reset_boot_spi_cs(P_DEFAULT_BOOT_SPI_CS); } void bfin_get_ether_addr(char *addr) diff --git a/arch/blackfin/mach-bf527/include/mach/portmux.h b/arch/blackfin/mach-bf527/include/mach/portmux.h index 7f6da2c386b..72b1652be4d 100644 --- a/arch/blackfin/mach-bf527/include/mach/portmux.h +++ b/arch/blackfin/mach-bf527/include/mach/portmux.h @@ -73,6 +73,8 @@ #define P_HWAIT (P_DONTCARE) +#define P_DEFAULT_BOOT_SPI_CS P_SPI0_SSEL1 + #define P_SPI0_SS (P_DEFINED | P_IDENT(GPIO_PG1) | P_FUNCT(0)) #define P_SPI0_SSEL1 (P_DEFINED | P_IDENT(GPIO_PG1) | P_FUNCT(2)) #define P_SPI0_SCK (P_DEFINED | P_IDENT(GPIO_PG2) | P_FUNCT(2)) diff --git a/arch/blackfin/mach-bf533/boards/blackstamp.c b/arch/blackfin/mach-bf533/boards/blackstamp.c index 6ee607c259a..015c18f85e7 100644 --- a/arch/blackfin/mach-bf533/boards/blackstamp.c +++ b/arch/blackfin/mach-bf533/boards/blackstamp.c @@ -309,10 +309,8 @@ static struct platform_device i2c_gpio_device = { }; #endif -#ifdef CONFIG_I2C_BOARDINFO static struct i2c_board_info __initdata bfin_i2c_board_info[] = { }; -#endif static const unsigned int cclk_vlev_datasheet[] = { @@ -390,10 +388,8 @@ static int __init blackstamp_init(void) printk(KERN_INFO "%s(): registering device resources\n", __func__); -#ifdef CONFIG_I2C_BOARDINFO i2c_register_board_info(0, bfin_i2c_board_info, ARRAY_SIZE(bfin_i2c_board_info)); -#endif ret = platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); if (ret < 0) diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index 07f9ad1e189..db96f33f72e 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c @@ -441,7 +441,6 @@ static struct platform_device i2c_gpio_device = { }; #endif -#ifdef CONFIG_I2C_BOARDINFO static struct i2c_board_info __initdata bfin_i2c_board_info[] = { #if defined(CONFIG_JOYSTICK_AD7142) || defined(CONFIG_JOYSTICK_AD7142_MODULE) { @@ -461,7 +460,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = { }, #endif }; -#endif static const unsigned int cclk_vlev_datasheet[] = { @@ -550,10 +548,8 @@ static int __init stamp_init(void) printk(KERN_INFO "%s(): registering device resources\n", __func__); -#ifdef CONFIG_I2C_BOARDINFO i2c_register_board_info(0, bfin_i2c_board_info, ARRAY_SIZE(bfin_i2c_board_info)); -#endif ret = platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); if (ret < 0) diff --git a/arch/blackfin/mach-bf533/include/mach/portmux.h b/arch/blackfin/mach-bf533/include/mach/portmux.h index 685a2651dcd..2f59ce0b0cb 100644 --- a/arch/blackfin/mach-bf533/include/mach/portmux.h +++ b/arch/blackfin/mach-bf533/include/mach/portmux.h @@ -54,14 +54,11 @@ #define P_SPI0_SSEL2 (P_DEFINED | P_IDENT(GPIO_PF2)) #define P_SPI0_SSEL1 (P_DEFINED | P_IDENT(GPIO_PF1)) #define P_SPI0_SS (P_DEFINED | P_IDENT(GPIO_PF0)) +#define P_DEFAULT_BOOT_SPI_CS P_SPI0_SSEL2 #define P_TMR2 (P_DONTCARE) #define P_TMR1 (P_DONTCARE) #define P_TMR0 (P_DONTCARE) #define P_TMRCLK (P_DEFINED | P_IDENT(GPIO_PF1)) - - - - #endif /* _MACH_PORTMUX_H_ */ diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537.c b/arch/blackfin/mach-bf537/boards/cm_bf537.c index 6ac8e4d5bd3..9cd8fb2a30d 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537.c @@ -479,8 +479,13 @@ static struct platform_device bfin_sport1_uart_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -591,6 +596,7 @@ static struct platform_device *cm_bf537_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif diff --git a/arch/blackfin/mach-bf537/boards/generic_board.c b/arch/blackfin/mach-bf537/boards/generic_board.c index dd6e6bfb98e..da710fdc456 100644 --- a/arch/blackfin/mach-bf537/boards/generic_board.c +++ b/arch/blackfin/mach-bf537/boards/generic_board.c @@ -262,8 +262,13 @@ static struct platform_device isp1362_hcd_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -662,6 +667,7 @@ static struct platform_device *stamp_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif @@ -708,7 +714,7 @@ static struct platform_device *stamp_devices[] __initdata = { #endif }; -static int __init stamp_init(void) +static int __init generic_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); @@ -720,13 +726,13 @@ static int __init stamp_init(void) return 0; } -arch_initcall(stamp_init); +arch_initcall(generic_init); void native_machine_restart(char *cmd) { /* workaround reboot hang when booting from SPI */ if ((bfin_read_SYSCR() & 0x7) == 0x3) - bfin_gpio_reset_spi0_ssel1(); + bfin_reset_boot_spi_cs(P_DEFAULT_BOOT_SPI_CS); } #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) diff --git a/arch/blackfin/mach-bf537/boards/minotaur.c b/arch/blackfin/mach-bf537/boards/minotaur.c index bb795341cb1..db7d3a385e4 100644 --- a/arch/blackfin/mach-bf537/boards/minotaur.c +++ b/arch/blackfin/mach-bf537/boards/minotaur.c @@ -61,8 +61,13 @@ static struct platform_device rtc_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -324,6 +329,7 @@ static struct platform_device *minotaur_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif @@ -377,5 +383,5 @@ void native_machine_restart(char *cmd) { /* workaround reboot hang when booting from SPI */ if ((bfin_read_SYSCR() & 0x7) == 0x3) - bfin_gpio_reset_spi0_ssel1(); + bfin_reset_boot_spi_cs(P_DEFAULT_BOOT_SPI_CS); } diff --git a/arch/blackfin/mach-bf537/boards/pnav10.c b/arch/blackfin/mach-bf537/boards/pnav10.c index 89de94f4545..590eb3a139b 100644 --- a/arch/blackfin/mach-bf537/boards/pnav10.c +++ b/arch/blackfin/mach-bf537/boards/pnav10.c @@ -198,8 +198,13 @@ static struct platform_device isp1362_hcd_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -529,6 +534,7 @@ static struct platform_device *stamp_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif @@ -558,7 +564,7 @@ static struct platform_device *stamp_devices[] __initdata = { #endif }; -static int __init stamp_init(void) +static int __init pnav_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); @@ -569,7 +575,7 @@ static int __init stamp_init(void) return 0; } -arch_initcall(stamp_init); +arch_initcall(pnav_init); void bfin_get_ether_addr(char *addr) { diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c index d812e2514a2..cd04c5e4487 100644 --- a/arch/blackfin/mach-bf537/boards/stamp.c +++ b/arch/blackfin/mach-bf537/boards/stamp.c @@ -321,8 +321,13 @@ static struct platform_device isp1362_hcd_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -1068,7 +1073,6 @@ static struct adp5588_kpad_platform_data adp5588_kpad_data = { }; #endif -#ifdef CONFIG_I2C_BOARDINFO static struct i2c_board_info __initdata bfin_i2c_board_info[] = { #if defined(CONFIG_JOYSTICK_AD7142) || defined(CONFIG_JOYSTICK_AD7142_MODULE) { @@ -1102,7 +1106,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = { }, #endif }; -#endif #if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE) static struct platform_device bfin_sport0_uart_device = { @@ -1217,6 +1220,7 @@ static struct platform_device *stamp_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif @@ -1284,12 +1288,8 @@ static struct platform_device *stamp_devices[] __initdata = { static int __init stamp_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); - -#ifdef CONFIG_I2C_BOARDINFO i2c_register_board_info(0, bfin_i2c_board_info, ARRAY_SIZE(bfin_i2c_board_info)); -#endif - bfin_plat_nand_init(); platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info)); @@ -1307,7 +1307,7 @@ void native_machine_restart(char *cmd) { /* workaround reboot hang when booting from SPI */ if ((bfin_read_SYSCR() & 0x7) == 0x3) - bfin_gpio_reset_spi0_ssel1(); + bfin_reset_boot_spi_cs(P_DEFAULT_BOOT_SPI_CS); } /* diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c index 2f4b066153c..3f4f203a06e 100644 --- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c +++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c @@ -481,8 +481,13 @@ static struct platform_device bfin_sport1_uart_device = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mii_bus = { + .name = "bfin_mii_bus", +}; + static struct platform_device bfin_mac_device = { .name = "bfin_mac", + .dev.platform_data = &bfin_mii_bus, }; #endif @@ -593,6 +598,7 @@ static struct platform_device *cm_bf537_devices[] __initdata = { #endif #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mii_bus, &bfin_mac_device, #endif @@ -615,7 +621,7 @@ static struct platform_device *cm_bf537_devices[] __initdata = { &bfin_gpios_device, }; -static int __init cm_bf537_init(void) +static int __init tcm_bf537_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); platform_add_devices(cm_bf537_devices, ARRAY_SIZE(cm_bf537_devices)); @@ -629,7 +635,7 @@ static int __init cm_bf537_init(void) return 0; } -arch_initcall(cm_bf537_init); +arch_initcall(tcm_bf537_init); void bfin_get_ether_addr(char *addr) { diff --git a/arch/blackfin/mach-bf537/include/mach/portmux.h b/arch/blackfin/mach-bf537/include/mach/portmux.h index 78fee6e0f23..87285e75e90 100644 --- a/arch/blackfin/mach-bf537/include/mach/portmux.h +++ b/arch/blackfin/mach-bf537/include/mach/portmux.h @@ -31,6 +31,7 @@ #define P_PPI0_FS1 (P_DEFINED | P_IDENT(GPIO_PF9) | P_FUNCT(1)) #define P_TACLK0 (P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(1)) #define P_TMRCLK (P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(1)) +#define P_DEFAULT_BOOT_SPI_CS P_SPI0_SSEL1 #define P_PPI0_D0 (P_DEFINED | P_IDENT(GPIO_PG0) | P_FUNCT(0)) #define P_PPI0_D1 (P_DEFINED | P_IDENT(GPIO_PG1) | P_FUNCT(0)) diff --git a/arch/blackfin/mach-bf538/include/mach/portmux.h b/arch/blackfin/mach-bf538/include/mach/portmux.h index 1e031b588b4..c8db264e3e4 100644 --- a/arch/blackfin/mach-bf538/include/mach/portmux.h +++ b/arch/blackfin/mach-bf538/include/mach/portmux.h @@ -102,5 +102,6 @@ #define P_SPI0_SSEL2 (P_DEFINED | P_IDENT(GPIO_PF2)) #define P_SPI0_SSEL1 (P_DEFINED | P_IDENT(GPIO_PF1)) #define P_SPI0_SS (P_DEFINED | P_IDENT(GPIO_PF0)) +#define P_DEFAULT_BOOT_SPI_CS P_SPI0_SSEL2 #endif /* _MACH_PORTMUX_H_ */ diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c index 309c16014ca..096e661700a 100644 --- a/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/arch/blackfin/mach-bf548/boards/ezkit.c @@ -781,7 +781,6 @@ static struct platform_device i2c_bfin_twi1_device = { #endif #endif -#ifdef CONFIG_I2C_BOARDINFO static struct i2c_board_info __initdata bfin_i2c_board_info0[] = { }; @@ -800,7 +799,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info1[] = { #endif }; #endif -#endif #if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) #include <linux/gpio_keys.h> @@ -956,14 +954,12 @@ static int __init ezkit_init(void) { printk(KERN_INFO "%s(): registering device resources\n", __func__); -#ifdef CONFIG_I2C_BOARDINFO i2c_register_board_info(0, bfin_i2c_board_info0, ARRAY_SIZE(bfin_i2c_board_info0)); #if !defined(CONFIG_BF542) /* The BF542 only has 1 TWI */ i2c_register_board_info(1, bfin_i2c_board_info1, ARRAY_SIZE(bfin_i2c_board_info1)); #endif -#endif platform_add_devices(ezkit_devices, ARRAY_SIZE(ezkit_devices)); diff --git a/arch/blackfin/mach-bf548/include/mach/anomaly.h b/arch/blackfin/mach-bf548/include/mach/anomaly.h index 3b5430999f4..23d03c52f4b 100644 --- a/arch/blackfin/mach-bf548/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf548/include/mach/anomaly.h @@ -175,6 +175,7 @@ #define ANOMALY_05000311 (0) #define ANOMALY_05000323 (0) #define ANOMALY_05000363 (0) +#define ANOMALY_05000380 (0) #define ANOMALY_05000412 (0) #define ANOMALY_05000432 (0) #define ANOMALY_05000435 (0) diff --git a/arch/blackfin/mach-bf548/include/mach/bf548.h b/arch/blackfin/mach-bf548/include/mach/bf548.h index f0e56998481..cd31f72bdd8 100644 --- a/arch/blackfin/mach-bf548/include/mach/bf548.h +++ b/arch/blackfin/mach-bf548/include/mach/bf548.h @@ -104,6 +104,18 @@ #define AMGCTLVAL (V_AMBEN | V_AMCKEN) +#if defined(CONFIG_BF542M) +# define CONFIG_BF542 +#elif defined(CONFIG_BF544M) +# define CONFIG_BF544 +#elif defined(CONFIG_BF547M) +# define CONFIG_BF547 +#elif defined(CONFIG_BF548M) +# define CONFIG_BF548 +#elif defined(CONFIG_BF549M) +# define CONFIG_BF549 +#endif + #if defined(CONFIG_BF542) # define CPU "BF542" # define CPUID 0x27de diff --git a/arch/blackfin/mach-bf548/include/mach/gpio.h b/arch/blackfin/mach-bf548/include/mach/gpio.h index bba82dc75f1..3a205170978 100644 --- a/arch/blackfin/mach-bf548/include/mach/gpio.h +++ b/arch/blackfin/mach-bf548/include/mach/gpio.h @@ -195,17 +195,17 @@ struct gpio_port_t { unsigned short port_fer; unsigned short dummy1; - unsigned short port_data; + unsigned short data; unsigned short dummy2; - unsigned short port_set; + unsigned short data_set; unsigned short dummy3; - unsigned short port_clear; + unsigned short data_clear; unsigned short dummy4; - unsigned short port_dir_set; + unsigned short dir_set; unsigned short dummy5; - unsigned short port_dir_clear; + unsigned short dir_clear; unsigned short dummy6; - unsigned short port_inen; + unsigned short inen; unsigned short dummy7; unsigned int port_mux; }; diff --git a/arch/blackfin/mach-bf548/include/mach/portmux.h b/arch/blackfin/mach-bf548/include/mach/portmux.h index 8177a567dcd..ffb1d0a44b4 100644 --- a/arch/blackfin/mach-bf548/include/mach/portmux.h +++ b/arch/blackfin/mach-bf548/include/mach/portmux.h @@ -125,6 +125,7 @@ #define P_KEY_COL2 (P_DEFINED | P_IDENT(GPIO_PD14) | P_FUNCT(3)) #define P_KEY_COL3 (P_DEFINED | P_IDENT(GPIO_PD15) | P_FUNCT(3)) +#define P_DEFAULT_BOOT_SPI_CS P_SPI0_SSEL1 #define P_SPI0_SCK (P_DEFINED | P_IDENT(GPIO_PE0) | P_FUNCT(0)) #define P_SPI0_MISO (P_DEFINED | P_IDENT(GPIO_PE1) | P_FUNCT(0)) #define P_SPI0_MOSI (P_DEFINED | P_IDENT(GPIO_PE2) | P_FUNCT(0)) diff --git a/arch/blackfin/mach-bf561/include/mach/defBF561.h b/arch/blackfin/mach-bf561/include/mach/defBF561.h index d7c50975965..cf922295f4c 100644 --- a/arch/blackfin/mach-bf561/include/mach/defBF561.h +++ b/arch/blackfin/mach-bf561/include/mach/defBF561.h @@ -1106,6 +1106,8 @@ #define DLEN_8 0x0 /* PPI Data Length mask for DLEN=8 */ #define DLEN(x) (((x-9) & 0x07) << 11) /* PPI Data Length (only works for x=10-->x=16) */ #define POL 0x0000C000 /* PPI Signal Polarities */ +#define POLC 0x4000 /* PPI Clock Polarity */ +#define POLS 0x8000 /* PPI Frame Sync Polarity */ /* PPI_STATUS Masks */ #define FLD 0x00000400 /* Field Indicator */ diff --git a/arch/blackfin/mach-bf561/include/mach/portmux.h b/arch/blackfin/mach-bf561/include/mach/portmux.h index a6ee8206efb..2e5ad6347de 100644 --- a/arch/blackfin/mach-bf561/include/mach/portmux.h +++ b/arch/blackfin/mach-bf561/include/mach/portmux.h @@ -85,5 +85,6 @@ #define P_SPI0_MOSI (P_DONTCARE) #define P_SPI0_MISO (P_DONTCARE) #define P_SPI0_SCK (P_DONTCARE) +#define P_DEFAULT_BOOT_SPI_CS P_SPI0_SSEL2 #endif /* _MACH_PORTMUX_H_ */ diff --git a/arch/blackfin/mach-common/clocks-init.c b/arch/blackfin/mach-common/clocks-init.c index 5d182abefc7..9dddb6f8cc8 100644 --- a/arch/blackfin/mach-common/clocks-init.c +++ b/arch/blackfin/mach-common/clocks-init.c @@ -14,6 +14,7 @@ #include <asm/clocks.h> #include <asm/mem_init.h> +#define SDGCTL_WIDTH (1 << 31) /* SDRAM external data path width */ #define PLL_CTL_VAL \ (((CONFIG_VCO_MULT & 63) << 9) | CLKIN_HALF | \ (PLL_BYPASS << 8) | (ANOMALY_05000265 ? 0x8000 : 0)) @@ -76,7 +77,7 @@ void init_clocks(void) bfin_write_PLL_DIV(CONFIG_CCLK_ACT_DIV | CONFIG_SCLK_DIV); #ifdef EBIU_SDGCTL bfin_write_EBIU_SDRRC(mem_SDRRC); - bfin_write_EBIU_SDGCTL(mem_SDGCTL); + bfin_write_EBIU_SDGCTL((bfin_read_EBIU_SDGCTL() & SDGCTL_WIDTH) | mem_SDGCTL); #else bfin_write_EBIU_RSTCTL(bfin_read_EBIU_RSTCTL() & ~(SRREQ)); do_sync(); diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index fae77465137..88de053bbe8 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -151,13 +151,6 @@ ENTRY(_ex_syscall) jump.s _bfin_return_from_exception; ENDPROC(_ex_syscall) -ENTRY(_ex_soft_bp) - r7 = retx; - r7 += -2; - retx = r7; - jump.s _ex_trap_c; -ENDPROC(_ex_soft_bp) - ENTRY(_ex_single_step) /* If we just returned from an interrupt, the single step event is for the RTI instruction. */ @@ -1087,7 +1080,7 @@ ENTRY(_ex_table) * EXCPT instruction can provide 4 bits of EXCAUSE, allowing 16 to be user defined */ .long _ex_syscall /* 0x00 - User Defined - Linux Syscall */ - .long _ex_soft_bp /* 0x01 - User Defined - Software breakpoint */ + .long _ex_trap_c /* 0x01 - User Defined - Software breakpoint */ #ifdef CONFIG_KGDB .long _ex_trap_c /* 0x02 - User Defined - KGDB initial connection and break signal trap */ diff --git a/arch/blackfin/mach-common/head.S b/arch/blackfin/mach-common/head.S index e1e42c029e1..698d4c05947 100644 --- a/arch/blackfin/mach-common/head.S +++ b/arch/blackfin/mach-common/head.S @@ -17,6 +17,19 @@ __INIT +ENTRY(__init_clear_bss) + r2 = r2 - r1; + cc = r2 == 0; + if cc jump .L_bss_done; + r2 >>= 2; + p1 = r1; + p2 = r2; + lsetup (1f, 1f) lc0 = p2; +1: [p1++] = r0; +.L_bss_done: + rts; +ENDPROC(__init_clear_bss) + #define INITIAL_STACK (L1_SCRATCH_START + L1_SCRATCH_LENGTH - 12) ENTRY(__start) @@ -144,6 +157,35 @@ ENTRY(__start) call _init_early_exception_vectors; #endif + r0 = 0 (x); + /* Zero out all of the fun bss regions */ +#if L1_DATA_A_LENGTH > 0 + r1.l = __sbss_l1; + r1.h = __sbss_l1; + r2.l = __ebss_l1; + r2.h = __ebss_l1; + call __init_clear_bss +#endif +#if L1_DATA_B_LENGTH > 0 + r1.l = __sbss_b_l1; + r1.h = __sbss_b_l1; + r2.l = __ebss_b_l1; + r2.h = __ebss_b_l1; + call __init_clear_bss +#endif +#if L2_LENGTH > 0 + r1.l = __sbss_l2; + r1.h = __sbss_l2; + r2.l = __ebss_l2; + r2.h = __ebss_l2; + call __init_clear_bss +#endif + r1.l = ___bss_start; + r1.h = ___bss_start; + r2.l = ___bss_stop; + r2.h = ___bss_stop; + call __init_clear_bss + /* Put The Code for PLL Programming and SDRAM Programming in L1 ISRAM */ call _bfin_relocate_l1_mem; #ifdef CONFIG_BFIN_KERNEL_CLOCK @@ -185,19 +227,6 @@ ENDPROC(__start) # define WDOG_CTL WDOGA_CTL #endif -ENTRY(__init_clear_bss) - r2 = r2 - r1; - cc = r2 == 0; - if cc jump .L_bss_done; - r2 >>= 2; - p1 = r1; - p2 = r2; - lsetup (1f, 1f) lc0 = p2; -1: [p1++] = r0; -.L_bss_done: - rts; -ENDPROC(__init_clear_bss) - ENTRY(_real_start) /* Enable nested interrupts */ [--sp] = reti; @@ -209,35 +238,6 @@ ENTRY(_real_start) w[p0] = r0; ssync; - r0 = 0 (x); - /* Zero out all of the fun bss regions */ -#if L1_DATA_A_LENGTH > 0 - r1.l = __sbss_l1; - r1.h = __sbss_l1; - r2.l = __ebss_l1; - r2.h = __ebss_l1; - call __init_clear_bss -#endif -#if L1_DATA_B_LENGTH > 0 - r1.l = __sbss_b_l1; - r1.h = __sbss_b_l1; - r2.l = __ebss_b_l1; - r2.h = __ebss_b_l1; - call __init_clear_bss -#endif -#if L2_LENGTH > 0 - r1.l = __sbss_l2; - r1.h = __sbss_l2; - r2.l = __ebss_l2; - r2.h = __ebss_l2; - call __init_clear_bss -#endif - r1.l = ___bss_start; - r1.h = ___bss_start; - r2.l = ___bss_stop; - r2.h = ___bss_stop; - call __init_clear_bss - /* Pass the u-boot arguments to the global value command line */ R0 = R7; call _cmdline_init; diff --git a/arch/blackfin/mach-common/interrupt.S b/arch/blackfin/mach-common/interrupt.S index 473df0f7fa7..43c4eb9acb6 100644 --- a/arch/blackfin/mach-common/interrupt.S +++ b/arch/blackfin/mach-common/interrupt.S @@ -195,7 +195,7 @@ ENDPROC(_evt_ivhw) /* Interrupt routine for evt2 (NMI). * We don't actually use this, so just return. * For inner circle type details, please see: - * http://docs.blackfin.uclinux.org/doku.php?id=linux:nmi + * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:nmi */ ENTRY(_evt_nmi) .weak _evt_nmi diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 1bba6030dce..202494568c6 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -1101,10 +1101,9 @@ int __init init_arch_irq(void) IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 | IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW; -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) \ - || defined(BF538_FAMILY) || defined(CONFIG_BF51x) +#ifdef SIC_IWR0 bfin_write_SIC_IWR0(IWR_DISABLE_ALL); -#if defined(CONFIG_BF52x) || defined(CONFIG_BF51x) +# ifdef SIC_IWR1 /* BF52x/BF51x system reset does not properly reset SIC_IWR1 which * will screw up the bootrom as it relies on MDMA0/1 waking it * up from IDLE instructions. See this report for more info: @@ -1114,10 +1113,8 @@ int __init init_arch_irq(void) bfin_write_SIC_IWR1(IWR_ENABLE(10) | IWR_ENABLE(11)); else bfin_write_SIC_IWR1(IWR_DISABLE_ALL); -#else - bfin_write_SIC_IWR1(IWR_DISABLE_ALL); -#endif -# ifdef CONFIG_BF54x +# endif +# ifdef SIC_IWR2 bfin_write_SIC_IWR2(IWR_DISABLE_ALL); # endif #else diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c index d3d70fd67c1..f48a6aebb49 100644 --- a/arch/blackfin/mach-common/pm.c +++ b/arch/blackfin/mach-common/pm.c @@ -82,10 +82,9 @@ void bfin_pm_suspend_standby_enter(void) bfin_pm_standby_restore(); -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || \ - defined(CONFIG_BF538) || defined(CONFIG_BF539) || defined(CONFIG_BF51x) +#ifdef SIC_IWR0 bfin_write_SIC_IWR0(IWR_DISABLE_ALL); -#if defined(CONFIG_BF52x) || defined(CONFIG_BF51x) +# ifdef SIC_IWR1 /* BF52x system reset does not properly reset SIC_IWR1 which * will screw up the bootrom as it relies on MDMA0/1 waking it * up from IDLE instructions. See this report for more info: @@ -95,10 +94,8 @@ void bfin_pm_suspend_standby_enter(void) bfin_write_SIC_IWR1(IWR_ENABLE(10) | IWR_ENABLE(11)); else bfin_write_SIC_IWR1(IWR_DISABLE_ALL); -#else - bfin_write_SIC_IWR1(IWR_DISABLE_ALL); -#endif -# ifdef CONFIG_BF54x +# endif +# ifdef SIC_IWR2 bfin_write_SIC_IWR2(IWR_DISABLE_ALL); # endif #else diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 77f30b664b4..30cf46534dd 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -27,12 +27,12 @@ extern void *per_cpu_init(void); #else /* ! SMP */ -#define PER_CPU_ATTRIBUTES __attribute__((__section__(".data.percpu"))) - #define per_cpu_init() (__phys_per_cpu_start) #endif /* SMP */ +#define PER_CPU_BASE_SECTION ".data.percpu" + /* * Be extremely careful when taking the address of this variable! Due to virtual * remapping, it is different from the canonical address returned by __get_cpu_var(var)! diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 32f3af1641c..3193f4417e1 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -84,7 +84,7 @@ void build_cpu_to_node_map(void); .child = NULL, \ .groups = NULL, \ .min_interval = 8, \ - .max_interval = 8*(min(num_online_cpus(), 32)), \ + .max_interval = 8*(min(num_online_cpus(), 32U)), \ .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_nice_tries = 2, \ diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 5cfd3d91001..006ad366a45 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -880,7 +880,7 @@ iosapic_unregister_intr (unsigned int gsi) if (iosapic_intr_info[irq].count == 0) { #ifdef CONFIG_SMP /* Clear affinity */ - cpus_setall(idesc->affinity); + cpumask_setall(idesc->affinity); #endif /* Clear the interrupt information */ iosapic_intr_info[irq].dest = 0; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index a58f64ca9f0..226233a6fa1 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -103,7 +103,7 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; void set_irq_affinity_info (unsigned int irq, int hwid, int redir) { if (irq < NR_IRQS) { - cpumask_copy(&irq_desc[irq].affinity, + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu_logical_id(hwid))); irq_redir[irq] = (char) (redir & 0xff); } @@ -148,7 +148,7 @@ static void migrate_irqs(void) if (desc->status == IRQ_PER_CPU) continue; - if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask) + if (cpumask_any_and(irq_desc[irq].affinity, cpu_online_mask) >= nr_cpu_ids) { /* * Save it for phase 2 processing diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 28d3d483db9..927ad027820 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -493,11 +493,13 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); ia64_srlz_d(); while (vector != IA64_SPURIOUS_INT_VECTOR) { + struct irq_desc *desc = irq_to_desc(vector); + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); } else if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); else { int irq = local_vector_to_irq(vector); @@ -551,11 +553,13 @@ void ia64_process_pending_intr(void) * Perform normal interrupt style processing */ while (vector != IA64_SPURIOUS_INT_VECTOR) { + struct irq_desc *desc = irq_to_desc(vector); + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); } else if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); else { struct pt_regs *old_regs = set_irq_regs(NULL); int irq = local_vector_to_irq(vector); diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 89033933903..dcb6b7c51ea 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -75,7 +75,7 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, msg.data = data; write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); } #endif /* CONFIG_SMP */ @@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = *mask; + cpumask_copy(irq_desc[irq].affinity, mask); } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 10a7d47e851..f45e4e508ec 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -219,6 +219,7 @@ SECTIONS .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c index c5a214026a7..d0223abbbbd 100644 --- a/arch/ia64/sn/kernel/io_acpi_init.c +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -443,7 +443,7 @@ sn_acpi_slot_fixup(struct pci_dev *dev) size = pci_resource_len(dev, PCI_ROM_RESOURCE); addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], size); - image_size = pci_get_rom_size(addr, size); + image_size = pci_get_rom_size(dev, addr, size); dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr; dev->resource[PCI_ROM_RESOURCE].end = (unsigned long) addr + image_size - 1; diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 4e1801bad83..e2eb2da60f9 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -269,7 +269,7 @@ sn_io_slot_fixup(struct pci_dev *dev) rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE), size + 1); - image_size = pci_get_rom_size(rom, size + 1); + image_size = pci_get_rom_size(dev, rom, size + 1); dev->resource[PCI_ROM_RESOURCE].end = dev->resource[PCI_ROM_RESOURCE].start + image_size - 1; diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index ca553b0429c..81e428943d7 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -205,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = *cpu_mask; + cpumask_copy(irq_desc[irq].affinity, cpu_mask); } #endif /* CONFIG_SMP */ diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index abc62aa744a..3214ade02d1 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -66,7 +66,7 @@ extern void smtc_forward_irq(unsigned int irq); */ #define IRQ_AFFINITY_HOOK(irq) \ do { \ - if (!cpu_isset(smp_processor_id(), irq_desc[irq].affinity)) { \ + if (!cpumask_test_cpu(smp_processor_id(), irq_desc[irq].affinity)) {\ smtc_forward_irq(irq); \ irq_exit(); \ return; \ diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 494a49a317e..87deb8f6c45 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); } - irq_desc[irq].affinity = *cpumask; + cpumask_copy(irq_desc[irq].affinity, cpumask); spin_unlock_irqrestore(&gic_lock, flags); } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index b6cca01ff82..5f5af7d4c89 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -686,7 +686,7 @@ void smtc_forward_irq(unsigned int irq) * and efficiency, we just pick the easiest one to find. */ - target = first_cpu(irq_desc[irq].affinity); + target = cpumask_first(irq_desc[irq].affinity); /* * We depend on the platform code to have correctly processed @@ -921,11 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi) struct clock_event_device *cd; void *arg_copy = pipi->arg; int type_copy = pipi->type; + int irq = MIPS_CPU_IRQ_BASE + 1; + smtc_ipi_nq(&freeIPIq, pipi); switch (type_copy) { case SMTC_CLOCK_TICK: irq_enter(); - kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); cd = &per_cpu(mips_clockevent_device, cpu); cd->event_handler(cd); irq_exit(); diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index aabd7274507..5ba31888fef 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -116,7 +116,7 @@ struct plat_smp_ops msmtc_smp_ops = { void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { - cpumask_t tmask = *affinity; + cpumask_t tmask; int cpu = 0; void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff); @@ -139,11 +139,12 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) * be made to forward to an offline "CPU". */ + cpumask_copy(&tmask, affinity); for_each_cpu(cpu, affinity) { if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu)) cpu_clear(cpu, tmask); } - irq_desc[irq].affinity = tmask; + cpumask_copy(irq_desc[irq].affinity, &tmask); if (cpus_empty(tmask)) /* diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c index f8b18af141a..0ecd5fe9486 100644 --- a/arch/mips/sgi-ip22/ip22-int.c +++ b/arch/mips/sgi-ip22/ip22-int.c @@ -155,7 +155,7 @@ static void indy_buserror_irq(void) int irq = SGI_BUSERR_IRQ; irq_enter(); - kstat_this_cpu.irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); ip22_be_interrupt(irq); irq_exit(); } diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c index 3dcb27ec0c5..c8f7d2328b2 100644 --- a/arch/mips/sgi-ip22/ip22-time.c +++ b/arch/mips/sgi-ip22/ip22-time.c @@ -122,7 +122,7 @@ void indy_8254timer_irq(void) char c; irq_enter(); - kstat_this_cpu.irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); printk(KERN_ALERT "Oops, got 8254 interrupt.\n"); ArcRead(0, &c, 1, &cnt); ArcEnterInteractiveMode(); diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index dddfda8e829..314691648c9 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -178,9 +178,10 @@ struct plat_smp_ops bcm1480_smp_ops = { void bcm1480_mailbox_interrupt(void) { int cpu = smp_processor_id(); + int irq = K_BCM1480_INT_MBOX_0_0; unsigned int action; - kstat_this_cpu.irqs[K_BCM1480_INT_MBOX_0_0]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); /* Load the mailbox register to figure out what we're supposed to do */ action = (__raw_readq(mailbox_0_regs[cpu]) >> 48) & 0xffff; diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index 5950a288a7d..cad14003b84 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -166,9 +166,10 @@ struct plat_smp_ops sb_smp_ops = { void sb1250_mailbox_interrupt(void) { int cpu = smp_processor_id(); + int irq = K_INT_MBOX_0; unsigned int action; - kstat_this_cpu.irqs[K_INT_MBOX_0]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); /* Load the mailbox register to figure out what we're supposed to do */ action = (____raw_readq(mailbox_regs[cpu]) >> 48) & 0xffff; diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c index 10811e981d2..2e370d88a87 100644 --- a/arch/mn10300/kernel/mn10300-watchdog.c +++ b/arch/mn10300/kernel/mn10300-watchdog.c @@ -130,6 +130,7 @@ void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) * the stack NMI-atomically, it's safe to use smp_processor_id(). */ int sum, cpu = smp_processor_id(); + int irq = NMIIRQ; u8 wdt, tmp; wdt = WDCTR & ~WDCTR_WDCNE; @@ -138,7 +139,7 @@ void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) NMICR = NMICR_WDIF; nmi_count(cpu)++; - kstat_this_cpu.irqs[NMIIRQ]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); sum = irq_stat[cpu].__irq_count; if (last_irq_sums[cpu] == sum) { diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index ac2c822928c..49482806863 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -120,7 +120,7 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest) if (CHECK_IRQ_PER_CPU(irq)) { /* Bad linux design decision. The mask has already * been set; we must reset it */ - irq_desc[irq].affinity = CPU_MASK_ALL; + cpumask_setall(irq_desc[irq].affinity); return -EINVAL; } @@ -136,7 +136,7 @@ static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) if (cpu_check_affinity(irq, dest)) return; - irq_desc[irq].affinity = *dest; + cpumask_copy(irq_desc[irq].affinity, dest); } #endif @@ -295,7 +295,7 @@ int txn_alloc_irq(unsigned int bits_wide) unsigned long txn_affinity_addr(unsigned int irq, int cpu) { #ifdef CONFIG_SMP - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); #endif return per_cpu(cpu_data, cpu).txn_addr; @@ -352,7 +352,7 @@ void do_cpu_irq_mask(struct pt_regs *regs) irq = eirr_to_irq(eirr_val); #ifdef CONFIG_SMP - dest = irq_desc[irq].affinity; + cpumask_copy(&dest, irq_desc[irq].affinity); if (CHECK_IRQ_PER_CPU(irq_desc[irq].status) && !cpu_isset(smp_processor_id(), dest)) { int cpu = first_cpu(dest); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 23b8b5e36f9..ad1e5ac721d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -231,7 +231,7 @@ void fixup_irqs(cpumask_t map) if (irq_desc[irq].status & IRQ_PER_CPU) continue; - cpus_and(mask, irq_desc[irq].affinity, map); + cpumask_and(&mask, irq_desc[irq].affinity, &map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 161b9b9691f..295ccc5e86b 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -184,6 +184,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 84e058f1e1c..80b513449f4 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -153,9 +153,10 @@ static int get_irq_server(unsigned int virq, unsigned int strict_check) { int server; /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t cpumask = irq_desc[virq].affinity; + cpumask_t cpumask; cpumask_t tmp = CPU_MASK_NONE; + cpumask_copy(&cpumask, irq_desc[virq].affinity); if (!distribute_irqs) return default_server; @@ -869,7 +870,7 @@ void xics_migrate_irqs_away(void) virq, cpu); /* Reset affinity to all cpus */ - irq_desc[virq].affinity = CPU_MASK_ALL; + cpumask_setall(irq_desc[virq].affinity); desc->chip->set_affinity(virq, cpu_all_mask); unlock: spin_unlock_irqrestore(&desc->lock, flags); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index a35297dbac2..532e205303a 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -566,9 +566,10 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic) #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int virt_irq) { - cpumask_t mask = irq_desc[virt_irq].affinity; + cpumask_t mask; int cpuid; + cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c index caf4c33f4e8..7c35787d29b 100644 --- a/arch/sh/boards/board-ap325rxa.c +++ b/arch/sh/boards/board-ap325rxa.c @@ -22,6 +22,7 @@ #include <linux/gpio.h> #include <linux/spi/spi.h> #include <linux/spi/spi_gpio.h> +#include <media/ov772x.h> #include <media/soc_camera_platform.h> #include <media/sh_mobile_ceu.h> #include <video/sh_mobile_lcdc.h> @@ -216,7 +217,14 @@ static struct platform_device lcdc_device = { }, }; +static void camera_power(int val) +{ + gpio_set_value(GPIO_PTZ5, val); /* RST_CAM/RSTB */ + mdelay(10); +} + #ifdef CONFIG_I2C +/* support for the old ncm03j camera */ static unsigned char camera_ncm03j_magic[] = { 0x87, 0x00, 0x88, 0x08, 0x89, 0x01, 0x8A, 0xE8, @@ -237,6 +245,23 @@ static unsigned char camera_ncm03j_magic[] = 0x63, 0xD4, 0x64, 0xEA, 0xD6, 0x0F, }; +static int camera_probe(void) +{ + struct i2c_adapter *a = i2c_get_adapter(0); + struct i2c_msg msg; + int ret; + + camera_power(1); + msg.addr = 0x6e; + msg.buf = camera_ncm03j_magic; + msg.len = 2; + msg.flags = 0; + ret = i2c_transfer(a, &msg, 1); + camera_power(0); + + return ret; +} + static int camera_set_capture(struct soc_camera_platform_info *info, int enable) { @@ -245,9 +270,11 @@ static int camera_set_capture(struct soc_camera_platform_info *info, int ret = 0; int i; + camera_power(0); if (!enable) return 0; /* no disable for now */ + camera_power(1); for (i = 0; i < ARRAY_SIZE(camera_ncm03j_magic); i += 2) { u_int8_t buf[8]; @@ -286,8 +313,35 @@ static struct platform_device camera_device = { .platform_data = &camera_info, }, }; + +static int __init camera_setup(void) +{ + if (camera_probe() > 0) + platform_device_register(&camera_device); + + return 0; +} +late_initcall(camera_setup); + #endif /* CONFIG_I2C */ +static int ov7725_power(struct device *dev, int mode) +{ + camera_power(0); + if (mode) + camera_power(1); + + return 0; +} + +static struct ov772x_camera_info ov7725_info = { + .buswidth = SOCAM_DATAWIDTH_8, + .flags = OV772X_FLAG_VFLIP | OV772X_FLAG_HFLIP, + .link = { + .power = ov7725_power, + }, +}; + static struct sh_mobile_ceu_info sh_mobile_ceu_info = { .flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_MASTER | SOCAM_DATAWIDTH_8, @@ -338,9 +392,6 @@ static struct platform_device *ap325rxa_devices[] __initdata = { &ap325rxa_nor_flash_device, &lcdc_device, &ceu_device, -#ifdef CONFIG_I2C - &camera_device, -#endif &nand_flash_device, &sdcard_cn3_device, }; @@ -349,6 +400,10 @@ static struct i2c_board_info __initdata ap325rxa_i2c_devices[] = { { I2C_BOARD_INFO("pcf8563", 0x51), }, + { + I2C_BOARD_INFO("ov772x", 0x21), + .platform_data = &ov7725_info, + }, }; static struct spi_board_info ap325rxa_spi_devices[] = { @@ -426,7 +481,7 @@ static int __init ap325rxa_devices_setup(void) gpio_request(GPIO_PTZ6, NULL); gpio_direction_output(GPIO_PTZ6, 0); /* STBY_CAM */ gpio_request(GPIO_PTZ5, NULL); - gpio_direction_output(GPIO_PTZ5, 1); /* RST_CAM */ + gpio_direction_output(GPIO_PTZ5, 0); /* RST_CAM */ gpio_request(GPIO_PTZ4, NULL); gpio_direction_output(GPIO_PTZ4, 0); /* SADDR */ diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index 5c423fa8e6b..352f87d50fd 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28 -# Fri Jan 9 16:54:19 2009 +# Linux kernel version: 2.6.29-rc2 +# Tue Jan 27 11:45:08 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y @@ -45,12 +45,12 @@ CONFIG_BSD_PROCESS_ACCT=y # CONFIG_AUDIT is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set CONFIG_USER_SCHED=y # CONFIG_CGROUP_SCHED is not set +# CONFIG_CGROUPS is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set @@ -378,6 +378,7 @@ CONFIG_WIRELESS=y # CONFIG_WIRELESS_EXT is not set # CONFIG_LIB80211 is not set # CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -400,6 +401,7 @@ CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set CONFIG_MTD_CONCAT=y CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set CONFIG_MTD_CMDLINE_PARTS=y # CONFIG_MTD_AR7_PARTS is not set @@ -447,9 +449,7 @@ CONFIG_MTD_CFI_UTIL=y # # CONFIG_MTD_COMPLEX_MAPPINGS is not set CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_PHYSMAP_START=0xffffffff -CONFIG_MTD_PHYSMAP_LEN=0 -CONFIG_MTD_PHYSMAP_BANKWIDTH=0 +# CONFIG_MTD_PHYSMAP_COMPAT is not set # CONFIG_MTD_PLATRAM is not set # @@ -480,6 +480,12 @@ CONFIG_MTD_NAND_SH_FLCTL=y # CONFIG_MTD_ONENAND is not set # +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_QINFO_PROBE is not set + +# # UBI - Unsorted block images # CONFIG_MTD_UBI=y @@ -607,6 +613,10 @@ CONFIG_SMSC911X=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set # CONFIG_IWLWIFI_LEDS is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # CONFIG_WAN is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set @@ -790,6 +800,7 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_PMIC_DA903X is not set # CONFIG_MFD_WM8400 is not set # CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set # CONFIG_REGULATOR is not set # @@ -837,7 +848,7 @@ CONFIG_SOC_CAMERA=y # CONFIG_SOC_CAMERA_MT9V022 is not set # CONFIG_SOC_CAMERA_TW9910 is not set CONFIG_SOC_CAMERA_PLATFORM=y -# CONFIG_SOC_CAMERA_OV772X is not set +CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_RADIO_ADAPTERS is not set # CONFIG_DAB is not set @@ -1012,6 +1023,7 @@ CONFIG_FS_POSIX_ACL=y CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y @@ -1060,6 +1072,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_JFFS2_FS is not set # CONFIG_UBIFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 7758263514b..678576796bd 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28 -# Fri Jan 9 17:09:35 2009 +# Linux kernel version: 2.6.29-rc1 +# Thu Jan 22 09:16:16 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y @@ -45,8 +45,12 @@ CONFIG_SYSVIPC_SYSCTL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set # CONFIG_GROUP_SCHED is not set + +# +# Control Group support +# +# CONFIG_CGROUPS is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set @@ -389,6 +393,7 @@ CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y # CONFIG_LIB80211 is not set # CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -411,6 +416,7 @@ CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set CONFIG_MTD_CONCAT=y CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set CONFIG_MTD_CMDLINE_PARTS=y # CONFIG_MTD_AR7_PARTS is not set @@ -458,9 +464,7 @@ CONFIG_MTD_CFI_UTIL=y # # CONFIG_MTD_COMPLEX_MAPPINGS is not set CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_PHYSMAP_START=0xffffffff -CONFIG_MTD_PHYSMAP_LEN=0 -CONFIG_MTD_PHYSMAP_BANKWIDTH=0 +# CONFIG_MTD_PHYSMAP_COMPAT is not set # CONFIG_MTD_PLATRAM is not set # @@ -488,6 +492,12 @@ CONFIG_MTD_NAND_PLATFORM=y # CONFIG_MTD_ONENAND is not set # +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_QINFO_PROBE is not set + +# # UBI - Unsorted block images # # CONFIG_MTD_UBI is not set @@ -587,6 +597,10 @@ CONFIG_SMC91X=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set # CONFIG_IWLWIFI_LEDS is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # CONFIG_WAN is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set @@ -761,6 +775,7 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_PMIC_DA903X is not set # CONFIG_MFD_WM8400 is not set # CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set # CONFIG_REGULATOR is not set # @@ -806,9 +821,9 @@ CONFIG_SOC_CAMERA=y # CONFIG_SOC_CAMERA_MT9M111 is not set # CONFIG_SOC_CAMERA_MT9T031 is not set # CONFIG_SOC_CAMERA_MT9V022 is not set -# CONFIG_SOC_CAMERA_TW9910 is not set -CONFIG_SOC_CAMERA_PLATFORM=y -# CONFIG_SOC_CAMERA_OV772X is not set +CONFIG_SOC_CAMERA_TW9910=y +# CONFIG_SOC_CAMERA_PLATFORM is not set +CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_RADIO_ADAPTERS is not set # CONFIG_DAB is not set @@ -866,11 +881,13 @@ CONFIG_USB_GADGET_SELECTED=y # CONFIG_USB_GADGET_PXA25X is not set # CONFIG_USB_GADGET_PXA27X is not set # CONFIG_USB_GADGET_S3C2410 is not set +# CONFIG_USB_GADGET_IMX is not set CONFIG_USB_GADGET_M66592=y CONFIG_USB_M66592=y CONFIG_SUPERH_BUILT_IN_M66592=y # CONFIG_USB_GADGET_AMD5536UDC is not set # CONFIG_USB_GADGET_FSL_QE is not set +# CONFIG_USB_GADGET_CI13XXX is not set # CONFIG_USB_GADGET_NET2280 is not set # CONFIG_USB_GADGET_GOKU is not set # CONFIG_USB_GADGET_DUMMY_HCD is not set @@ -883,6 +900,11 @@ CONFIG_USB_G_SERIAL=y # CONFIG_USB_MIDI_GADGET is not set # CONFIG_USB_G_PRINTER is not set # CONFIG_USB_CDC_COMPOSITE is not set + +# +# OTG and related infrastructure +# +# CONFIG_USB_GPIO_VBUS is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -961,6 +983,7 @@ CONFIG_UIO_PDRV_GENIRQ=y CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY is not set # CONFIG_QUOTA is not set @@ -1004,6 +1027,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_EFS_FS is not set # CONFIG_JFFS2_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set diff --git a/arch/sh/include/asm/mutex-llsc.h b/arch/sh/include/asm/mutex-llsc.h index ee839ee58ac..090358a7e1b 100644 --- a/arch/sh/include/asm/mutex-llsc.h +++ b/arch/sh/include/asm/mutex-llsc.h @@ -21,38 +21,36 @@ static inline void __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) { - int __ex_flag, __res; + int __done, __res; __asm__ __volatile__ ( "movli.l @%2, %0 \n" "add #-1, %0 \n" "movco.l %0, @%2 \n" "movt %1 \n" - : "=&z" (__res), "=&r" (__ex_flag) + : "=&z" (__res), "=&r" (__done) : "r" (&(count)->counter) : "t"); - __res |= !__ex_flag; - if (unlikely(__res != 0)) + if (unlikely(!__done || __res != 0)) fail_fn(count); } static inline int __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) { - int __ex_flag, __res; + int __done, __res; __asm__ __volatile__ ( "movli.l @%2, %0 \n" "add #-1, %0 \n" "movco.l %0, @%2 \n" "movt %1 \n" - : "=&z" (__res), "=&r" (__ex_flag) + : "=&z" (__res), "=&r" (__done) : "r" (&(count)->counter) : "t"); - __res |= !__ex_flag; - if (unlikely(__res != 0)) + if (unlikely(!__done || __res != 0)) __res = fail_fn(count); return __res; @@ -61,19 +59,18 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) static inline void __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) { - int __ex_flag, __res; + int __done, __res; __asm__ __volatile__ ( "movli.l @%2, %0 \n\t" "add #1, %0 \n\t" "movco.l %0, @%2 \n\t" "movt %1 \n\t" - : "=&z" (__res), "=&r" (__ex_flag) + : "=&z" (__res), "=&r" (__done) : "r" (&(count)->counter) : "t"); - __res |= !__ex_flag; - if (unlikely(__res <= 0)) + if (unlikely(!__done || __res <= 0)) fail_fn(count); } diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 05a868a71ef..5bc34681d99 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -21,23 +21,10 @@ static inline void syscall_rollback(struct task_struct *task, */ } -static inline bool syscall_has_error(struct pt_regs *regs) -{ - return (regs->sr & 0x1) ? true : false; -} -static inline void syscall_set_error(struct pt_regs *regs) -{ - regs->sr |= 0x1; -} -static inline void syscall_clear_error(struct pt_regs *regs) -{ - regs->sr &= ~0x1; -} - static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return syscall_has_error(regs) ? regs->regs[0] : 0; + return IS_ERR_VALUE(regs->regs[0]) ? regs->regs[0] : 0; } static inline long syscall_get_return_value(struct task_struct *task, @@ -50,13 +37,10 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - if (error) { - syscall_set_error(regs); + if (error) regs->regs[0] = -error; - } else { - syscall_clear_error(regs); + else regs->regs[0] = val; - } } static inline void syscall_get_arguments(struct task_struct *task, diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h index e1143b9784d..c3561ca72be 100644 --- a/arch/sh/include/asm/syscall_64.h +++ b/arch/sh/include/asm/syscall_64.h @@ -21,23 +21,10 @@ static inline void syscall_rollback(struct task_struct *task, */ } -static inline bool syscall_has_error(struct pt_regs *regs) -{ - return (regs->sr & 0x1) ? true : false; -} -static inline void syscall_set_error(struct pt_regs *regs) -{ - regs->sr |= 0x1; -} -static inline void syscall_clear_error(struct pt_regs *regs) -{ - regs->sr &= ~0x1; -} - static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return syscall_has_error(regs) ? regs->regs[9] : 0; + return IS_ERR_VALUE(regs->regs[9]) ? regs->regs[9] : 0; } static inline long syscall_get_return_value(struct task_struct *task, @@ -50,13 +37,10 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - if (error) { - syscall_set_error(regs); + if (error) regs->regs[9] = -error; - } else { - syscall_clear_error(regs); + else regs->regs[9] = val; - } } static inline void syscall_get_arguments(struct task_struct *task, diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index 2780917c008..e3ea5411da6 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -423,7 +423,7 @@ static int ieee_fpe_handler(struct pt_regs *regs) int m; unsigned int hx; - m = (finsn >> 9) & 0x7; + m = (finsn >> 8) & 0x7; hx = tsk->thread.fpu.hard.fp_regs[m]; if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR) diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 53424750857..370d2cfa34e 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -262,11 +262,11 @@ void __init setup_bootmem_allocator(unsigned long free_pfn) BOOTMEM_DEFAULT); /* - * reserve physical page 0 - it's a special BIOS page on many boxes, - * enabling clean reboots, SMP operation, laptop functions. + * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET. */ - reserve_bootmem(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET, - BOOTMEM_DEFAULT); + if (CONFIG_ZERO_PAGE_OFFSET != 0) + reserve_bootmem(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET, + BOOTMEM_DEFAULT); sparse_memory_present_with_active_regions(0); diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 77c21bde376..17784e19ae3 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -510,7 +510,6 @@ handle_syscall_restart(unsigned long save_r0, struct pt_regs *regs, case -ERESTARTNOHAND: no_system_call_restart: regs->regs[0] = -EINTR; - regs->sr |= 1; break; case -ERESTARTSYS: @@ -589,8 +588,7 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - if (regs->sr & 1) - handle_syscall_restart(save_r0, regs, &ka.sa); + handle_syscall_restart(save_r0, regs, &ka.sa); /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &ka, &info, oldset, diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index b22fdfaaa19..0663a0ee602 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -60,7 +60,6 @@ handle_syscall_restart(struct pt_regs *regs, struct sigaction *sa) case -ERESTARTNOHAND: no_system_call_restart: regs->regs[REG_RET] = -EINTR; - regs->sr |= 1; break; case -ERESTARTSYS: @@ -109,8 +108,7 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset) signr = get_signal_to_deliver(&info, &ka, regs, 0); if (signr > 0) { - if (regs->sr & 1) - handle_syscall_restart(regs, &ka.sa); + handle_syscall_restart(regs, &ka.sa); /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S index cbdd0d40e54..356c8ec9289 100644 --- a/arch/sh/lib/checksum.S +++ b/arch/sh/lib/checksum.S @@ -36,8 +36,7 @@ */ /* - * unsigned int csum_partial(const unsigned char *buf, int len, - * unsigned int sum); + * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); */ .text @@ -49,11 +48,31 @@ ENTRY(csum_partial) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ - mov r5, r1 mov r4, r0 - tst #2, r0 ! Check alignment. - bt 2f ! Jump if alignment is ok. + tst #3, r0 ! Check alignment. + bt/s 2f ! Jump if alignment is ok. + mov r4, r7 ! Keep a copy to check for alignment ! + tst #1, r0 ! Check alignment. + bt 21f ! Jump if alignment is boundary of 2bytes. + + ! buf is odd + tst r5, r5 + add #-1, r5 + bt 9f + mov.b @r4+, r0 + extu.b r0, r0 + addc r0, r6 ! t=0 from previous tst + mov r6, r0 + shll8 r6 + shlr16 r0 + shlr8 r0 + or r0, r6 + mov r4, r0 + tst #2, r0 + bt 2f +21: + ! buf is 2 byte aligned (len could be 0) add #-2, r5 ! Alignment uses up two bytes. cmp/pz r5 ! bt/s 1f ! Jump if we had at least two bytes. @@ -61,16 +80,17 @@ ENTRY(csum_partial) bra 6f add #2, r5 ! r5 was < 2. Deal with it. 1: - mov r5, r1 ! Save new len for later use. mov.w @r4+, r0 extu.w r0, r0 addc r0, r6 bf 2f add #1, r6 2: + ! buf is 4 byte aligned (len could be 0) + mov r5, r1 mov #-5, r0 - shld r0, r5 - tst r5, r5 + shld r0, r1 + tst r1, r1 bt/s 4f ! if it's =0, go to 4f clrt .align 2 @@ -92,30 +112,31 @@ ENTRY(csum_partial) addc r0, r6 addc r2, r6 movt r0 - dt r5 + dt r1 bf/s 3b cmp/eq #1, r0 - ! here, we know r5==0 - addc r5, r6 ! add carry to r6 + ! here, we know r1==0 + addc r1, r6 ! add carry to r6 4: - mov r1, r0 + mov r5, r0 and #0x1c, r0 tst r0, r0 - bt/s 6f - mov r0, r5 - shlr2 r5 + bt 6f + ! 4 bytes or more remaining + mov r0, r1 + shlr2 r1 mov #0, r2 5: addc r2, r6 mov.l @r4+, r2 movt r0 - dt r5 + dt r1 bf/s 5b cmp/eq #1, r0 addc r2, r6 - addc r5, r6 ! r5==0 here, so it means add carry-bit + addc r1, r6 ! r1==0 here, so it means add carry-bit 6: - mov r1, r5 + ! 3 bytes or less remaining mov #3, r0 and r0, r5 tst r5, r5 @@ -139,8 +160,18 @@ ENTRY(csum_partial) 8: addc r0, r6 mov #0, r0 - addc r0, r6 + addc r0, r6 9: + ! Check if the buffer was misaligned, if so realign sum + mov r7, r0 + tst #1, r0 + bt 10f + mov r6, r0 + shll8 r6 + shlr16 r0 + shlr8 r0 + or r0, r6 +10: rts mov r6, r0 diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 7da7c13d23c..a11b89ee9ef 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -17,7 +17,7 @@ typedef struct { /* Dcache line 1 */ unsigned int __softirq_pending; /* must be 1st, see rtrap.S */ - unsigned int __pad0; + unsigned int __nmi_count; unsigned long clock_tick; /* %tick's per second */ unsigned long __pad; unsigned int __pad1; diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index d47d4a1955a..1934f2cbf51 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -66,9 +66,6 @@ extern void virt_irq_free(unsigned int virt_irq); extern void __init init_IRQ(void); extern void fixup_irqs(void); -extern int register_perfctr_intr(void (*handler)(struct pt_regs *)); -extern void release_perfctr_intr(void (*handler)(struct pt_regs *)); - static inline void set_softint(unsigned long bits) { __asm__ __volatile__("wr %0, 0x0, %%set_softint" @@ -98,5 +95,6 @@ void __trigger_all_cpu_backtrace(void); extern void *hardirq_stack[NR_CPUS]; extern void *softirq_stack[NR_CPUS]; #define __ARCH_HAS_DO_SOFTIRQ +#define ARCH_HAS_NMI_WATCHDOG #endif diff --git a/arch/sparc/include/asm/kdebug_64.h b/arch/sparc/include/asm/kdebug_64.h index f905b773235..feb3578e12c 100644 --- a/arch/sparc/include/asm/kdebug_64.h +++ b/arch/sparc/include/asm/kdebug_64.h @@ -14,6 +14,8 @@ enum die_val { DIE_TRAP, DIE_TRAP_TL1, DIE_CALL, + DIE_NMI, + DIE_NMIWATCHDOG, }; #endif diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h new file mode 100644 index 00000000000..fbd546dd4fe --- /dev/null +++ b/arch/sparc/include/asm/nmi.h @@ -0,0 +1,10 @@ +#ifndef __NMI_H +#define __NMI_H + +extern int __init nmi_init(void); +extern void perfctr_irq(int irq, struct pt_regs *regs); +extern void nmi_adjust_hz(unsigned int new_hz); + +extern int nmi_usable; + +#endif /* __NMI_H */ diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h new file mode 100644 index 00000000000..a2f5c61f924 --- /dev/null +++ b/arch/sparc/include/asm/pcr.h @@ -0,0 +1,46 @@ +#ifndef __PCR_H +#define __PCR_H + +struct pcr_ops { + u64 (*read)(void); + void (*write)(u64); +}; +extern const struct pcr_ops *pcr_ops; + +extern void deferred_pcr_work_irq(int irq, struct pt_regs *regs); +extern void schedule_deferred_pcr_work(void); + +#define PCR_PIC_PRIV 0x00000001 /* PIC access is privileged */ +#define PCR_STRACE 0x00000002 /* Trace supervisor events */ +#define PCR_UTRACE 0x00000004 /* Trace user events */ +#define PCR_N2_HTRACE 0x00000008 /* Trace hypervisor events */ +#define PCR_N2_TOE_OV0 0x00000010 /* Trap if PIC 0 overflows */ +#define PCR_N2_TOE_OV1 0x00000020 /* Trap if PIC 1 overflows */ +#define PCR_N2_MASK0 0x00003fc0 +#define PCR_N2_MASK0_SHIFT 6 +#define PCR_N2_SL0 0x0003c000 +#define PCR_N2_SL0_SHIFT 14 +#define PCR_N2_OV0 0x00040000 +#define PCR_N2_MASK1 0x07f80000 +#define PCR_N2_MASK1_SHIFT 19 +#define PCR_N2_SL1 0x78000000 +#define PCR_N2_SL1_SHIFT 27 +#define PCR_N2_OV1 0x80000000 + +extern unsigned int picl_shift; + +/* In order to commonize as much of the implementation as + * possible, we use PICH as our counter. Mostly this is + * to accomodate Niagara-1 which can only count insn cycles + * in PICH. + */ +static inline u64 picl_value(unsigned int nmi_hz) +{ + u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift); + + return ((u64)((0 - delta) & 0xffffffff)) << 32; +} + +extern u64 pcr_enable; + +#endif /* __PCR_H */ diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h index d573820c0ff..32a7efe76d0 100644 --- a/arch/sparc/include/asm/pil.h +++ b/arch/sparc/include/asm/pil.h @@ -23,6 +23,7 @@ #define PIL_SMP_CTX_NEW_VERSION 4 #define PIL_DEVICE_IRQ 5 #define PIL_SMP_CALL_FUNC_SNGL 6 +#define PIL_DEFERRED_PCR_WORK 7 #define PIL_NORMAL_MAX 14 #define PIL_NMI 15 diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 53adcaa0348..54742e58831 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -52,6 +52,8 @@ obj-$(CONFIG_SPARC64) += visemul.o obj-$(CONFIG_SPARC64) += hvapi.o obj-$(CONFIG_SPARC64) += sstate.o obj-$(CONFIG_SPARC64) += mdesc.o +obj-$(CONFIG_SPARC64) += pcr.o +obj-$(CONFIG_SPARC64) += nmi.o # sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation obj-$(CONFIG_SPARC32) += devres.o diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 32d32b4824f..d85c3dc4953 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -26,6 +26,7 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data); struct cpu_info { int psr_vers; const char *name; + const char *pmu_name; }; struct fpu_info { @@ -45,6 +46,9 @@ struct manufacturer_info { #define CPU(ver, _name) \ { .psr_vers = ver, .name = _name } +#define CPU_PMU(ver, _name, _pmu_name) \ +{ .psr_vers = ver, .name = _name, .pmu_name = _pmu_name } + #define FPU(ver, _name) \ { .fp_vers = ver, .name = _name } @@ -183,10 +187,10 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x17, .cpu_info = { - CPU(0x10, "TI UltraSparc I (SpitFire)"), - CPU(0x11, "TI UltraSparc II (BlackBird)"), - CPU(0x12, "TI UltraSparc IIi (Sabre)"), - CPU(0x13, "TI UltraSparc IIe (Hummingbird)"), + CPU_PMU(0x10, "TI UltraSparc I (SpitFire)", "ultra12"), + CPU_PMU(0x11, "TI UltraSparc II (BlackBird)", "ultra12"), + CPU_PMU(0x12, "TI UltraSparc IIi (Sabre)", "ultra12"), + CPU_PMU(0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"), CPU(-1, NULL) }, .fpu_info = { @@ -199,7 +203,7 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x22, .cpu_info = { - CPU(0x10, "TI UltraSparc I (SpitFire)"), + CPU_PMU(0x10, "TI UltraSparc I (SpitFire)", "ultra12"), CPU(-1, NULL) }, .fpu_info = { @@ -209,12 +213,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x3e, .cpu_info = { - CPU(0x14, "TI UltraSparc III (Cheetah)"), - CPU(0x15, "TI UltraSparc III+ (Cheetah+)"), - CPU(0x16, "TI UltraSparc IIIi (Jalapeno)"), - CPU(0x18, "TI UltraSparc IV (Jaguar)"), - CPU(0x19, "TI UltraSparc IV+ (Panther)"), - CPU(0x22, "TI UltraSparc IIIi+ (Serrano)"), + CPU_PMU(0x14, "TI UltraSparc III (Cheetah)", "ultra3"), + CPU_PMU(0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"), + CPU_PMU(0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"), + CPU_PMU(0x18, "TI UltraSparc IV (Jaguar)", "ultra3+"), + CPU_PMU(0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"), + CPU_PMU(0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3i"), CPU(-1, NULL) }, .fpu_info = { @@ -234,6 +238,7 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { const char *sparc_cpu_type; const char *sparc_fpu_type; +const char *sparc_pmu_type; unsigned int fsr_storage; @@ -244,6 +249,7 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) sparc_cpu_type = NULL; sparc_fpu_type = NULL; + sparc_pmu_type = NULL; manuf = NULL; for (i = 0; i < ARRAY_SIZE(manufacturer_info); i++) @@ -263,6 +269,7 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) { if (cpu->psr_vers == psr_vers) { sparc_cpu_type = cpu->name; + sparc_pmu_type = cpu->pmu_name; sparc_fpu_type = "No FPU"; break; } @@ -290,6 +297,8 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) psr_impl, fpu_vers); sparc_fpu_type = "Unknown FPU"; } + if (sparc_pmu_type == NULL) + sparc_pmu_type = "Unknown PMU"; } #ifdef CONFIG_SPARC32 @@ -315,11 +324,13 @@ static void __init sun4v_cpu_probe(void) case SUN4V_CHIP_NIAGARA1: sparc_cpu_type = "UltraSparc T1 (Niagara)"; sparc_fpu_type = "UltraSparc T1 integrated FPU"; + sparc_pmu_type = "niagara"; break; case SUN4V_CHIP_NIAGARA2: sparc_cpu_type = "UltraSparc T2 (Niagara2)"; sparc_fpu_type = "UltraSparc T2 integrated FPU"; + sparc_pmu_type = "niagara2"; break; default: diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index cab8e028687..3d2c6baae96 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -196,6 +196,11 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } else if (i == NR_IRQS) { + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_data(j).__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); } return 0; } @@ -247,9 +252,10 @@ struct irq_handler_data { #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int virt_irq) { - cpumask_t mask = irq_desc[virt_irq].affinity; + cpumask_t mask; int cpuid; + cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); @@ -778,69 +784,6 @@ void do_softirq(void) local_irq_restore(flags); } -static void unhandled_perf_irq(struct pt_regs *regs) -{ - unsigned long pcr, pic; - - read_pcr(pcr); - read_pic(pic); - - write_pcr(0); - - printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n", - smp_processor_id()); - printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n", - smp_processor_id(), pcr, pic); -} - -/* Almost a direct copy of the powerpc PMC code. */ -static DEFINE_SPINLOCK(perf_irq_lock); -static void *perf_irq_owner_caller; /* mostly for debugging */ -static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq; - -/* Invoked from level 15 PIL handler in trap table. */ -void perfctr_irq(int irq, struct pt_regs *regs) -{ - clear_softint(1 << irq); - perf_irq(regs); -} - -int register_perfctr_intr(void (*handler)(struct pt_regs *)) -{ - int ret; - - if (!handler) - return -EINVAL; - - spin_lock(&perf_irq_lock); - if (perf_irq != unhandled_perf_irq) { - printk(KERN_WARNING "register_perfctr_intr: " - "perf IRQ busy (reserved by caller %p)\n", - perf_irq_owner_caller); - ret = -EBUSY; - goto out; - } - - perf_irq_owner_caller = __builtin_return_address(0); - perf_irq = handler; - - ret = 0; -out: - spin_unlock(&perf_irq_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(register_perfctr_intr); - -void release_perfctr_intr(void (*handler)(struct pt_regs *)) -{ - spin_lock(&perf_irq_lock); - perf_irq_owner_caller = NULL; - perf_irq = unhandled_perf_irq; - spin_unlock(&perf_irq_lock); -} -EXPORT_SYMBOL_GPL(release_perfctr_intr); - #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) { @@ -854,7 +797,7 @@ void fixup_irqs(void) !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, - &irq_desc[irq].affinity); + irq_desc[irq].affinity); } spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 81a972e8d8e..15d8a3f645c 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -5,6 +5,7 @@ /* cpu.c */ extern const char *sparc_cpu_type; +extern const char *sparc_pmu_type; extern const char *sparc_fpu_type; extern unsigned int fsr_storage; diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c new file mode 100644 index 00000000000..09f088ed4a6 --- /dev/null +++ b/arch/sparc/kernel/nmi.c @@ -0,0 +1,224 @@ +/* Pseudo NMI support on sparc64 systems. + * + * Copyright (C) 2009 David S. Miller <davem@davemloft.net> + * + * The NMI watchdog support and infrastructure is based almost + * entirely upon the x86 NMI support code. + */ +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/nmi.h> +#include <linux/module.h> +#include <linux/kprobes.h> +#include <linux/kernel_stat.h> +#include <linux/slab.h> +#include <linux/kdebug.h> +#include <linux/delay.h> +#include <linux/smp.h> + +#include <asm/ptrace.h> +#include <asm/local.h> +#include <asm/pcr.h> + +/* We don't have a real NMI on sparc64, but we can fake one + * up using profiling counter overflow interrupts and interrupt + * levels. + * + * The profile overflow interrupts at level 15, so we use + * level 14 as our IRQ off level. + */ + +static int nmi_watchdog_active; +static int panic_on_timeout; + +int nmi_usable; +EXPORT_SYMBOL_GPL(nmi_usable); + +static unsigned int nmi_hz = HZ; + +static DEFINE_PER_CPU(unsigned int, last_irq_sum); +static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(int, nmi_touch); + +void touch_nmi_watchdog(void) +{ + if (nmi_watchdog_active) { + int cpu; + + for_each_present_cpu(cpu) { + if (per_cpu(nmi_touch, cpu) != 1) + per_cpu(nmi_touch, cpu) = 1; + } + } + + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL(touch_nmi_watchdog); + +static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) +{ + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, + pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) + return; + + console_verbose(); + bust_spinlocks(1); + + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->tpc); + show_regs(regs); + + bust_spinlocks(0); + + if (do_panic || panic_on_oops) + panic("Non maskable interrupt"); + + local_irq_enable(); + do_exit(SIGBUS); +} + +notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) +{ + unsigned int sum, touched = 0; + int cpu = smp_processor_id(); + + clear_softint(1 << irq); + pcr_ops->write(PCR_PIC_PRIV); + + local_cpu_data().__nmi_count++; + + if (notify_die(DIE_NMI, "nmi", regs, 0, + pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) + touched = 1; + + sum = kstat_irqs_cpu(0, cpu); + if (__get_cpu_var(nmi_touch)) { + __get_cpu_var(nmi_touch) = 0; + touched = 1; + } + if (!touched && __get_cpu_var(last_irq_sum) == sum) { + local_inc(&__get_cpu_var(alert_counter)); + if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, panic_on_timeout); + } else { + __get_cpu_var(last_irq_sum) = sum; + local_set(&__get_cpu_var(alert_counter), 0); + } + if (nmi_usable) { + write_pic(picl_value(nmi_hz)); + pcr_ops->write(pcr_enable); + } +} + +static inline unsigned int get_nmi_count(int cpu) +{ + return cpu_data(cpu).__nmi_count; +} + +static int endflag __initdata; + +static __init void nmi_cpu_busy(void *data) +{ + local_irq_enable_in_hardirq(); + while (endflag == 0) + mb(); +} + +static void report_broken_nmi(int cpu, int *prev_nmi_count) +{ + printk(KERN_CONT "\n"); + + printk(KERN_WARNING + "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + + printk(KERN_WARNING + "Please report this to bugzilla.kernel.org,\n"); + printk(KERN_WARNING + "and attach the output of the 'dmesg' command.\n"); + + nmi_usable = 0; +} + +static void stop_watchdog(void *unused) +{ + pcr_ops->write(PCR_PIC_PRIV); +} + +static int __init check_nmi_watchdog(void) +{ + unsigned int *prev_nmi_count; + int cpu, err; + + prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL); + if (!prev_nmi_count) { + err = -ENOMEM; + goto error; + } + + printk(KERN_INFO "Testing NMI watchdog ... "); + + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); + + for_each_possible_cpu(cpu) + prev_nmi_count[cpu] = get_nmi_count(cpu); + local_irq_enable(); + mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ + + for_each_online_cpu(cpu) { + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) + report_broken_nmi(cpu, prev_nmi_count); + } + endflag = 1; + if (!nmi_usable) { + kfree(prev_nmi_count); + err = -ENODEV; + goto error; + } + printk("OK.\n"); + + nmi_hz = 1; + + kfree(prev_nmi_count); + return 0; +error: + on_each_cpu(stop_watchdog, NULL, 1); + return err; +} + +static void start_watchdog(void *unused) +{ + pcr_ops->write(PCR_PIC_PRIV); + write_pic(picl_value(nmi_hz)); + + pcr_ops->write(pcr_enable); +} + +void nmi_adjust_hz(unsigned int new_hz) +{ + nmi_hz = new_hz; + on_each_cpu(start_watchdog, NULL, 1); +} +EXPORT_SYMBOL_GPL(nmi_adjust_hz); + +int __init nmi_init(void) +{ + nmi_usable = 1; + + on_each_cpu(start_watchdog, NULL, 1); + + return check_nmi_watchdog(); +} + +static int __init setup_nmi_watchdog(char *str) +{ + if (!strncmp(str, "panic", 5)) + panic_on_timeout = 1; + + return 0; +} +__setup("nmi_watchdog=", setup_nmi_watchdog); diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c new file mode 100644 index 00000000000..92e0dda141a --- /dev/null +++ b/arch/sparc/kernel/pcr.c @@ -0,0 +1,153 @@ +/* pcr.c: Generic sparc64 performance counter infrastructure. + * + * Copyright (C) 2009 David S. Miller (davem@davemloft.net) + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/irq.h> + +#include <asm/pil.h> +#include <asm/pcr.h> +#include <asm/nmi.h> + +/* This code is shared between various users of the performance + * counters. Users will be oprofile, pseudo-NMI watchdog, and the + * perf_counter support layer. + */ + +#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) +#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ + PCR_N2_TOE_OV1 | \ + (2 << PCR_N2_SL1_SHIFT) | \ + (0xff << PCR_N2_MASK1_SHIFT)) + +u64 pcr_enable; +unsigned int picl_shift; + +/* Performance counter interrupts run unmasked at PIL level 15. + * Therefore we can't do things like wakeups and other work + * that expects IRQ disabling to be adhered to in locking etc. + * + * Therefore in such situations we defer the work by signalling + * a lower level cpu IRQ. + */ +void deferred_pcr_work_irq(int irq, struct pt_regs *regs) +{ + clear_softint(1 << PIL_DEFERRED_PCR_WORK); +} + +void schedule_deferred_pcr_work(void) +{ + set_softint(1 << PIL_DEFERRED_PCR_WORK); +} + +const struct pcr_ops *pcr_ops; +EXPORT_SYMBOL_GPL(pcr_ops); + +static u64 direct_pcr_read(void) +{ + u64 val; + + read_pcr(val); + return val; +} + +static void direct_pcr_write(u64 val) +{ + write_pcr(val); +} + +static const struct pcr_ops direct_pcr_ops = { + .read = direct_pcr_read, + .write = direct_pcr_write, +}; + +static void n2_pcr_write(u64 val) +{ + unsigned long ret; + + ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); + if (val != HV_EOK) + write_pcr(val); +} + +static const struct pcr_ops n2_pcr_ops = { + .read = direct_pcr_read, + .write = n2_pcr_write, +}; + +static unsigned long perf_hsvc_group; +static unsigned long perf_hsvc_major; +static unsigned long perf_hsvc_minor; + +static int __init register_perf_hsvc(void) +{ + if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + perf_hsvc_group = HV_GRP_NIAG_PERF; + break; + + case SUN4V_CHIP_NIAGARA2: + perf_hsvc_group = HV_GRP_N2_CPU; + break; + + default: + return -ENODEV; + } + + + perf_hsvc_major = 1; + perf_hsvc_minor = 0; + if (sun4v_hvapi_register(perf_hsvc_group, + perf_hsvc_major, + &perf_hsvc_minor)) { + printk("perfmon: Could not register hvapi.\n"); + return -ENODEV; + } + } + return 0; +} + +static void __init unregister_perf_hsvc(void) +{ + if (tlb_type != hypervisor) + return; + sun4v_hvapi_unregister(perf_hsvc_group); +} + +int __init pcr_arch_init(void) +{ + int err = register_perf_hsvc(); + + if (err) + return err; + + switch (tlb_type) { + case hypervisor: + pcr_ops = &n2_pcr_ops; + pcr_enable = PCR_N2_ENABLE; + picl_shift = 2; + break; + + case cheetah: + case cheetah_plus: + case spitfire: + pcr_ops = &direct_pcr_ops; + pcr_enable = PCR_SUN4U_ENABLE; + break; + + default: + err = -ENODEV; + goto out_unregister; + } + + return nmi_init(); + +out_unregister: + unregister_perf_hsvc(); + return err; +} + +arch_initcall(pcr_arch_init); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index cc8b5604442..a73954b87f0 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -29,6 +29,7 @@ #include <linux/cpu.h> #include <linux/elfcore.h> #include <linux/sysrq.h> +#include <linux/nmi.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -52,8 +53,10 @@ static void sparc64_yield(int cpu) { - if (tlb_type != hypervisor) + if (tlb_type != hypervisor) { + touch_nmi_watchdog(); return; + } clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 49d061f4ae9..f2bcfd2967d 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -354,6 +354,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) seq_printf(m, "cpu\t\t: %s\n" "fpu\t\t: %s\n" + "pmu\t\t: %s\n" "prom\t\t: %s\n" "type\t\t: %s\n" "ncpus probed\t: %d\n" @@ -366,6 +367,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) , sparc_cpu_type, sparc_fpu_type, + sparc_pmu_type, prom_version, ((tlb_type == hypervisor) ? "sun4v" : diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 2db3c2229b9..db310aa0018 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -729,7 +729,7 @@ void timer_interrupt(int irq, struct pt_regs *regs) irq_enter(); - kstat_this_cpu.irqs[0]++; + kstat_incr_irqs_this_cpu(0, irq_to_desc(0)); if (unlikely(!evt->event_handler)) { printk(KERN_WARNING diff --git a/arch/sparc/kernel/ttable.S b/arch/sparc/kernel/ttable.S index ea925503b42..d9bdfb9d5c1 100644 --- a/arch/sparc/kernel/ttable.S +++ b/arch/sparc/kernel/ttable.S @@ -63,7 +63,8 @@ tl0_irq6: TRAP_IRQ(smp_call_function_single_client, 6) #else tl0_irq6: BTRAP(0x46) #endif -tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49) +tl0_irq7: TRAP_IRQ(deferred_pcr_work_irq, 7) +tl0_irq8: BTRAP(0x48) BTRAP(0x49) tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d) tl0_irq14: TRAP_IRQ(timer_interrupt, 14) tl0_irq15: TRAP_NMI_IRQ(perfctr_irq, 15) diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a9e474bf638..4ab8993b086 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -19,6 +19,7 @@ #include <linux/interrupt.h> #include <linux/kprobes.h> #include <linux/kdebug.h> +#include <linux/percpu.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -224,6 +225,30 @@ cannot_handle: unhandled_fault (address, current, regs); } +static void noinline bogus_32bit_fault_tpc(struct pt_regs *regs) +{ + static int times; + + if (times++ < 10) + printk(KERN_ERR "FAULT[%s:%d]: 32-bit process reports " + "64-bit TPC [%lx]\n", + current->comm, current->pid, + regs->tpc); + show_regs(regs); +} + +static void noinline bogus_32bit_fault_address(struct pt_regs *regs, + unsigned long addr) +{ + static int times; + + if (times++ < 10) + printk(KERN_ERR "FAULT[%s:%d]: 32-bit process " + "reports 64-bit fault address [%lx]\n", + current->comm, current->pid, addr); + show_regs(regs); +} + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -244,6 +269,19 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) (fault_code & FAULT_CODE_DTLB)) BUG(); + if (test_thread_flag(TIF_32BIT)) { + if (!(regs->tstate & TSTATE_PRIV)) { + if (unlikely((regs->tpc >> 32) != 0)) { + bogus_32bit_fault_tpc(regs); + goto intr_or_no_mm; + } + } + if (unlikely((address >> 32) != 0)) { + bogus_32bit_fault_address(regs, address); + goto intr_or_no_mm; + } + } + if (regs->tstate & TSTATE_PRIV) { unsigned long tpc = regs->tpc; @@ -264,12 +302,6 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) if (in_atomic() || !mm) goto intr_or_no_mm; - if (test_thread_flag(TIF_32BIT)) { - if (!(regs->tstate & TSTATE_PRIV)) - regs->tpc &= 0xffffffff; - address &= 0xffffffff; - } - if (!down_read_trylock(&mm->mmap_sem)) { if ((regs->tstate & TSTATE_PRIV) && !search_exception_tables(regs->tpc)) { diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c index d6e170c074f..d172f86439b 100644 --- a/arch/sparc/oprofile/init.c +++ b/arch/sparc/oprofile/init.c @@ -13,217 +13,57 @@ #include <linux/init.h> #ifdef CONFIG_SPARC64 -#include <asm/hypervisor.h> -#include <asm/spitfire.h> -#include <asm/cpudata.h> -#include <asm/irq.h> +#include <linux/notifier.h> +#include <linux/rcupdate.h> +#include <linux/kdebug.h> +#include <asm/nmi.h> -static int nmi_enabled; - -struct pcr_ops { - u64 (*read)(void); - void (*write)(u64); -}; -static const struct pcr_ops *pcr_ops; - -static u64 direct_pcr_read(void) -{ - u64 val; - - read_pcr(val); - return val; -} - -static void direct_pcr_write(u64 val) -{ - write_pcr(val); -} - -static const struct pcr_ops direct_pcr_ops = { - .read = direct_pcr_read, - .write = direct_pcr_write, -}; - -static void n2_pcr_write(u64 val) +static int profile_timer_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - unsigned long ret; - - ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); - if (val != HV_EOK) - write_pcr(val); -} - -static const struct pcr_ops n2_pcr_ops = { - .read = direct_pcr_read, - .write = n2_pcr_write, -}; - -/* In order to commonize as much of the implementation as - * possible, we use PICH as our counter. Mostly this is - * to accomodate Niagara-1 which can only count insn cycles - * in PICH. - */ -static u64 picl_value(void) -{ - u32 delta = local_cpu_data().clock_tick / HZ; - - return ((u64)((0 - delta) & 0xffffffff)) << 32; -} - -#define PCR_PIC_PRIV 0x00000001 /* PIC access is privileged */ -#define PCR_STRACE 0x00000002 /* Trace supervisor events */ -#define PCR_UTRACE 0x00000004 /* Trace user events */ -#define PCR_N2_HTRACE 0x00000008 /* Trace hypervisor events */ -#define PCR_N2_TOE_OV0 0x00000010 /* Trap if PIC 0 overflows */ -#define PCR_N2_TOE_OV1 0x00000020 /* Trap if PIC 1 overflows */ -#define PCR_N2_MASK0 0x00003fc0 -#define PCR_N2_MASK0_SHIFT 6 -#define PCR_N2_SL0 0x0003c000 -#define PCR_N2_SL0_SHIFT 14 -#define PCR_N2_OV0 0x00040000 -#define PCR_N2_MASK1 0x07f80000 -#define PCR_N2_MASK1_SHIFT 19 -#define PCR_N2_SL1 0x78000000 -#define PCR_N2_SL1_SHIFT 27 -#define PCR_N2_OV1 0x80000000 - -#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) -#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ - PCR_N2_TOE_OV1 | \ - (2 << PCR_N2_SL1_SHIFT) | \ - (0xff << PCR_N2_MASK1_SHIFT)) - -static u64 pcr_enable = PCR_SUN4U_ENABLE; - -static void nmi_handler(struct pt_regs *regs) -{ - pcr_ops->write(PCR_PIC_PRIV); - - if (nmi_enabled) { - oprofile_add_sample(regs, 0); - - write_pic(picl_value()); - pcr_ops->write(pcr_enable); - } -} - -/* We count "clock cycle" events in the lower 32-bit PIC. - * Then configure it such that it overflows every HZ, and thus - * generates a level 15 interrupt at that frequency. - */ -static void cpu_nmi_start(void *_unused) -{ - pcr_ops->write(PCR_PIC_PRIV); - write_pic(picl_value()); - - pcr_ops->write(pcr_enable); -} + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; -static void cpu_nmi_stop(void *_unused) -{ - pcr_ops->write(PCR_PIC_PRIV); -} - -static int nmi_start(void) -{ - int err = register_perfctr_intr(nmi_handler); - - if (!err) { - nmi_enabled = 1; - wmb(); - err = on_each_cpu(cpu_nmi_start, NULL, 1); - if (err) { - nmi_enabled = 0; - wmb(); - on_each_cpu(cpu_nmi_stop, NULL, 1); - release_perfctr_intr(nmi_handler); - } + switch (val) { + case DIE_NMI: + oprofile_add_sample(args->regs, 0); + ret = NOTIFY_STOP; + break; + default: + break; } - - return err; -} - -static void nmi_stop(void) -{ - nmi_enabled = 0; - wmb(); - - on_each_cpu(cpu_nmi_stop, NULL, 1); - release_perfctr_intr(nmi_handler); - synchronize_sched(); + return ret; } -static unsigned long perf_hsvc_group; -static unsigned long perf_hsvc_major; -static unsigned long perf_hsvc_minor; +static struct notifier_block profile_timer_exceptions_nb = { + .notifier_call = profile_timer_exceptions_notify, +}; -static int __init register_perf_hsvc(void) +static int timer_start(void) { - if (tlb_type == hypervisor) { - switch (sun4v_chip_type) { - case SUN4V_CHIP_NIAGARA1: - perf_hsvc_group = HV_GRP_NIAG_PERF; - break; - - case SUN4V_CHIP_NIAGARA2: - perf_hsvc_group = HV_GRP_N2_CPU; - break; - - default: - return -ENODEV; - } - - - perf_hsvc_major = 1; - perf_hsvc_minor = 0; - if (sun4v_hvapi_register(perf_hsvc_group, - perf_hsvc_major, - &perf_hsvc_minor)) { - printk("perfmon: Could not register N2 hvapi.\n"); - return -ENODEV; - } - } + if (register_die_notifier(&profile_timer_exceptions_nb)) + return 1; + nmi_adjust_hz(HZ); return 0; } -static void unregister_perf_hsvc(void) + +static void timer_stop(void) { - if (tlb_type != hypervisor) - return; - sun4v_hvapi_unregister(perf_hsvc_group); + nmi_adjust_hz(1); + unregister_die_notifier(&profile_timer_exceptions_nb); + synchronize_sched(); /* Allow already-started NMIs to complete. */ } -static int oprofile_nmi_init(struct oprofile_operations *ops) +static int op_nmi_timer_init(struct oprofile_operations *ops) { - int err = register_perf_hsvc(); - - if (err) - return err; - - switch (tlb_type) { - case hypervisor: - pcr_ops = &n2_pcr_ops; - pcr_enable = PCR_N2_ENABLE; - break; - - case cheetah: - case cheetah_plus: - pcr_ops = &direct_pcr_ops; - break; - - default: + if (!nmi_usable) return -ENODEV; - } - ops->create_files = NULL; - ops->setup = NULL; - ops->shutdown = NULL; - ops->start = nmi_start; - ops->stop = nmi_stop; + ops->start = timer_start; + ops->stop = timer_stop; ops->cpu_type = "timer"; - - printk(KERN_INFO "oprofile: Using perfctr based NMI timer interrupt.\n"); - + printk(KERN_INFO "oprofile: Using perfctr NMI timer interrupt.\n"); return 0; } #endif @@ -233,7 +73,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) int ret = -ENODEV; #ifdef CONFIG_SPARC64 - ret = oprofile_nmi_init(ops); + ret = op_nmi_timer_init(ops); if (!ret) return ret; #endif @@ -241,10 +81,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) return ret; } - void oprofile_arch_exit(void) { -#ifdef CONFIG_SPARC64 - unregister_perf_hsvc(); -#endif } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9c39095b33f..5c8e353c112 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -133,7 +133,7 @@ config ARCH_HAS_CACHE_LINE_SIZE def_bool y config HAVE_SETUP_PER_CPU_AREA - def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) + def_bool y config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP @@ -391,6 +391,13 @@ config X86_RDC321X as R-8610-(G). If you don't have one of these chips, you should say N here. +config X86_UV + bool "SGI Ultraviolet" + depends on X86_64 + help + This option is needed in order to support SGI Ultraviolet systems. + If you don't have one of these, you should say N here. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" @@ -1340,13 +1347,17 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config CC_STACKPROTECTOR_ALL + bool + config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 && EXPERIMENTAL && BROKEN + depends on X86_64 + select CC_STACKPROTECTOR_ALL help - This option turns on the -fstack-protector GCC feature. This - feature puts, at the beginning of critical functions, a canary - value on the stack just before the return address, and validates + This option turns on the -fstack-protector GCC feature. This + feature puts, at the beginning of functions, a canary value on + the stack just before the return address, and validates the value just before actually returning. Stack based buffer overflows (that need to overwrite this return address) now also overwrite the canary, which gets detected and the attack is then @@ -1354,15 +1365,8 @@ config CC_STACKPROTECTOR This feature requires gcc version 4.2 or above, or a distribution gcc with the feature backported. Older versions are automatically - detected and for those versions, this configuration option is ignored. - -config CC_STACKPROTECTOR_ALL - bool "Use stack-protector for all functions" - depends on CC_STACKPROTECTOR - help - Normally, GCC only inserts the canary value protection for - functions that use large-ish on-stack buffers. By enabling - this option, GCC will be asked to do this for ALL functions. + detected and for those versions, this configuration option is + ignored. (and a warning is printed during bootup) source kernel/Kconfig.hz diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index c98d52e8296..085fef4d866 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -294,25 +294,23 @@ config X86_CPU # Define implied options from the CPU selection here config X86_L1_CACHE_BYTES int - default "128" if GENERIC_CPU || MPSC - default "64" if MK8 || MCORE2 - depends on X86_64 + default "128" if MPSC + default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32 config X86_INTERNODE_CACHE_BYTES int default "4096" if X86_VSMP default X86_L1_CACHE_BYTES if !X86_VSMP - depends on X86_64 config X86_CMPXCHG def_bool X86_64 || (X86_32 && !M386) config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC + default "7" if MPENTIUM4 || MPSC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU config X86_XADD def_bool y diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 10d6cc3fd05..28f111461ca 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -117,6 +117,7 @@ config DEBUG_RODATA config DEBUG_RODATA_TEST bool "Testcase for the DEBUG_RODATA feature" depends on DEBUG_RODATA + default y help This option enables a testcase for the DEBUG_RODATA feature as well as for the change_page_attr() infrastructure. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d1a47adb5ae..cacee981d16 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -73,7 +73,7 @@ else stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ - "$(CC)" -fstack-protector ) + "$(CC)" "-fstack-protector -DGCC_HAS_SP" ) stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ "$(CC)" -fstack-protector-all ) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 9dabd00e980..dd77ac0cac4 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -46,78 +46,83 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where); int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { - int err; + int err = 0; if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; - /* If you change siginfo_t structure, please make sure that - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); - - if (from->si_code < 0) { - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); - } else { - /* - * First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) - */ - err |= __put_user(from->_sifields._pad[0], - &to->_sifields._pad[0]); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: - break; - case __SI_CHLD >> 16: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - /* FALL THROUGH */ - default: - case __SI_KILL >> 16: - err |= __put_user(from->si_uid, &to->si_uid); - break; - case __SI_POLL >> 16: - err |= __put_user(from->si_fd, &to->si_fd); - break; - case __SI_TIMER >> 16: - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(ptr_to_compat(from->si_ptr), - &to->si_ptr); - break; - /* This is not generated by the kernel as of now. */ - case __SI_RT >> 16: - case __SI_MESGQ >> 16: - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); - break; + put_user_try { + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + put_user_ex(from->si_signo, &to->si_signo); + put_user_ex(from->si_errno, &to->si_errno); + put_user_ex((short)from->si_code, &to->si_code); + + if (from->si_code < 0) { + put_user_ex(from->si_pid, &to->si_pid); + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + put_user_ex(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + put_user_ex(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + put_user_ex(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + put_user_ex(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + put_user_ex(from->si_overrun, &to->si_overrun); + put_user_ex(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(from->si_int, &to->si_int); + break; + } } - } + } put_user_catch(err); + return err; } int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - int err; + int err = 0; u32 ptr32; if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) return -EFAULT; - err = __get_user(to->si_signo, &from->si_signo); - err |= __get_user(to->si_errno, &from->si_errno); - err |= __get_user(to->si_code, &from->si_code); + get_user_try { + get_user_ex(to->si_signo, &from->si_signo); + get_user_ex(to->si_errno, &from->si_errno); + get_user_ex(to->si_code, &from->si_code); - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - err |= __get_user(ptr32, &from->si_ptr); - to->si_ptr = compat_ptr(ptr32); + get_user_ex(to->si_pid, &from->si_pid); + get_user_ex(to->si_uid, &from->si_uid); + get_user_ex(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + } get_user_catch(err); return err; } @@ -142,17 +147,23 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, struct pt_regs *regs) { stack_t uss, uoss; - int ret; + int ret, err = 0; mm_segment_t seg; if (uss_ptr) { u32 ptr; memset(&uss, 0, sizeof(stack_t)); - if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) || - __get_user(ptr, &uss_ptr->ss_sp) || - __get_user(uss.ss_flags, &uss_ptr->ss_flags) || - __get_user(uss.ss_size, &uss_ptr->ss_size)) + if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t))) + return -EFAULT; + + get_user_try { + get_user_ex(ptr, &uss_ptr->ss_sp); + get_user_ex(uss.ss_flags, &uss_ptr->ss_flags); + get_user_ex(uss.ss_size, &uss_ptr->ss_size); + } get_user_catch(err); + + if (err) return -EFAULT; uss.ss_sp = compat_ptr(ptr); } @@ -161,10 +172,16 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { - if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) || - __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || - __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || - __put_user(uoss.ss_size, &uoss_ptr->ss_size)) + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) + return -EFAULT; + + put_user_try { + put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp); + put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags); + put_user_ex(uoss.ss_size, &uoss_ptr->ss_size); + } put_user_catch(err); + + if (err) ret = -EFAULT; } return ret; @@ -174,18 +191,18 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, * Do a signal return; undo the signal stack. */ #define COPY(x) { \ - err |= __get_user(regs->x, &sc->x); \ + get_user_ex(regs->x, &sc->x); \ } #define COPY_SEG_CPL3(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp | 3; \ } #define RELOAD_SEG(seg) { \ unsigned int cur, pre; \ - err |= __get_user(pre, &sc->seg); \ + get_user_ex(pre, &sc->seg); \ savesegment(seg, cur); \ pre |= 3; \ if (pre != cur) \ @@ -209,39 +226,42 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, sc, sc->err, sc->ip, sc->cs, sc->flags); #endif - /* - * Reload fs and gs if they have changed in the signal - * handler. This does not handle long fs/gs base changes in - * the handler, but does not clobber them at least in the - * normal case. - */ - err |= __get_user(gs, &sc->gs); - gs |= 3; - savesegment(gs, oldgs); - if (gs != oldgs) - load_gs_index(gs); - - RELOAD_SEG(fs); - RELOAD_SEG(ds); - RELOAD_SEG(es); - - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); - /* Don't touch extended registers */ - - COPY_SEG_CPL3(cs); - COPY_SEG_CPL3(ss); - - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - /* disable syscall checks */ - regs->orig_ax = -1; - - err |= __get_user(tmp, &sc->fpstate); - buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); - - err |= __get_user(*pax, &sc->ax); + get_user_try { + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + get_user_ex(gs, &sc->gs); + gs |= 3; + savesegment(gs, oldgs); + if (gs != oldgs) + load_gs_index(gs); + + RELOAD_SEG(fs); + RELOAD_SEG(ds); + RELOAD_SEG(es); + + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); + /* Don't touch extended registers */ + + COPY_SEG_CPL3(cs); + COPY_SEG_CPL3(ss); + + get_user_ex(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + /* disable syscall checks */ + regs->orig_ax = -1; + + get_user_ex(tmp, &sc->fpstate); + buf = compat_ptr(tmp); + err |= restore_i387_xstate_ia32(buf); + + get_user_ex(*pax, &sc->ax); + } get_user_catch(err); + return err; } @@ -319,36 +339,38 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, { int tmp, err = 0; - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - savesegment(fs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->fs); - savesegment(ds, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->ds); - savesegment(es, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->es); - - err |= __put_user(regs->di, &sc->di); - err |= __put_user(regs->si, &sc->si); - err |= __put_user(regs->bp, &sc->bp); - err |= __put_user(regs->sp, &sc->sp); - err |= __put_user(regs->bx, &sc->bx); - err |= __put_user(regs->dx, &sc->dx); - err |= __put_user(regs->cx, &sc->cx); - err |= __put_user(regs->ax, &sc->ax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->ip, &sc->ip); - err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->sp, &sc->sp_at_signal); - err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); - - err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate); - - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + put_user_try { + savesegment(gs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->gs); + savesegment(fs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->fs); + savesegment(ds, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->ds); + savesegment(es, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->es); + + put_user_ex(regs->di, &sc->di); + put_user_ex(regs->si, &sc->si); + put_user_ex(regs->bp, &sc->bp); + put_user_ex(regs->sp, &sc->sp); + put_user_ex(regs->bx, &sc->bx); + put_user_ex(regs->dx, &sc->dx); + put_user_ex(regs->cx, &sc->cx); + put_user_ex(regs->ax, &sc->ax); + put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.error_code, &sc->err); + put_user_ex(regs->ip, &sc->ip); + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->sp, &sc->sp_at_signal); + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); + + put_user_ex(ptr_to_compat(fpstate), &sc->fpstate); + + /* non-iBCS2 extensions.. */ + put_user_ex(mask, &sc->oldmask); + put_user_ex(current->thread.cr2, &sc->cr2); + } put_user_catch(err); return err; } @@ -437,13 +459,17 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, else restorer = &frame->retcode; } - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); - /* - * These are actually not used anymore, but left because some - * gdb versions depend on them as a marker. - */ - err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); + put_user_try { + put_user_ex(ptr_to_compat(restorer), &frame->pretcode); + + /* + * These are actually not used anymore, but left because some + * gdb versions depend on them as a marker. + */ + put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + } put_user_catch(err); + if (err) return -EFAULT; @@ -496,41 +522,40 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= __put_user(sig, &frame->sig); - err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); - err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); - err |= copy_siginfo_to_user32(&frame->info, info); - if (err) - return -EFAULT; + put_user_try { + put_user_ex(sig, &frame->sig); + put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo); + put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); + err |= copy_siginfo_to_user32(&frame->info, info); - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - return -EFAULT; + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + else + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + rt_sigreturn); + put_user_ex(ptr_to_compat(restorer), &frame->pretcode); + + /* + * Not actually used anymore, but left because some gdb + * versions need it. + */ + put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + } put_user_catch(err); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - else - restorer = VDSO32_SYMBOL(current->mm->context.vdso, - rt_sigreturn); - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); - - /* - * Not actually used anymore, but left because some gdb - * versions need it. - */ - err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); if (err) return -EFAULT; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 256b00b6189..097a6b64c24 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target) CFI_DEF_CFA rsp,0 CFI_REGISTER rsp,rbp SWAPGS_UNSAFE_STACK - movq %gs:pda_kernelstack, %rsp - addq $(PDA_STACKOFFSET),%rsp + movq PER_CPU_VAR(kernel_stack), %rsp + addq $(KERNEL_STACK_OFFSET),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs, here we enable it straight after entry: @@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target) ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: @@ -418,9 +418,9 @@ ENTRY(ia32_syscall) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys -ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + ja ia32_badsys +ia32_do_call: IA32_ARG_FIXUP call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: @@ -435,7 +435,9 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - jmp ia32_do_syscall + cmpl $(IA32_NR_syscalls-1),%eax + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + jmp ia32_do_call END(ia32_syscall) ia32_badsys: diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h new file mode 100644 index 00000000000..82f613c607c --- /dev/null +++ b/arch/x86/include/asm/apicnum.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_APICNUM_H +#define _ASM_X86_APICNUM_H + +/* define MAX_IO_APICS */ +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bae482df603..f03b23e3286 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,20 @@ #include <linux/nodemask.h> #include <linux/percpu.h> +#ifdef CONFIG_SMP + +extern void prefill_possible_map(void); + +#else /* CONFIG_SMP */ + +static inline void prefill_possible_map(void) {} + +#define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define safe_smp_processor_id() 0 +#define stack_smp_processor_id() 0 + +#endif /* CONFIG_SMP */ + struct x86_cpu { struct cpu cpu; }; @@ -17,4 +31,11 @@ extern void arch_unregister_cpu(int); #endif DECLARE_PER_CPU(int, cpu_state); + +#ifdef CONFIG_X86_HAS_BOOT_CPU_ID +extern unsigned char boot_cpu_id; +#else +#define boot_cpu_id 0 +#endif + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h new file mode 100644 index 00000000000..a7f3c75f8ad --- /dev/null +++ b/arch/x86/include/asm/cpumask.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_CPUMASK_H +#define _ASM_X86_CPUMASK_H +#ifndef __ASSEMBLY__ +#include <linux/cpumask.h> + +#ifdef CONFIG_X86_64 + +extern cpumask_var_t cpu_callin_mask; +extern cpumask_var_t cpu_callout_mask; +extern cpumask_var_t cpu_initialized_mask; +extern cpumask_var_t cpu_sibling_setup_mask; + +extern void setup_cpu_local_masks(void); + +#else /* CONFIG_X86_32 */ + +extern cpumask_t cpu_callin_map; +extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_initialized; +extern cpumask_t cpu_sibling_setup_map; + +#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) +#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) +#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) +#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) + +static inline void setup_cpu_local_masks(void) { } + +#endif /* CONFIG_X86_32 */ + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0930b4f8d67..c68c361697e 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -1,39 +1,21 @@ #ifndef _ASM_X86_CURRENT_H #define _ASM_X86_CURRENT_H -#ifdef CONFIG_X86_32 #include <linux/compiler.h> #include <asm/percpu.h> +#ifndef __ASSEMBLY__ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); -static __always_inline struct task_struct *get_current(void) -{ - return x86_read_percpu(current_task); -} - -#else /* X86_32 */ - -#ifndef __ASSEMBLY__ -#include <asm/pda.h> - -struct task_struct; static __always_inline struct task_struct *get_current(void) { - return read_pda(pcurrent); + return percpu_read(current_task); } -#else /* __ASSEMBLY__ */ - -#include <asm/asm-offsets.h> -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg +#define current get_current() #endif /* __ASSEMBLY__ */ -#endif /* X86_32 */ - -#define current get_current() - #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 2c05b737ee2..4334502d366 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -138,11 +138,4 @@ struct genapic { extern struct genapic *genapic; extern void es7000_update_genapic_to_cluster(void); -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -#define get_uv_system_type() UV_NONE -#define is_uv_system() 0 -#define uv_wakeup_secondary(a, b) 1 -#define uv_system_init() do {} while (0) - - #endif /* _ASM_X86_GENAPIC_32_H */ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index adf32fb56aa..7bb092c5905 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -extern enum uv_system_type get_uv_system_type(void); -extern int is_uv_system(void); extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); -extern void uv_cpu_init(void); -extern void uv_system_init(void); -extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); extern void setup_apic_routing(void); diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 000787df66e..176f058e715 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -1,11 +1,52 @@ -#ifdef CONFIG_X86_32 -# include "hardirq_32.h" -#else -# include "hardirq_64.h" +#ifndef _ASM_X86_HARDIRQ_H +#define _ASM_X86_HARDIRQ_H + +#include <linux/threads.h> +#include <linux/irq.h> + +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int irq0_irqs; +#ifdef CONFIG_X86_LOCAL_APIC + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq_spurious_count; +#endif +#ifdef CONFIG_SMP + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + unsigned int irq_thermal_count; +# ifdef CONFIG_X86_64 + unsigned int irq_threshold_count; +# endif #endif +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + +#define __ARCH_IRQ_STAT + +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) + +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) + +extern void ack_bad_irq(unsigned int irq); extern u64 arch_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu arch_irq_stat_cpu extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat + +#endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h deleted file mode 100644 index cf7954d1405..00000000000 --- a/arch/x86/include/asm/hardirq_32.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _ASM_X86_HARDIRQ_32_H -#define _ASM_X86_HARDIRQ_32_H - -#include <linux/threads.h> -#include <linux/irq.h> - -typedef struct { - unsigned int __softirq_pending; - unsigned long idle_timestamp; - unsigned int __nmi_count; /* arch dependent */ - unsigned int apic_timer_irqs; /* arch dependent */ - unsigned int irq0_irqs; - unsigned int irq_resched_count; - unsigned int irq_call_count; - unsigned int irq_tlb_count; - unsigned int irq_thermal_count; - unsigned int irq_spurious_count; -} ____cacheline_aligned irq_cpustat_t; - -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); - -#define __ARCH_IRQ_STAT -#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) - -#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++) - -void ack_bad_irq(unsigned int irq); -#include <linux/irq_cpustat.h> - -#endif /* _ASM_X86_HARDIRQ_32_H */ diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h deleted file mode 100644 index b5a6b5d5670..00000000000 --- a/arch/x86/include/asm/hardirq_64.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _ASM_X86_HARDIRQ_64_H -#define _ASM_X86_HARDIRQ_64_H - -#include <linux/threads.h> -#include <linux/irq.h> -#include <asm/pda.h> -#include <asm/apic.h> - -/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ -#define MAX_HARDIRQS_PER_CPU NR_VECTORS - -#define __ARCH_IRQ_STAT 1 - -#define inc_irq_stat(member) add_pda(member, 1) - -#define local_softirq_pending() read_pda(__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING 1 - -#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) -#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) - -extern void ack_bad_irq(unsigned int irq); - -#endif /* _ASM_X86_HARDIRQ_64_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7a1f44ac1f1..08ec793aa04 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -114,38 +114,16 @@ struct IR_IO_APIC_route_entry { extern int nr_ioapics; extern int nr_ioapic_registers[MAX_IO_APICS]; -/* - * MP-BIOS irq configuration table structures: - */ - #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_ioapic { - unsigned long mp_apicaddr; - unsigned int mp_apicid; - unsigned char mp_type; - unsigned char mp_apicver; - unsigned char mp_flags; -}; - -struct mp_config_intsrc { - unsigned int mp_dstapic; - unsigned char mp_type; - unsigned char mp_irqtype; - unsigned short mp_irqflag; - unsigned char mp_srcbus; - unsigned char mp_srcbusirq; - unsigned char mp_dstirq; -}; - /* I/O APIC entries */ -extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h index 89c898ab298..77843225b7e 100644 --- a/arch/x86/include/asm/irq_regs.h +++ b/arch/x86/include/asm/irq_regs.h @@ -1,5 +1,31 @@ -#ifdef CONFIG_X86_32 -# include "irq_regs_32.h" -#else -# include "irq_regs_64.h" -#endif +/* + * Per-cpu current frame pointer - the location of the last exception frame on + * the stack, stored in the per-cpu area. + * + * Jeremy Fitzhardinge <jeremy@goop.org> + */ +#ifndef _ASM_X86_IRQ_REGS_H +#define _ASM_X86_IRQ_REGS_H + +#include <asm/percpu.h> + +#define ARCH_HAS_OWN_IRQ_REGS + +DECLARE_PER_CPU(struct pt_regs *, irq_regs); + +static inline struct pt_regs *get_irq_regs(void) +{ + return percpu_read(irq_regs); +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = get_irq_regs(); + percpu_write(irq_regs, new_regs); + + return old_regs; +} + +#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h deleted file mode 100644 index 86afd747345..00000000000 --- a/arch/x86/include/asm/irq_regs_32.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the per-cpu area. - * - * Jeremy Fitzhardinge <jeremy@goop.org> - */ -#ifndef _ASM_X86_IRQ_REGS_32_H -#define _ASM_X86_IRQ_REGS_32_H - -#include <asm/percpu.h> - -#define ARCH_HAS_OWN_IRQ_REGS - -DECLARE_PER_CPU(struct pt_regs *, irq_regs); - -static inline struct pt_regs *get_irq_regs(void) -{ - return x86_read_percpu(irq_regs); -} - -static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) -{ - struct pt_regs *old_regs; - - old_regs = get_irq_regs(); - x86_write_percpu(irq_regs, new_regs); - - return old_regs; -} - -#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h deleted file mode 100644 index 3dd9c0b7027..00000000000 --- a/arch/x86/include/asm/irq_regs_64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index f7ff65032b9..9a83a10a5d5 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -49,31 +49,33 @@ * some of the following vectors are 'rare', they are merged * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). */ #ifdef CONFIG_X86_32 # define SPURIOUS_APIC_VECTOR 0xff # define ERROR_APIC_VECTOR 0xfe -# define INVALIDATE_TLB_VECTOR 0xfd -# define RESCHEDULE_VECTOR 0xfc -# define CALL_FUNCTION_VECTOR 0xfb -# define CALL_FUNCTION_SINGLE_VECTOR 0xfa -# define THERMAL_APIC_VECTOR 0xf0 +# define RESCHEDULE_VECTOR 0xfd +# define CALL_FUNCTION_VECTOR 0xfc +# define CALL_FUNCTION_SINGLE_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xfa +/* 0xf8 - 0xf9 : free */ +# define INVALIDATE_TLB_VECTOR_END 0xf7 +# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + +# define NUM_INVALIDATE_TLB_VECTORS 8 #else -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define RESCHEDULE_VECTOR 0xfd -#define CALL_FUNCTION_VECTOR 0xfc -#define CALL_FUNCTION_SINGLE_VECTOR 0xfb -#define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -#define UV_BAU_MESSAGE 0xf8 -#define INVALIDATE_TLB_VECTOR_END 0xf7 -#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ +# define SPURIOUS_APIC_VECTOR 0xff +# define ERROR_APIC_VECTOR 0xfe +# define RESCHEDULE_VECTOR 0xfd +# define CALL_FUNCTION_VECTOR 0xfc +# define CALL_FUNCTION_SINGLE_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xfa +# define THRESHOLD_APIC_VECTOR 0xf9 +# define UV_BAU_MESSAGE 0xf8 +# define INVALIDATE_TLB_VECTOR_END 0xf7 +# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ #define NUM_INVALIDATE_TLB_VECTORS 8 @@ -105,6 +107,8 @@ #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) +#include <asm/apicnum.h> /* need MAX_IO_APICS */ + #ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) @@ -112,11 +116,12 @@ # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else -# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) -# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif + +# define NR_IRQS \ + ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ + (NR_VECTORS + (8 * NR_CPUS)) : \ + (NR_VECTORS + (32 * MAX_IO_APICS))) \ + #endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h index 6b1add8e31d..6fa399ad1de 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/mach-default/entry_arch.h @@ -11,10 +11,26 @@ */ #ifdef CONFIG_X86_SMP BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) -BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) + +BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, + smp_invalidate_interrupt) #endif /* diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 8aeeb3fd73d..52948df9cd1 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); -#ifdef CONFIG_X86_32 -# include "mmu_context_32.h" -#else -# include "mmu_context_64.h" + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +#ifdef CONFIG_SMP + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); +#endif +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned cpu = smp_processor_id(); + + if (likely(prev != next)) { + /* stop flush ipis for the previous mm */ + cpu_clear(cpu, prev->cpu_vm_mask); +#ifdef CONFIG_SMP + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif + cpu_set(cpu, next->cpu_vm_mask); + + /* Re-load page tables */ + load_cr3(next->pgd); + + /* + * load the LDT, if the LDT is different: + */ + if (unlikely(prev->context.ldt != next->context.ldt)) + load_LDT_nolock(&next->context); + } +#ifdef CONFIG_SMP + else { + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. + */ + load_cr3(next->pgd); + load_LDT_nolock(&next->context); + } + } +#endif +} #define activate_mm(prev, next) \ do { \ @@ -33,5 +76,17 @@ do { \ switch_mm((prev), (next), NULL); \ } while (0); +#ifdef CONFIG_X86_32 +#define deactivate_mm(tsk, mm) \ +do { \ + loadsegment(gs, 0); \ +} while (0) +#else +#define deactivate_mm(tsk, mm) \ +do { \ + load_gs_index(0); \ + loadsegment(fs, 0); \ +} while (0) +#endif #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h deleted file mode 100644 index 7e98ce1d2c0..00000000000 --- a/arch/x86/include/asm/mmu_context_32.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef _ASM_X86_MMU_CONTEXT_32_H -#define _ASM_X86_MMU_CONTEXT_32_H - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -#ifdef CONFIG_SMP - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif -} - -static inline void switch_mm(struct mm_struct *prev, - struct mm_struct *next, - struct task_struct *tsk) -{ - int cpu = smp_processor_id(); - - if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); -#ifdef CONFIG_SMP - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - x86_write_percpu(cpu_tlbstate.active_mm, next); -#endif - cpu_set(cpu, next->cpu_vm_mask); - - /* Re-load page tables */ - load_cr3(next->pgd); - - /* - * load the LDT, if the LDT is different: - */ - if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); - } -#ifdef CONFIG_SMP - else { - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); - - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload %cr3. - */ - load_cr3(next->pgd); - load_LDT_nolock(&next->context); - } - } -#endif -} - -#define deactivate_mm(tsk, mm) \ - asm("movl %0,%%gs": :"r" (0)); - -#endif /* _ASM_X86_MMU_CONTEXT_32_H */ diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h deleted file mode 100644 index 677d36e9540..00000000000 --- a/arch/x86/include/asm/mmu_context_64.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _ASM_X86_MMU_CONTEXT_64_H -#define _ASM_X86_MMU_CONTEXT_64_H - -#include <asm/pda.h> - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -#ifdef CONFIG_SMP - if (read_pda(mmu_state) == TLBSTATE_OK) - write_pda(mmu_state, TLBSTATE_LAZY); -#endif -} - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - unsigned cpu = smp_processor_id(); - if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); -#ifdef CONFIG_SMP - write_pda(mmu_state, TLBSTATE_OK); - write_pda(active_mm, next); -#endif - cpu_set(cpu, next->cpu_vm_mask); - load_cr3(next->pgd); - - if (unlikely(next->context.ldt != prev->context.ldt)) - load_LDT_nolock(&next->context); - } -#ifdef CONFIG_SMP - else { - write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - BUG(); - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload CR3 - * to make sure to use no freed page tables. - */ - load_cr3(next->pgd); - load_LDT_nolock(&next->context); - } - } -#endif -} - -#define deactivate_mm(tsk, mm) \ -do { \ - load_gs_index(0); \ - asm volatile("movl %0,%%fs"::"r"(0)); \ -} while (0) - -#endif /* _ASM_X86_MMU_CONTEXT_64_H */ diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 59568bc4767..4a7f96d7c18 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -24,17 +24,18 @@ # endif #endif -struct intel_mp_floating { - char mpf_signature[4]; /* "_MP_" */ - unsigned int mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ +/* Intel MP Floating Pointer Structure */ +struct mpf_intel { + char signature[4]; /* "_MP_" */ + unsigned int physptr; /* Configuration table address */ + unsigned char length; /* Our length (paragraphs) */ + unsigned char specification; /* Specification version */ + unsigned char checksum; /* Checksum (makes sum 0) */ + unsigned char feature1; /* Standard or configuration ? */ + unsigned char feature2; /* Bit7 set for IMCR|PIC */ + unsigned char feature3; /* Unused (0) */ + unsigned char feature4; /* Unused (0) */ + unsigned char feature5; /* Unused (0) */ }; #define MPC_SIGNATURE "PCMP" diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e869..6b9810859da 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -147,7 +147,7 @@ static inline pteval_t native_pte_val(pte_t pte) return pte.pte; } -static inline pteval_t native_pte_flags(pte_t pte) +static inline pteval_t pte_flags(pte_t pte) { return native_pte_val(pte) & PTE_FLAGS_MASK; } @@ -173,7 +173,6 @@ static inline pteval_t native_pte_flags(pte_t pte) #endif #define pte_val(x) native_pte_val(x) -#define pte_flags(x) native_pte_flags(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 5ebca29f44f..e27fdbe5f9e 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -13,8 +13,8 @@ #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -#define IRQSTACK_ORDER 2 -#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) +#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) #define STACKFAULT_STACK 1 #define DOUBLEFAULT_STACK 2 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..ff691736f5e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -12,21 +12,38 @@ #define CLBR_EAX (1 << 0) #define CLBR_ECX (1 << 1) #define CLBR_EDX (1 << 2) +#define CLBR_EDI (1 << 3) -#ifdef CONFIG_X86_64 -#define CLBR_RSI (1 << 3) -#define CLBR_RDI (1 << 4) +#ifdef CONFIG_X86_32 +/* CLBR_ANY should match all regs platform has. For i386, that's just it */ +#define CLBR_ANY ((1 << 4) - 1) + +#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) +#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) +#define CLBR_SCRATCH (0) +#else +#define CLBR_RAX CLBR_EAX +#define CLBR_RCX CLBR_ECX +#define CLBR_RDX CLBR_EDX +#define CLBR_RDI CLBR_EDI +#define CLBR_RSI (1 << 4) #define CLBR_R8 (1 << 5) #define CLBR_R9 (1 << 6) #define CLBR_R10 (1 << 7) #define CLBR_R11 (1 << 8) + #define CLBR_ANY ((1 << 9) - 1) + +#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ + CLBR_RCX | CLBR_R8 | CLBR_R9) +#define CLBR_RET_REG (CLBR_RAX) +#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) + #include <asm/desc_defs.h> -#else -/* CLBR_ANY should match all regs platform has. For i386, that's just it */ -#define CLBR_ANY ((1 << 3) - 1) #endif /* X86_64 */ +#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) + #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/cpumask.h> @@ -40,6 +57,14 @@ struct tss_struct; struct mm_struct; struct desc_struct; +/* + * Wrapper type for pointers to code which uses the non-standard + * calling convention. See PV_CALL_SAVE_REGS_THUNK below. + */ +struct paravirt_callee_save { + void *func; +}; + /* general info */ struct pv_info { unsigned int kernel_rpl; @@ -189,11 +214,15 @@ struct pv_irq_ops { * expected to use X86_EFLAGS_IF; all other bits * returned from save_fl are undefined, and may be ignored by * restore_fl. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. */ - unsigned long (*save_fl)(void); - void (*restore_fl)(unsigned long); - void (*irq_disable)(void); - void (*irq_enable)(void); + struct paravirt_callee_save save_fl; + struct paravirt_callee_save restore_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; + void (*safe_halt)(void); void (*halt)(void); @@ -244,7 +273,8 @@ struct pv_mmu_ops { void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); - void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + void (*flush_tlb_others)(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va); /* Hooks for allocating and freeing a pagetable top-level */ @@ -278,12 +308,11 @@ struct pv_mmu_ops { void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); - pteval_t (*pte_val)(pte_t); - pteval_t (*pte_flags)(pte_t); - pte_t (*make_pte)(pteval_t pte); + struct paravirt_callee_save pte_val; + struct paravirt_callee_save make_pte; - pgdval_t (*pgd_val)(pgd_t); - pgd_t (*make_pgd)(pgdval_t pgd); + struct paravirt_callee_save pgd_val; + struct paravirt_callee_save make_pgd; #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE @@ -298,12 +327,12 @@ struct pv_mmu_ops { void (*set_pud)(pud_t *pudp, pud_t pudval); - pmdval_t (*pmd_val)(pmd_t); - pmd_t (*make_pmd)(pmdval_t pmd); + struct paravirt_callee_save pmd_val; + struct paravirt_callee_save make_pmd; #if PAGETABLE_LEVELS == 4 - pudval_t (*pud_val)(pud_t); - pud_t (*make_pud)(pudval_t pud); + struct paravirt_callee_save pud_val; + struct paravirt_callee_save make_pud; void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); #endif /* PAGETABLE_LEVELS == 4 */ @@ -388,6 +417,8 @@ extern struct pv_lock_ops pv_lock_ops; asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); unsigned paravirt_patch_ignore(unsigned len); unsigned paravirt_patch_call(void *insnbuf, const void *target, u16 tgt_clobbers, @@ -479,25 +510,45 @@ int paravirt_disable_iospace(void); * makes sure the incoming and outgoing types are always correct. */ #ifdef CONFIG_X86_32 -#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx +#define PVOP_VCALL_ARGS \ + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx #define PVOP_CALL_ARGS PVOP_VCALL_ARGS + +#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) + #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS + +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + #define EXTRA_CLOBBERS #define VEXTRA_CLOBBERS -#else -#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx +#else /* CONFIG_X86_64 */ +#define PVOP_VCALL_ARGS \ + unsigned long __edi = __edi, __esi = __esi, \ + __edx = __edx, __ecx = __ecx #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax + +#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) + #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ "=S" (__esi), "=d" (__edx), \ "=c" (__ecx) - #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" -#endif +#endif /* CONFIG_X86_32 */ #ifdef CONFIG_PARAVIRT_DEBUG #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) @@ -505,10 +556,11 @@ int paravirt_disable_iospace(void); #define PVOP_TEST_NULL(op) ((void)op) #endif -#define __PVOP_CALL(rettype, op, pre, post, ...) \ +#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ + pre, post, ...) \ ({ \ rettype __ret; \ - PVOP_CALL_ARGS; \ + PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ /* This is 32-bit specific, but is okay in 64-bit */ \ /* since this condition will never hold */ \ @@ -516,70 +568,113 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_CALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" EXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_CALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" EXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ __ret = (rettype)__eax; \ } \ __ret; \ }) -#define __PVOP_VCALL(op, pre, post, ...) \ + +#define __PVOP_CALL(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ + EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) + +#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_CALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + +#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ ({ \ PVOP_VCALL_ARGS; \ PVOP_TEST_NULL(op); \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_VCALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" VEXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ }) +#define __PVOP_VCALL(op, pre, post, ...) \ + ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + VEXTRA_CLOBBERS, \ + pre, post, ##__VA_ARGS__) + +#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + + #define PVOP_CALL0(rettype, op) \ __PVOP_CALL(rettype, op, "", "") #define PVOP_VCALL0(op) \ __PVOP_VCALL(op, "", "") +#define PVOP_CALLEE0(rettype, op) \ + __PVOP_CALLEESAVE(rettype, op, "", "") +#define PVOP_VCALLEE0(op) \ + __PVOP_VCALLEESAVE(op, "", "") + + #define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) #define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) + +#define PVOP_CALLEE1(rettype, op, arg1) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALLEE1(op, arg1) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) + #define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1" ((unsigned long)(arg2))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) #define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1" ((unsigned long)(arg2))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + +#define PVOP_CALLEE2(rettype, op, arg1, arg2) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) +#define PVOP_VCALLEE2(op, arg1, arg2) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) #define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) /* This is the only difference in x86_64. We can make it much simpler */ #ifdef CONFIG_X86_32 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ __PVOP_CALL(rettype, op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ __PVOP_VCALL(op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ @@ -587,13 +682,13 @@ int paravirt_disable_iospace(void); "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) #else #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ - "3"((unsigned long)(arg4))) + __PVOP_CALL(rettype, op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ - "3"((unsigned long)(arg4))) + __PVOP_VCALL(op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #endif static inline int paravirt_enabled(void) @@ -984,10 +1079,11 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); } -static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, +static inline void flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); + PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) @@ -1059,13 +1155,13 @@ static inline pte_t __pte(pteval_t val) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, - pv_mmu_ops.make_pte, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pteval_t, + pv_mmu_ops.make_pte, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pteval_t, - pv_mmu_ops.make_pte, - val); + ret = PVOP_CALLEE1(pteval_t, + pv_mmu_ops.make_pte, + val); return (pte_t) { .pte = ret }; } @@ -1075,29 +1171,12 @@ static inline pteval_t pte_val(pte_t pte) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, - pte.pte, (u64)pte.pte >> 32); - else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, - pte.pte); - - return ret; -} - -static inline pteval_t pte_flags(pte_t pte) -{ - pteval_t ret; - - if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, - pte.pte, (u64)pte.pte >> 32); + ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val, + pte.pte, (u64)pte.pte >> 32); else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, - pte.pte); + ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val, + pte.pte); -#ifdef CONFIG_PARAVIRT_DEBUG - BUG_ON(ret & PTE_PFN_MASK); -#endif return ret; } @@ -1106,11 +1185,11 @@ static inline pgd_t __pgd(pgdval_t val) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, - val); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd, + val); return (pgd_t) { ret }; } @@ -1120,11 +1199,11 @@ static inline pgdval_t pgd_val(pgd_t pgd) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd, (u64)pgd.pgd >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd, (u64)pgd.pgd >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd); return ret; } @@ -1188,11 +1267,11 @@ static inline pmd_t __pmd(pmdval_t val) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, - val); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd, + val); return (pmd_t) { ret }; } @@ -1202,11 +1281,11 @@ static inline pmdval_t pmd_val(pmd_t pmd) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd, (u64)pmd.pmd >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd, (u64)pmd.pmd >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd); return ret; } @@ -1228,11 +1307,11 @@ static inline pud_t __pud(pudval_t val) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, - val); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud, + val); return (pud_t) { ret }; } @@ -1242,11 +1321,11 @@ static inline pudval_t pud_val(pud_t pud) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, - pud.pud, (u64)pud.pud >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val, + pud.pud, (u64)pud.pud >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, - pud.pud); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val, + pud.pud); return ret; } @@ -1387,9 +1466,10 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, } void _paravirt_nop(void); -#define paravirt_nop ((void *)_paravirt_nop) +u32 _paravirt_ident_32(u32); +u64 _paravirt_ident_64(u64); -void paravirt_use_bytelocks(void); +#define paravirt_nop ((void *)_paravirt_nop) #ifdef CONFIG_SMP @@ -1438,12 +1518,37 @@ extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; #ifdef CONFIG_X86_32 -#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" -#define PV_RESTORE_REGS "popl %%edx; popl %%ecx" +#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" +#define PV_RESTORE_REGS "popl %edx; popl %ecx;" + +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" +#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" + #define PV_FLAGS_ARG "0" #define PV_EXTRA_CLOBBERS #define PV_VEXTRA_CLOBBERS #else +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS \ + "push %rcx;" \ + "push %rdx;" \ + "push %rsi;" \ + "push %rdi;" \ + "push %r8;" \ + "push %r9;" \ + "push %r10;" \ + "push %r11;" +#define PV_RESTORE_ALL_CALLER_REGS \ + "pop %r11;" \ + "pop %r10;" \ + "pop %r9;" \ + "pop %r8;" \ + "pop %rdi;" \ + "pop %rsi;" \ + "pop %rdx;" \ + "pop %rcx;" + /* We save some registers, but all of them, that's too much. We clobber all * caller saved registers but the argument parameter */ #define PV_SAVE_REGS "pushq %%rdi;" @@ -1453,52 +1558,76 @@ extern struct paravirt_patch_site __parainstructions[], #define PV_FLAGS_ARG "D" #endif +/* + * Generate a thunk around a function which saves all caller-save + * registers except for the return value. This allows C functions to + * be called from assembler code where fewer than normal registers are + * available. It may also help code generation around calls from C + * code if the common case doesn't use many registers. + * + * When a callee is wrapped in a thunk, the caller can assume that all + * arg regs and all scratch registers are preserved across the + * call. The return value in rax/eax will not be saved, even for void + * functions. + */ +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + extern typeof(func) __raw_callee_save_##func; \ + static void *__##func##__ __used = func; \ + \ + asm(".pushsection .text;" \ + "__raw_callee_save_" #func ": " \ + PV_SAVE_ALL_CALLER_REGS \ + "call " #func ";" \ + PV_RESTORE_ALL_CALLER_REGS \ + "ret;" \ + ".popsection") + +/* Get a reference to a callee-save function */ +#define PV_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { __raw_callee_save_##func }) + +/* Promise that "func" already uses the right calling convention */ +#define __PV_IS_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { func }) + static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : paravirt_type(pv_irq_ops.save_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_VEXTRA_CLOBBERS); + : "memory", "cc"); return f; } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : PV_FLAGS_ARG(f), paravirt_type(pv_irq_ops.restore_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_EXTRA_CLOBBERS); + : "memory", "cc"); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_disable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_enable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline unsigned long __raw_local_irq_save(void) @@ -1541,33 +1670,49 @@ static inline unsigned long __raw_local_irq_save(void) .popsection +#define COND_PUSH(set, mask, reg) \ + .if ((~(set)) & mask); push %reg; .endif +#define COND_POP(set, mask, reg) \ + .if ((~(set)) & mask); pop %reg; .endif + #ifdef CONFIG_X86_64 -#define PV_SAVE_REGS \ - push %rax; \ - push %rcx; \ - push %rdx; \ - push %rsi; \ - push %rdi; \ - push %r8; \ - push %r9; \ - push %r10; \ - push %r11 -#define PV_RESTORE_REGS \ - pop %r11; \ - pop %r10; \ - pop %r9; \ - pop %r8; \ - pop %rdi; \ - pop %rsi; \ - pop %rdx; \ - pop %rcx; \ - pop %rax + +#define PV_SAVE_REGS(set) \ + COND_PUSH(set, CLBR_RAX, rax); \ + COND_PUSH(set, CLBR_RCX, rcx); \ + COND_PUSH(set, CLBR_RDX, rdx); \ + COND_PUSH(set, CLBR_RSI, rsi); \ + COND_PUSH(set, CLBR_RDI, rdi); \ + COND_PUSH(set, CLBR_R8, r8); \ + COND_PUSH(set, CLBR_R9, r9); \ + COND_PUSH(set, CLBR_R10, r10); \ + COND_PUSH(set, CLBR_R11, r11) +#define PV_RESTORE_REGS(set) \ + COND_POP(set, CLBR_R11, r11); \ + COND_POP(set, CLBR_R10, r10); \ + COND_POP(set, CLBR_R9, r9); \ + COND_POP(set, CLBR_R8, r8); \ + COND_POP(set, CLBR_RDI, rdi); \ + COND_POP(set, CLBR_RSI, rsi); \ + COND_POP(set, CLBR_RDX, rdx); \ + COND_POP(set, CLBR_RCX, rcx); \ + COND_POP(set, CLBR_RAX, rax) + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) #else -#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx -#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax +#define PV_SAVE_REGS(set) \ + COND_PUSH(set, CLBR_EAX, eax); \ + COND_PUSH(set, CLBR_EDI, edi); \ + COND_PUSH(set, CLBR_ECX, ecx); \ + COND_PUSH(set, CLBR_EDX, edx) +#define PV_RESTORE_REGS(set) \ + COND_POP(set, CLBR_EDX, edx); \ + COND_POP(set, CLBR_ECX, ecx); \ + COND_POP(set, CLBR_EDI, edi); \ + COND_POP(set, CLBR_EAX, eax) + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) #define PARA_INDIRECT(addr) *%cs:addr @@ -1579,15 +1724,15 @@ static inline unsigned long __raw_local_irq_save(void) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS; \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS;) \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS; \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS;) + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define USERGS_SYSRET32 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ @@ -1617,11 +1762,15 @@ static inline unsigned long __raw_local_irq_save(void) PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ swapgs) +/* + * Note: swapgs is very special, and in practise is either going to be + * implemented with a single "swapgs" instruction or something very + * special. Either way, we don't need to save any registers for + * it. + */ #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - PV_SAVE_REGS; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ - PV_RESTORE_REGS \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ ) #define GET_CR2_INTO_RCX \ diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h deleted file mode 100644 index 2fbfff88df3..00000000000 --- a/arch/x86/include/asm/pda.h +++ /dev/null @@ -1,137 +0,0 @@ -#ifndef _ASM_X86_PDA_H -#define _ASM_X86_PDA_H - -#ifndef __ASSEMBLY__ -#include <linux/stddef.h> -#include <linux/types.h> -#include <linux/cache.h> -#include <asm/page.h> - -/* Per processor datastructure. %gs points to it while the kernel runs */ -struct x8664_pda { - struct task_struct *pcurrent; /* 0 Current process */ - unsigned long data_offset; /* 8 Per cpu data offset from linker - address */ - unsigned long kernelstack; /* 16 top of kernel stack for current */ - unsigned long oldrsp; /* 24 user rsp for system call */ - int irqcount; /* 32 Irq nesting counter. Starts -1 */ - unsigned int cpunumber; /* 36 Logical CPU number */ -#ifdef CONFIG_CC_STACKPROTECTOR - unsigned long stack_canary; /* 40 stack canary value */ - /* gcc-ABI: this canary MUST be at - offset 40!!! */ -#endif - char *irqstackptr; - short nodenumber; /* number of current node (32k max) */ - short in_bootmem; /* pda lives in bootmem */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ - short mmu_state; - short isidle; - struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; -} ____cacheline_aligned_in_smp; - -extern struct x8664_pda **_cpu_pda; -extern void pda_init(int); - -#define cpu_pda(i) (_cpu_pda[i]) - -/* - * There is no fast way to get the base address of the PDA, all the accesses - * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ -extern void __bad_pda_field(void) __attribute__((noreturn)); - -/* - * proxy_pda doesn't actually exist, but tell gcc it is accessed for - * all PDA accesses so it gets read/write dependencies right. - */ -extern struct x8664_pda _proxy_pda; - -#define pda_offset(field) offsetof(struct x8664_pda, field) - -#define pda_to_op(op, field, val) \ -do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i" (pda_offset(field))); \ - break; \ - case 8: \ - asm(op "q %1,%%gs:%c2": \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ -} while (0) - -#define pda_from_op(op, field) \ -({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %%gs:%c1,%0" : \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%gs:%c1,%0": \ - "=r" (ret__): \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 8: \ - asm(op "q %%gs:%c1,%0": \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ - ret__; \ -}) - -#define read_pda(field) pda_from_op("mov", field) -#define write_pda(field, val) pda_to_op("mov", field, val) -#define add_pda(field, val) pda_to_op("add", field, val) -#define sub_pda(field, val) pda_to_op("sub", field, val) -#define or_pda(field, val) pda_to_op("or", field, val) - -/* This is not atomic against other CPUs -- CPU preemption needs to be off */ -#define test_and_clear_bit_pda(bit, field) \ -({ \ - int old__; \ - asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (_proxy_pda.field) \ - : "dIr" (bit), "i" (pda_offset(field)) : "memory");\ - old__; \ -}) - -#endif - -#define PDA_STACKOFFSET (5*8) - -#endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ece72053ba6..aee103b26d0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -2,53 +2,12 @@ #define _ASM_X86_PERCPU_H #ifdef CONFIG_X86_64 -#include <linux/compiler.h> - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP -#include <asm/pda.h> - -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) -#define __my_cpu_offset read_pda(data_offset) - -#define per_cpu_offset(x) (__per_cpu_offset(x)) - +#define __percpu_seg gs +#define __percpu_mov_op movq +#else +#define __percpu_seg fs +#define __percpu_mov_op movl #endif -#include <asm-generic/percpu.h> - -DECLARE_PER_CPU(struct x8664_pda, pda); - -/* - * These are supposed to be implemented as a single instruction which - * operates on the per-cpu data base segment. x86-64 doesn't have - * that yet, so this is a fairly inefficient workaround for the - * meantime. The single instruction is atomic with respect to - * preemption and interrupts, so we need to explicitly disable - * interrupts here to achieve the same effect. However, because it - * can be used from within interrupt-disable/enable, we can't actually - * disable interrupts; disabling preemption is enough. - */ -#define x86_read_percpu(var) \ - ({ \ - typeof(per_cpu_var(var)) __tmp; \ - preempt_disable(); \ - __tmp = __get_cpu_var(var); \ - preempt_enable(); \ - __tmp; \ - }) - -#define x86_write_percpu(var, val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(var) = (val); \ - preempt_enable(); \ - } while(0) - -#else /* CONFIG_X86_64 */ #ifdef __ASSEMBLY__ @@ -65,47 +24,48 @@ DECLARE_PER_CPU(struct x8664_pda, pda); * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__##this_cpu_off, reg; \ +#define PER_CPU(var, reg) \ + __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %fs:per_cpu__##var +#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg +#define PER_CPU(var, reg) \ + __percpu_mov_op $per_cpu__##var, reg #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ +#ifdef CONFIG_X86_64_SMP +#define INIT_PER_CPU_VAR(var) init_per_cpu__##var +#else +#define INIT_PER_CPU_VAR(var) per_cpu__##var +#endif + #else /* ...!ASSEMBLY */ +#include <linux/stringify.h> + +#ifdef CONFIG_SMP +#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x +#define __my_cpu_offset percpu_read(this_cpu_off) +#else +#define __percpu_arg(x) "%" #x +#endif + /* - * PER_CPU finds an address of a per-cpu variable. + * Initialized pointers to per-cpu variables needed for the boot + * processor need to use these macros to get the proper address + * offset from __per_cpu_load on SMP. * - * Args: - * var - variable name - * cpu - 32bit register containing the current CPU number - * - * The resulting address is stored in the "cpu" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) + * There also must be an entry in vmlinux_64.lds.S */ -#ifdef CONFIG_SMP - -#define __my_cpu_offset x86_read_percpu(this_cpu_off) - -/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ -#define __percpu_seg "%%fs:" - -#else /* !SMP */ - -#define __percpu_seg "" - -#endif /* SMP */ - -#include <asm-generic/percpu.h> +#define DECLARE_INIT_PER_CPU(var) \ + extern typeof(per_cpu_var(var)) init_per_cpu_var(var) -/* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); +#ifdef CONFIG_X86_64_SMP +#define init_per_cpu_var(var) init_per_cpu__##var +#else +#define init_per_cpu_var(var) per_cpu_var(var) +#endif /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ @@ -120,20 +80,25 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg"%0" \ + asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg"%0" \ + asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg"%0" \ + asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ + case 8: \ + asm(op "q %1,"__percpu_arg(0) \ + : "+m" (var) \ + : "re" ((T__)val)); \ + break; \ default: __bad_percpu_size(); \ } \ } while (0) @@ -143,17 +108,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg"%1,%0" \ + asm(op "b "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg"%1,%0" \ + asm(op "w "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg"%1,%0" \ + asm(op "l "__percpu_arg(1)",%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 8: \ + asm(op "q "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -162,13 +132,30 @@ do { \ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) +#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) +#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) +#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) +#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) +#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) + +/* This is not atomic against other CPUs -- CPU preemption needs to be off */ +#define x86_test_and_clear_bit_percpu(bit, var) \ +({ \ + int old__; \ + asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ + : "=r" (old__), "+m" (per_cpu__##var) \ + : "dIr" (bit)); \ + old__; \ +}) + +#include <asm-generic/percpu.h> + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_SMP @@ -195,9 +182,9 @@ do { \ #define early_per_cpu_ptr(_name) (_name##_early_ptr) #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) #define early_per_cpu(_name, _cpu) \ - (early_per_cpu_ptr(_name) ? \ - early_per_cpu_ptr(_name)[_cpu] : \ - per_cpu(_name, _cpu)) + *(early_per_cpu_ptr(_name) ? \ + &early_per_cpu_ptr(_name)[_cpu] : \ + &per_cpu(_name, _cpu)) #else /* !CONFIG_SMP */ #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4f5af8447d5..6f7c102018b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -240,64 +240,78 @@ static inline int pmd_large(pmd_t pte) (_PAGE_PSE | _PAGE_PRESENT); } +static inline pte_t pte_set_flags(pte_t pte, pteval_t set) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v | set); +} + +static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v & ~clear); +} + static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_DIRTY); + return pte_clear_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkold(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_ACCESSED); + return pte_clear_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_RW); + return pte_clear_flags(pte, _PAGE_RW); } static inline pte_t pte_mkexec(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_NX); + return pte_clear_flags(pte, _PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_DIRTY); + return pte_set_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_ACCESSED); + return pte_set_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkwrite(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_RW); + return pte_set_flags(pte, _PAGE_RW); } static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_PSE); + return pte_set_flags(pte, _PAGE_PSE); } static inline pte_t pte_clrhuge(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_PSE); + return pte_clear_flags(pte, _PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_GLOBAL); + return pte_set_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_clrglobal(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_GLOBAL); + return pte_clear_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPECIAL); + return pte_set_flags(pte, _PAGE_SPECIAL); } extern pteval_t __supported_pte_mask; diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ba09289acca..1df9637dfda 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -11,7 +11,6 @@ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> -#include <asm/pda.h> extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3bfd5235a9e..9763eb70013 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -378,6 +378,24 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); + +union irq_stack_union { + char irq_stack[IRQ_STACK_SIZE]; + /* + * GCC hardcodes the stack canary as %gs:40. Since the + * irq_stack is the object at %gs:0, we reserve the bottom + * 48 bytes of the irq stack for the canary. + */ + struct { + char gs_base[40]; + unsigned long stack_canary; + }; +}; + +DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_INIT_PER_CPU(irq_stack_union); + +DECLARE_PER_CPU(char *, irq_stack_ptr); #endif extern void print_cpu_info(struct cpuinfo_x86 *); @@ -752,9 +770,9 @@ extern int sysenter_setup(void); extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); -extern void switch_to_new_gdt(void); +extern void switch_to_new_gdt(int); +extern void load_percpu_segment(int); extern void cpu_init(void); -extern void init_gdt(int cpu); static inline unsigned long get_debugctlmsr(void) { diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a..536949749bc 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else -void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 19953df61c5..45ef8a1b9d7 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -15,34 +15,8 @@ # include <asm/io_apic.h> # endif #endif -#include <asm/pda.h> #include <asm/thread_info.h> - -#ifdef CONFIG_X86_64 - -extern cpumask_var_t cpu_callin_mask; -extern cpumask_var_t cpu_callout_mask; -extern cpumask_var_t cpu_initialized_mask; -extern cpumask_var_t cpu_sibling_setup_mask; - -#else /* CONFIG_X86_32 */ - -extern cpumask_t cpu_callin_map; -extern cpumask_t cpu_callout_map; -extern cpumask_t cpu_initialized; -extern cpumask_t cpu_sibling_setup_map; - -#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) -#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) -#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) -#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) - -#endif /* CONFIG_X86_32 */ - -extern void (*mtrr_hook)(void); -extern void zap_low_mappings(void); - -extern int __cpuinit get_local_pda(int cpu); +#include <asm/cpumask.h> extern int smp_num_siblings; extern unsigned int num_processors; @@ -50,9 +24,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -#ifdef CONFIG_X86_32 DECLARE_PER_CPU(int, cpu_number); -#endif static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -167,8 +139,6 @@ void play_dead_common(void); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -extern void prefill_possible_map(void); - void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) @@ -177,10 +147,6 @@ static inline int num_booting_cpus(void) { return cpumask_weight(cpu_callout_mask); } -#else -static inline void prefill_possible_map(void) -{ -} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus __cpuinitdata; @@ -191,11 +157,11 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +#define raw_smp_processor_id() (percpu_read(cpu_number)) extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -#define raw_smp_processor_id() read_pda(cpunumber) +#define raw_smp_processor_id() (percpu_read(cpu_number)) #define stack_smp_processor_id() \ ({ \ @@ -205,10 +171,6 @@ extern int safe_smp_processor_id(void); }) #define safe_smp_processor_id() smp_processor_id() -#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ -#define cpu_physical_id(cpu) boot_cpu_physical_apicid -#define safe_smp_processor_id() 0 -#define stack_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -251,11 +213,5 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ -#ifdef CONFIG_X86_HAS_BOOT_CPU_ID -extern unsigned char boot_cpu_id; -#else -#define boot_cpu_id 0 -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da..2bd6b111a41 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -172,70 +172,8 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; } -#ifdef CONFIG_PARAVIRT -/* - * Define virtualization-friendly old-style lock byte lock, for use in - * pv_lock_ops if desired. - * - * This differs from the pre-2.6.24 spinlock by always using xchgb - * rather than decb to take the lock; this allows it to use a - * zero-initialized lock structure. It also maintains a 1-byte - * contention counter, so that we can implement - * __byte_spin_is_contended. - */ -struct __byte_spinlock { - s8 lock; - s8 spinners; -}; - -static inline int __byte_spin_is_locked(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - return bl->lock != 0; -} - -static inline int __byte_spin_is_contended(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - return bl->spinners != 0; -} - -static inline void __byte_spin_lock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - s8 val = 1; - - asm("1: xchgb %1, %0\n" - " test %1,%1\n" - " jz 3f\n" - " " LOCK_PREFIX "incb %2\n" - "2: rep;nop\n" - " cmpb $1, %0\n" - " je 2b\n" - " " LOCK_PREFIX "decb %2\n" - " jmp 1b\n" - "3:" - : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); -} - -static inline int __byte_spin_trylock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - u8 old = 1; - - asm("xchgb %1,%0" - : "+m" (bl->lock), "+q" (old) : : "memory"); +#ifndef CONFIG_PARAVIRT - return old == 0; -} - -static inline void __byte_spin_unlock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - smp_wmb(); - bl->lock = 0; -} -#else /* !CONFIG_PARAVIRT */ static inline int __raw_spin_is_locked(raw_spinlock_t *lock) { return __ticket_spin_is_locked(lock); @@ -267,7 +205,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, __raw_spin_lock(lock); } -#endif /* CONFIG_PARAVIRT */ +#endif static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h new file mode 100644 index 00000000000..36a700acaf2 --- /dev/null +++ b/arch/x86/include/asm/stackprotector.h @@ -0,0 +1,38 @@ +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#include <asm/tsc.h> +#include <asm/processor.h> + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + u64 canary; + u64 tsc; + + /* + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement + */ + BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); + + /* + * We both use the random pool and the current TSC as a source + * of randomness. The TSC only matters for very early init, + * there it already has some randomness on most systems. Later + * on during the bootup the random pool has true entropy too. + */ + get_random_bytes(&canary, sizeof(canary)); + tsc = __native_read_tsc(); + canary += tsc + (tsc << 32UL); + + current->stack_canary = canary; + percpu_write(irq_stack_union.stack_canary, canary); +} + +#endif diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8e626ea33a1..2fcc70bc85f 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -86,27 +86,44 @@ do { \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ "r12", "r13", "r14", "r15" +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,"__percpu_arg([gs_canary])"\n\t" +#define __switch_canary_oparam \ + , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + /* Save restore flags to clear handle leaking NT */ #define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ + asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ + __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ "jc ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ + __switch_canary_oparam \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [current_task] "m" (per_cpu_var(current_task)) \ + __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 98789647baa..df9d5f78385 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -40,6 +40,7 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif + int uaccess_err; }; #define INIT_THREAD_INFO(tsk) \ @@ -194,25 +195,21 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include <asm/pda.h> +#include <asm/percpu.h> +#define KERNEL_STACK_OFFSET (5*8) /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} +DECLARE_PER_CPU(unsigned long, kernel_stack); -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) +static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + ti = (void *)(percpu_read(kernel_stack) + + KERNEL_STACK_OFFSET - THREAD_SIZE); return ti; } @@ -220,8 +217,8 @@ static inline struct thread_info *stack_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + movq PER_CPU_VAR(kernel_stack),reg ; \ + subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg #endif diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb54911..d3539f998f8 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -static inline void native_flush_tlb_others(const cpumask_t *cpumask, +static inline void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { @@ -142,31 +142,28 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, flush_tlb_mm(vma->vm_mm); } -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, - unsigned long va); +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -#ifdef CONFIG_X86_32 struct tlb_state { struct mm_struct *active_mm; int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -void reset_lazy_tlbstate(void); -#else static inline void reset_lazy_tlbstate(void) { + percpu_write(cpu_tlbstate.state, 0); + percpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif #endif /* SMP */ #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) +#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) #endif static inline void flush_tlb_kernel_range(unsigned long start, @@ -175,4 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } +extern void zap_low_mappings(void); + #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4e2f2e0aab2..77cfb2cfb38 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -74,6 +74,8 @@ static inline const struct cpumask *cpumask_of_node(int node) return &node_to_cpumask_map[node]; } +static inline void setup_node_to_cpumask_map(void) { } + #else /* CONFIG_X86_64 */ /* Mappings between node number and cpus on that node. */ @@ -83,7 +85,8 @@ extern cpumask_t *node_to_cpumask_map; DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); /* Returns the number of the current Node. */ -#define numa_node_id() read_pda(nodenumber) +DECLARE_PER_CPU(int, node_number); +#define numa_node_id() percpu_read(node_number) #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); @@ -102,10 +105,7 @@ static inline int cpu_to_node(int cpu) /* Same function but used if called before per_cpu areas are setup */ static inline int early_cpu_to_node(int cpu) { - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - return per_cpu(x86_cpu_to_node_map, cpu); + return early_per_cpu(x86_cpu_to_node_map, cpu); } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ @@ -122,6 +122,8 @@ static inline cpumask_t node_to_cpumask(int node) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ +extern void setup_node_to_cpumask_map(void); + /* * Replace default node_to_cpumask_ptr with optimized version * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" @@ -192,9 +194,20 @@ extern int __node_distance(int, int); #else /* !CONFIG_NUMA */ -#define numa_node_id() 0 -#define cpu_to_node(cpu) 0 -#define early_cpu_to_node(cpu) 0 +static inline int numa_node_id(void) +{ + return 0; +} + +static inline int cpu_to_node(int cpu) +{ + return 0; +} + +static inline int early_cpu_to_node(int cpu) +{ + return 0; +} static inline const cpumask_t *cpumask_of_node(int node) { @@ -209,6 +222,8 @@ static inline int node_to_first_cpu(int node) return first_cpu(cpu_online_map); } +static inline void setup_node_to_cpumask_map(void) { } + /* * Replace default node_to_cpumask_ptr with optimized version * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 780ba0ab94f..90f06c25221 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -13,6 +13,7 @@ extern unsigned char *trampoline_base; extern unsigned long init_rsp; extern unsigned long initial_code; +extern unsigned long initial_gs; #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) #define TRAMPOLINE_BASE 0x6000 diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 4340055b755..b685ece89d5 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -121,7 +121,7 @@ extern int __get_user_bad(void); #define __get_user_x(size, ret, x, ptr) \ asm volatile("call __get_user_" #size \ - : "=a" (ret),"=d" (x) \ + : "=a" (ret), "=d" (x) \ : "0" (ptr)) \ /* Careful: we have to cast the result to the type of the pointer @@ -181,12 +181,12 @@ extern int __get_user_bad(void); #define __put_user_x(size, x, ptr, __ret_pu) \ asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ - :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") + : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #ifdef CONFIG_X86_32 -#define __put_user_u64(x, addr, err) \ +#define __put_user_asm_u64(x, addr, err, errret) \ asm volatile("1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ "3:\n" \ @@ -197,14 +197,24 @@ extern int __get_user_bad(void); _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=r" (err) \ - : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) + : "A" (x), "r" (addr), "i" (errret), "0" (err)) + +#define __put_user_asm_ex_u64(x, addr) \ + asm volatile("1: movl %%eax,0(%1)\n" \ + "2: movl %%edx,4(%1)\n" \ + "3:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + _ASM_EXTABLE(2b, 3b - 2b) \ + : : "A" (x), "r" (addr)) #define __put_user_x8(x, ptr, __ret_pu) \ asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else -#define __put_user_u64(x, ptr, retval) \ - __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) +#define __put_user_asm_u64(x, ptr, retval, errret) \ + __put_user_asm(x, ptr, retval, "q", "", "Zr", errret) +#define __put_user_asm_ex_u64(x, addr) \ + __put_user_asm_ex(x, addr, "q", "", "Zr") #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) #endif @@ -276,10 +286,32 @@ do { \ __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ break; \ case 4: \ - __put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\ + __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ - __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ + __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ + errret); \ + break; \ + default: \ + __put_user_bad(); \ + } \ +} while (0) + +#define __put_user_size_ex(x, ptr, size) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ + break; \ + case 2: \ + __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ + break; \ + case 4: \ + __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ + break; \ + case 8: \ + __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ break; \ default: \ __put_user_bad(); \ @@ -311,9 +343,12 @@ do { \ #ifdef CONFIG_X86_32 #define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() +#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() #else #define __get_user_asm_u64(x, ptr, retval, errret) \ __get_user_asm(x, ptr, retval, "q", "", "=r", errret) +#define __get_user_asm_ex_u64(x, ptr) \ + __get_user_asm_ex(x, ptr, "q", "", "=r") #endif #define __get_user_size(x, ptr, size, retval, errret) \ @@ -350,6 +385,33 @@ do { \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) +#define __get_user_size_ex(x, ptr, size) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ + break; \ + case 2: \ + __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ + break; \ + case 4: \ + __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ + break; \ + case 8: \ + __get_user_asm_ex_u64(x, ptr); \ + break; \ + default: \ + (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ + asm volatile("1: mov"itype" %1,%"rtype"0\n" \ + "2:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + : ltype(x) : "m" (__m(addr))) + #define __put_user_nocheck(x, ptr, size) \ ({ \ int __pu_err; \ @@ -385,6 +447,26 @@ struct __large_struct { unsigned long buf[100]; }; _ASM_EXTABLE(1b, 3b) \ : "=r"(err) \ : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) + +#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ + asm volatile("1: mov"itype" %"rtype"0,%1\n" \ + "2:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + : : ltype(x), "m" (__m(addr))) + +/* + * uaccess_try and catch + */ +#define uaccess_try do { \ + int prev_err = current_thread_info()->uaccess_err; \ + current_thread_info()->uaccess_err = 0; \ + barrier(); + +#define uaccess_catch(err) \ + (err) |= current_thread_info()->uaccess_err; \ + current_thread_info()->uaccess_err = prev_err; \ +} while (0) + /** * __get_user: - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -408,6 +490,7 @@ struct __large_struct { unsigned long buf[100]; }; #define __get_user(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + /** * __put_user: - Write a simple value into user space, with less checking. * @x: Value to copy to user space. @@ -435,6 +518,45 @@ struct __large_struct { unsigned long buf[100]; }; #define __put_user_unaligned __put_user /* + * {get|put}_user_try and catch + * + * get_user_try { + * get_user_ex(...); + * } get_user_catch(err) + */ +#define get_user_try uaccess_try +#define get_user_catch(err) uaccess_catch(err) + +#define get_user_ex(x, ptr) do { \ + unsigned long __gue_val; \ + __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ + (x) = (__force __typeof__(*(ptr)))__gue_val; \ +} while (0) + +#ifdef CONFIG_X86_WP_WORKS_OK + +#define put_user_try uaccess_try +#define put_user_catch(err) uaccess_catch(err) + +#define put_user_ex(x, ptr) \ + __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + +#else /* !CONFIG_X86_WP_WORKS_OK */ + +#define put_user_try do { \ + int __uaccess_err = 0; + +#define put_user_catch(err) \ + (err) |= __uaccess_err; \ +} while (0) + +#define put_user_ex(x, ptr) do { \ + __uaccess_err |= __put_user(x, ptr); \ +} while (0) + +#endif /* CONFIG_X86_WP_WORKS_OK */ + +/* * movsl can be slow when source and dest are not both 8-byte aligned */ #ifdef CONFIG_X86_INTEL_USERCOPY diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h new file mode 100644 index 00000000000..8ac1d7e312f --- /dev/null +++ b/arch/x86/include/asm/uv/uv.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_UV_UV_H +#define _ASM_X86_UV_UV_H + +enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; + +#ifdef CONFIG_X86_UV + +extern enum uv_system_type get_uv_system_type(void); +extern int is_uv_system(void); +extern void uv_cpu_init(void); +extern void uv_system_init(void); +extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); +extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, + unsigned int cpu); + +#else /* X86_UV */ + +static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline int is_uv_system(void) { return 0; } +static inline void uv_cpu_init(void) { } +static inline void uv_system_init(void) { } +static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) +{ return 1; } +static inline const struct cpumask * +uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, + unsigned long va, unsigned int cpu) +{ return cpumask; } + +#endif /* X86_UV */ + +#endif /* _ASM_X86_UV_UV_H */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 50423c7b56b..9b0e61bf7a8 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -325,7 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d364df03c1d..37fa30bada1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,11 +23,12 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) +CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o -obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o +obj-y += setup.o i8259.o irqinit_$(BITS).o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o @@ -57,9 +58,9 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o -obj-$(CONFIG_X86_32_SMP) += smpcommon.o -obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_SMP) += setup_percpu.o +obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o @@ -114,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o uv_irq.o uv_sysfs.o + obj-y += genapic_64.o genapic_flat_64.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o + obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o + obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7678f10c456..c193ec3c695 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); for (i = 0; i < nr_ioapics; i++) { - struct mp_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mp_apicid, used); + struct mpc_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->apicid, used); } if (!test_bit(id, used)) return id; @@ -945,29 +945,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) idx = nr_ioapics; - mp_ioapics[idx].mp_type = MP_IOAPIC; - mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mp_apicaddr = address; + mp_ioapics[idx].type = MP_IOAPIC; + mp_ioapics[idx].flags = MPC_APIC_USABLE; + mp_ioapics[idx].apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); + mp_ioapics[idx].apicid = uniq_ioapic_id(id); #ifdef CONFIG_X86_32 - mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); + mp_ioapics[idx].apicver = io_apic_get_version(idx); #else - mp_ioapics[idx].mp_apicver = 0; + mp_ioapics[idx].apicver = 0; #endif /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, - mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, + mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; @@ -996,19 +996,19 @@ int __init acpi_probe_gsi(void) return max_gsi + 1; } -static void assign_to_mp_irq(struct mp_config_intsrc *m, - struct mp_config_intsrc *mp_irq) +static void assign_to_mp_irq(struct mpc_intsrc *m, + struct mpc_intsrc *mp_irq) { - memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); + memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); } -static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, - struct mp_config_intsrc *m) +static int mp_irq_cmp(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) { - return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); + return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); } -static void save_mp_irq(struct mp_config_intsrc *m) +static void save_mp_irq(struct mpc_intsrc *m) { int i; @@ -1026,7 +1026,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { int ioapic; int pin; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; /* * Convert 'gsi' to 'ioapic.pin'. @@ -1044,13 +1044,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (trigger << 2) | polarity; - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ - mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ - mp_irq.mp_dstirq = pin; /* INTIN# */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; /* IRQ */ + mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ + mp_irq.dstirq = pin; /* INTIN# */ save_mp_irq(&mp_irq); } @@ -1060,7 +1060,7 @@ void __init mp_config_acpi_legacy_irqs(void) int i; int ioapic; unsigned int dstapic; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; #if defined (CONFIG_MCA) || defined (CONFIG_EISA) /* @@ -1085,7 +1085,7 @@ void __init mp_config_acpi_legacy_irqs(void) ioapic = mp_find_ioapic(0); if (ioapic < 0) return; - dstapic = mp_ioapics[ioapic].mp_apicid; + dstapic = mp_ioapics[ioapic].apicid; /* * Use the default configuration for the IRQs 0-15. Unless @@ -1095,16 +1095,14 @@ void __init mp_config_acpi_legacy_irqs(void) int idx; for (idx = 0; idx < mp_irq_entries; idx++) { - struct mp_config_intsrc *irq = mp_irqs + idx; + struct mpc_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mp_srcbus == MP_ISA_BUS - && irq->mp_srcbusirq == i) + if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ - if (irq->mp_dstapic == dstapic && - irq->mp_dstirq == i) + if (irq->dstapic == dstapic && irq->dstirq == i) break; } @@ -1113,13 +1111,13 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqflag = 0; /* Conforming */ - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_dstapic = dstapic; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_srcbusirq = i; /* Identity mapped */ - mp_irq.mp_dstirq = i; + mp_irq.type = MP_INTSRC; + mp_irq.irqflag = 0; /* Conforming */ + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.dstapic = dstapic; + mp_irq.irqtype = mp_INT; + mp_irq.srcbusirq = i; /* Identity mapped */ + mp_irq.dstirq = i; save_mp_irq(&mp_irq); } @@ -1230,22 +1228,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity) { #ifdef CONFIG_X86_MPPARSE - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; int ioapic; if (!acpi_ioapic) return 0; /* print the entry should happen on mptable identically */ - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.mp_srcbus = number; - mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); ioapic = mp_find_ioapic(gsi); - mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; + mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; save_mp_irq(&mp_irq); #endif diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6f95f..7c243a2c511 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,6 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; @@ -156,11 +157,11 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); + if (strncmp(str, "s4_nonvs", 8) == 0) + acpi_s4_no_nvs(); #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 115449f869e..383d827eef8 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -60,6 +60,24 @@ # error SPURIOUS_APIC_VECTOR definition error #endif +unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +EXPORT_SYMBOL(boot_cpu_physical_apicid); +unsigned int max_physical_apicid; + +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; + +/* + * Map cpu index to physical APIC ID + */ +DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + #ifdef CONFIG_X86_32 /* * Knob to control our willingness to enable the local APIC. @@ -1130,6 +1148,13 @@ void __cpuinit setup_local_APIC(void) unsigned int value; int i, j; + if (disable_apic) { +#ifdef CONFIG_X86_IO_APIC + disable_ioapic_setup(); +#endif + return; + } + #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (lapic_is_integrated() && esr_disable) { @@ -1570,11 +1595,11 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { -#ifdef CONFIG_X86_64 if (disable_apic) { pr_info("Apic disabled\n"); return -1; } +#ifdef CONFIG_X86_64 if (!cpu_has_apic) { disable_apic = 1; pr_info("Apic disabled by BIOS\n"); @@ -1877,17 +1902,8 @@ void __cpuinit generic_processor_info(int apicid, int version) #endif #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } + early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; + early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif set_cpu_possible(cpu, true); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1d41d3f1edb..8793ab33e2c 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -11,7 +11,6 @@ #include <linux/hardirq.h> #include <linux/suspend.h> #include <linux/kbuild.h> -#include <asm/pda.h> #include <asm/processor.h> #include <asm/segment.h> #include <asm/thread_info.h> @@ -48,16 +47,6 @@ int main(void) #endif BLANK(); #undef ENTRY -#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(kernelstack); - ENTRY(oldrsp); - ENTRY(pcurrent); - ENTRY(irqcount); - ENTRY(cpunumber); - ENTRY(irqstackptr); - ENTRY(data_offset); - BLANK(); -#undef ENTRY #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 83492b1f93b..41b0de6df87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,14 +21,16 @@ #include <asm/asm.h> #include <asm/numa.h> #include <asm/smp.h> +#include <asm/cpu.h> +#include <asm/cpumask.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/mpspec.h> #include <asm/apic.h> #include <mach_apic.h> #include <asm/genapic.h> +#include <asm/uv/uv.h> #endif -#include <asm/pda.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/desc.h> @@ -50,6 +52,15 @@ cpumask_var_t cpu_initialized_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; +/* correctly size the local cpu masks */ +void __init setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + #else /* CONFIG_X86_32 */ cpumask_t cpu_callin_map; @@ -62,23 +73,23 @@ cpumask_t cpu_sibling_setup_map; static struct cpu_dev *this_cpu __cpuinitdata; +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + /* + * We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + * + * The TLS descriptors are currently at a different place compared to i386. + * Hopefully nobody expects them at a fixed place (Wine?) + */ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; #else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, @@ -110,9 +121,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; + [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, #endif +} }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); #ifdef CONFIG_X86_32 @@ -242,18 +253,28 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +void load_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); +#endif +} + /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ -void switch_to_new_gdt(void) +void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); -#ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -#endif + /* Reload the per-cpu base */ + + load_percpu_segment(cpu); } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; @@ -877,54 +898,22 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; +DEFINE_PER_CPU_FIRST(union irq_stack_union, + irq_stack_union) __aligned(PAGE_SIZE); +DEFINE_PER_CPU(char *, irq_stack_ptr) = + init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; -void __cpuinit pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; - } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } +DEFINE_PER_CPU(unsigned int, irq_count) = -1; - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } -} - -static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) + __aligned(PAGE_SIZE); extern asmlinkage void ignore_sysret(void); @@ -982,15 +971,14 @@ void __cpuinit cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); unsigned long v; - char *estacks = NULL; struct task_struct *me; int i; - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); - else - estacks = boot_exception_stacks; +#ifdef CONFIG_NUMA + if (cpu != 0 && percpu_read(node_number) == 0 && + cpu_to_node(cpu) != NUMA_NO_NODE) + percpu_write(node_number, cpu_to_node(cpu)); +#endif me = current; @@ -1006,7 +994,9 @@ void __cpuinit cpu_init(void) * and set up the GDT descriptor: */ - switch_to_new_gdt(); + switch_to_new_gdt(cpu); + loadsegment(fs, 0); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); @@ -1024,18 +1014,13 @@ void __cpuinit cpu_init(void) * set up and load the per-CPU TSS */ if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + static const unsigned int sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ }; + char *estacks = per_cpu(exception_stacks, cpu); for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; + estacks += sizes[v]; orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } @@ -1114,7 +1099,7 @@ void __cpuinit cpu_init(void) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_idt(&idt_descr); - switch_to_new_gdt(); + switch_to_new_gdt(cpu); /* * Set up and load the per-CPU TSS and LDT diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index efae3b22a0f..65792c2cc46 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -245,17 +245,6 @@ config X86_E_POWERSAVER comment "shared options" -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - config X86_SPEEDSTEP_LIB tristate default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index da299eb85fc..7293508d8f5 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -147,7 +147,16 @@ struct _cpuid4_info { union _cpuid4_leaf_ecx ecx; unsigned long size; unsigned long can_disable; - cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ + DECLARE_BITMAP(shared_cpu_map, NR_CPUS); +}; + +/* subset of above _cpuid4_info w/o shared_cpu_map */ +struct _cpuid4_info_regs { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + unsigned long can_disable; }; #ifdef CONFIG_PCI @@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, } static void __cpuinit -amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { if (index < 3) return; @@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) } static int -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +__cpuinit cpuid4_cache_lookup_regs(int index, + struct _cpuid4_info_regs *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -314,6 +324,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) return 0; } +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + struct _cpuid4_info_regs *leaf_regs = + (struct _cpuid4_info_regs *)this_leaf; + + return cpuid4_cache_lookup_regs(index, leaf_regs); +} + static int __cpuinit find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; @@ -353,11 +372,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) * parameters cpuid leaf to find the cache details */ for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info this_leaf; - + struct _cpuid4_info_regs this_leaf; int retval; - retval = cpuid4_cache_lookup(i, &this_leaf); + retval = cpuid4_cache_lookup_regs(i, &this_leaf); if (retval >= 0) { switch(this_leaf.eax.split.level) { case 1: @@ -506,17 +524,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) - cpu_set(cpu, this_leaf->shared_cpu_map); + cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); else { index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { - cpu_set(i, this_leaf->shared_cpu_map); + cpumask_set_cpu(i, + to_cpumask(this_leaf->shared_cpu_map)); if (i != cpu && per_cpu(cpuid4_info, i)) { - sibling_leaf = CPUID4_INFO_IDX(i, index); - cpu_set(cpu, sibling_leaf->shared_cpu_map); + sibling_leaf = + CPUID4_INFO_IDX(i, index); + cpumask_set_cpu(cpu, to_cpumask( + sibling_leaf->shared_cpu_map)); } } } @@ -528,9 +549,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) int sibling; this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { + for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpu_clear(cpu, sibling_leaf->shared_cpu_map); + cpumask_clear_cpu(cpu, + to_cpumask(sibling_leaf->shared_cpu_map)); } } #else @@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, int n = 0; if (len > 1) { - cpumask_t *mask = &this_leaf->shared_cpu_map; + const struct cpumask *mask; + mask = to_cpumask(this_leaf->shared_cpu_map); n = type? cpulist_scnprintf(buf, len-2, mask) : cpumask_scnprintf(buf, len-2, mask); @@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; ssize_t ret = 0; int i; @@ -733,7 +757,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; unsigned int ret, index, val; @@ -878,7 +903,7 @@ err_out: return -ENOMEM; } -static cpumask_t cache_dev_map = CPU_MASK_NONE; +static DECLARE_BITMAP(cache_dev_map, NR_CPUS); /* Add/Remove cache interface for CPU device */ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) @@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } - cpu_set(cpu, cache_dev_map); + cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); return 0; @@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) if (per_cpu(cpuid4_info, cpu) == NULL) return; - if (!cpu_isset(cpu, cache_dev_map)) + if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) return; - cpu_clear(cpu, cache_dev_map); + cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 8ae8c4ff094..4772e91e824 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = { struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; - cpumask_t cpus; + cpumask_var_t cpus; }; static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); @@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifdef CONFIG_SMP if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = first_cpu(per_cpu(cpu_core_map, cpu)); + i = cpumask_first(&per_cpu(cpu_core_map, cpu)); /* first core not up yet */ if (cpu_data(i).cpu_core_id) @@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) err = -ENOMEM; goto out; } + if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { + kfree(b); + err = -ENOMEM; + goto out; + } b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); if (!b->kobj) goto out_free; #ifndef CONFIG_SMP - b->cpus = CPU_MASK_ALL; + cpumask_setall(b->cpus); #else - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); #endif per_cpu(threshold_banks, cpu)[bank] = b; @@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out_free; - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) out_free: per_cpu(threshold_banks, cpu)[bank] = NULL; + free_cpumask_var(b->cpus); kfree(b); out: return err; @@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #endif /* remove all sibling symlinks before unregistering */ - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) free_out: kobject_del(b->kobj); kobject_put(b->kobj); + free_cpumask_var(b->cpus); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 4b48f251fd3..5e8c79e748a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -7,6 +7,7 @@ #include <linux/interrupt.h> #include <linux/percpu.h> #include <asm/processor.h> +#include <asm/apic.h> #include <asm/msr.h> #include <asm/mce.h> #include <asm/hw_irq.h> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c689d19e35a..11b93cabdf7 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -24,7 +24,7 @@ #include <asm/apic.h> #include <asm/hpet.h> #include <linux/kdebug.h> -#include <asm/smp.h> +#include <asm/cpu.h> #include <asm/reboot.h> #include <asm/virtext.h> diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index c302d070704..d35db5993fd 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned long *irq_stack_end = + (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; struct thread_info *tinfo; int graph = 0; @@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *) estack_end[-2]; continue; } - if (irqstack_end) { - unsigned long *irqstack; - irqstack = irqstack_end - - (IRQSTACKSIZE - 64) / sizeof(*irqstack); + if (irq_stack_end) { + unsigned long *irq_stack; + irq_stack = irq_stack_end - + (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - if (stack >= irqstack && stack < irqstack_end) { + if (stack >= irq_stack && stack < irq_stack_end) { if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end, &graph); + ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last * pointer (index -1 to end) in the IRQ stack: */ - stack = (unsigned long *) (irqstack_end[-1]); - irqstack_end = NULL; + stack = (unsigned long *) (irq_stack_end[-1]); + irq_stack_end = NULL; ops->stack(data, "EOI"); continue; } @@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = - (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = - (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irq_stack_end = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + unsigned long *irq_stack = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* * debugging aid: "show_stack(NULL, NULL);" prints the @@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { - if (stack >= irqstack && stack <= irqstack_end) { - if (stack == irqstack_end) { - stack = (unsigned long *) (irqstack_end[-1]); + if (stack >= irq_stack && stack <= irq_stack_end) { + if (stack == irq_stack_end) { + stack = (unsigned long *) (irq_stack_end[-1]); printk(" <EOI> "); } } else { @@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs) int i; unsigned long sp; const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; + struct task_struct *cur = current; sp = regs->sp; printk("CPU %d ", cpu); diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1119d247fe1..b205272ad39 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -366,10 +366,12 @@ void __init efi_init(void) SMBIOS_TABLE_GUID)) { efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx ", config_tables[i].table); +#ifdef CONFIG_X86_UV } else if (!efi_guidcmp(config_tables[i].guid, UV_SYSTEM_TABLE_GUID)) { efi.uv_systab = config_tables[i].table; printk(" UVsystab=0x%lx ", config_tables[i].table); +#endif } else if (!efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID)) { efi.hcdp = config_tables[i].table; diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 652c5287215..a4ee29127fd 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -36,6 +36,7 @@ #include <asm/proto.h> #include <asm/efi.h> #include <asm/cacheflush.h> +#include <asm/fixmap.h> static pgd_t save_pgd __initdata; static unsigned long efi_flags __initdata; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 46469029e9d..a0b91aac72a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -672,7 +672,7 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -#define BUILD_INTERRUPT(name, nr) \ +#define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ pushl $~(nr); \ @@ -680,11 +680,13 @@ ENTRY(name) \ SAVE_ALL; \ TRACE_IRQS_OFF \ movl %esp,%eax; \ - call smp_##name; \ + call fn; \ jmp ret_from_intr; \ CFI_ENDPROC; \ ENDPROC(name) +#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) + /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a1346217e43..586bed67755 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -52,6 +52,7 @@ #include <asm/irqflags.h> #include <asm/paravirt.h> #include <asm/ftrace.h> +#include <asm/percpu.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ #include <linux/elf-em.h> @@ -209,7 +210,7 @@ ENTRY(native_usergs_sysret64) /* %rsp:at FRAMEEND */ .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq %gs:pda_oldrsp,\tmp + movq PER_CPU_VAR(old_rsp),\tmp movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) @@ -220,7 +221,7 @@ ENTRY(native_usergs_sysret64) .macro RESTORE_TOP_OF_STACK tmp offset=0 movq RSP+\offset(%rsp),\tmp - movq \tmp,%gs:pda_oldrsp + movq \tmp,PER_CPU_VAR(old_rsp) movq EFLAGS+\offset(%rsp),\tmp movq \tmp,R11+\offset(%rsp) .endm @@ -336,15 +337,15 @@ ENTRY(save_args) je 1f SWAPGS /* - * irqcount is used to check if a CPU is already on an interrupt stack + * irq_count is used to check if a CPU is already on an interrupt stack * or not. While this is essentially redundant with preempt_count it is * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ -1: incl %gs:pda_irqcount +1: incl PER_CPU_VAR(irq_count) jne 2f popq_cfi %rax /* move return address... */ - mov %gs:pda_irqstackptr,%rsp + mov PER_CPU_VAR(irq_stack_ptr),%rsp EMPTY_FRAME 0 pushq_cfi %rbp /* backlink for unwinder */ pushq_cfi %rax /* ... to the new stack */ @@ -468,7 +469,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK @@ -479,8 +480,8 @@ ENTRY(system_call) */ ENTRY(system_call_after_swapgs) - movq %rsp,%gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq %rsp,PER_CPU_VAR(old_rsp) + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight * and short: @@ -523,7 +524,7 @@ sysret_check: CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 /*CFI_REGISTER rflags,r11*/ - movq %gs:pda_oldrsp, %rsp + movq PER_CPU_VAR(old_rsp), %rsp USERGS_SYSRET64 CFI_RESTORE_STATE @@ -833,11 +834,11 @@ common_interrupt: XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ - /* 0(%rsp): oldrsp-ARGOFFSET */ + /* 0(%rsp): old_rsp-ARGOFFSET */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 @@ -982,8 +983,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt #endif +#ifdef CONFIG_X86_UV apicinterrupt UV_BAU_MESSAGE \ uv_bau_message_intr1 uv_bau_message_interrupt +#endif apicinterrupt LOCAL_TIMER_VECTOR \ apic_timer_interrupt smp_apic_timer_interrupt @@ -1073,10 +1076,10 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - movq %gs:pda_data_offset, %rbp - subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + PER_CPU(init_tss, %rbp) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) call \do_sym - addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) @@ -1138,7 +1141,7 @@ ENTRY(native_load_gs_index) CFI_STARTPROC pushf CFI_ADJUST_CFA_OFFSET 8 - DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) + DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS gs_change: movl %edi,%gs @@ -1260,14 +1263,14 @@ ENTRY(call_softirq) CFI_REL_OFFSET rbp,0 mov %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - incl %gs:pda_irqcount - cmove %gs:pda_irqstackptr,%rsp + incl PER_CPU_VAR(irq_count) + cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) ret CFI_ENDPROC END(call_softirq) @@ -1297,15 +1300,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) movq %rdi, %rsp # we don't return, adjust the stack frame CFI_ENDPROC DEFAULT_FRAME -11: incl %gs:pda_irqcount +11: incl PER_CPU_VAR(irq_count) movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - cmovzq %gs:pda_irqstackptr,%rsp + cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp pushq %rbp # backlink for old unwinder call xen_evtchn_do_upcall popq %rsp CFI_DEF_CFA_REGISTER rsp - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC END(do_hypervisor_callback) diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 2bced78b0b8..e656c272115 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; static struct genapic *apic_probe[] __initdata = { +#ifdef CONFIG_X86_UV &apic_x2apic_uv_x, +#endif &apic_x2apic_phys, &apic_x2apic_cluster, &apic_physflat, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b193e082f6c..bfe36249145 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -25,6 +25,7 @@ #include <asm/ipi.h> #include <asm/genapic.h> #include <asm/pgtable.h> +#include <asm/uv/uv.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> #include <asm/uv/bios.h> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b93..f5b27224769 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,27 +26,6 @@ #include <asm/bios_ebda.h> #include <asm/trampoline.h> -/* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda; - -#ifdef CONFIG_SMP -/* - * We install an empty cpu_pda pointer table to indicate to early users - * (numa_set_node) that the cpu_pda pointer table for cpus other than - * the boot cpu is not yet setup. - */ -static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; -#else -static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; -#endif - -void __init x86_64_init_pda(void) -{ - _cpu_pda = __cpu_pda; - cpu_pda(0) = &_boot_cpu_pda; - pda_init(0); -} - static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -112,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - x86_64_init_pda(); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70..24c0e5cd71e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - movl %eax,%fs # gets reset once there's real percpu movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds movl %eax,%es + movl $(__KERNEL_PERCPU), %eax + movl %eax,%fs # set this cpu's percpu + xorl %eax,%eax # Clear GS and LDT movl %eax,%gs lldt %ax @@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP movb $1, ready cmpb $0,%cl # the first CPU calls start_kernel je 1f - movl $(__KERNEL_PERCPU), %eax - movl %eax,%fs # set this cpu's percpu movl (stack_start), %esp 1: #endif /* CONFIG_SMP */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d49556..2e648e3a5ea 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -19,6 +19,7 @@ #include <asm/msr.h> #include <asm/cache.h> #include <asm/processor-flags.h> +#include <asm/percpu.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -226,12 +227,15 @@ ENTRY(secondary_startup_64) movl %eax,%fs movl %eax,%gs - /* - * Setup up a dummy PDA. this is just for some early bootup code - * that does in_interrupt() - */ + /* Set up %gs. + * + * The base of %gs always points to the bottom of the irqstack + * union. If the stack protector canary is enabled, it is + * located at %gs:40. Note that, on SMP, the boot cpu uses + * init data section till per cpu areas are set up. + */ movl $MSR_GS_BASE,%ecx - movq $empty_zero_page,%rax + movq initial_gs(%rip),%rax movq %rax,%rdx shrq $32,%rdx wrmsr @@ -257,6 +261,8 @@ ENTRY(secondary_startup_64) .align 8 ENTRY(initial_code) .quad x86_64_start_kernel + ENTRY(initial_gs) + .quad INIT_PER_CPU_VAR(irq_stack_union) __FINITDATA ENTRY(stack_start) @@ -401,7 +407,8 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 - .quad per_cpu__gdt_page +early_gdt_descr_base: + .quad INIT_PER_CPU_VAR(gdt_page) ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index bc7ac4da90d..0480d06a12a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -46,6 +46,7 @@ #include <asm/idle.h> #include <asm/io.h> #include <asm/smp.h> +#include <asm/cpu.h> #include <asm/desc.h> #include <asm/proto.h> #include <asm/acpi.h> @@ -82,11 +83,11 @@ static DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -356,7 +357,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpumask_intersects(&desc->affinity, mask)) + if (!cpumask_intersects(desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -386,7 +387,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -579,9 +580,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - cpumask_and(&desc->affinity, cfg->domain, mask); + cpumask_and(desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); + return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } static void @@ -944,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) + if (mp_irqs[i].irqtype == type && + (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || + mp_irqs[i].dstapic == MP_APIC_ALL) && + mp_irqs[i].dstirq == pin) return i; return -1; @@ -961,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) - return mp_irqs[i].mp_dstirq; + return mp_irqs[i].dstirq; } return -1; } @@ -977,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) return apic; } } @@ -1012,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + mp_irqs[i].dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && + !mp_irqs[i].irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) + if (pin == (mp_irqs[i].srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -1071,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -1088,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mp_irqflag & 3) + switch (mp_irqs[idx].irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -1130,13 +1131,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) + switch ((mp_irqs[idx].irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -1214,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mp_dstirq != pin) + if (mp_irqs[idx].dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; + irq = mp_irqs[idx].srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -1566,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].apicid, pin, cfg->vector, irq, trigger, polarity); - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); + mp_ioapics[apic].apicid, pin); __clear_irq_vector(irq, cfg); return; } @@ -1604,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void) notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); } else apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); continue; } if (notcon) { @@ -1699,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1719,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -2121,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].mp_apicid; + old_id = mp_ioapics[apic].apicid; - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + mp_ioapics[apic].apicid = reg_00.bits.ID; } /* @@ -2137,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { + mp_ioapics[apic].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2148,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; + mp_ioapics[apic].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2163,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mp_apicid) + if (old_id != mp_ioapics[apic].apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; + if (mp_irqs[i].dstapic == old_id) + mp_irqs[i].dstapic + = mp_ioapics[apic].apicid; /* * Read the right value from the MPC table and @@ -2175,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + reg_00.bits.ID = mp_ioapics[apic].apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2188,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); @@ -2383,7 +2382,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) if (cfg->move_in_progress) send_cleanup_vector(cfg); - cpumask_copy(&desc->affinity, mask); + cpumask_copy(desc->affinity, mask); } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2405,11 +2404,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, &desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(&desc->pending_mask); + cpumask_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2434,7 +2433,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, &desc->pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -2448,7 +2447,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, mask); + cpumask_copy(desc->pending_mask, mask); migrate_irq_remapped_level_desc(desc); return; } @@ -2516,7 +2515,7 @@ static void irq_complete_move(struct irq_desc **descp) /* domain has not changed, but affinity did */ me = smp_processor_id(); - if (cpu_isset(me, desc->affinity)) { + if (cpumask_test_cpu(me, desc->affinity)) { *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; @@ -3118,8 +3117,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -3184,7 +3183,7 @@ unsigned int create_irq_nr(unsigned int irq_want) irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new < NR_IRQS; new++) { + for (new = irq_want; new < nr_irqs; new++) { if (platform_legacy_irq(new)) continue; @@ -3259,6 +3258,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms int err; unsigned dest; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (err) @@ -3464,40 +3466,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) -{ - unsigned int irq; - int ret; - unsigned int irq_want; - - irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); - if (irq == 0) - return -1; - -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, msidesc, irq); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} - int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { unsigned int irq; @@ -3727,6 +3695,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct irq_cfg *cfg; int err; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (!err) { @@ -3761,7 +3732,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_UV /* * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. @@ -3861,6 +3832,22 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } +#ifdef CONFIG_SPARSE_IRQ +int __init arch_probe_nr_irqs(void) +{ + int nr; + + nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? + (NR_VECTORS + (8 * nr_cpu_ids)) : + (NR_VECTORS + (32 * nr_ioapics))); + + if (nr < nr_irqs && nr > nr_irqs_gsi) + nr_irqs = nr; + + return 0; +} +#endif + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -3995,8 +3982,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) + if (mp_irqs[i].irqtype == mp_INT && + mp_irqs[i].srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; @@ -4050,7 +4037,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = &desc->affinity; + mask = desc->affinity; else mask = TARGET_CPUS; @@ -4111,7 +4098,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; + ioapic_phys = mp_ioapics[i].apicaddr; #ifdef CONFIG_X86_32 if (!ioapic_phys) { printk(KERN_ERR diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3973e2df7f8..8b30d0c2512 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq) #endif } -#ifdef CONFIG_X86_32 -# define irq_stats(x) (&per_cpu(irq_stat, x)) -#else -# define irq_stats(x) cpu_pda(x) -#endif +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing: */ diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 74b9ff7341e..e0f29be8ab0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -248,7 +248,7 @@ void fixup_irqs(void) if (irq == 2) continue; - affinity = &desc->affinity; + affinity = desc->affinity; if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); affinity = cpu_all_mask; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 63c88e6ec02..018963aa6ee 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,6 +18,13 @@ #include <linux/smp.h> #include <asm/io_apic.h> #include <asm/idle.h> +#include <asm/apic.h> + +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + +DEFINE_PER_CPU(struct pt_regs *, irq_regs); +EXPORT_PER_CPU_SYMBOL(irq_regs); /* * Probabilistic stack overflow check: @@ -100,7 +107,7 @@ void fixup_irqs(void) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); - affinity = &desc->affinity; + affinity = desc->affinity; if (!irq_has_action(irq) || cpumask_equal(affinity, cpu_online_mask)) { spin_unlock(&desc->lock); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 10a09c2f182..22608ebf831 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -140,8 +140,15 @@ void __init native_init_IRQ(void) */ alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + /* IPIs for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); /* IPI for generic function call */ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index b7f4c929e61..5e9f4fc5138 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -87,9 +87,9 @@ #include <linux/cpu.h> #include <linux/firmware.h> #include <linux/platform_device.h> +#include <linux/uaccess.h> #include <asm/msr.h> -#include <asm/uaccess.h> #include <asm/processor.h> #include <asm/microcode.h> @@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; } -static inline int +static inline int update_match_revision(struct microcode_header_intel *mc_header, int rev) { return (mc_header->rev <= rev) ? 0 : 1; @@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device) return ret; } - ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, - &get_ucode_fw); + ret = generic_load_microcode(cpu, (void *)firmware->data, + firmware->size, &get_ucode_fw); release_firmware(firmware); @@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size) /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c index 3db0a5442eb..0edd819050e 100644 --- a/arch/x86/kernel/module_32.c +++ b/arch/x86/kernel/module_32.c @@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } /* We don't need anything special. */ @@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr, *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".text", secstrings + s->sh_name)) text = s; if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index 6ba87830d4b..c23880b90b5 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c @@ -30,14 +30,14 @@ #include <asm/page.h> #include <asm/pgtable.h> -#define DEBUGP(fmt...) +#define DEBUGP(fmt...) #ifndef CONFIG_UML void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } void *module_alloc(unsigned long size) @@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; Elf64_Sym *sym; void *loc; - u64 val; + u64 val; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rel[i].r_info); - DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), - sym->st_value, rel[i].r_addend, (u64)loc); + DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)loc); - val = sym->st_value + rel[i].r_addend; + val = sym->st_value + rel[i].r_addend; switch (ELF64_R_TYPE(rel[i].r_info)) { case R_X86_64_NONE: @@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if ((s64)val != *(s32 *)loc) goto overflow; break; - case R_X86_64_PC32: + case R_X86_64_PC32: val -= (u64)loc; *(u32 *)loc = val; #if 0 if ((s64)val != *(s32 *)loc) - goto overflow; + goto overflow; #endif break; default: - printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", + printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; overflow: - printk(KERN_ERR "overflow in relocation type %d val %Lx\n", + printk(KERN_ERR "overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", me->name); @@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs, unsigned int relsec, struct module *me) { - printk("non add relocation not supported\n"); + printk(KERN_ERR "non add relocation not supported\n"); return -ENOSYS; -} +} int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) + const Elf_Shdr *sechdrs, + struct module *me) { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL; @@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a649a4ccad4..fa6bb263892 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -144,11 +144,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) if (bad_ioapic(m->apicaddr)) return; - mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; - mp_ioapics[nr_ioapics].mp_apicid = m->apicid; - mp_ioapics[nr_ioapics].mp_type = m->type; - mp_ioapics[nr_ioapics].mp_apicver = m->apicver; - mp_ioapics[nr_ioapics].mp_flags = m->flags; + mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; + mp_ioapics[nr_ioapics].apicid = m->apicid; + mp_ioapics[nr_ioapics].type = m->type; + mp_ioapics[nr_ioapics].apicver = m->apicver; + mp_ioapics[nr_ioapics].flags = m->flags; nr_ioapics++; } @@ -160,55 +160,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m) m->srcbusirq, m->dstapic, m->dstirq); } -static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) { apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", - mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, - (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, - mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); + mp_irq->irqtype, mp_irq->irqflag & 3, + (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, + mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); } static void __init assign_to_mp_irq(struct mpc_intsrc *m, - struct mp_config_intsrc *mp_irq) + struct mpc_intsrc *mp_irq) { - mp_irq->mp_dstapic = m->dstapic; - mp_irq->mp_type = m->type; - mp_irq->mp_irqtype = m->irqtype; - mp_irq->mp_irqflag = m->irqflag; - mp_irq->mp_srcbus = m->srcbus; - mp_irq->mp_srcbusirq = m->srcbusirq; - mp_irq->mp_dstirq = m->dstirq; + mp_irq->dstapic = m->dstapic; + mp_irq->type = m->type; + mp_irq->irqtype = m->irqtype; + mp_irq->irqflag = m->irqflag; + mp_irq->srcbus = m->srcbus; + mp_irq->srcbusirq = m->srcbusirq; + mp_irq->dstirq = m->dstirq; } -static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, +static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - m->dstapic = mp_irq->mp_dstapic; - m->type = mp_irq->mp_type; - m->irqtype = mp_irq->mp_irqtype; - m->irqflag = mp_irq->mp_irqflag; - m->srcbus = mp_irq->mp_srcbus; - m->srcbusirq = mp_irq->mp_srcbusirq; - m->dstirq = mp_irq->mp_dstirq; + m->dstapic = mp_irq->dstapic; + m->type = mp_irq->type; + m->irqtype = mp_irq->irqtype; + m->irqflag = mp_irq->irqflag; + m->srcbus = mp_irq->srcbus; + m->srcbusirq = mp_irq->srcbusirq; + m->dstirq = mp_irq->dstirq; } -static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, +static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - if (mp_irq->mp_dstapic != m->dstapic) + if (mp_irq->dstapic != m->dstapic) return 1; - if (mp_irq->mp_type != m->type) + if (mp_irq->type != m->type) return 2; - if (mp_irq->mp_irqtype != m->irqtype) + if (mp_irq->irqtype != m->irqtype) return 3; - if (mp_irq->mp_irqflag != m->irqflag) + if (mp_irq->irqflag != m->irqflag) return 4; - if (mp_irq->mp_srcbus != m->srcbus) + if (mp_irq->srcbus != m->srcbus) return 5; - if (mp_irq->mp_srcbusirq != m->srcbusirq) + if (mp_irq->srcbusirq != m->srcbusirq) return 6; - if (mp_irq->mp_dstirq != m->dstirq) + if (mp_irq->dstirq != m->dstirq) return 7; return 0; @@ -417,7 +417,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.type = MP_INTSRC; intsrc.irqflag = 0; /* conforming */ intsrc.srcbus = 0; - intsrc.dstapic = mp_ioapics[0].mp_apicid; + intsrc.dstapic = mp_ioapics[0].apicid; intsrc.irqtype = mp_INT; @@ -570,14 +570,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) } } -static struct intel_mp_floating *mpf_found; +static struct mpf_intel *mpf_found; /* * Scan the memory blocks for an SMP configuration block. */ static void __init __get_smp_config(unsigned int early) { - struct intel_mp_floating *mpf = mpf_found; + struct mpf_intel *mpf = mpf_found; if (!mpf) return; @@ -598,9 +598,9 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->mpf_specification); + mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) - if (mpf->mpf_feature2 & (1 << 7)) { + if (mpf->feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; } else { @@ -611,7 +611,7 @@ static void __init __get_smp_config(unsigned int early) /* * Now see if we need to read further. */ - if (mpf->mpf_feature1 != 0) { + if (mpf->feature1 != 0) { if (early) { /* * local APIC has default address @@ -621,16 +621,16 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Default MP configuration #%d\n", - mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); + mpf->feature1); + construct_default_ISA_mptable(mpf->feature1); - } else if (mpf->mpf_physptr) { + } else if (mpf->physptr) { /* * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { + if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif @@ -688,19 +688,19 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { unsigned int *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { - mpf = (struct intel_mp_floating *)bp; + mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && + (mpf->length == 1) && !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4))) { + ((mpf->specification == 1) + || (mpf->specification == 4))) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; #endif @@ -713,7 +713,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 1; reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); - if (mpf->mpf_physptr) { + if (mpf->physptr) { unsigned long size = PAGE_SIZE; #ifdef CONFIG_X86_32 /* @@ -722,14 +722,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * the bottom is mapped now. * PC-9800's MPC table places on the very last * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() + * PAGE_SIZE from mpf->physptr yields BUG() * in reserve_bootmem. */ unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; + if (mpf->physptr + size > end) + size = end - mpf->physptr; #endif - reserve_bootmem_generic(mpf->mpf_physptr, size, + reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); } @@ -809,15 +809,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) /* not legacy */ for (i = 0; i < mp_irq_entries; i++) { - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; - if (mp_irqs[i].mp_srcbus != m->srcbus) + if (mp_irqs[i].srcbus != m->srcbus) continue; - if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) + if (mp_irqs[i].srcbusirq != m->srcbusirq) continue; if (irq_used[i]) { /* already claimed */ @@ -922,10 +922,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, if (irq_used[i]) continue; - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; if (nr_m_spare > 0) { @@ -1001,7 +1001,7 @@ static int __init update_mp_table(void) { char str[16]; char oem[10]; - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; struct mpc_table *mpc, *mpc_new; if (!enable_update_mptable) @@ -1014,19 +1014,19 @@ static int __init update_mp_table(void) /* * Now see if we need to go further. */ - if (mpf->mpf_feature1 != 0) + if (mpf->feature1 != 0) return 0; - if (!mpf->mpf_physptr) + if (!mpf->physptr) return 0; - mpc = phys_to_virt(mpf->mpf_physptr); + mpc = phys_to_virt(mpf->physptr); if (!smp_check_mpc(mpc, oem, str)) return 0; printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); - printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); + printk(KERN_INFO "physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { mpc_new_phys = 0; @@ -1047,23 +1047,23 @@ static int __init update_mp_table(void) } printk(KERN_INFO "use in-positon replacing\n"); } else { - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; mpc_new = phys_to_virt(mpc_new_phys); memcpy(mpc_new, mpc, mpc->length); mpc = mpc_new; /* check if we can modify that */ - if (mpc_new_phys - mpf->mpf_physptr) { - struct intel_mp_floating *mpf_new; + if (mpc_new_phys - mpf->physptr) { + struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); memcpy(mpf_new, mpf, 16); mpf = mpf_new; - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; } - mpf->mpf_checksum = 0; - mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); - printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); + mpf->checksum = 0; + mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); + printk(KERN_INFO "physptr new: %x\n", mpf->physptr); } /* diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 726266695b2..3cf3413ec62 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -35,10 +35,10 @@ #include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/uaccess.h> #include <asm/processor.h> #include <asm/msr.h> -#include <asm/uaccess.h> #include <asm/system.h> static struct class *msr_class; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 7228979f1e7..23b6d9e6e4f 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -61,11 +61,7 @@ static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { -#ifdef CONFIG_X86_64 - return cpu_pda(cpu)->__nmi_count; -#else - return nmi_count(cpu); -#endif + return per_cpu(irq_stat, cpu).__nmi_count; } static inline int mce_in_progress(void) @@ -82,12 +78,8 @@ static inline int mce_in_progress(void) */ static inline unsigned int get_timer_irqs(int cpu) { -#ifdef CONFIG_X86_64 - return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); -#else return per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; -#endif } #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 95777b0faa7..3a7c5a44082 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = { }; EXPORT_SYMBOL(pv_lock_ops); -void __init paravirt_use_bytelocks(void) -{ -#ifdef CONFIG_SMP - pv_lock_ops.spin_is_locked = __byte_spin_is_locked; - pv_lock_ops.spin_is_contended = __byte_spin_is_contended; - pv_lock_ops.spin_lock = __byte_spin_lock; - pv_lock_ops.spin_trylock = __byte_spin_trylock; - pv_lock_ops.spin_unlock = __byte_spin_unlock; -#endif -} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb60887..cea11c8e304 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -44,6 +44,17 @@ void _paravirt_nop(void) { } +/* identity function, which can be inlined */ +u32 _paravirt_ident_32(u32 x) +{ + return x; +} + +u64 _paravirt_ident_64(u64 x) +{ + return x; +} + static void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); - else if (opfunc == paravirt_nop) + else if (opfunc == _paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); + + /* identity functions just return their single argument */ + else if (opfunc == _paravirt_ident_32) + ret = paravirt_patch_ident_32(insnbuf, len); + else if (opfunc == _paravirt_ident_64) + ret = paravirt_patch_ident_64(insnbuf, len); + else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || @@ -292,10 +310,10 @@ struct pv_time_ops pv_time_ops = { struct pv_irq_ops pv_irq_ops = { .init_IRQ = native_init_IRQ, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, + .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), + .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), + .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), .safe_halt = native_safe_halt, .halt = native_halt, #ifdef CONFIG_X86_64 @@ -373,6 +391,14 @@ struct pv_apic_ops pv_apic_ops = { #endif }; +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) +/* 32-bit pagetable entries */ +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) +#else +/* 64-bit pagetable entries */ +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) +#endif + struct pv_mmu_ops pv_mmu_ops = { #ifndef CONFIG_X86_64 .pagetable_setup_start = native_pagetable_setup_start, @@ -424,22 +450,23 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .pmd_val = native_pmd_val, - .make_pmd = native_make_pmd, + + .pmd_val = PTE_IDENT, + .make_pmd = PTE_IDENT, #if PAGETABLE_LEVELS == 4 - .pud_val = native_pud_val, - .make_pud = native_make_pud, + .pud_val = PTE_IDENT, + .make_pud = PTE_IDENT, + .set_pgd = native_set_pgd, #endif #endif /* PAGETABLE_LEVELS >= 3 */ - .pte_val = native_pte_val, - .pte_flags = native_pte_flags, - .pgd_val = native_pgd_val, + .pte_val = PTE_IDENT, + .pgd_val = PTE_IDENT, - .make_pte = native_make_pte, - .make_pgd = native_make_pgd, + .make_pte = PTE_IDENT, + .make_pgd = PTE_IDENT, .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 9fe644f4861..d9f32e6d6ab 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) +{ + /* arg in %eax, return in %eax */ + return 0; +} + +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) +{ + /* arg in %edx:%eax, return in %edx:%eax */ + return 0; +} + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 061d01df9ae..3f08f34f93e 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); +DEF_NATIVE(, mov32, "mov %edi, %eax"); +DEF_NATIVE(, mov64, "mov %rdi, %rax"); + +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__mov32, end__mov32); +} + +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__mov64, end__mov64); +} + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b..1a1ae8edc40 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * Return saved PC of a blocked thread. */ @@ -111,7 +108,6 @@ void cpu_idle(void) play_dead(); local_irq_disable(); - __get_cpu_var(irq_stat).idle_timestamp = jiffies; /* Don't trace irqs off for idle */ stop_critical_timings(); pm_idle(); @@ -591,7 +587,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - x86_write_percpu(current_task, next_p); + percpu_write(current_task, next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 85b4cb5c198..8eb169e4558 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -16,6 +16,7 @@ #include <stdarg.h> +#include <linux/stackprotector.h> #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> @@ -47,7 +48,6 @@ #include <asm/processor.h> #include <asm/i387.h> #include <asm/mmu_context.h> -#include <asm/pda.h> #include <asm/prctl.h> #include <asm/desc.h> #include <asm/proto.h> @@ -58,6 +58,12 @@ asmlinkage extern void ret_from_fork(void); +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + +DEFINE_PER_CPU(unsigned long, old_rsp); +static DEFINE_PER_CPU(unsigned char, is_idle); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -76,13 +82,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); void enter_idle(void) { - write_pda(isidle, 1); + percpu_write(is_idle, 1); atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { - if (test_and_clear_bit_pda(0, isidle) == 0) + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } @@ -112,6 +118,17 @@ static inline void play_dead(void) void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; + + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): + */ + boot_init_stack_canary(); + /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(1); @@ -397,7 +414,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; - write_pda(oldrsp, new_sp); + percpu_write(old_rsp, new_sp); regs->cs = __USER_CS; regs->ss = __USER_DS; regs->flags = 0x200; @@ -618,21 +635,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = read_pda(oldrsp); - write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + prev->usersp = percpu_read(old_rsp); + percpu_write(old_rsp, next->usersp); + percpu_write(current_task, next_p); - write_pda(kernelstack, + percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - PDA_STACKOFFSET); -#ifdef CONFIG_CC_STACKPROTECTOR - write_pda(stack_canary, next_p->stack_canary); - /* - * Build time only check to make sure the stack_canary is at - * offset 40 in the pda; this is a gcc ABI requirement - */ - BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); -#endif + THREAD_SIZE - KERNEL_STACK_OFFSET); /* * Now maybe reload the debug registers and handle I/O bitmaps diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b46eb41643..f8536fee5c1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -14,6 +14,7 @@ #include <asm/reboot.h> #include <asm/pci_x86.h> #include <asm/virtext.h> +#include <asm/cpu.h> #ifdef CONFIG_X86_32 # include <linux/dmi.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c461f6d6907..d5d6693b706 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -89,7 +89,7 @@ #include <asm/system.h> #include <asm/vsyscall.h> -#include <asm/smp.h> +#include <asm/cpu.h> #include <asm/desc.h> #include <asm/dma.h> #include <asm/iommu.h> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 01161077a49..ef91747bbed 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,145 +13,46 @@ #include <asm/mpspec.h> #include <asm/apicdef.h> #include <asm/highmem.h> +#include <asm/proto.h> +#include <asm/cpumask.h> +#include <asm/cpu.h> -#ifdef CONFIG_X86_LOCAL_APIC -unsigned int num_processors; -unsigned disabled_cpus __cpuinitdata; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); -unsigned int max_physical_apicid; - -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; -#endif - -/* map cpu index to physical APIC ID */ -DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -#define X86_64_NUMA 1 - -/* map cpu index to node index */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); - -/* which logical CPUs are on which nodes */ -cpumask_t *node_to_cpumask_map; -EXPORT_SYMBOL(node_to_cpumask_map); - -/* setup node_to_cpumask_map */ -static void __init setup_node_to_cpumask_map(void); - +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +# define DBG(x...) printk(KERN_DEBUG x) #else -static inline void setup_node_to_cpumask_map(void) { } +# define DBG(x...) #endif -#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) -/* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. - */ -static void __init setup_per_cpu_maps(void) -{ - int cpu; +DEFINE_PER_CPU(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); - for_each_possible_cpu(cpu) { - per_cpu(x86_cpu_to_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_apicid, cpu); - per_cpu(x86_bios_cpu_apicid, cpu) = - early_per_cpu_map(x86_bios_cpu_apicid, cpu); -#ifdef X86_64_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); +#ifdef CONFIG_X86_64 +#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) +#else +#define BOOT_PERCPU_OFFSET 0 #endif - } - /* indicate the early static arrays will soon be gone */ - early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; - early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; -#ifdef X86_64_NUMA - early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; -#endif -} +DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; +EXPORT_PER_CPU_SYMBOL(this_cpu_off); -#ifdef CONFIG_X86_32 -/* - * Great future not-so-futuristic plan: make i386 and x86_64 do it - * the same way - */ -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, +}; EXPORT_SYMBOL(__per_cpu_offset); -static inline void setup_cpu_pda_map(void) { } - -#elif !defined(CONFIG_SMP) -static inline void setup_cpu_pda_map(void) { } - -#else /* CONFIG_SMP && CONFIG_X86_64 */ - -/* - * Allocate cpu_pda pointer table and array via alloc_bootmem. - */ -static void __init setup_cpu_pda_map(void) -{ - char *pda; - struct x8664_pda **new_cpu_pda; - unsigned long size; - int cpu; - - size = roundup(sizeof(struct x8664_pda), cache_line_size()); - - /* allocate cpu_pda array and pointer table */ - { - unsigned long tsize = nr_cpu_ids * sizeof(void *); - unsigned long asize = size * (nr_cpu_ids - 1); - tsize = roundup(tsize, cache_line_size()); - new_cpu_pda = alloc_bootmem(tsize + asize); - pda = (char *)new_cpu_pda + tsize; - } - - /* initialize pointer table to static pda's */ - for_each_possible_cpu(cpu) { - if (cpu == 0) { - /* leave boot cpu pda in place */ - new_cpu_pda[0] = cpu_pda(0); - continue; - } - new_cpu_pda[cpu] = (struct x8664_pda *)pda; - new_cpu_pda[cpu]->in_bootmem = 1; - pda += size; - } - - /* point to new pointer table */ - _cpu_pda = new_cpu_pda; -} - -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void __init setup_cpu_local_masks(void) +static inline void setup_percpu_segment(int cpu) { - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ +#ifdef CONFIG_X86_32 + struct desc_struct gdt; -static inline void setup_cpu_local_masks(void) -{ + pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, + 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); +#endif } -#endif /* CONFIG_X86_32 */ - /* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. @@ -159,18 +60,12 @@ static inline void setup_cpu_local_masks(void) */ void __init setup_per_cpu_areas(void) { - ssize_t size, old_size; + ssize_t size; char *ptr; int cpu; - unsigned long align = 1; - - /* Setup cpu_pda map */ - setup_cpu_pda_map(); /* Copy section for each CPU (we discard the original) */ - old_size = PERCPU_ENOUGH_ROOM; - align = max_t(unsigned long, PAGE_SIZE, align); - size = roundup(old_size, align); + size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE); pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); @@ -179,30 +74,67 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = __alloc_bootmem(size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages(size); #else int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem(size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages(size); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d at %016lx\n", cpu, __pa(ptr)); } else { - ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); pr_debug("per cpu data for cpu%d on node%d at %016lx\n", cpu, node, __pa(ptr)); } #endif + + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); + per_cpu(cpu_number, cpu) = cpu; + setup_percpu_segment(cpu); + /* + * Copy data used in early init routines from the + * initial arrays to the per cpu data areas. These + * arrays then become expendable and the *_early_ptr's + * are zeroed indicating that the static arrays are + * gone. + */ +#ifdef CONFIG_X86_LOCAL_APIC + per_cpu(x86_cpu_to_apicid, cpu) = + early_per_cpu_map(x86_cpu_to_apicid, cpu); + per_cpu(x86_bios_cpu_apicid, cpu) = + early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#endif +#ifdef CONFIG_X86_64 + per_cpu(irq_stack_ptr, cpu) = + per_cpu(irq_stack_union.irq_stack, cpu) + + IRQ_STACK_SIZE - 64; +#ifdef CONFIG_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif +#endif + /* + * Up to this point, the boot CPU has been using .data.init + * area. Reload any changed state for the boot CPU. + */ + if (cpu == boot_cpu_id) + switch_to_new_gdt(cpu); + + DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } - /* Setup percpu data maps */ - setup_per_cpu_maps(); + /* indicate the early static arrays will soon be gone */ +#ifdef CONFIG_X86_LOCAL_APIC + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#endif +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) + early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; +#endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); @@ -210,199 +142,3 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); } - -#endif - -#ifdef X86_64_NUMA - -/* - * Allocate node_to_cpumask_map based on number of available nodes - * Requires node_possible_map to be valid. - * - * Note: node_to_cpumask() is not valid until after this is done. - */ -static void __init setup_node_to_cpumask_map(void) -{ - unsigned int node, num = 0; - cpumask_t *map; - - /* setup nr_node_ids if not done yet */ - if (nr_node_ids == MAX_NUMNODES) { - for_each_node_mask(node, node_possible_map) - num = node; - nr_node_ids = num + 1; - } - - /* allocate the map */ - map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); - - pr_debug("Node to cpumask map at %p for %d nodes\n", - map, nr_node_ids); - - /* node_to_cpumask() will now work */ - node_to_cpumask_map = map; -} - -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - - if (cpu_pda(cpu) && node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; - - if (cpu_to_node_map) - cpu_to_node_map[cpu] = node; - - else if (per_cpu_offset(cpu)) - per_cpu(x86_cpu_to_node_map, cpu) = node; - - else - pr_debug("Setting node for non-present cpu %d\n", cpu); -} - -void __cpuinit numa_clear_node(int cpu) -{ - numa_set_node(cpu, NUMA_NO_NODE); -} - -#ifndef CONFIG_DEBUG_PER_CPU_MAPS - -void __cpuinit numa_add_cpu(int cpu) -{ - cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); -} - -#else /* CONFIG_DEBUG_PER_CPU_MAPS */ - -/* - * --------- debug versions of the numa functions --------- - */ -static void __cpuinit numa_set_cpumask(int cpu, int enable) -{ - int node = cpu_to_node(cpu); - cpumask_t *mask; - char buf[64]; - - if (node_to_cpumask_map == NULL) { - printk(KERN_ERR "node_to_cpumask_map NULL\n"); - dump_stack(); - return; - } - - mask = &node_to_cpumask_map[node]; - if (enable) - cpu_set(cpu, *mask); - else - cpu_clear(cpu, *mask); - - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", - enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); -} - -void __cpuinit numa_add_cpu(int cpu) -{ - numa_set_cpumask(cpu, 1); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - numa_set_cpumask(cpu, 0); -} - -int cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) { - printk(KERN_WARNING - "cpu_to_node(%d): usage too early!\n", cpu); - dump_stack(); - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} -EXPORT_SYMBOL(cpu_to_node); - -/* - * Same function as cpu_to_node() but used if called before the - * per_cpu areas are setup. - */ -int early_cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - if (!per_cpu_offset(cpu)) { - printk(KERN_WARNING - "early_cpu_to_node(%d): no per_cpu area!\n", cpu); - dump_stack(); - return NUMA_NO_NODE; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} - - -/* empty cpumask */ -static const cpumask_t cpu_mask_none; - -/* - * Returns a pointer to the bitmask of CPUs on Node 'node'. - */ -const cpumask_t *cpumask_of_node(int node) -{ - if (node_to_cpumask_map == NULL) { - printk(KERN_WARNING - "cpumask_of_node(%d): no node_to_cpumask_map!\n", - node); - dump_stack(); - return (const cpumask_t *)&cpu_online_map; - } - if (node >= nr_node_ids) { - printk(KERN_WARNING - "cpumask_of_node(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return &cpu_mask_none; - } - return &node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(cpumask_of_node); - -/* - * Returns a bitmask of CPUs on Node 'node'. - * - * Side note: this function creates the returned cpumask on the stack - * so with a high NR_CPUS count, excessive stack space is used. The - * node_to_cpumask_ptr function should be used whenever possible. - */ -cpumask_t node_to_cpumask(int node) -{ - if (node_to_cpumask_map == NULL) { - printk(KERN_WARNING - "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); - dump_stack(); - return cpu_online_map; - } - if (node >= nr_node_ids) { - printk(KERN_WARNING - "node_to_cpumask(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return cpu_mask_none; - } - return node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(node_to_cpumask); - -/* - * --------- end of debug versions of the numa functions --------- - */ - -#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ - -#endif /* X86_64_NUMA */ - diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index df0587f24c5..7fc78b01981 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -51,24 +51,24 @@ #endif #define COPY(x) { \ - err |= __get_user(regs->x, &sc->x); \ + get_user_ex(regs->x, &sc->x); \ } #define COPY_SEG(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp; \ } #define COPY_SEG_CPL3(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp | 3; \ } #define GET_SEG(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ loadsegment(seg, tmp); \ } @@ -83,45 +83,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; + get_user_try { + #ifdef CONFIG_X86_32 - GET_SEG(gs); - COPY_SEG(fs); - COPY_SEG(es); - COPY_SEG(ds); + GET_SEG(gs); + COPY_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); #endif /* CONFIG_X86_32 */ - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); #ifdef CONFIG_X86_64 - COPY(r8); - COPY(r9); - COPY(r10); - COPY(r11); - COPY(r12); - COPY(r13); - COPY(r14); - COPY(r15); + COPY(r8); + COPY(r9); + COPY(r10); + COPY(r11); + COPY(r12); + COPY(r13); + COPY(r14); + COPY(r15); #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_32 - COPY_SEG_CPL3(cs); - COPY_SEG_CPL3(ss); + COPY_SEG_CPL3(cs); + COPY_SEG_CPL3(ss); #else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); #endif /* CONFIG_X86_32 */ - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ + get_user_ex(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->orig_ax = -1; /* disable syscall checks */ + + get_user_ex(buf, &sc->fpstate); + err |= restore_i387_xstate(buf); - err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); + get_user_ex(*pax, &sc->ax); + } get_user_catch(err); - err |= __get_user(*pax, &sc->ax); return err; } @@ -131,57 +135,60 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, { int err = 0; + put_user_try { + #ifdef CONFIG_X86_32 - { - unsigned int tmp; + { + unsigned int tmp; - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - } - err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); - err |= __put_user(regs->es, (unsigned int __user *)&sc->es); - err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); + savesegment(gs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->gs); + } + put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); + put_user_ex(regs->es, (unsigned int __user *)&sc->es); + put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); #endif /* CONFIG_X86_32 */ - err |= __put_user(regs->di, &sc->di); - err |= __put_user(regs->si, &sc->si); - err |= __put_user(regs->bp, &sc->bp); - err |= __put_user(regs->sp, &sc->sp); - err |= __put_user(regs->bx, &sc->bx); - err |= __put_user(regs->dx, &sc->dx); - err |= __put_user(regs->cx, &sc->cx); - err |= __put_user(regs->ax, &sc->ax); + put_user_ex(regs->di, &sc->di); + put_user_ex(regs->si, &sc->si); + put_user_ex(regs->bp, &sc->bp); + put_user_ex(regs->sp, &sc->sp); + put_user_ex(regs->bx, &sc->bx); + put_user_ex(regs->dx, &sc->dx); + put_user_ex(regs->cx, &sc->cx); + put_user_ex(regs->ax, &sc->ax); #ifdef CONFIG_X86_64 - err |= __put_user(regs->r8, &sc->r8); - err |= __put_user(regs->r9, &sc->r9); - err |= __put_user(regs->r10, &sc->r10); - err |= __put_user(regs->r11, &sc->r11); - err |= __put_user(regs->r12, &sc->r12); - err |= __put_user(regs->r13, &sc->r13); - err |= __put_user(regs->r14, &sc->r14); - err |= __put_user(regs->r15, &sc->r15); + put_user_ex(regs->r8, &sc->r8); + put_user_ex(regs->r9, &sc->r9); + put_user_ex(regs->r10, &sc->r10); + put_user_ex(regs->r11, &sc->r11); + put_user_ex(regs->r12, &sc->r12); + put_user_ex(regs->r13, &sc->r13); + put_user_ex(regs->r14, &sc->r14); + put_user_ex(regs->r15, &sc->r15); #endif /* CONFIG_X86_64 */ - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->ip, &sc->ip); + put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.error_code, &sc->err); + put_user_ex(regs->ip, &sc->ip); #ifdef CONFIG_X86_32 - err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->sp, &sc->sp_at_signal); - err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->sp, &sc->sp_at_signal); + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); #else /* !CONFIG_X86_32 */ - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->cs, &sc->cs); - err |= __put_user(0, &sc->gs); - err |= __put_user(0, &sc->fs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->cs, &sc->cs); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ - err |= __put_user(fpstate, &sc->fpstate); + put_user_ex(fpstate, &sc->fpstate); - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + /* non-iBCS2 extensions.. */ + put_user_ex(mask, &sc->oldmask); + put_user_ex(current->thread.cr2, &sc->cr2); + } put_user_catch(err); return err; } @@ -336,43 +343,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= __put_user(sig, &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - if (err) - return -EFAULT; - - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - return -EFAULT; + put_user_try { + put_user_ex(sig, &frame->sig); + put_user_ex(&frame->info, &frame->pinfo); + put_user_ex(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); - /* Set up to return from userspace. */ - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + /* Set up to return from userspace. */ + restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + put_user_ex(restorer, &frame->pretcode); - /* - * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 - * - * WE DO NOT USE IT ANY MORE! It's only left here for historical - * reasons and because gdb uses it as a signature to notice - * signal handler stack frames. - */ - err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + /* + * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 + * + * WE DO NOT USE IT ANY MORE! It's only left here for historical + * reasons and because gdb uses it as a signature to notice + * signal handler stack frames. + */ + put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + } put_user_catch(err); if (err) return -EFAULT; @@ -436,28 +441,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return -EFAULT; } - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - /* x86-64 should always use SA_RESTORER. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - /* could use a vstub here */ - return -EFAULT; - } + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + /* x86-64 should always use SA_RESTORER. */ + if (ka->sa.sa_flags & SA_RESTORER) { + put_user_ex(ka->sa.sa_restorer, &frame->pretcode); + } else { + /* could use a vstub here */ + err |= -EFAULT; + } + } put_user_catch(err); if (err) return -EFAULT; @@ -509,31 +516,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { struct k_sigaction new_ka, old_ka; - int ret; + int ret = 0; if (act) { old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + if (!access_ok(VERIFY_READ, act, sizeof(*act))) return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); + get_user_try { + get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); + get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); + get_user_ex(mask, &act->sa_mask); + get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); + } get_user_catch(ret); + + if (ret) + return -EFAULT; siginitset(&new_ka.sa.sa_mask, mask); } ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + put_user_try { + put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); + put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); + put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); + } put_user_catch(ret); + + if (ret) + return -EFAULT; } return ret; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bb1a3b1fc87..612d3c74f6a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -53,7 +53,6 @@ #include <asm/nmi.h> #include <asm/irq.h> #include <asm/idle.h> -#include <asm/smp.h> #include <asm/trampoline.h> #include <asm/cpu.h> #include <asm/numa.h> @@ -63,6 +62,7 @@ #include <asm/vmi.h> #include <asm/genapic.h> #include <asm/setup.h> +#include <asm/uv/uv.h> #include <linux/mc146818rtc.h> #include <mach_apic.h> @@ -745,52 +745,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -#ifdef CONFIG_X86_64 - -/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ -static void __ref free_bootmem_pda(struct x8664_pda *oldpda) -{ - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); -} - -/* - * Allocate node local memory for the AP pda. - * - * Must be called after the _cpu_pda pointer table is initialized. - */ -int __cpuinit get_local_pda(int cpu) -{ - struct x8664_pda *oldpda, *newpda; - unsigned long size = sizeof(struct x8664_pda); - int node = cpu_to_node(cpu); - - if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) - return 0; - - oldpda = cpu_pda(cpu); - newpda = kmalloc_node(size, GFP_ATOMIC, node); - if (!newpda) { - printk(KERN_ERR "Could not allocate node local PDA " - "for CPU %d on node %d\n", cpu, node); - - if (oldpda) - return 0; /* have a usable pda */ - else - return -1; - } - - if (oldpda) { - memcpy(newpda, oldpda, size); - free_bootmem_pda(oldpda); - } - - newpda->in_bootmem = 0; - cpu_pda(cpu) = newpda; - return 0; -} -#endif /* CONFIG_X86_64 */ - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -808,16 +762,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - if (cpu > 0) { - boot_error = get_local_pda(cpu); - if (boot_error) - goto restore_state; - /* if can't get pda memory, can't start cpu */ - } -#endif - alternatives_smp_switch(1); c_idle.idle = get_idle_for_cpu(cpu); @@ -847,14 +791,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) set_idle_for_cpu(cpu, c_idle.idle); do_rest: -#ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; - init_gdt(cpu); +#ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + initial_gs = per_cpu_offset(cpu); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(c_idle.idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; @@ -931,9 +877,7 @@ do_rest: inquire_remote_apic(apicid); } } -#ifdef CONFIG_X86_64 -restore_state: -#endif + if (boot_error) { /* Try to put things back the way they were before ... */ numa_remove_cpu(cpu); /* was set by numa_add_cpu */ @@ -1125,6 +1069,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); smpboot_clear_io_apic(); + disable_ioapic_setup(); return -1; } @@ -1240,10 +1185,7 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); -#ifdef CONFIG_X86_32 - init_gdt(me); -#endif - switch_to_new_gdt(); + switch_to_new_gdt(me); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); per_cpu(cpu_state, me) = CPU_ONLINE; diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c deleted file mode 100644 index 397e309839d..00000000000 --- a/arch/x86/kernel/smpcommon.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * SMP stuff which is common to all sub-architectures. - */ -#include <linux/module.h> -#include <asm/smp.h> - -#ifdef CONFIG_X86_32 -DEFINE_PER_CPU(unsigned long, this_cpu_off); -EXPORT_PER_CPU_SYMBOL(this_cpu_off); - -/* - * Initialize the CPU's GDT. This is either the boot CPU doing itself - * (still using the master per-cpu area), or a CPU doing it for a - * secondary which will soon come up. - */ -__cpuinit void init_gdt(int cpu) -{ - struct desc_struct gdt; - - pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - - write_gdt_entry(get_cpu_gdt_table(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); - - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; - per_cpu(cpu_number, cpu) = cpu; -} -#endif diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c deleted file mode 100644 index ce505464224..00000000000 --- a/arch/x86/kernel/tlb_32.c +++ /dev/null @@ -1,256 +0,0 @@ -#include <linux/spinlock.h> -#include <linux/cpu.h> -#include <linux/interrupt.h> - -#include <asm/tlbflush.h> - -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) - ____cacheline_aligned = { &init_mm, 0, }; - -/* must come after the send_IPI functions above for inlining */ -#include <mach_ipi.h> - -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul <manfred@colorfullife.com> - */ - -static cpumask_t flush_cpumask; -static struct mm_struct *flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -void leave_mm(int cpu) -{ - BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu_tlbstate to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu_tlbstate[].active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu_tlbstate[].active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu_tlbstate to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu_tlbstate is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - */ - -void smp_invalidate_interrupt(struct pt_regs *regs) -{ - unsigned long cpu; - - cpu = get_cpu(); - - if (!cpu_isset(cpu, flush_cpumask)) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { - if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - leave_mm(cpu); - } - ack_APIC_irq(); - smp_mb__before_clear_bit(); - cpu_clear(cpu, flush_cpumask); - smp_mb__after_clear_bit(); -out: - put_cpu_no_resched(); - inc_irq_stat(irq_tlb_count); -} - -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) -{ - cpumask_t cpumask = *cpumaskp; - - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - BUG_ON(!mm); - -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (unlikely(cpus_empty(cpumask))) - return; -#endif - - /* - * i'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * AK: x86-64 has a faster method that could be ported. - */ - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); - - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); - - while (!cpus_empty(flush_cpumask)) - /* nothing. lockup detection does not belong here */ - cpu_relax(); - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); - - preempt_enable(); -} -EXPORT_SYMBOL(flush_tlb_page); - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1); -} - -void reset_lazy_tlbstate(void) -{ - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} - diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 6812b829ed8..f4b2f27d19b 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <asm/mmu_context.h> +#include <asm/uv/uv.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> #include <asm/uv/uv_bau.h> @@ -210,14 +211,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * * Send a broadcast and wait for a broadcast message to complete. * - * The cpumaskp mask contains the cpus the broadcast was sent to. + * The flush_mask contains the cpus the broadcast was sent to. * - * Returns 1 if all remote flushing was done. The mask is zeroed. - * Returns 0 if some remote flushing remains to be done. The mask is left - * unchanged. + * Returns NULL if all remote flushing was done. The mask is zeroed. + * Returns @flush_mask if some remote flushing remains to be done. The + * mask will have some bits still set. */ -int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, - cpumask_t *cpumaskp) +const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, + struct bau_desc *bau_desc, + struct cpumask *flush_mask) { int completion_status = 0; int right_shift; @@ -257,66 +259,76 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * the cpu's, all of which are still in the mask. */ __get_cpu_var(ptcstats).ptc_i++; - return 0; + return flush_mask; } /* * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */ - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, flush_mask) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; - cpu_clear(bit, *cpumaskp); + cpumask_clear_cpu(bit, flush_mask); } - if (!cpus_empty(*cpumaskp)) - return 0; - return 1; + if (!cpumask_empty(flush_mask)) + return flush_mask; + return NULL; } /** * uv_flush_tlb_others - globally purge translation cache of a virtual * address or all TLB's - * @cpumaskp: mask of all cpu's in which the address is to be removed + * @cpumask: mask of all cpu's in which the address is to be removed * @mm: mm_struct containing virtual address range * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * @cpu: the current cpu * * This is the entry point for initiating any UV global TLB shootdown. * * Purges the translation caches of all specified processors of the given * virtual address, or purges all TLB's on specified processors. * - * The caller has derived the cpumaskp from the mm_struct and has subtracted - * the local cpu from the mask. This function is called only if there - * are bits set in the mask. (e.g. flush_tlb_page()) + * The caller has derived the cpumask from the mm_struct. This function + * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) * - * The cpumaskp is converted into a nodemask of the nodes containing + * The cpumask is converted into a nodemask of the nodes containing * the cpus. * - * Returns 1 if all remote flushing was done. - * Returns 0 if some remote flushing remains to be done. + * Note that this function should be called with preemption disabled. + * + * Returns NULL if all remote flushing was done. + * Returns pointer to cpumask if some remote flushing remains to be + * done. The returned pointer is valid till preemption is re-enabled. */ -int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, unsigned int cpu) { + static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); + struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask); int i; int bit; int blade; - int cpu; + int uv_cpu; int this_blade; int locals = 0; struct bau_desc *bau_desc; - cpu = uv_blade_processor_id(); + WARN_ON(!in_atomic()); + + cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); + + uv_cpu = uv_blade_processor_id(); this_blade = uv_numa_blade_id(); bau_desc = __get_cpu_var(bau_control).descriptor_base; - bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; + bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0; - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, flush_mask) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) { @@ -331,17 +343,17 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, * no off_node flushing; return status for local node */ if (locals) - return 0; + return flush_mask; else - return 1; + return NULL; } __get_cpu_var(ptcstats).requestor++; __get_cpu_var(ptcstats).ntargeted += i; bau_desc->payload.address = va; - bau_desc->payload.sending_cpu = smp_processor_id(); + bau_desc->payload.sending_cpu = cpu; - return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); + return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask); } /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7932338d7cb..3b7b2e19020 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -59,7 +59,6 @@ #ifdef CONFIG_X86_64 #include <asm/pgalloc.h> #include <asm/proto.h> -#include <asm/pda.h> #else #include <asm/processor-flags.h> #include <asm/arch_hooks.h> diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index bef58b4982d..f052c84ecbe 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -680,10 +680,11 @@ static inline int __init activate_vmi(void) para_fill(pv_mmu_ops.write_cr2, SetCR2); para_fill(pv_mmu_ops.write_cr3, SetCR3); para_fill(pv_cpu_ops.write_cr4, SetCR4); - para_fill(pv_irq_ops.save_fl, GetInterruptMask); - para_fill(pv_irq_ops.restore_fl, SetInterruptMask); - para_fill(pv_irq_ops.irq_disable, DisableInterrupts); - para_fill(pv_irq_ops.irq_enable, EnableInterrupts); + + para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); + para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); + para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); + para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); para_fill(pv_cpu_ops.wbinvd, WBINVD); para_fill(pv_cpu_ops.read_tsc, RDTSC); diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 82c67559dde..3eba7f7bac0 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -178,14 +178,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(PAGE_SIZE); - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); /* freed after init ends here */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6be..087a7f2c639 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -5,6 +5,7 @@ #define LOAD_OFFSET __START_KERNEL_map #include <asm-generic/vmlinux.lds.h> +#include <asm/asm-offsets.h> #include <asm/page.h> #undef i386 /* in case the preprocessor is a 32bit one */ @@ -13,12 +14,15 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) jiffies_64 = jiffies; -_proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif + data.init2 PT_LOAD FLAGS(7); /* RWE */ note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -208,14 +212,28 @@ SECTIONS __initramfs_end = .; #endif +#ifdef CONFIG_SMP + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * start another section data.init2. Also, pda should be at the head of + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) +#else PERCPU(PAGE_SIZE) +#endif . = ALIGN(PAGE_SIZE); __init_end = .; . = ALIGN(PAGE_SIZE); __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + *(.data.nosave) + } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; @@ -239,8 +257,21 @@ SECTIONS DWARF_DEBUG } + /* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ +#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +INIT_PER_CPU(gdt_page); +INIT_PER_CPU(irq_stack_union); + /* * Build-time check on the image size: */ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE") + +#ifdef CONFIG_SMP +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); +#endif diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a688f3bfaec..c609205df59 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void) flags &= ~X86_EFLAGS_IF; return flags; } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); static void vsmp_restore_fl(unsigned long flags) { @@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags) flags |= X86_EFLAGS_AC; native_restore_fl(flags); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); static void vsmp_irq_disable(void) { @@ -53,6 +55,7 @@ static void vsmp_irq_disable(void) native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); static void vsmp_irq_enable(void) { @@ -60,6 +63,7 @@ static void vsmp_irq_enable(void) native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void) cap, ctl); if (cap & ctl & (1 << 4)) { /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_irq_ops.irq_disable = vsmp_irq_disable; - pv_irq_ops.irq_enable = vsmp_irq_enable; - pv_irq_ops.save_fl = vsmp_save_fl; - pv_irq_ops.restore_fl = vsmp_restore_fl; + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); + pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); pv_init_ops.patch = vsmp_patch; ctl &= ~(1 << 4); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 695e426aa35..3909e3ba5ce 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(init_level4_pgt); EXPORT_SYMBOL(load_gs_index); - -EXPORT_SYMBOL(_proxy_pda); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 92f1c6f3e19..19e33b6cd59 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -173,24 +173,29 @@ static unsigned long save_fl(void) { return lguest_data.irq_enabled; } +PV_CALLEE_SAVE_REGS_THUNK(save_fl); /* restore_flags() just sets the flags back to the value given. */ static void restore_fl(unsigned long flags) { lguest_data.irq_enabled = flags; } +PV_CALLEE_SAVE_REGS_THUNK(restore_fl); /* Interrupts go off... */ static void irq_disable(void) { lguest_data.irq_enabled = 0; } +PV_CALLEE_SAVE_REGS_THUNK(irq_disable); /* Interrupts go on... */ static void irq_enable(void) { lguest_data.irq_enabled = X86_EFLAGS_IF; } +PV_CALLEE_SAVE_REGS_THUNK(irq_enable); + /*:*/ /*M:003 Note that we don't check for outstanding interrupts when we re-enable * them (or when we unmask an interrupt). This seems to work for the moment, @@ -984,10 +989,10 @@ __init void lguest_init(void) /* interrupt-related operations */ pv_irq_ops.init_IRQ = lguest_init_IRQ; - pv_irq_ops.save_fl = save_fl; - pv_irq_ops.restore_fl = restore_fl; - pv_irq_ops.irq_disable = irq_disable; - pv_irq_ops.irq_enable = irq_enable; + pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); pv_irq_ops.safe_halt = lguest_safe_halt; /* init-time operations */ diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index d914a7996a6..66b7eb57d8e 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -9,6 +9,7 @@ #include <asm/e820.h> #include <asm/io.h> #include <asm/setup.h> +#include <asm/cpu.h> void __init pre_intr_init_hook(void) { diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 7ffcdeec463..6f5a38c7f90 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -400,7 +400,7 @@ void __init find_smp_config(void) VOYAGER_SUS_IN_CONTROL_PORT); current_thread_info()->cpu = boot_cpu_id; - x86_write_percpu(cpu_number, boot_cpu_id); + percpu_write(cpu_number, boot_cpu_id); } /* @@ -528,7 +528,6 @@ static void __init do_boot_cpu(__u8 cpu) /* init_tasks (in sched.c) is indexed logically */ stack_start.sp = (void *)idle->thread.sp; - init_gdt(cpu); per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1745,14 +1744,13 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { - init_gdt(smp_processor_id()); - switch_to_new_gdt(); + int cpu = smp_processor_id(); + switch_to_new_gdt(cpu); cpu_online_map = cpumask_of_cpu(smp_processor_id()); cpu_callout_map = cpumask_of_cpu(smp_processor_id()); cpu_callin_map = CPU_MASK_NONE; cpu_present_map = cpumask_of_cpu(smp_processor_id()); - } static int __cpuinit voyager_cpu_up(unsigned int cpu) @@ -1779,7 +1777,6 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); - x86_write_percpu(cpu_number, hard_smp_processor_id()); } static void voyager_send_call_func(const struct cpumask *callmask) diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index d8cc96a2738..9f05157220f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,6 +1,8 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o +obj-$(CONFIG_X86_SMP) += tlb.o + obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 7e8db53528a..61b41ca3b5a 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -23,6 +23,12 @@ int fixup_exception(struct pt_regs *regs) fixup = search_exception_tables(regs->ip); if (fixup) { + /* If fixup is less than 16, it means uaccess error */ + if (fixup->fixup < 16) { + current_thread_info()->uaccess_err = -EFAULT; + regs->ip += fixup->fixup; + return 1; + } regs->ip = fixup->fixup; return 1; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 90dfae511a4..8c3f3113a6e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -26,6 +26,7 @@ #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/kdebug.h> +#include <linux/magic.h> #include <asm/system.h> #include <asm/desc.h> @@ -91,8 +92,8 @@ static inline int notify_page_fault(struct pt_regs *regs) * * Opcode checker based on code by Richard Brunner */ -static int is_prefetch(struct pt_regs *regs, unsigned long addr, - unsigned long error_code) +static int is_prefetch(struct pt_regs *regs, unsigned long error_code, + unsigned long addr) { unsigned char *instr; int scan_more = 1; @@ -409,15 +410,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, } #ifdef CONFIG_X86_64 -static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static noinline void pgtable_bad(struct pt_regs *regs, + unsigned long error_code, unsigned long address) { unsigned long flags = oops_begin(); int sig = SIGKILL; - struct task_struct *tsk; + struct task_struct *tsk = current; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", - current->comm, address); + tsk->comm, address); dump_pagetable(address); tsk = current; tsk->thread.cr2 = address; @@ -429,6 +430,196 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, } #endif +static noinline void no_context(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; + unsigned long *stackend; + +#ifdef CONFIG_X86_64 + unsigned long flags; + int sig; +#endif + + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * X86_32 + * Valid to do another page fault here, because if this fault + * had been triggered by is_prefetch fixup_exception would have + * handled it. + * + * X86_64 + * Hall of shame of CPU/BIOS bugs. + */ + if (is_prefetch(regs, error_code, address)) + return; + + if (is_errata93(regs, address)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ +#ifdef CONFIG_X86_32 + bust_spinlocks(1); +#else + flags = oops_begin(); +#endif + + show_fault_oops(regs, error_code, address); + + stackend = end_of_stack(tsk); + if (*stackend != STACK_END_MAGIC) + printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); + + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + +#ifdef CONFIG_X86_32 + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); +#else + sig = SIGKILL; + if (__die("Oops", regs, error_code)) + sig = 0; + /* Executive summary in case the body of the oops scrolled away */ + printk(KERN_EMERG "CR2: %016lx\n", address); + oops_end(flags, regs, sig); +#endif +} + +static void __bad_area_nosemaphore(struct pt_regs *regs, + unsigned long error_code, unsigned long address, + int si_code) +{ + struct task_struct *tsk = current; + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & PF_USER) { + /* + * It's possible to have interrupts off here. + */ + local_irq_enable(); + + /* + * Valid to do another page fault here because this one came + * from user space. + */ + if (is_prefetch(regs, error_code, address)) + return; + + if (is_errata100(regs, address)) + return; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk( + "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + (void *) regs->ip, (void *) regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } + + tsk->thread.cr2 = address; + /* Kernel addresses are always protection faults */ + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGSEGV, si_code, address, tsk); + return; + } + + if (is_f00f_bug(regs, address)) + return; + + no_context(regs, error_code, address); +} + +static noinline void bad_area_nosemaphore(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); +} + +static void __bad_area(struct pt_regs *regs, + unsigned long error_code, unsigned long address, + int si_code) +{ + struct mm_struct *mm = current->mm; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ + up_read(&mm->mmap_sem); + + __bad_area_nosemaphore(regs, error_code, address, si_code); +} + +static noinline void bad_area(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_MAPERR); +} + +static noinline void bad_area_access_error(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_ACCERR); +} + +/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ +static void out_of_memory(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(¤t->mm->mmap_sem); + pagefault_out_of_memory(); +} + +static void do_sigbus(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); +#ifdef CONFIG_X86_32 + /* User space => ok to do another page fault */ + if (is_prefetch(regs, error_code, address)) + return; +#endif + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +} + +static noinline void mm_fault_error(struct pt_regs *regs, + unsigned long error_code, unsigned long address, unsigned int fault) +{ + if (fault & VM_FAULT_OOM) + out_of_memory(regs, error_code, address); + else if (fault & VM_FAULT_SIGBUS) + do_sigbus(regs, error_code, address); + else + BUG(); +} + static int spurious_fault_check(unsigned long error_code, pte_t *pte) { if ((error_code & PF_WRITE) && !pte_write(*pte)) @@ -448,8 +639,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static int spurious_fault(unsigned long address, - unsigned long error_code) +static noinline int spurious_fault(unsigned long error_code, + unsigned long address) { pgd_t *pgd; pud_t *pud; @@ -494,7 +685,7 @@ static int spurious_fault(unsigned long address, * * This assumes no large pages in there. */ -static int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { #ifdef CONFIG_X86_32 unsigned long pgd_paddr; @@ -573,6 +764,25 @@ static int vmalloc_fault(unsigned long address) int show_unhandled_signals = 1; +static inline int access_error(unsigned long error_code, int write, + struct vm_area_struct *vma) +{ + if (write) { + /* write, present and write, not present */ + if (unlikely(!(vma->vm_flags & VM_WRITE))) + return 1; + } else if (unlikely(error_code & PF_PROT)) { + /* read, present */ + return 1; + } else { + /* read, not present */ + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + return 1; + } + + return 0; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -583,16 +793,12 @@ asmlinkage #endif void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { + unsigned long address; struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; - unsigned long address; - int write, si_code; + int write; int fault; -#ifdef CONFIG_X86_64 - unsigned long flags; - int sig; -#endif tsk = current; mm = tsk->mm; @@ -601,10 +807,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) /* get the address */ address = read_cr2(); - si_code = SEGV_MAPERR; - - if (notify_page_fault(regs)) - return; if (unlikely(kmmio_fault(regs, address))) return; @@ -631,17 +833,22 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) return; /* Can handle a stale RO->RW TLB */ - if (spurious_fault(address, error_code)) + if (spurious_fault(error_code, address)) return; + /* kprobes don't want to hook the spurious faults. */ + if (notify_page_fault(regs)) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. */ - goto bad_area_nosemaphore; + bad_area_nosemaphore(regs, error_code, address); + return; } - + if (unlikely(notify_page_fault(regs))) + return; /* * It's safe to allow irq's after cr2 has been saved and the * vmalloc fault has been handled. @@ -657,15 +864,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) #ifdef CONFIG_X86_64 if (unlikely(error_code & PF_RSVD)) - pgtable_bad(address, regs, error_code); + pgtable_bad(regs, error_code, address); #endif /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ - if (unlikely(in_atomic() || !mm)) - goto bad_area_nosemaphore; + if (unlikely(in_atomic() || !mm)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } /* * When running in the kernel we expect faults to occur only to @@ -683,20 +892,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ - if (!down_read_trylock(&mm->mmap_sem)) { + if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if ((error_code & PF_USER) == 0 && - !search_exception_tables(regs->ip)) - goto bad_area_nosemaphore; + !search_exception_tables(regs->ip)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } down_read(&mm->mmap_sem); } vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) + if (unlikely(!vma)) { + bad_area(regs, error_code, address); + return; + } + if (likely(vma->vm_start <= address)) goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, error_code, address); + return; + } if (error_code & PF_USER) { /* * Accessing the stack below %sp is always a bug. @@ -704,31 +919,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * and pusha to work. ("enter $65535,$31" pushes * 32 pointers and then decrements %sp by 65535.) */ - if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) - goto bad_area; + if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { + bad_area(regs, error_code, address); + return; + } } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ + if (unlikely(expand_stack(vma, address))) { + bad_area(regs, error_code, address); + return; + } + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ good_area: - si_code = SEGV_ACCERR; - write = 0; - switch (error_code & (PF_PROT|PF_WRITE)) { - default: /* 3: write, present */ - /* fall through */ - case PF_WRITE: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case PF_PROT: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; + write = error_code & PF_WRITE; + if (unlikely(access_error(error_code, write, vma))) { + bad_area_access_error(regs, error_code, address); + return; } /* @@ -738,11 +947,8 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); + mm_fault_error(regs, error_code, address, fault); + return; } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; @@ -760,128 +966,6 @@ good_area: } #endif up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & PF_USER) { - /* - * It's possible to have interrupts off here. - */ - local_irq_enable(); - - /* - * Valid to do another page fault here because this one came - * from user space. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata100(regs, address)) - return; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk( - "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, - (void *) regs->ip, (void *) regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } - - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGSEGV, si_code, address, tsk); - return; - } - - if (is_f00f_bug(regs, address)) - return; - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - /* - * X86_32 - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. - * - * X86_64 - * Hall of shame of CPU/BIOS bugs. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata93(regs, address)) - return; - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ -#ifdef CONFIG_X86_32 - bust_spinlocks(1); -#else - flags = oops_begin(); -#endif - - show_fault_oops(regs, error_code, address); - - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; - -#ifdef CONFIG_X86_32 - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); -#else - sig = SIGKILL; - if (__die("Oops", regs, error_code)) - sig = 0; - /* Executive summary in case the body of the oops scrolled away */ - printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(flags, regs, sig); -#endif - -out_of_memory: - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). - */ - up_read(&mm->mmap_sem); - pagefault_out_of_memory(); - return; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & PF_USER)) - goto no_context; -#ifdef CONFIG_X86_32 - /* User space => ok to do another page fault */ - if (is_prefetch(regs, address, error_code)) - return; -#endif - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); } DEFINE_SPINLOCK(pgd_lock); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2cef0507441..00263bf07a8 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -49,7 +49,6 @@ #include <asm/paravirt.h> #include <asm/setup.h> #include <asm/cacheflush.h> -#include <asm/smp.h> unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89..deb1c1ab786 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -20,6 +20,12 @@ #include <asm/acpi.h> #include <asm/k8.h> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +# define DBG(x...) printk(KERN_DEBUG x) +#else +# define DBG(x...) +#endif + struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -33,6 +39,21 @@ int numa_off __initdata; static unsigned long __initdata nodemap_addr; static unsigned long __initdata nodemap_size; +DEFINE_PER_CPU(int, node_number) = 0; +EXPORT_PER_CPU_SYMBOL(node_number); + +/* + * Map cpu index to node index + */ +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); + +/* + * Which logical CPUs are on which nodes + */ +cpumask_t *node_to_cpumask_map; +EXPORT_SYMBOL(node_to_cpumask_map); + /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -640,3 +661,199 @@ void __init init_cpu_to_node(void) #endif +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: node_to_cpumask() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) + */ +void __init setup_node_to_cpumask_map(void) +{ + unsigned int node, num = 0; + cpumask_t *map; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) { + for_each_node_mask(node, node_possible_map) + num = node; + nr_node_ids = num + 1; + } + + /* allocate the map */ + map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); + + pr_debug("Node to cpumask map at %p for %d nodes\n", + map, nr_node_ids); + + /* node_to_cpumask() will now work */ + node_to_cpumask_map = map; +} + +void __cpuinit numa_set_node(int cpu, int node) +{ + int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); + + /* early setting, no percpu area yet */ + if (cpu_to_node_map) { + cpu_to_node_map[cpu] = node; + return; + } + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { + printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); + dump_stack(); + return; + } +#endif + per_cpu(x86_cpu_to_node_map, cpu) = node; + + if (node != NUMA_NO_NODE) + per_cpu(node_number, cpu) = node; +} + +void __cpuinit numa_clear_node(int cpu) +{ + numa_set_node(cpu, NUMA_NO_NODE); +} + +#ifndef CONFIG_DEBUG_PER_CPU_MAPS + +void __cpuinit numa_add_cpu(int cpu) +{ + cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +#else /* CONFIG_DEBUG_PER_CPU_MAPS */ + +/* + * --------- debug versions of the numa functions --------- + */ +static void __cpuinit numa_set_cpumask(int cpu, int enable) +{ + int node = early_cpu_to_node(cpu); + cpumask_t *mask; + char buf[64]; + + if (node_to_cpumask_map == NULL) { + printk(KERN_ERR "node_to_cpumask_map NULL\n"); + dump_stack(); + return; + } + + mask = &node_to_cpumask_map[node]; + if (enable) + cpu_set(cpu, *mask); + else + cpu_clear(cpu, *mask); + + cpulist_scnprintf(buf, sizeof(buf), mask); + printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); +} + +void __cpuinit numa_add_cpu(int cpu) +{ + numa_set_cpumask(cpu, 1); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + numa_set_cpumask(cpu, 0); +} + +int cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) { + printk(KERN_WARNING + "cpu_to_node(%d): usage too early!\n", cpu); + dump_stack(); + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} +EXPORT_SYMBOL(cpu_to_node); + +/* + * Same function as cpu_to_node() but used if called before the + * per_cpu areas are setup. + */ +int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + if (!cpu_possible(cpu)) { + printk(KERN_WARNING + "early_cpu_to_node(%d): no per_cpu area!\n", cpu); + dump_stack(); + return NUMA_NO_NODE; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} + + +/* empty cpumask */ +static const cpumask_t cpu_mask_none; + +/* + * Returns a pointer to the bitmask of CPUs on Node 'node'. + */ +const cpumask_t *cpumask_of_node(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "cpumask_of_node(%d): no node_to_cpumask_map!\n", + node); + dump_stack(); + return (const cpumask_t *)&cpu_online_map; + } + if (node >= nr_node_ids) { + printk(KERN_WARNING + "cpumask_of_node(%d): node > nr_node_ids(%d)\n", + node, nr_node_ids); + dump_stack(); + return &cpu_mask_none; + } + return &node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(cpumask_of_node); + +/* + * Returns a bitmask of CPUs on Node 'node'. + * + * Side note: this function creates the returned cpumask on the stack + * so with a high NR_CPUS count, excessive stack space is used. The + * node_to_cpumask_ptr function should be used whenever possible. + */ +cpumask_t node_to_cpumask(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); + dump_stack(); + return cpu_online_map; + } + if (node >= nr_node_ids) { + printk(KERN_WARNING + "node_to_cpumask(%d): node > nr_node_ids(%d)\n", + node, nr_node_ids); + dump_stack(); + return cpu_mask_none; + } + return node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(node_to_cpumask); + +/* + * --------- end of debug versions of the numa functions --------- + */ + +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 09737c8af07..15df1baee10 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -21,6 +21,7 @@ #include <asm/numa.h> #include <asm/e820.h> #include <asm/genapic.h> +#include <asm/uv/uv.h> int acpi_numa __initdata; diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/mm/tlb.c index f8be6f1d2e4..72a6d4ebe34 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/mm/tlb.c @@ -1,22 +1,18 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/delay.h> #include <linux/spinlock.h> #include <linux/smp.h> -#include <linux/kernel_stat.h> -#include <linux/mc146818rtc.h> #include <linux/interrupt.h> +#include <linux/module.h> -#include <asm/mtrr.h> -#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> -#include <asm/proto.h> -#include <asm/apicdef.h> -#include <asm/idle.h> -#include <asm/uv/uv_hub.h> -#include <asm/uv/uv_bau.h> +#include <asm/apic.h> +#include <asm/uv/uv.h> + +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; #include <mach_ipi.h> /* @@ -33,7 +29,7 @@ * To avoid global state use 8 different call vectors. * Each CPU uses a specific vector to trigger flushes on other * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. + * the right array slot for the flush data. * * With more than 8 CPUs they are hashed to the 8 available * vectors. The limited global vector space forces us to this right now. @@ -43,18 +39,18 @@ union smp_flush_state { struct { - cpumask_t flush_cpumask; struct mm_struct *flush_mm; unsigned long flush_va; spinlock_t tlbstate_lock; + DECLARE_BITMAP(flush_cpumask, NR_CPUS); }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; + char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; +} ____cacheline_internodealigned_in_smp; /* State is put into the per CPU data section, but padded to a full cache line because other CPUs can access it and we don't want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); +static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; /* * We cannot call mmdrop() because we are in interrupt context, @@ -62,9 +58,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state); */ void leave_mm(int cpu) { - if (read_pda(mmu_state) == TLBSTATE_OK) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -117,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm); * Interrupts are disabled. */ -asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) +/* + * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop + * but still used for documentation purpose but the usage is slightly + * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt + * entry calls in with the first parameter in %eax. Maybe define + * intrlinkage? + */ +#ifdef CONFIG_X86_64 +asmlinkage +#endif +void smp_invalidate_interrupt(struct pt_regs *regs) { - int cpu; - int sender; + unsigned int cpu; + unsigned int sender; union smp_flush_state *f; cpu = smp_processor_id(); @@ -129,9 +135,9 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * Use that to determine where the sender put the data. */ sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; - if (!cpu_isset(cpu, f->flush_cpumask)) + if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) goto out; /* * This was a BUG() but until someone can quote me the @@ -142,8 +148,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (f->flush_mm == read_pda(active_mm)) { - if (read_pda(mmu_state) == TLBSTATE_OK) { + if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -153,23 +159,21 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) } out: ack_APIC_irq(); - cpu_clear(cpu, f->flush_cpumask); + smp_mb__before_clear_bit(); + cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); + smp_mb__after_clear_bit(); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +static void flush_tlb_others_ipi(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - int sender; + unsigned int sender; union smp_flush_state *f; - cpumask_t cpumask = *cpumaskp; - - if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va)) - return; /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; /* * Could avoid this lock when @@ -180,7 +184,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_mm = mm; f->flush_va = va; - cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + cpumask_andnot(to_cpumask(f->flush_cpumask), + cpumask, cpumask_of(smp_processor_id())); /* * Make the above memory operations globally visible before @@ -191,9 +196,10 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(to_cpumask(f->flush_cpumask), + INVALIDATE_TLB_VECTOR_START + sender); - while (!cpus_empty(f->flush_cpumask)) + while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); f->flush_mm = NULL; @@ -201,12 +207,28 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, spin_unlock(&f->tlbstate_lock); } +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) +{ + if (is_uv_system()) { + unsigned int cpu; + + cpu = get_cpu(); + cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); + if (cpumask) + flush_tlb_others_ipi(cpumask, mm, va); + put_cpu(); + return; + } + flush_tlb_others_ipi(cpumask, mm, va); +} + static int __cpuinit init_smp_flush(void) { int i; - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + for (i = 0; i < ARRAY_SIZE(flush_state); i++) + spin_lock_init(&flush_state[i].tlbstate_lock); return 0; } @@ -215,25 +237,18 @@ core_initcall(init_smp_flush); void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -241,8 +256,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -250,11 +265,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -263,8 +275,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } @@ -274,7 +286,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (read_pda(mmu_state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 6dcefba7836..3b767d03fd6 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -6,7 +6,8 @@ CFLAGS_REMOVE_irq.o = -pg endif obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ - time.o xen-asm_$(BITS).o grant-table.o suspend.o + time.o xen-asm.o xen-asm_$(BITS).o \ + grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
\ No newline at end of file diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b2..37230342c2c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -61,40 +61,13 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; - -#ifdef CONFIG_X86_64 -/* l3 pud for userspace vsyscall mapping */ -static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ - -/* - * Note about cr3 (pagetable base) values: - * - * xen_cr3 contains the current logical cr3 value; it contains the - * last set cr3. This may not be the current effective cr3, because - * its update may be being lazily deferred. However, a vcpu looking - * at its own cr3 can use this value knowing that it everything will - * be self-consistent. - * - * xen_current_cr3 contains the actual vcpu cr3; it is set once the - * hypercall to set the vcpu cr3 is complete (so it may be a little - * out of date, but it will never be set early). If one vcpu is - * looking at another vcpu's cr3 value, it should use this variable. - */ -DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ -DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ - struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); struct shared_info xen_dummy_shared_info; +void *xen_initial_gdt; + /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. @@ -114,14 +87,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; * * 0: not available, 1: available */ -static int have_vcpu_info_placement = -#ifdef CONFIG_X86_32 - 1 -#else - 0 -#endif - ; - +static int have_vcpu_info_placement = 1; static void xen_vcpu_setup(int cpu) { @@ -237,7 +203,7 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static void xen_leave_lazy(void) +void xen_leave_lazy(void) { paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); @@ -598,83 +564,6 @@ static struct apic_ops xen_basic_apic_ops = { #endif -static void xen_flush_tlb(void) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_TLB_FLUSH_LOCAL; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_single(unsigned long addr) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - op = mcs.args; - op->cmd = MMUEXT_INVLPG_LOCAL; - op->arg1.linear_addr = addr & PAGE_MASK; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, - unsigned long va) -{ - struct { - struct mmuext_op op; - cpumask_t mask; - } *args; - cpumask_t cpumask = *cpus; - struct multicall_space mcs; - - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - BUG_ON(!mm); - - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (cpus_empty(cpumask)) - return; - - mcs = xen_mc_entry(sizeof(*args)); - args = mcs.args; - args->mask = cpumask; - args->op.arg2.vcpumask = &args->mask; - - if (va == TLB_FLUSH_ALL) { - args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - } else { - args->op.cmd = MMUEXT_INVLPG_MULTI; - args->op.arg1.linear_addr = va; - } - - MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); -} static void xen_clts(void) { @@ -700,21 +589,6 @@ static void xen_write_cr0(unsigned long cr0) xen_mc_issue(PARAVIRT_LAZY_CPU); } -static void xen_write_cr2(unsigned long cr2) -{ - x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; -} - -static unsigned long xen_read_cr2(void) -{ - return x86_read_percpu(xen_vcpu)->arch.cr2; -} - -static unsigned long xen_read_cr2_direct(void) -{ - return x86_read_percpu(xen_vcpu_info.arch.cr2); -} - static void xen_write_cr4(unsigned long cr4) { cr4 &= ~X86_CR4_PGE; @@ -723,71 +597,6 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static unsigned long xen_read_cr3(void) -{ - return x86_read_percpu(xen_cr3); -} - -static void set_current_cr3(void *v) -{ - x86_write_percpu(xen_current_cr3, (unsigned long)v); -} - -static void __xen_write_cr3(bool kernel, unsigned long cr3) -{ - struct mmuext_op *op; - struct multicall_space mcs; - unsigned long mfn; - - if (cr3) - mfn = pfn_to_mfn(PFN_DOWN(cr3)); - else - mfn = 0; - - WARN_ON(mfn == 0 && kernel); - - mcs = __xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; - op->arg1.mfn = mfn; - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - if (kernel) { - x86_write_percpu(xen_cr3, cr3); - - /* Update xen_current_cr3 once the batch has actually - been submitted. */ - xen_mc_callback(set_current_cr3, (void *)cr3); - } -} - -static void xen_write_cr3(unsigned long cr3) -{ - BUG_ON(preemptible()); - - xen_mc_batch(); /* disables interrupts */ - - /* Update while interrupts are disabled, so its atomic with - respect to ipis */ - x86_write_percpu(xen_cr3, cr3); - - __xen_write_cr3(true, cr3); - -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif - - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ -} - static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -829,185 +638,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } -/* Early in boot, while setting up the initial pagetable, assume - everything is pinned. */ -static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) -{ -#ifdef CONFIG_FLATMEM - BUG_ON(mem_map); /* should only be used early */ -#endif - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); -} - -/* Early release_pte assumes that all pts are pinned, since there's - only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(unsigned long pfn) -{ - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); -} - -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) -{ - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); -} - -/* This needs to make sure the new pte page is pinned iff its being - attached to a pinned pagetable. */ -static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(virt_to_page(mm->pgd))) { - SetPagePinned(page); - - vm_unmap_aliases(); - if (!PageHighMem(page)) { - make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } - } -} - -static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PTE); -} - -static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PMD); -} - -static int xen_pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = mm->pgd; - int ret = 0; - - BUG_ON(PagePinned(virt_to_page(pgd))); - -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; - - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; - - if (user_pgd != NULL) { - user_pgd[pgd_index(VSYSCALL_START)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); - } -#endif - - return ret; -} - -static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifdef CONFIG_X86_64 - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) - free_page((unsigned long)user_pgd); -#endif -} - -/* This should never happen until we're OK to use struct page */ -static void xen_release_ptpage(unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(page)) { - if (!PageHighMem(page)) { - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); - } - ClearPagePinned(page); - } -} - -static void xen_release_pte(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PTE); -} - -static void xen_release_pmd(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PMD); -} - -#if PAGETABLE_LEVELS == 4 -static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PUD); -} - -static void xen_release_pud(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PUD); -} -#endif - -#ifdef CONFIG_HIGHPTE -static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) -{ - pgprot_t prot = PAGE_KERNEL; - - if (PagePinned(page)) - prot = PAGE_KERNEL_RO; - - if (0 && PageHighMem(page)) - printk("mapping highpte %lx type %d prot %s\n", - page_to_pfn(page), type, - (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); - - return kmap_atomic_prot(page, type, prot); -} -#endif - -#ifdef CONFIG_X86_32 -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} - -/* Init-time set_pte while constructing initial pagetables, which - doesn't allow RO pagetable pages to be remapped RW */ -static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - pte = mask_rw_pte(ptep, pte); - - xen_set_pte(ptep, pte); -} -#endif - -static __init void xen_pagetable_setup_start(pgd_t *base) -{ -} - void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1028,37 +658,6 @@ void xen_setup_shared_info(void) xen_setup_mfn_list_list(); } -static __init void xen_pagetable_setup_done(pgd_t *base) -{ - xen_setup_shared_info(); -} - -static __init void xen_post_allocator_init(void) -{ - pv_mmu_ops.set_pte = xen_set_pte; - pv_mmu_ops.set_pmd = xen_set_pmd; - pv_mmu_ops.set_pud = xen_set_pud; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.set_pgd = xen_set_pgd; -#endif - - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.alloc_pud = xen_alloc_pud; - pv_mmu_ops.release_pud = xen_release_pud; -#endif - -#ifdef CONFIG_X86_64 - SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif - xen_mark_init_mm_pinned(); -} - /* This is called once we have the cpu_possible_map */ void xen_setup_vcpu_info_placement(void) { @@ -1072,10 +671,10 @@ void xen_setup_vcpu_info_placement(void) if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); - pv_irq_ops.save_fl = xen_save_fl_direct; - pv_irq_ops.restore_fl = xen_restore_fl_direct; - pv_irq_ops.irq_disable = xen_irq_disable_direct; - pv_irq_ops.irq_enable = xen_irq_enable_direct; + pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); + pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); + pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); + pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); pv_mmu_ops.read_cr2 = xen_read_cr2_direct; } } @@ -1133,49 +732,6 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } -static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) -{ - pte_t pte; - - phys >>= PAGE_SHIFT; - - switch (idx) { - case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_F00F_BUG - case FIX_F00F_IDT: -#endif -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: - case FIX_VDSO: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#else - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: -#endif -#ifdef CONFIG_X86_LOCAL_APIC - case FIX_APIC_BASE: /* maps dummy local APIC */ -#endif - pte = pfn_pte(phys, prot); - break; - - default: - pte = mfn_pte(phys, prot); - break; - } - - __native_set_fixmap(idx, pte); - -#ifdef CONFIG_X86_64 - /* Replicate changes to map the vsyscall page into the user - pagetable vsyscall mapping. */ - if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { - unsigned long vaddr = __fix_to_virt(idx); - set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); - } -#endif -} - static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, @@ -1271,87 +827,6 @@ static const struct pv_apic_ops xen_apic_ops __initdata = { #endif }; -static const struct pv_mmu_ops xen_mmu_ops __initdata = { - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3, - - .flush_tlb_user = xen_flush_tlb, - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, - .flush_tlb_others = xen_flush_tlb_others, - - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, - - .alloc_pte = xen_alloc_pte_init, - .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pte_init, - .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pte_init, - -#ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = xen_kmap_atomic_pte, -#endif - -#ifdef CONFIG_X86_64 - .set_pte = xen_set_pte, -#else - .set_pte = xen_set_pte_init, -#endif - .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd_hyper, - - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, - - .pte_val = xen_pte_val, - .pte_flags = native_pte_flags, - .pgd_val = xen_pgd_val, - - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .set_pte_present = xen_set_pte_at, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ - .set_pud = xen_set_pud_hyper, - - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, - -#if PAGETABLE_LEVELS == 4 - .pud_val = xen_pud_val, - .make_pud = xen_make_pud, - .set_pgd = xen_set_pgd_hyper, - - .alloc_pud = xen_alloc_pte_init, - .release_pud = xen_release_pte_init, -#endif /* PAGETABLE_LEVELS == 4 */ - - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, - .exit_mmap = xen_exit_mmap, - - .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy, - }, - - .set_fixmap = xen_set_fixmap, -}; - static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1394,223 +869,6 @@ static const struct machine_ops __initdata xen_machine_ops = { }; -static void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} - -/* - * Like __va(), but returns address in the kernel mapping (which is - * all we have until the physical memory mapping has been set up. - */ -static void *__ka(phys_addr_t paddr) -{ -#ifdef CONFIG_X86_64 - return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif -} - -/* Convert a machine address to physical address */ -static unsigned long m2p(phys_addr_t maddr) -{ - phys_addr_t paddr; - - maddr &= PTE_PFN_MASK; - paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; - - return paddr; -} - -/* Convert a machine address to kernel virtual */ -static void *m2v(phys_addr_t maddr) -{ - return __ka(m2p(maddr)); -} - -static void set_page_prot(void *addr, pgprot_t prot) -{ - unsigned long pfn = __pa(addr) >> PAGE_SHIFT; - pte_t pte = pfn_pte(pfn, prot); - - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) - BUG(); -} - -static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} - -#ifdef CONFIG_X86_64 -static void convert_pfn_mfn(void *v) -{ - pte_t *pte = v; - int i; - - /* All levels are converted the same way, so just treat them - as ptes. */ - for (i = 0; i < PTRS_PER_PTE; i++) - pte[i] = xen_make_pte(pte[i].pte); -} - -/* - * Set up the inital kernel pagetable. - * - * We can construct this by grafting the Xen provided pagetable into - * head_64.S's preconstructed pagetables. We copy the Xen L2's into - * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This - * means that only the kernel has a physical mapping to start with - - * but that's enough to get __va working. We need to fill in the rest - * of the physical mapping once some sort of allocator has been set - * up. - */ -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pud_t *l3; - pmd_t *l2; - - /* Zap identity mapping */ - init_level4_pgt[0] = __pgd(0); - - /* Pre-constructed entries are in pfn, so convert to mfn */ - convert_pfn_mfn(init_level4_pgt); - convert_pfn_mfn(level3_ident_pgt); - convert_pfn_mfn(level3_kernel_pgt); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); - - /* Make pagetable pieces RO */ - set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - - /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); - - /* Unpin Xen-provided one */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - /* Switch over */ - pgd = init_level4_pgt; - - /* - * At this stage there can be no user pgd, and no page - * structure to attach it to, so make sure we just set kernel - * pgd. - */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - - reserve_early(__pa(xen_start_info->pt_base), - __pa(xen_start_info->pt_base + - xen_start_info->nr_pt_frames * PAGE_SIZE), - "XEN PAGETABLES"); - - return pgd; -} -#else /* !CONFIG_X86_64 */ -static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; - -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); - - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); - - xen_map_identity_early(level2_kernel_pgt, max_pfn); - - memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); - set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], - __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); - - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - xen_write_cr3(__pa(swapper_pg_dir)); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); - - return swapper_pg_dir; -} -#endif /* CONFIG_X86_64 */ - /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -1650,10 +908,18 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; #ifdef CONFIG_X86_64 - /* Disable until direct per-cpu data access. */ - have_vcpu_info_placement = 0; - x86_64_init_pda(); + /* + * Setup percpu state. We only need to do this for 64-bit + * because 32-bit already has %fs set properly. + */ + load_percpu_segment(0); #endif + /* + * The only reliable way to retain the initial address of the + * percpu gdt_page is to remember it here, so we can go and + * mark it RW later, when the initial percpu area is freed. + */ + xen_initial_gdt = &per_cpu(gdt_page, 0); xen_smp_init(); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index bb042608c60..5a070900ad3 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -50,6 +50,7 @@ static unsigned long xen_save_fl(void) */ return (-flags) & X86_EFLAGS_IF; } +PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); static void xen_restore_fl(unsigned long flags) { @@ -62,7 +63,7 @@ static void xen_restore_fl(unsigned long flags) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); @@ -76,6 +77,7 @@ static void xen_restore_fl(unsigned long flags) xen_force_evtchn_callback(); } } +PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); static void xen_irq_disable(void) { @@ -83,9 +85,10 @@ static void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; + percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } +PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); static void xen_irq_enable(void) { @@ -96,7 +99,7 @@ static void xen_irq_enable(void) the caller is confused and is trying to re-enable interrupts on an indeterminate processor. */ - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any @@ -106,6 +109,7 @@ static void xen_irq_enable(void) if (unlikely(vcpu->evtchn_upcall_pending)) xen_force_evtchn_callback(); } +PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); static void xen_safe_halt(void) { @@ -124,10 +128,12 @@ static void xen_halt(void) static const struct pv_irq_ops xen_irq_ops __initdata = { .init_IRQ = __xen_init_IRQ, - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, + + .save_fl = PV_CALLEE_SAVE(xen_save_fl), + .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), + .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), + .irq_enable = PV_CALLEE_SAVE(xen_irq_enable), + .safe_halt = xen_safe_halt, .halt = xen_halt, #ifdef CONFIG_X86_64 diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 503c240e26c..d2e8ed1aff3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -47,6 +47,7 @@ #include <asm/tlbflush.h> #include <asm/fixmap.h> #include <asm/mmu_context.h> +#include <asm/setup.h> #include <asm/paravirt.h> #include <asm/linkage.h> @@ -55,6 +56,8 @@ #include <xen/page.h> #include <xen/interface/xen.h> +#include <xen/interface/version.h> +#include <xen/hvc-console.h> #include "multicalls.h" #include "mmu.h" @@ -114,6 +117,37 @@ static inline void check_zero(void) #endif /* CONFIG_XEN_DEBUG_FS */ + +/* + * Identity map, in addition to plain kernel map. This needs to be + * large enough to allocate page table pages to allocate the rest. + * Each page can map 2MB. + */ +static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; + +#ifdef CONFIG_X86_64 +/* l3 pud for userspace vsyscall mapping */ +static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; +#endif /* CONFIG_X86_64 */ + +/* + * Note about cr3 (pagetable base) values: + * + * xen_cr3 contains the current logical cr3 value; it contains the + * last set cr3. This may not be the current effective cr3, because + * its update may be being lazily deferred. However, a vcpu looking + * at its own cr3 can use this value knowing that it everything will + * be self-consistent. + * + * xen_current_cr3 contains the actual vcpu cr3; it is set once the + * hypercall to set the vcpu cr3 is complete (so it may be a little + * out of date, but it will never be set early). If one vcpu is + * looking at another vcpu's cr3 value, it should use this variable. + */ +DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ +DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ + + /* * Just beyond the highest usermode address. STACK_TOP_MAX has a * redzone above it, so round it up to a PGD boundary. @@ -458,28 +492,33 @@ pteval_t xen_pte_val(pte_t pte) { return pte_mfn_to_pfn(pte.pte); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); pgdval_t xen_pgd_val(pgd_t pgd) { return pte_mfn_to_pfn(pgd.pgd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); pte_t xen_make_pte(pteval_t pte) { pte = pte_pfn_to_mfn(pte); return native_make_pte(pte); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); return native_make_pgd(pgd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); pmdval_t xen_pmd_val(pmd_t pmd) { return pte_mfn_to_pfn(pmd.pmd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); void xen_set_pud_hyper(pud_t *ptr, pud_t val) { @@ -556,12 +595,14 @@ pmd_t xen_make_pmd(pmdval_t pmd) pmd = pte_pfn_to_mfn(pmd); return native_make_pmd(pmd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); #if PAGETABLE_LEVELS == 4 pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); pud_t xen_make_pud(pudval_t pud) { @@ -569,6 +610,7 @@ pud_t xen_make_pud(pudval_t pud) return native_make_pud(pud); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); pgd_t *xen_get_user_pgd(pgd_t *pgd) { @@ -1063,18 +1105,14 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; -#ifdef CONFIG_X86_64 - active_mm = read_pda(active_mm); -#else - active_mm = __get_cpu_var(cpu_tlbstate).active_mm; -#endif + active_mm = percpu_read(cpu_tlbstate.active_mm); if (active_mm == mm) leave_mm(smp_processor_id()); /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { load_cr3(swapper_pg_dir); arch_flush_lazy_cpu_mode(); } @@ -1156,6 +1194,709 @@ void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } +static __init void xen_pagetable_setup_start(pgd_t *base) +{ +} + +static __init void xen_pagetable_setup_done(pgd_t *base) +{ + xen_setup_shared_info(); +} + +static void xen_write_cr2(unsigned long cr2) +{ + percpu_read(xen_vcpu)->arch.cr2 = cr2; +} + +static unsigned long xen_read_cr2(void) +{ + return percpu_read(xen_vcpu)->arch.cr2; +} + +unsigned long xen_read_cr2_direct(void) +{ + return percpu_read(xen_vcpu_info.arch.cr2); +} + +static void xen_flush_tlb(void) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_TLB_FLUSH_LOCAL; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +static void xen_flush_tlb_single(unsigned long addr) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + op = mcs.args; + op->cmd = MMUEXT_INVLPG_LOCAL; + op->arg1.linear_addr = addr & PAGE_MASK; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +static void xen_flush_tlb_others(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va) +{ + struct { + struct mmuext_op op; + DECLARE_BITMAP(mask, NR_CPUS); + } *args; + struct multicall_space mcs; + + BUG_ON(cpumask_empty(cpus)); + BUG_ON(!mm); + + mcs = xen_mc_entry(sizeof(*args)); + args = mcs.args; + args->op.arg2.vcpumask = to_cpumask(args->mask); + + /* Remove us, and any offline CPUS. */ + cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); + if (unlikely(cpumask_empty(to_cpumask(args->mask)))) + goto issue; + + if (va == TLB_FLUSH_ALL) { + args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; + } else { + args->op.cmd = MMUEXT_INVLPG_MULTI; + args->op.arg1.linear_addr = va; + } + + MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); + +issue: + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + +static unsigned long xen_read_cr3(void) +{ + return percpu_read(xen_cr3); +} + +static void set_current_cr3(void *v) +{ + percpu_write(xen_current_cr3, (unsigned long)v); +} + +static void __xen_write_cr3(bool kernel, unsigned long cr3) +{ + struct mmuext_op *op; + struct multicall_space mcs; + unsigned long mfn; + + if (cr3) + mfn = pfn_to_mfn(PFN_DOWN(cr3)); + else + mfn = 0; + + WARN_ON(mfn == 0 && kernel); + + mcs = __xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; + op->arg1.mfn = mfn; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + if (kernel) { + percpu_write(xen_cr3, cr3); + + /* Update xen_current_cr3 once the batch has actually + been submitted. */ + xen_mc_callback(set_current_cr3, (void *)cr3); + } +} + +static void xen_write_cr3(unsigned long cr3) +{ + BUG_ON(preemptible()); + + xen_mc_batch(); /* disables interrupts */ + + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ + percpu_write(xen_cr3, cr3); + + __xen_write_cr3(true, cr3); + +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); + } +#endif + + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ +} + +static int xen_pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = mm->pgd; + int ret = 0; + + BUG_ON(PagePinned(virt_to_page(pgd))); + +#ifdef CONFIG_X86_64 + { + struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; + + BUG_ON(page->private != 0); + + ret = -ENOMEM; + + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; + + if (user_pgd != NULL) { + user_pgd[pgd_index(VSYSCALL_START)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + ret = 0; + } + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + } +#endif + + return ret; +} + +static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_X86_64 + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + if (user_pgd) + free_page((unsigned long)user_pgd); +#endif +} + +#ifdef CONFIG_HIGHPTE +static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) +{ + pgprot_t prot = PAGE_KERNEL; + + if (PagePinned(page)) + prot = PAGE_KERNEL_RO; + + if (0 && PageHighMem(page)) + printk("mapping highpte %lx type %d prot %s\n", + page_to_pfn(page), type, + (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); + + return kmap_atomic_prot(page, type, prot); +} +#endif + +#ifdef CONFIG_X86_32 +static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) +{ + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); + + return pte; +} + +/* Init-time set_pte while constructing initial pagetables, which + doesn't allow RO pagetable pages to be remapped RW */ +static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) +{ + pte = mask_rw_pte(ptep, pte); + + xen_set_pte(ptep, pte); +} +#endif + +/* Early in boot, while setting up the initial pagetable, assume + everything is pinned. */ +static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) +{ +#ifdef CONFIG_FLATMEM + BUG_ON(mem_map); /* should only be used early */ +#endif + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); +} + +/* Early release_pte assumes that all pts are pinned, since there's + only init_mm and anything attached to that is pinned. */ +static void xen_release_pte_init(unsigned long pfn) +{ + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); +} + +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = cmd; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + +/* This needs to make sure the new pte page is pinned iff its being + attached to a pinned pagetable. */ +static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(virt_to_page(mm->pgd))) { + SetPagePinned(page); + + vm_unmap_aliases(); + if (!PageHighMem(page)) { + make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); + if (level == PT_PTE && USE_SPLIT_PTLOCKS) + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + } else { + /* make sure there are no stray mappings of + this page */ + kmap_flush_unused(); + } + } +} + +static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PTE); +} + +static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PMD); +} + +/* This should never happen until we're OK to use struct page */ +static void xen_release_ptpage(unsigned long pfn, unsigned level) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(page)) { + if (!PageHighMem(page)) { + if (level == PT_PTE && USE_SPLIT_PTLOCKS) + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); + } + ClearPagePinned(page); + } +} + +static void xen_release_pte(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PTE); +} + +static void xen_release_pmd(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PMD); +} + +#if PAGETABLE_LEVELS == 4 +static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PUD); +} + +static void xen_release_pud(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PUD); +} +#endif + +void __init xen_reserve_top(void) +{ +#ifdef CONFIG_X86_32 + unsigned long top = HYPERVISOR_VIRT_START; + struct xen_platform_parameters pp; + + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) + top = pp.virt_start; + + reserve_top_address(-top); +#endif /* CONFIG_X86_32 */ +} + +/* + * Like __va(), but returns address in the kernel mapping (which is + * all we have until the physical memory mapping has been set up. + */ +static void *__ka(phys_addr_t paddr) +{ +#ifdef CONFIG_X86_64 + return (void *)(paddr + __START_KERNEL_map); +#else + return __va(paddr); +#endif +} + +/* Convert a machine address to physical address */ +static unsigned long m2p(phys_addr_t maddr) +{ + phys_addr_t paddr; + + maddr &= PTE_PFN_MASK; + paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; + + return paddr; +} + +/* Convert a machine address to kernel virtual */ +static void *m2v(phys_addr_t maddr) +{ + return __ka(m2p(maddr)); +} + +static void set_page_prot(void *addr, pgprot_t prot) +{ + unsigned long pfn = __pa(addr) >> PAGE_SHIFT; + pte_t pte = pfn_pte(pfn, prot); + + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + BUG(); +} + +static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) +{ + unsigned pmdidx, pteidx; + unsigned ident_pte; + unsigned long pfn; + + ident_pte = 0; + pfn = 0; + for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { + pte_t *pte_page; + + /* Reuse or allocate a page of ptes */ + if (pmd_present(pmd[pmdidx])) + pte_page = m2v(pmd[pmdidx].pmd); + else { + /* Check for free pte pages */ + if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) + break; + + pte_page = &level1_ident_pgt[ident_pte]; + ident_pte += PTRS_PER_PTE; + + pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); + } + + /* Install mappings */ + for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { + pte_t pte; + + if (pfn > max_pfn_mapped) + max_pfn_mapped = pfn; + + if (!pte_none(pte_page[pteidx])) + continue; + + pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); + pte_page[pteidx] = pte; + } + } + + for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) + set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); + + set_page_prot(pmd, PAGE_KERNEL_RO); +} + +#ifdef CONFIG_X86_64 +static void convert_pfn_mfn(void *v) +{ + pte_t *pte = v; + int i; + + /* All levels are converted the same way, so just treat them + as ptes. */ + for (i = 0; i < PTRS_PER_PTE; i++) + pte[i] = xen_make_pte(pte[i].pte); +} + +/* + * Set up the inital kernel pagetable. + * + * We can construct this by grafting the Xen provided pagetable into + * head_64.S's preconstructed pagetables. We copy the Xen L2's into + * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This + * means that only the kernel has a physical mapping to start with - + * but that's enough to get __va working. We need to fill in the rest + * of the physical mapping once some sort of allocator has been set + * up. + */ +__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, + unsigned long max_pfn) +{ + pud_t *l3; + pmd_t *l2; + + /* Zap identity mapping */ + init_level4_pgt[0] = __pgd(0); + + /* Pre-constructed entries are in pfn, so convert to mfn */ + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + + memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); + memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + /* Set up identity map */ + xen_map_identity_early(level2_ident_pgt, max_pfn); + + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(init_level4_pgt))); + + /* Unpin Xen-provided one */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + /* Switch over */ + pgd = init_level4_pgt; + + /* + * At this stage there can be no user pgd, and no page + * structure to attach it to, so make sure we just set kernel + * pgd. + */ + xen_mc_batch(); + __xen_write_cr3(true, __pa(pgd)); + xen_mc_issue(PARAVIRT_LAZY_CPU); + + reserve_early(__pa(xen_start_info->pt_base), + __pa(xen_start_info->pt_base + + xen_start_info->nr_pt_frames * PAGE_SIZE), + "XEN PAGETABLES"); + + return pgd; +} +#else /* !CONFIG_X86_64 */ +static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; + +__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, + unsigned long max_pfn) +{ + pmd_t *kernel_pmd; + + init_pg_tables_start = __pa(pgd); + init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + + kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); + memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + + xen_map_identity_early(level2_kernel_pgt, max_pfn); + + memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], + __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); + + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + set_page_prot(empty_zero_page, PAGE_KERNEL_RO); + + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + xen_write_cr3(__pa(swapper_pg_dir)); + + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + + return swapper_pg_dir; +} +#endif /* CONFIG_X86_64 */ + +static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) +{ + pte_t pte; + + phys >>= PAGE_SHIFT; + + switch (idx) { + case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: +#ifdef CONFIG_X86_F00F_BUG + case FIX_F00F_IDT: +#endif +#ifdef CONFIG_X86_32 + case FIX_WP_TEST: + case FIX_VDSO: +# ifdef CONFIG_HIGHMEM + case FIX_KMAP_BEGIN ... FIX_KMAP_END: +# endif +#else + case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: +#endif +#ifdef CONFIG_X86_LOCAL_APIC + case FIX_APIC_BASE: /* maps dummy local APIC */ +#endif + pte = pfn_pte(phys, prot); + break; + + default: + pte = mfn_pte(phys, prot); + break; + } + + __native_set_fixmap(idx, pte); + +#ifdef CONFIG_X86_64 + /* Replicate changes to map the vsyscall page into the user + pagetable vsyscall mapping. */ + if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { + unsigned long vaddr = __fix_to_virt(idx); + set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); + } +#endif +} + +__init void xen_post_allocator_init(void) +{ + pv_mmu_ops.set_pte = xen_set_pte; + pv_mmu_ops.set_pmd = xen_set_pmd; + pv_mmu_ops.set_pud = xen_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = xen_set_pgd; +#endif + + /* This will work as long as patching hasn't happened yet + (which it hasn't) */ + pv_mmu_ops.alloc_pte = xen_alloc_pte; + pv_mmu_ops.alloc_pmd = xen_alloc_pmd; + pv_mmu_ops.release_pte = xen_release_pte; + pv_mmu_ops.release_pmd = xen_release_pmd; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.alloc_pud = xen_alloc_pud; + pv_mmu_ops.release_pud = xen_release_pud; +#endif + +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif + xen_mark_init_mm_pinned(); +} + + +const struct pv_mmu_ops xen_mmu_ops __initdata = { + .pagetable_setup_start = xen_pagetable_setup_start, + .pagetable_setup_done = xen_pagetable_setup_done, + + .read_cr2 = xen_read_cr2, + .write_cr2 = xen_write_cr2, + + .read_cr3 = xen_read_cr3, + .write_cr3 = xen_write_cr3, + + .flush_tlb_user = xen_flush_tlb, + .flush_tlb_kernel = xen_flush_tlb, + .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_others = xen_flush_tlb_others, + + .pte_update = paravirt_nop, + .pte_update_defer = paravirt_nop, + + .pgd_alloc = xen_pgd_alloc, + .pgd_free = xen_pgd_free, + + .alloc_pte = xen_alloc_pte_init, + .release_pte = xen_release_pte_init, + .alloc_pmd = xen_alloc_pte_init, + .alloc_pmd_clone = paravirt_nop, + .release_pmd = xen_release_pte_init, + +#ifdef CONFIG_HIGHPTE + .kmap_atomic_pte = xen_kmap_atomic_pte, +#endif + +#ifdef CONFIG_X86_64 + .set_pte = xen_set_pte, +#else + .set_pte = xen_set_pte_init, +#endif + .set_pte_at = xen_set_pte_at, + .set_pmd = xen_set_pmd_hyper, + + .ptep_modify_prot_start = __ptep_modify_prot_start, + .ptep_modify_prot_commit = __ptep_modify_prot_commit, + + .pte_val = PV_CALLEE_SAVE(xen_pte_val), + .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), + + .make_pte = PV_CALLEE_SAVE(xen_make_pte), + .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), + +#ifdef CONFIG_X86_PAE + .set_pte_atomic = xen_set_pte_atomic, + .set_pte_present = xen_set_pte_at, + .pte_clear = xen_pte_clear, + .pmd_clear = xen_pmd_clear, +#endif /* CONFIG_X86_PAE */ + .set_pud = xen_set_pud_hyper, + + .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), + .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), + +#if PAGETABLE_LEVELS == 4 + .pud_val = PV_CALLEE_SAVE(xen_pud_val), + .make_pud = PV_CALLEE_SAVE(xen_make_pud), + .set_pgd = xen_set_pgd_hyper, + + .alloc_pud = xen_alloc_pte_init, + .release_pud = xen_release_pte_init, +#endif /* PAGETABLE_LEVELS == 4 */ + + .activate_mm = xen_activate_mm, + .dup_mmap = xen_dup_mmap, + .exit_mmap = xen_exit_mmap, + + .lazy_mode = { + .enter = paravirt_enter_lazy_mmu, + .leave = xen_leave_lazy, + }, + + .set_fixmap = xen_set_fixmap, +}; + + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_mmu_debug; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 98d71659da5..24d1b44a337 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -54,4 +54,7 @@ pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +unsigned long xen_read_cr2_direct(void); + +extern const struct pv_mmu_ops xen_mmu_ops; #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index fa3e10725d9..9e565da5d1f 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -41,7 +41,7 @@ static inline void xen_mc_issue(unsigned mode) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ - local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); + local_irq_restore(percpu_read(xen_mc_irq_flags)); } /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c44e2069c7c..035582ae815 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_resched_count++; -#else - add_pda(irq_resched_count, 1); -#endif + inc_irq_stat(irq_resched_count); return IRQ_HANDLED; } @@ -78,7 +74,7 @@ static __cpuinit void cpu_bringup(void) xen_setup_cpu_clockevents(); cpu_set(cpu, cpu_online_map); - x86_write_percpu(cpu_state, CPU_ONLINE); + percpu_write(cpu_state, CPU_ONLINE); wmb(); /* We can take interrupts now: we're officially "up". */ @@ -174,7 +170,7 @@ static void __init xen_smp_prepare_boot_cpu(void) /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); + make_lowmem_page_readwrite(xen_initial_gdt); xen_setup_vcpu_info_placement(); } @@ -239,6 +235,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; +#else + ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ @@ -283,23 +281,14 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - WARN_ON(cpu == 0); - if (cpu > 0) { - rc = get_local_pda(cpu); - if (rc) - return rc; - } -#endif - -#ifdef CONFIG_X86_32 - init_gdt(cpu); per_cpu(current_task, cpu) = idle; +#ifdef CONFIG_X86_32 irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = idle; clear_tsk_thread_flag(idle, TIF_FORK); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif xen_setup_timer(cpu); xen_init_lock_cpu(cpu); @@ -445,11 +434,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; @@ -459,11 +444,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_single_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 212ffe012b7..95be7b43472 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -6,6 +6,7 @@ #include <asm/xen/hypercall.h> #include <asm/xen/page.h> +#include <asm/fixmap.h> #include "xen-ops.h" #include "mmu.h" diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S new file mode 100644 index 00000000000..79d7362ad6d --- /dev/null +++ b/arch/x86/xen/xen-asm.S @@ -0,0 +1,142 @@ +/* + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in percpu data) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. + */ + +#include <asm/asm-offsets.h> +#include <asm/percpu.h> +#include <asm/processor-flags.h> + +#include "xen-asm.h" + +/* + * Enable events. This clears the event mask and tests the pending + * event status with one and operation. If there are pending events, + * then enter the hypervisor to get them handled. + */ +ENTRY(xen_irq_enable_direct) + /* Unmask events */ + movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + + /* + * Preempt here doesn't matter because that will deal with any + * pending interrupts. The pending check may end up being run + * on the wrong CPU, but that doesn't hurt. + */ + + /* Test for pending */ + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + jz 1f + +2: call check_events +1: +ENDPATCH(xen_irq_enable_direct) + ret + ENDPROC(xen_irq_enable_direct) + RELOC(xen_irq_enable_direct, 2b+1) + + +/* + * Disabling events is simply a matter of making the event mask + * non-zero. + */ +ENTRY(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask +ENDPATCH(xen_irq_disable_direct) + ret + ENDPROC(xen_irq_disable_direct) + RELOC(xen_irq_disable_direct, 0) + +/* + * (xen_)save_fl is used to get the current interrupt enable status. + * Callers expect the status to be in X86_EFLAGS_IF, and other bits + * may be set in the return value. We take advantage of this by + * making sure that X86_EFLAGS_IF has the right value (and other bits + * in that byte are 0), but other bits in the return value are + * undefined. We need to toggle the state of the bit, because Xen and + * x86 use opposite senses (mask vs enable). + */ +ENTRY(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + setz %ah + addb %ah, %ah +ENDPATCH(xen_save_fl_direct) + ret + ENDPROC(xen_save_fl_direct) + RELOC(xen_save_fl_direct, 0) + + +/* + * In principle the caller should be passing us a value return from + * xen_save_fl_direct, but for robustness sake we test only the + * X86_EFLAGS_IF flag rather than the whole byte. After setting the + * interrupt mask state, it checks for unmasked pending events and + * enters the hypervisor to get them delivered if so. + */ +ENTRY(xen_restore_fl_direct) +#ifdef CONFIG_X86_64 + testw $X86_EFLAGS_IF, %di +#else + testb $X86_EFLAGS_IF>>8, %ah +#endif + setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + /* + * Preempt here doesn't matter because that will deal with any + * pending interrupts. The pending check may end up being run + * on the wrong CPU, but that doesn't hurt. + */ + + /* check for unmasked and pending */ + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + jz 1f +2: call check_events +1: +ENDPATCH(xen_restore_fl_direct) + ret + ENDPROC(xen_restore_fl_direct) + RELOC(xen_restore_fl_direct, 2b+1) + + +/* + * Force an event check by making a hypercall, but preserve regs + * before making the call. + */ +check_events: +#ifdef CONFIG_X86_32 + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax +#else + push %rax + push %rcx + push %rdx + push %rsi + push %rdi + push %r8 + push %r9 + push %r10 + push %r11 + call xen_force_evtchn_callback + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rdi + pop %rsi + pop %rdx + pop %rcx + pop %rax +#endif + ret diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h new file mode 100644 index 00000000000..465276467a4 --- /dev/null +++ b/arch/x86/xen/xen-asm.h @@ -0,0 +1,12 @@ +#ifndef _XEN_XEN_ASM_H +#define _XEN_XEN_ASM_H + +#include <linux/linkage.h> + +#define RELOC(x, v) .globl x##_reloc; x##_reloc=v +#define ENDPATCH(x) .globl x##_end; x##_end=. + +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 + +#endif diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 42786f59d9c..88e15deb8b8 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -1,117 +1,43 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ -#include <linux/linkage.h> - -#include <asm/asm-offsets.h> #include <asm/thread_info.h> -#include <asm/percpu.h> #include <asm/processor-flags.h> #include <asm/segment.h> #include <xen/interface/xen.h> -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) +#include "xen-asm.h" /* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). + * Force an event check by making a hypercall, but preserve regs + * before making the call. */ -ENTRY(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) +check_events: + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) /* - We can't use sysexit directly, because we're not running in ring0. - But we can easily fake it up using iret. Assuming xen_sysexit - is jumped to with a standard stack frame, we can just strip it - back to a standard iret frame and use iret. + * We can't use sysexit directly, because we're not running in ring0. + * But we can easily fake it up using iret. Assuming xen_sysexit is + * jumped to with a standard stack frame, we can just strip it back to + * a standard iret frame and use iret. */ ENTRY(xen_sysexit) movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ @@ -122,33 +48,31 @@ ENTRY(xen_sysexit) ENDPROC(xen_sysexit) /* - This is run where a normal iret would be run, with the same stack setup: - 8: eflags - 4: cs - esp-> 0: eip - - This attempts to make sure that any pending events are dealt - with on return to usermode, but there is a small window in - which an event can happen just before entering usermode. If - the nested interrupt ends up setting one of the TIF_WORK_MASK - pending work flags, they will not be tested again before - returning to usermode. This means that a process can end up - with pending work, which will be unprocessed until the process - enters and leaves the kernel again, which could be an - unbounded amount of time. This means that a pending signal or - reschedule event could be indefinitely delayed. - - The fix is to notice a nested interrupt in the critical - window, and if one occurs, then fold the nested interrupt into - the current interrupt stack frame, and re-process it - iteratively rather than recursively. This means that it will - exit via the normal path, and all pending work will be dealt - with appropriately. - - Because the nested interrupt handler needs to deal with the - current stack state in whatever form its in, we keep things - simple by only using a single register which is pushed/popped - on the stack. + * This is run where a normal iret would be run, with the same stack setup: + * 8: eflags + * 4: cs + * esp-> 0: eip + * + * This attempts to make sure that any pending events are dealt with + * on return to usermode, but there is a small window in which an + * event can happen just before entering usermode. If the nested + * interrupt ends up setting one of the TIF_WORK_MASK pending work + * flags, they will not be tested again before returning to + * usermode. This means that a process can end up with pending work, + * which will be unprocessed until the process enters and leaves the + * kernel again, which could be an unbounded amount of time. This + * means that a pending signal or reschedule event could be + * indefinitely delayed. + * + * The fix is to notice a nested interrupt in the critical window, and + * if one occurs, then fold the nested interrupt into the current + * interrupt stack frame, and re-process it iteratively rather than + * recursively. This means that it will exit via the normal path, and + * all pending work will be dealt with appropriately. + * + * Because the nested interrupt handler needs to deal with the current + * stack state in whatever form its in, we keep things simple by only + * using a single register which is pushed/popped on the stack. */ ENTRY(xen_iret) /* test eflags for special cases */ @@ -158,13 +82,15 @@ ENTRY(xen_iret) push %eax ESP_OFFSET=4 # bytes pushed onto stack - /* Store vcpu_info pointer for easy access. Do it this - way to avoid having to reload %fs */ + /* + * Store vcpu_info pointer for easy access. Do it this way to + * avoid having to reload %fs + */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax),%eax - movl __per_cpu_offset(,%eax,4),%eax - mov per_cpu__xen_vcpu(%eax),%eax + movl TI_cpu(%eax), %eax + movl __per_cpu_offset(,%eax,4), %eax + mov per_cpu__xen_vcpu(%eax), %eax #else movl per_cpu__xen_vcpu, %eax #endif @@ -172,37 +98,46 @@ ENTRY(xen_iret) /* check IF state we're restoring */ testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - /* Maybe enable events. Once this happens we could get a - recursive event, so the critical region starts immediately - afterwards. However, if that happens we don't end up - resuming the code, so we don't have to be worried about - being preempted to another CPU. */ + /* + * Maybe enable events. Once this happens we could get a + * recursive event, so the critical region starts immediately + * afterwards. However, if that happens we don't end up + * resuming the code, so we don't have to be worried about + * being preempted to another CPU. + */ setz XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ cmpw $0x0001, XEN_vcpu_info_pending(%eax) - /* If there's something pending, mask events again so we - can jump back into xen_hypervisor_callback */ + /* + * If there's something pending, mask events again so we can + * jump back into xen_hypervisor_callback + */ sete XEN_vcpu_info_mask(%eax) popl %eax - /* From this point on the registers are restored and the stack - updated, so we don't need to worry about it if we're preempted */ + /* + * From this point on the registers are restored and the stack + * updated, so we don't need to worry about it if we're + * preempted + */ iret_restore_end: - /* Jump to hypervisor_callback after fixing up the stack. - Events are masked, so jumping out of the critical - region is OK. */ + /* + * Jump to hypervisor_callback after fixing up the stack. + * Events are masked, so jumping out of the critical region is + * OK. + */ je xen_hypervisor_callback 1: iret xen_iret_end_crit: -.section __ex_table,"a" +.section __ex_table, "a" .align 4 - .long 1b,iret_exc + .long 1b, iret_exc .previous hyper_iret: @@ -212,55 +147,55 @@ hyper_iret: .globl xen_iret_start_crit, xen_iret_end_crit /* - This is called by xen_hypervisor_callback in entry.S when it sees - that the EIP at the time of interrupt was between xen_iret_start_crit - and xen_iret_end_crit. We're passed the EIP in %eax so we can do - a more refined determination of what to do. - - The stack format at this point is: - ---------------- - ss : (ss/esp may be present if we came from usermode) - esp : - eflags } outer exception info - cs } - eip } - ---------------- <- edi (copy dest) - eax : outer eax if it hasn't been restored - ---------------- - eflags } nested exception info - cs } (no ss/esp because we're nested - eip } from the same ring) - orig_eax }<- esi (copy src) - - - - - - - - - - fs } - es } - ds } SAVE_ALL state - eax } - : : - ebx }<- esp - ---------------- - - In order to deliver the nested exception properly, we need to shift - everything from the return addr up to the error code so it - sits just under the outer exception info. This means that when we - handle the exception, we do it in the context of the outer exception - rather than starting a new one. - - The only caveat is that if the outer eax hasn't been - restored yet (ie, it's still on stack), we need to insert - its value into the SAVE_ALL state before going on, since - it's usermode state which we eventually need to restore. + * This is called by xen_hypervisor_callback in entry.S when it sees + * that the EIP at the time of interrupt was between + * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in + * %eax so we can do a more refined determination of what to do. + * + * The stack format at this point is: + * ---------------- + * ss : (ss/esp may be present if we came from usermode) + * esp : + * eflags } outer exception info + * cs } + * eip } + * ---------------- <- edi (copy dest) + * eax : outer eax if it hasn't been restored + * ---------------- + * eflags } nested exception info + * cs } (no ss/esp because we're nested + * eip } from the same ring) + * orig_eax }<- esi (copy src) + * - - - - - - - - + * fs } + * es } + * ds } SAVE_ALL state + * eax } + * : : + * ebx }<- esp + * ---------------- + * + * In order to deliver the nested exception properly, we need to shift + * everything from the return addr up to the error code so it sits + * just under the outer exception info. This means that when we + * handle the exception, we do it in the context of the outer + * exception rather than starting a new one. + * + * The only caveat is that if the outer eax hasn't been restored yet + * (ie, it's still on stack), we need to insert its value into the + * SAVE_ALL state before going on, since it's usermode state which we + * eventually need to restore. */ ENTRY(xen_iret_crit_fixup) /* - Paranoia: Make sure we're really coming from kernel space. - One could imagine a case where userspace jumps into the - critical range address, but just before the CPU delivers a GP, - it decides to deliver an interrupt instead. Unlikely? - Definitely. Easy to avoid? Yes. The Intel documents - explicitly say that the reported EIP for a bad jump is the - jump instruction itself, not the destination, but some virtual - environments get this wrong. + * Paranoia: Make sure we're really coming from kernel space. + * One could imagine a case where userspace jumps into the + * critical range address, but just before the CPU delivers a + * GP, it decides to deliver an interrupt instead. Unlikely? + * Definitely. Easy to avoid? Yes. The Intel documents + * explicitly say that the reported EIP for a bad jump is the + * jump instruction itself, not the destination, but some + * virtual environments get this wrong. */ movl PT_CS(%esp), %ecx andl $SEGMENT_RPL_MASK, %ecx @@ -270,15 +205,17 @@ ENTRY(xen_iret_crit_fixup) lea PT_ORIG_EAX(%esp), %esi lea PT_EFLAGS(%esp), %edi - /* If eip is before iret_restore_end then stack - hasn't been restored yet. */ + /* + * If eip is before iret_restore_end then stack + * hasn't been restored yet. + */ cmp $iret_restore_end, %eax jae 1f - movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ + movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ movl %eax, PT_EAX(%esp) - lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ + lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ /* set up the copy */ 1: std @@ -286,20 +223,6 @@ ENTRY(xen_iret_crit_fixup) rep movsl cld - lea 4(%edi),%esp /* point esp to new frame */ + lea 4(%edi), %esp /* point esp to new frame */ 2: jmp xen_do_upcall - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 05794c566e8..02f496a8dba 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -1,174 +1,45 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ -#include <linux/linkage.h> - -#include <asm/asm-offsets.h> -#include <asm/processor-flags.h> #include <asm/errno.h> +#include <asm/percpu.h> +#include <asm/processor-flags.h> #include <asm/segment.h> #include <xen/interface/xen.h> -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -#if 1 -/* - x86-64 does not yet support direct access to percpu variables - via a segment override, so we just need to make sure this code - never gets used - */ -#define BUG ud2a -#define PER_CPU_VAR(var, off) 0xdeadbeef -#endif - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - BUG - - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - BUG - - movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - BUG - - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - BUG - - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) - - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %rax - push %rcx - push %rdx - push %rsi - push %rdi - push %r8 - push %r9 - push %r10 - push %r11 - call xen_force_evtchn_callback - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rdi - pop %rsi - pop %rdx - pop %rcx - pop %rax - ret +#include "xen-asm.h" ENTRY(xen_adjust_exception_frame) - mov 8+0(%rsp),%rcx - mov 8+8(%rsp),%r11 + mov 8+0(%rsp), %rcx + mov 8+8(%rsp), %r11 ret $16 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* - Xen64 iret frame: - - ss - rsp - rflags - cs - rip <-- standard iret frame - - flags - - rcx } - r11 }<-- pushed by hypercall page -rsp -> rax } + * Xen64 iret frame: + * + * ss + * rsp + * rflags + * cs + * rip <-- standard iret frame + * + * flags + * + * rcx } + * r11 }<-- pushed by hypercall page + * rsp->rax } */ ENTRY(xen_iret) pushq $0 @@ -177,8 +48,8 @@ ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) /* - sysexit is not used for 64-bit processes, so it's - only ever used to return to 32-bit compat userspace. + * sysexit is not used for 64-bit processes, so it's only ever used to + * return to 32-bit compat userspace. */ ENTRY(xen_sysexit) pushq $__USER32_DS @@ -193,13 +64,15 @@ ENDPATCH(xen_sysexit) RELOC(xen_sysexit, 1b+1) ENTRY(xen_sysret64) - /* We're already on the usermode stack at this point, but still - with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back + */ + movq %rsp, PER_CPU_VAR(old_rsp) + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER_CS pushq %rcx @@ -210,13 +83,15 @@ ENDPATCH(xen_sysret64) RELOC(xen_sysret64, 1b+1) ENTRY(xen_sysret32) - /* We're already on the usermode stack at this point, but still - with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack, %rsp + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back + */ + movq %rsp, PER_CPU_VAR(old_rsp) + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER32_CS pushq %rcx @@ -227,28 +102,27 @@ ENDPATCH(xen_sysret32) RELOC(xen_sysret32, 1b+1) /* - Xen handles syscall callbacks much like ordinary exceptions, - which means we have: - - kernel gs - - kernel rsp - - an iret-like stack frame on the stack (including rcx and r11): - ss - rsp - rflags - cs - rip - r11 - rsp-> rcx - - In all the entrypoints, we undo all that to make it look - like a CPU-generated syscall/sysenter and jump to the normal - entrypoint. + * Xen handles syscall callbacks much like ordinary exceptions, which + * means we have: + * - kernel gs + * - kernel rsp + * - an iret-like stack frame on the stack (including rcx and r11): + * ss + * rsp + * rflags + * cs + * rip + * r11 + * rsp->rcx + * + * In all the entrypoints, we undo all that to make it look like a + * CPU-generated syscall/sysenter and jump to the normal entrypoint. */ .macro undo_xen_syscall - mov 0*8(%rsp),%rcx - mov 1*8(%rsp),%r11 - mov 5*8(%rsp),%rsp + mov 0*8(%rsp), %rcx + mov 1*8(%rsp), %r11 + mov 5*8(%rsp), %rsp .endm /* Normal 64-bit system call target */ @@ -275,7 +149,7 @@ ENDPROC(xen_sysenter_target) ENTRY(xen_syscall32_target) ENTRY(xen_sysenter_target) - lea 16(%rsp), %rsp /* strip %rcx,%r11 */ + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $VGCF_in_syscall jmp hypercall_iret diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index c1f8faf0a2c..2f5ef2632ea 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,9 +10,12 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +extern void *xen_initial_gdt; + struct trap_info; void xen_copy_trap_info(struct trap_info *traps); +DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); @@ -22,6 +25,13 @@ extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); +void xen_setup_machphys_mapping(void); +pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_ident_map_ISA(void); +void xen_reserve_top(void); + +void xen_leave_lazy(void); +void xen_post_allocator_init(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); |