From 6af6632a1c1cac169fe7fbedb2f642b4ebb38323 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 14 Nov 2007 16:52:25 -0800 Subject: [SCSI] boot options: correct option name and tell where to find docs for it Minor corrections and additions to 'scsi_logging_level', as pointed out by Chuck Ebbert. Also point out the IBM S390-tools 'scsi_logging_level' script. Signed-off-by: Randy Dunlap Signed-off-by: James Bottomley --- Documentation/kernel-parameters.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c4178778e7f..99938b8b50e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1593,7 +1593,13 @@ and is between 256 and 4096 characters. It is defined in the file Format: :: (flags are integer value) - scsi_logging= [SCSI] + scsi_logging_level= [SCSI] a bit mask of logging levels + See drivers/scsi/scsi_logging.h for bits. Also + settable via sysctl at dev.scsi.logging_level + (/proc/sys/dev/scsi/logging_level). + There is also a nice 'scsi_logging_level' script in the + S390-tools package, available for download at + http://www-128.ibm.com/developerworks/linux/linux390/s390-tools-1.5.4.html scsi_mod.scan= [SCSI] sync (default) scans SCSI busses as they are discovered. async scans them in kernel threads, -- cgit v1.2.3-70-g09d2 From e7ba176b47db2ed53f258a6b4fe9d9fc6fa437a9 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Wed, 10 Oct 2007 14:58:29 +0200 Subject: [AVR32] NMI debugging Change the NMI handler to use the die notifier chain to signal anyone who cares. Add a simple "nmi debugger" which hooks into this chain and that may dump registers, task state, etc. when it happens. Signed-off-by: Haavard Skinnemoen --- Documentation/kernel-parameters.txt | 5 +++ arch/avr32/Kconfig | 10 +++++ arch/avr32/kernel/Makefile | 1 + arch/avr32/kernel/irq.c | 11 +++++ arch/avr32/kernel/nmi_debug.c | 82 +++++++++++++++++++++++++++++++++++++ arch/avr32/kernel/traps.c | 21 ++++++++-- arch/avr32/mach-at32ap/at32ap700x.c | 1 + arch/avr32/mach-at32ap/extint.c | 39 ++++++++++++++---- include/asm-avr32/irq.h | 5 +++ include/asm-avr32/kdebug.h | 1 + 10 files changed, 166 insertions(+), 10 deletions(-) create mode 100644 arch/avr32/kernel/nmi_debug.c (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c4178778e7f..17fc60e3244 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -34,6 +34,7 @@ parameter is applicable: ALSA ALSA sound support is enabled. APIC APIC support is enabled. APM Advanced Power Management support is enabled. + AVR32 AVR32 architecture is enabled. AX25 Appropriate AX.25 support is enabled. BLACKFIN Blackfin architecture is enabled. DRM Direct Rendering Management support is enabled. @@ -1123,6 +1124,10 @@ and is between 256 and 4096 characters. It is defined in the file of returning the full 64-bit number. The default is to return 64-bit inode numbers. + nmi_debug= [KNL,AVR32] Specify one or more actions to take + when a NMI is triggered. + Format: [state][,regs][,debounce][,die] + nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels no387 [BUGS=X86-32] Tells the kernel to use the 387 maths diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 516015b3293..e34e2c9c94c 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -170,6 +170,16 @@ config OWNERSHIP_TRACE enabling Nexus-compliant debuggers to keep track of the PID of the currently executing task. +config NMI_DEBUGGING + bool "NMI Debugging" + default n + help + Say Y here and pass the nmi_debug command-line parameter to + the kernel to turn on NMI debugging. Depending on the value + of the nmi_debug option, various pieces of information will + be dumped to the console when a Non-Maskable Interrupt + happens. + # FPU emulation goes here source "kernel/Kconfig.hz" diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile index bc224a4e39f..e4b6d122b03 100644 --- a/arch/avr32/kernel/Makefile +++ b/arch/avr32/kernel/Makefile @@ -12,3 +12,4 @@ obj-y += init_task.o switch_to.o cpu.o obj-$(CONFIG_MODULES) += module.o avr32_ksyms.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_NMI_DEBUGGING) += nmi_debug.o diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c index 61f2de266f6..a8e767d836a 100644 --- a/arch/avr32/kernel/irq.c +++ b/arch/avr32/kernel/irq.c @@ -25,6 +25,17 @@ void ack_bad_irq(unsigned int irq) printk("unexpected IRQ %u\n", irq); } +/* May be overridden by platform code */ +int __weak nmi_enable(void) +{ + return -ENOSYS; +} + +void __weak nmi_disable(void) +{ + +} + #ifdef CONFIG_PROC_FS int show_interrupts(struct seq_file *p, void *v) { diff --git a/arch/avr32/kernel/nmi_debug.c b/arch/avr32/kernel/nmi_debug.c new file mode 100644 index 00000000000..3414b8566c2 --- /dev/null +++ b/arch/avr32/kernel/nmi_debug.c @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include + +enum nmi_action { + NMI_SHOW_STATE = 1 << 0, + NMI_SHOW_REGS = 1 << 1, + NMI_DIE = 1 << 2, + NMI_DEBOUNCE = 1 << 3, +}; + +static unsigned long nmi_actions; + +static int nmi_debug_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = data; + + if (likely(val != DIE_NMI)) + return NOTIFY_DONE; + + if (nmi_actions & NMI_SHOW_STATE) + show_state(); + if (nmi_actions & NMI_SHOW_REGS) + show_regs(args->regs); + if (nmi_actions & NMI_DEBOUNCE) + mdelay(10); + if (nmi_actions & NMI_DIE) + return NOTIFY_BAD; + + return NOTIFY_OK; +} + +static struct notifier_block nmi_debug_nb = { + .notifier_call = nmi_debug_notify, +}; + +static int __init nmi_debug_setup(char *str) +{ + char *p, *sep; + + register_die_notifier(&nmi_debug_nb); + if (nmi_enable()) { + printk(KERN_WARNING "Unable to enable NMI.\n"); + return 0; + } + + if (*str != '=') + return 0; + + for (p = str + 1; *p; p = sep + 1) { + sep = strchr(p, ','); + if (sep) + *sep = 0; + if (strcmp(p, "state") == 0) + nmi_actions |= NMI_SHOW_STATE; + else if (strcmp(p, "regs") == 0) + nmi_actions |= NMI_SHOW_REGS; + else if (strcmp(p, "debounce") == 0) + nmi_actions |= NMI_DEBOUNCE; + else if (strcmp(p, "die") == 0) + nmi_actions |= NMI_DIE; + else + printk(KERN_WARNING "NMI: Unrecognized action `%s'\n", + p); + if (!sep) + break; + } + + return 0; +} +__setup("nmi_debug", nmi_debug_setup); diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c index 870c075e631..cf6f686d9b0 100644 --- a/arch/avr32/kernel/traps.c +++ b/arch/avr32/kernel/traps.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -107,9 +108,23 @@ void _exception(long signr, struct pt_regs *regs, int code, asmlinkage void do_nmi(unsigned long ecr, struct pt_regs *regs) { - printk(KERN_ALERT "Got Non-Maskable Interrupt, dumping regs\n"); - show_regs_log_lvl(regs, KERN_ALERT); - show_stack_log_lvl(current, regs->sp, regs, KERN_ALERT); + int ret; + + nmi_enter(); + + ret = notify_die(DIE_NMI, "NMI", regs, 0, ecr, SIGINT); + switch (ret) { + case NOTIFY_OK: + case NOTIFY_STOP: + return; + case NOTIFY_BAD: + die("Fatal Non-Maskable Interrupt", regs, SIGINT); + default: + break; + } + + printk(KERN_ALERT "Got NMI, but nobody cared. Disabling...\n"); + nmi_disable(); } asmlinkage void do_critical_exception(unsigned long ecr, struct pt_regs *regs) diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index 9386e1f82fb..14e61f05e1f 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -13,6 +13,7 @@ #include #include +#include #include #include diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c index f5bfd4c81fe..e108e7bba8c 100644 --- a/arch/avr32/mach-at32ap/extint.c +++ b/arch/avr32/mach-at32ap/extint.c @@ -26,16 +26,10 @@ #define EIC_MODE 0x0014 #define EIC_EDGE 0x0018 #define EIC_LEVEL 0x001c -#define EIC_TEST 0x0020 #define EIC_NMIC 0x0024 -/* Bitfields in TEST */ -#define EIC_TESTEN_OFFSET 31 -#define EIC_TESTEN_SIZE 1 - /* Bitfields in NMIC */ -#define EIC_EN_OFFSET 0 -#define EIC_EN_SIZE 1 +#define EIC_NMIC_ENABLE (1 << 0) /* Bit manipulation macros */ #define EIC_BIT(name) \ @@ -63,6 +57,9 @@ struct eic { unsigned int first_irq; }; +static struct eic *nmi_eic; +static bool nmi_enabled; + static void eic_ack_irq(unsigned int irq) { struct eic *eic = get_irq_chip_data(irq); @@ -174,6 +171,24 @@ static void demux_eic_irq(unsigned int irq, struct irq_desc *desc) } } +int nmi_enable(void) +{ + nmi_enabled = true; + + if (nmi_eic) + eic_writel(nmi_eic, NMIC, EIC_NMIC_ENABLE); + + return 0; +} + +void nmi_disable(void) +{ + if (nmi_eic) + eic_writel(nmi_eic, NMIC, 0); + + nmi_enabled = false; +} + static int __init eic_probe(struct platform_device *pdev) { struct eic *eic; @@ -230,6 +245,16 @@ static int __init eic_probe(struct platform_device *pdev) set_irq_chained_handler(int_irq, demux_eic_irq); set_irq_data(int_irq, eic); + if (pdev->id == 0) { + nmi_eic = eic; + if (nmi_enabled) + /* + * Someone tried to enable NMI before we were + * ready. Do it now. + */ + nmi_enable(); + } + dev_info(&pdev->dev, "External Interrupt Controller at 0x%p, IRQ %u\n", eic->regs, int_irq); diff --git a/include/asm-avr32/irq.h b/include/asm-avr32/irq.h index 83e6549d778..9315724c059 100644 --- a/include/asm-avr32/irq.h +++ b/include/asm-avr32/irq.h @@ -11,4 +11,9 @@ #define irq_canonicalize(i) (i) +#ifndef __ASSEMBLER__ +int nmi_enable(void); +void nmi_disable(void); +#endif + #endif /* __ASM_AVR32_IOCTLS_H */ diff --git a/include/asm-avr32/kdebug.h b/include/asm-avr32/kdebug.h index fd7e99046b2..ca4f9542365 100644 --- a/include/asm-avr32/kdebug.h +++ b/include/asm-avr32/kdebug.h @@ -5,6 +5,7 @@ enum die_val { DIE_BREAKPOINT, DIE_SSTEP, + DIE_NMI, }; #endif /* __ASM_AVR32_KDEBUG_H */ -- cgit v1.2.3-70-g09d2 From 661ca0da3e1fa60c9e56f995fd83772ffa9ffe90 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 26 Jan 2008 14:10:36 +0100 Subject: [S390] Cleanup in Documentation/kernel-parameters.txt. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 65de5ba7b74..d29b6d926a4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -370,8 +370,6 @@ and is between 256 and 4096 characters. It is defined in the file configured. Potentially dangerous and should only be used if you are entirely sure of the consequences. - chandev= [HW,NET] Generic channel device initialisation - checkreqprot [SELINUX] Set initial checkreqprot flag value. Format: { "0" | "1" } See security/selinux/Kconfig help text. @@ -382,6 +380,12 @@ and is between 256 and 4096 characters. It is defined in the file Value can be changed at runtime via /selinux/checkreqprot. + cio_ignore= [S390] + See Documentation/s390/CommonIO for details. + + cio_msg= [S390] + See Documentation/s390/CommonIO for details. + clock= [BUGS=X86-32, HW] gettimeofday clocksource override. [Deprecated] Forces specified clocksource (if available) to be used -- cgit v1.2.3-70-g09d2 From 14ff56bbb3422cc465a12e81f5e1b5f06168aeda Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sat, 26 Jan 2008 14:10:37 +0100 Subject: [S390] cio: Dump ccw device information in case of timeout. Information about a ccw device will be dumped in case of a ccw timeout. This can be enabled with the kernel parameter ccw_timeout_log. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 3 +++ Documentation/s390/CommonIO | 5 ++++ drivers/s390/cio/device_fsm.c | 50 +++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d29b6d926a4..880f882160e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -370,6 +370,9 @@ and is between 256 and 4096 characters. It is defined in the file configured. Potentially dangerous and should only be used if you are entirely sure of the consequences. + ccw_timeout_log [S390] + See Documentation/s390/CommonIO for details. + checkreqprot [SELINUX] Set initial checkreqprot flag value. Format: { "0" | "1" } See security/selinux/Kconfig help text. diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO index 86320aa3fb0..8fbc0a85287 100644 --- a/Documentation/s390/CommonIO +++ b/Documentation/s390/CommonIO @@ -4,6 +4,11 @@ S/390 common I/O-Layer - command line parameters, procfs and debugfs entries Command line parameters ----------------------- +* ccw_timeout_log + + Enable logging of debug information in case of ccw device timeouts. + + * cio_msg = yes | no Determines whether information on found devices and sensed device diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index bfad421cda6..31c0f788516 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -25,6 +25,8 @@ #include "ioasm.h" #include "chp.h" +static int timeout_log_enabled; + int device_is_online(struct subchannel *sch) { @@ -82,6 +84,52 @@ int device_trigger_verify(struct subchannel *sch) return 0; } +static int __init ccw_timeout_log_setup(char *unused) +{ + timeout_log_enabled = 1; + return 1; +} + +__setup("ccw_timeout_log", ccw_timeout_log_setup); + +static void ccw_timeout_log(struct ccw_device *cdev) +{ + struct schib schib; + struct subchannel *sch; + int cc; + + sch = to_subchannel(cdev->dev.parent); + cc = stsch(sch->schid, &schib); + + printk(KERN_WARNING "cio: ccw device timeout occurred at %llx, " + "device information:\n", get_clock()); + printk(KERN_WARNING "cio: orb:\n"); + print_hex_dump(KERN_WARNING, "cio: ", DUMP_PREFIX_NONE, 16, 1, + &sch->orb, sizeof(sch->orb), 0); + printk(KERN_WARNING "cio: ccw device bus id: %s\n", cdev->dev.bus_id); + printk(KERN_WARNING "cio: subchannel bus id: %s\n", sch->dev.bus_id); + printk(KERN_WARNING "cio: subchannel lpm: %02x, opm: %02x, " + "vpm: %02x\n", sch->lpm, sch->opm, sch->vpm); + + if ((void *)(addr_t)sch->orb.cpa == &sch->sense_ccw || + (void *)(addr_t)sch->orb.cpa == cdev->private->iccws) + printk(KERN_WARNING "cio: last channel program (intern):\n"); + else + printk(KERN_WARNING "cio: last channel program:\n"); + + print_hex_dump(KERN_WARNING, "cio: ", DUMP_PREFIX_NONE, 16, 1, + (void *)(addr_t)sch->orb.cpa, sizeof(struct ccw1), 0); + printk(KERN_WARNING "cio: ccw device state: %d\n", + cdev->private->state); + printk(KERN_WARNING "cio: store subchannel returned: cc=%d\n", cc); + printk(KERN_WARNING "cio: schib:\n"); + print_hex_dump(KERN_WARNING, "cio: ", DUMP_PREFIX_NONE, 16, 1, + &schib, sizeof(schib), 0); + printk(KERN_WARNING "cio: ccw device flags:\n"); + print_hex_dump(KERN_WARNING, "cio: ", DUMP_PREFIX_NONE, 16, 1, + &cdev->private->flags, sizeof(cdev->private->flags), 0); +} + /* * Timeout function. It just triggers a DEV_EVENT_TIMEOUT. */ @@ -92,6 +140,8 @@ ccw_device_timeout(unsigned long data) cdev = (struct ccw_device *) data; spin_lock_irq(cdev->ccwlock); + if (timeout_log_enabled) + ccw_timeout_log(cdev); dev_fsm_event(cdev, DEV_EVENT_TIMEOUT); spin_unlock_irq(cdev->ccwlock); } -- cgit v1.2.3-70-g09d2 From b02aae9cf52956dfe1bec73f77f81a3d05d3902b Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Wed, 30 Jan 2008 13:30:05 +0100 Subject: x86: provide a DMI based port 0x80 I/O delay override. x86: provide a DMI based port 0x80 I/O delay override. Certain (HP) laptops experience trouble from our port 0x80 I/O delay writes. This patch provides for a DMI based switch to the "alternate diagnostic port" 0xed (as used by some BIOSes as well) for these. David P. Reed confirmed that port 0xed works for him and provides a proper delay. The symptoms of _not_ working are a hanging machine, with "hwclock" use being a direct trigger. Earlier versions of this attempted to simply use udelay(2), with the 2 being a value tested to be a nicely conservative upper-bound with help from many on the linux-kernel mailinglist but that approach has two problems. First, pre-loops_per_jiffy calibration (which is post PIT init while some implementations of the PIT are actually one of the historically problematic devices that need the delay) udelay() isn't particularly well-defined. We could initialise loops_per_jiffy conservatively (and based on CPU family so as to not unduly delay old machines) which would sort of work, but... Second, delaying isn't the only effect that a write to port 0x80 has. It's also a PCI posting barrier which some devices may be explicitly or implicitly relying on. Alan Cox did a survey and found evidence that additionally some drivers may be racy on SMP without the bus locking outb. Switching to an inb() makes the timing too unpredictable and as such, this DMI based switch should be the safest approach for now. Any more invasive changes should get more rigid testing first. It's moreover only very few machines with the problem and a DMI based hack seems to fit that situation. This also introduces a command-line parameter "io_delay" to override the DMI based choice again: io_delay= where "standard" means using the standard port 0x80 and "alternate" port 0xed. This retains the udelay method as a config (CONFIG_UDELAY_IO_DELAY) and command-line ("io_delay=udelay") choice for testing purposes as well. This does not change the io_delay() in the boot code which is using the same port 0x80 I/O delay but those do not appear to be a problem as David P. Reed reported the problem was already gone after using the udelay version. He moreover reported that booting with "acpi=off" also fixed things and seeing as how ACPI isn't touched until after this DMI based I/O port switch I believe it's safe to leave the ones in the boot code be. The DMI strings from David's HP Pavilion dv9000z are in there already and we need to get/verify the DMI info from other machines with the problem, notably the HP Pavilion dv6000z. This patch is partly based on earlier patches from Pavel Machek and David P. Reed. Signed-off-by: Rene Herman Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 8 +++ arch/x86/Kconfig.debug | 9 +++ arch/x86/boot/compressed/misc_32.c | 8 +-- arch/x86/boot/compressed/misc_64.c | 8 +-- arch/x86/kernel/Makefile_32 | 2 +- arch/x86/kernel/Makefile_64 | 2 +- arch/x86/kernel/io_delay.c | 106 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup_32.c | 2 + arch/x86/kernel/setup_64.c | 2 + include/asm-x86/io_32.h | 8 ++- include/asm-x86/io_64.h | 33 +++++++---- 11 files changed, 165 insertions(+), 23 deletions(-) create mode 100644 arch/x86/kernel/io_delay.c (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 880f882160e..9e605605842 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -794,6 +794,14 @@ and is between 256 and 4096 characters. It is defined in the file for translation below 32 bit and if not available then look in the higher range. + io_delay= [X86-32,X86-64] I/O delay method + standard + Standard port 0x80 delay + alternate + Alternate port 0xed delay + udelay + Simple two microsecond delay + io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in arch/alpha/kernel/core_marvel.c. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 761ca7b5f12..40aba670fb3 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -112,4 +112,13 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. +config UDELAY_IO_DELAY + bool "Delay I/O through udelay instead of outb" + depends on DEBUG_KERNEL + help + Make inb_p/outb_p use udelay() based delays by default. Please note + that udelay() does not have the same bus-level side-effects that + the normal outb based delay does meaning this could cause drivers + to change behaviour and/or bugs to surface. + endmenu diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c index b74d60d1b2f..288e16283ef 100644 --- a/arch/x86/boot/compressed/misc_32.c +++ b/arch/x86/boot/compressed/misc_32.c @@ -276,10 +276,10 @@ static void putstr(const char *s) RM_SCREEN_INFO.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ - outb_p(14, vidport); - outb_p(0xff & (pos >> 9), vidport+1); - outb_p(15, vidport); - outb_p(0xff & (pos >> 1), vidport+1); + outb(14, vidport); + outb(0xff & (pos >> 9), vidport+1); + outb(15, vidport); + outb(0xff & (pos >> 1), vidport+1); } static void* memset(void* s, int c, unsigned n) diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c index 6ea015aa65e..43e5fcc37be 100644 --- a/arch/x86/boot/compressed/misc_64.c +++ b/arch/x86/boot/compressed/misc_64.c @@ -269,10 +269,10 @@ static void putstr(const char *s) RM_SCREEN_INFO.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ - outb_p(14, vidport); - outb_p(0xff & (pos >> 9), vidport+1); - outb_p(15, vidport); - outb_p(0xff & (pos >> 1), vidport+1); + outb(14, vidport); + outb(0xff & (pos >> 9), vidport+1); + outb(15, vidport); + outb(0xff & (pos >> 1), vidport+1); } static void* memset(void* s, int c, unsigned n) diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index a7bc93c2766..0cc1981d1e3 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386 obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ - quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o + quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 5a88890d8ee..08a68f0d8fd 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ - i8253.o + i8253.o io_delay.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c new file mode 100644 index 00000000000..4d955e74b97 --- /dev/null +++ b/arch/x86/kernel/io_delay.c @@ -0,0 +1,106 @@ +/* + * I/O delay strategies for inb_p/outb_p + */ +#include +#include +#include +#include +#include +#include + +/* + * Allow for a DMI based override of port 0x80 needed for certain HP laptops + */ +#define IO_DELAY_PORT_STD 0x80 +#define IO_DELAY_PORT_ALT 0xed + +static void standard_io_delay(void) +{ + asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD)); +} + +static void alternate_io_delay(void) +{ + asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT)); +} + +/* + * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't + * have the bus-level side-effects that outb does + */ +#define IO_DELAY_USECS 2 + +/* + * High on a hill was a lonely goatherd + */ +static void udelay_io_delay(void) +{ + udelay(IO_DELAY_USECS); +} + +#ifndef CONFIG_UDELAY_IO_DELAY +static void (*io_delay)(void) = standard_io_delay; +#else +static void (*io_delay)(void) = udelay_io_delay; +#endif + +/* + * Paravirt wants native_io_delay to be a constant. + */ +void native_io_delay(void) +{ + io_delay(); +} +EXPORT_SYMBOL(native_io_delay); + +#ifndef CONFIG_UDELAY_IO_DELAY +static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id) +{ + printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident); + io_delay = alternate_io_delay; + return 0; +} + +static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = { + { + .callback = dmi_alternate_io_delay_port, + .ident = "HP Pavilion dv9000z", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B9") + } + }, + { + } +}; + +static int __initdata io_delay_override; + +void __init io_delay_init(void) +{ + if (!io_delay_override) + dmi_check_system(alternate_io_delay_port_dmi_table); +} +#endif + +static int __init io_delay_param(char *s) +{ + if (!s) + return -EINVAL; + + if (!strcmp(s, "standard")) + io_delay = standard_io_delay; + else if (!strcmp(s, "alternate")) + io_delay = alternate_io_delay; + else if (!strcmp(s, "udelay")) + io_delay = udelay_io_delay; + else + return -EINVAL; + +#ifndef CONFIG_UDELAY_IO_DELAY + io_delay_override = 1; +#endif + return 0; +} + +early_param("io_delay", io_delay_param); diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 9c24b45b513..51bdc0b1b72 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); + io_delay_init();; + #ifdef CONFIG_X86_GENERICARCH generic_apic_probe(); #endif diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 30d94d1d5f5..ec976edf039 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); + io_delay_init(); + #ifdef CONFIG_SMP /* setup to use the static apicid table during kernel startup */ x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index fe881cd1e6f..a8d25c38b91 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -250,10 +250,14 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ -static inline void native_io_delay(void) +#ifndef CONFIG_UDELAY_IO_DELAY +extern void io_delay_init(void); +#else +static inline void io_delay_init(void) { - asm volatile("outb %%al,$0x80" : : : "memory"); } +#endif +extern void native_io_delay(void); #if defined(CONFIG_PARAVIRT) #include diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index a037b079433..5bebaf96169 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -35,13 +35,24 @@ * - Arnaldo Carvalho de Melo */ -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +#ifndef CONFIG_UDELAY_IO_DELAY +extern void io_delay_init(void); +#else +static inline void io_delay_init(void) +{ +} +#endif +extern void native_io_delay(void); +static inline void slow_down_io(void) +{ + native_io_delay(); #ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -#else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); #endif +} /* * Talk about misusing macros.. @@ -50,21 +61,21 @@ static inline void out##s(unsigned x value, unsigned short port) { #define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port)) #define __OUT(s,s1,x) \ -__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUT1(s,x) __OUT2(s,s1,"w"); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); } #define __IN1(s) \ static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; #define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port)) -#define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +#define __IN(s,s1) \ +__IN1(s) __IN2(s,s1,"w"); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; } #define __INS(s) \ static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -- cgit v1.2.3-70-g09d2 From 6e7c402590b75b6b45138792445ee0f0315a8473 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:30:05 +0100 Subject: x86: various changes and cleanups to in_p/out_p delay details various changes to the in_p/out_p delay details: - add the io_delay=none method - make each method selectable from the kernel config - simplify the delay code a bit by getting rid of an indirect function call - add the /proc/sys/kernel/io_delay_type sysctl - change 'io_delay=standard|alternate' to io_delay=0x80 and io_delay=0xed - make the io delay config not depend on CONFIG_DEBUG_KERNEL Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Tested-by: "David P. Reed" --- Documentation/kernel-parameters.txt | 12 ++-- arch/x86/Kconfig.debug | 79 ++++++++++++++++++++++++--- arch/x86/kernel/io_delay.c | 106 +++++++++++++++++------------------- include/asm-x86/io_32.h | 10 +--- include/asm-x86/io_64.h | 10 +--- kernel/sysctl.c | 9 +++ 6 files changed, 143 insertions(+), 83 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9e605605842..b427b7c0e5d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -795,12 +795,14 @@ and is between 256 and 4096 characters. It is defined in the file then look in the higher range. io_delay= [X86-32,X86-64] I/O delay method - standard - Standard port 0x80 delay - alternate - Alternate port 0xed delay + 0x80 + Standard port 0x80 based delay + 0xed + Alternate port 0xed based delay (needed on some systems) udelay - Simple two microsecond delay + Simple two microseconds delay + none + No delay io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 40aba670fb3..77eda46f97b 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -112,13 +112,78 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config UDELAY_IO_DELAY - bool "Delay I/O through udelay instead of outb" - depends on DEBUG_KERNEL +# +# IO delay types: +# + +config IO_DELAY_TYPE_0X80 + int + default "0" + +config IO_DELAY_TYPE_0XED + int + default "1" + +config IO_DELAY_TYPE_UDELAY + int + default "2" + +config IO_DELAY_TYPE_NONE + int + default "3" + +choice + prompt "IO delay type" + default IO_DELAY_0X80 + +config IO_DELAY_0X80 + bool "port 0x80 based port-IO delay [recommended]" + help + This is the traditional Linux IO delay used for in/out_p. + It is the most tested hence safest selection here. + +config IO_DELAY_0XED + bool "port 0xed based port-IO delay" + help + Use port 0xed as the IO delay. This frees up port 0x80 which is + often used as a hardware-debug port. + +config IO_DELAY_UDELAY + bool "udelay based port-IO delay" + help + Use udelay(2) as the IO delay method. This provides the delay + while not having any side-effect on the IO port space. + +config IO_DELAY_NONE + bool "no port-IO delay" help - Make inb_p/outb_p use udelay() based delays by default. Please note - that udelay() does not have the same bus-level side-effects that - the normal outb based delay does meaning this could cause drivers - to change behaviour and/or bugs to surface. + No port-IO delay. Will break on old boxes that require port-IO + delay for certain operations. Should work on most new machines. + +endchoice + +if IO_DELAY_0X80 +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_0X80 +endif + +if IO_DELAY_0XED +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_0XED +endif + +if IO_DELAY_UDELAY +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_UDELAY +endif + +if IO_DELAY_NONE +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_NONE +endif endmenu diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 4d955e74b97..f052e34dc94 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -1,5 +1,9 @@ /* * I/O delay strategies for inb_p/outb_p + * + * Allow for a DMI based override of port 0x80, needed for certain HP laptops + * and possibly other systems. Also allow for the gradual elimination of + * outb_p/inb_p API uses. */ #include #include @@ -8,98 +12,86 @@ #include #include -/* - * Allow for a DMI based override of port 0x80 needed for certain HP laptops - */ -#define IO_DELAY_PORT_STD 0x80 -#define IO_DELAY_PORT_ALT 0xed - -static void standard_io_delay(void) -{ - asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD)); -} - -static void alternate_io_delay(void) -{ - asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT)); -} - -/* - * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't - * have the bus-level side-effects that outb does - */ -#define IO_DELAY_USECS 2 - -/* - * High on a hill was a lonely goatherd - */ -static void udelay_io_delay(void) -{ - udelay(IO_DELAY_USECS); -} +int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; +EXPORT_SYMBOL_GPL(io_delay_type); -#ifndef CONFIG_UDELAY_IO_DELAY -static void (*io_delay)(void) = standard_io_delay; -#else -static void (*io_delay)(void) = udelay_io_delay; -#endif +static int __initdata io_delay_override; /* * Paravirt wants native_io_delay to be a constant. */ void native_io_delay(void) { - io_delay(); + switch (io_delay_type) { + default: + case CONFIG_IO_DELAY_TYPE_0X80: + asm volatile ("outb %al, $0x80"); + break; + case CONFIG_IO_DELAY_TYPE_0XED: + asm volatile ("outb %al, $0xed"); + break; + case CONFIG_IO_DELAY_TYPE_UDELAY: + /* + * 2 usecs is an upper-bound for the outb delay but + * note that udelay doesn't have the bus-level + * side-effects that outb does, nor does udelay() have + * precise timings during very early bootup (the delays + * are shorter until calibrated): + */ + udelay(2); + case CONFIG_IO_DELAY_TYPE_NONE: + break; + } } EXPORT_SYMBOL(native_io_delay); -#ifndef CONFIG_UDELAY_IO_DELAY -static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id) +static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) { - printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident); - io_delay = alternate_io_delay; + if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) { + printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", + id->ident); + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; + } + return 0; } -static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = { +/* + * Quirk table for systems that misbehave (lock up, etc.) if port + * 0x80 is used: + */ +static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { { - .callback = dmi_alternate_io_delay_port, + .callback = dmi_io_delay_0xed_port, .ident = "HP Pavilion dv9000z", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), DMI_MATCH(DMI_BOARD_NAME, "30B9") } }, - { - } + { } }; -static int __initdata io_delay_override; - void __init io_delay_init(void) { if (!io_delay_override) - dmi_check_system(alternate_io_delay_port_dmi_table); + dmi_check_system(io_delay_0xed_port_dmi_table); } -#endif static int __init io_delay_param(char *s) { - if (!s) - return -EINVAL; - - if (!strcmp(s, "standard")) - io_delay = standard_io_delay; - else if (!strcmp(s, "alternate")) - io_delay = alternate_io_delay; + if (!strcmp(s, "0x80")) + io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; + else if (!strcmp(s, "0xed")) + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; else if (!strcmp(s, "udelay")) - io_delay = udelay_io_delay; + io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY; + else if (!strcmp(s, "none")) + io_delay_type = CONFIG_IO_DELAY_TYPE_NONE; else return -EINVAL; -#ifndef CONFIG_UDELAY_IO_DELAY io_delay_override = 1; -#endif return 0; } diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index a8d25c38b91..2a04bd17eac 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -250,15 +250,11 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ -#ifndef CONFIG_UDELAY_IO_DELAY -extern void io_delay_init(void); -#else -static inline void io_delay_init(void) -{ -} -#endif extern void native_io_delay(void); +extern int io_delay_type; +extern void io_delay_init(void); + #if defined(CONFIG_PARAVIRT) #include #else diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index 5bebaf96169..dbcc03aa1c6 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -35,15 +35,11 @@ * - Arnaldo Carvalho de Melo */ -#ifndef CONFIG_UDELAY_IO_DELAY -extern void io_delay_init(void); -#else -static inline void io_delay_init(void) -{ -} -#endif extern void native_io_delay(void); +extern int io_delay_type; +extern void io_delay_init(void); + static inline void slow_down_io(void) { native_io_delay(); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4bc8e48434a..357b68ba23e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -53,6 +53,7 @@ #ifdef CONFIG_X86 #include #include +#include #endif static int deprecated_sysctl_warning(struct __sysctl_args *args); @@ -727,6 +728,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "io_delay_type", + .data = &io_delay_type, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { -- cgit v1.2.3-70-g09d2 From af65d64845a90c8f2fc90b97e2148ff74672e979 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:30:43 +0100 Subject: x86 vDSO: consolidate vdso32 This makes x86_64's ia32 emulation support share the sources used in the 32-bit kernel for the 32-bit vDSO and much of its setup code. The 32-bit vDSO mapping now behaves the same on x86_64 as on native 32-bit. The abi.syscall32 sysctl on x86_64 now takes the same values that vm.vdso_enabled takes on the 32-bit kernel. That is, 1 means a randomized vDSO location, 2 means the fixed old address. The CONFIG_COMPAT_VDSO option is now available to make this the default setting, the same meaning it has for the 32-bit kernel. (This does not affect the 64-bit vDSO.) The argument vdso32=[012] can be used on both 32-bit and 64-bit kernels to set this paramter at boot time. The vdso=[012] argument still does this same thing on the 32-bit kernel. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 5 ++ arch/x86/Kconfig | 4 +- arch/x86/ia32/Makefile | 2 +- arch/x86/ia32/ia32_binfmt.c | 9 ++- arch/x86/ia32/ia32_signal.c | 22 ++++--- arch/x86/vdso/Makefile | 2 +- arch/x86/vdso/vdso32-setup.c | 119 ++++++++++++++++++++++++++++-------- arch/x86/vdso/vdso32.S | 16 +++-- arch/x86/xen/setup.c | 4 +- include/asm-x86/elf.h | 23 ++++--- 10 files changed, 146 insertions(+), 60 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b427b7c0e5d..b3f20beea41 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1988,6 +1988,11 @@ and is between 256 and 4096 characters. It is defined in the file vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping + vdso32= [X86-32,X86-64] + vdso32=2: enable compat VDSO (default with COMPAT_VDSO) + vdso32=1: enable 32-bit VDSO (default) + vdso32=0: disable 32-bit VDSO mapping + vector= [IA-64,SMP] vector=percpu: enable percpu vector domain diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 80b7ba4056d..2f4d88babd3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1191,9 +1191,9 @@ config HOTPLUG_CPU config COMPAT_VDSO bool "Compat VDSO support" default y - depends on X86_32 + depends on X86_32 || IA32_EMULATION help - Map the VDSO to the predictable old-style address too. + Map the 32-bit VDSO to the predictable old-style address too. ---help--- Say N here if you are running a sufficiently recent glibc version (2.3.3 or later), to remove the high-mapped diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index a3c997e9f39..1f58a21a41d 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile @@ -3,7 +3,7 @@ # obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \ - ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o + ia32_binfmt.o fpu32.o ptrace32.o sysv-$(CONFIG_SYSVIPC) := ipc32.o obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c index 55822d2cf05..e32974c3dd3 100644 --- a/arch/x86/ia32/ia32_binfmt.c +++ b/arch/x86/ia32/ia32_binfmt.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #undef ELF_ARCH #undef ELF_CLASS @@ -47,14 +47,13 @@ #define AT_SYSINFO 32 #define AT_SYSINFO_EHDR 33 -int sysctl_vsyscall32 = 1; +extern int sysctl_vsyscall32; #undef ARCH_DLINFO #define ARCH_DLINFO do { \ if (sysctl_vsyscall32) { \ - current->mm->context.vdso = (void *)VSYSCALL32_BASE; \ - NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \ + NEW_AUX_ENT(AT_SYSINFO, (u32)VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, (u32)VDSO_CURRENT_BASE); \ } \ } while(0) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 0fc5d8563e1..39356a756b2 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #define DEBUG_SIG 0 @@ -465,13 +465,16 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, goto give_sigsegv; } - /* Return stub is in 32bit vsyscall page */ - if (current->binfmt->hasvdso) - restorer = VSYSCALL32_SIGRETURN; - else - restorer = (void *)&frame->retcode; - if (ka->sa.sa_flags & SA_RESTORER) + if (ka->sa.sa_flags & SA_RESTORER) { restorer = ka->sa.sa_restorer; + } else { + /* Return stub is in 32bit vsyscall page */ + if (current->binfmt->hasvdso) + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + sigreturn); + else + restorer = (void *)&frame->retcode; + } err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); /* @@ -519,7 +522,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, { struct rt_sigframe __user *frame; struct exec_domain *ed = current_thread_info()->exec_domain; - void __user *restorer = VSYSCALL32_RTSIGRETURN; + void __user *restorer; int err = 0; /* __copy_to_user optimizes that into a single 8 byte store */ @@ -564,6 +567,9 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; + else + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + rt_sigreturn); err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); /* diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 1127c716df0..47bc2760e6a 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -15,7 +15,7 @@ vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o # files to link into kernel obj-$(VDSO64-y) += vma.o vdso.o -obj-$(CONFIG_X86_32) += vdso32.o vdso32-setup.o +obj-$(VDSO32-y) += vdso32.o vdso32-setup.o vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index fb71a93c5dc..d97a6d7d062 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -24,6 +24,7 @@ #include #include #include +#include enum { VDSO_DISABLED = 0, @@ -37,14 +38,24 @@ enum { #define VDSO_DEFAULT VDSO_ENABLED #endif +#ifdef CONFIG_X86_64 +#define vdso_enabled sysctl_vsyscall32 +#define arch_setup_additional_pages syscall32_setup_pages +#endif + +/* + * This is the difference between the prelinked addresses in the vDSO images + * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO + * in the user address space. + */ +#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK) + /* * Should the kernel map a VDSO page into processes and pass its * address down to glibc upon exec()? */ unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; -EXPORT_SYMBOL_GPL(vdso_enabled); - static int __init vdso_setup(char *s) { vdso_enabled = simple_strtoul(s, NULL, 0); @@ -52,9 +63,18 @@ static int __init vdso_setup(char *s) return 1; } -__setup("vdso=", vdso_setup); +/* + * For consistency, the argument vdso32=[012] affects the 32-bit vDSO + * behavior on both 64-bit and 32-bit kernels. + * On 32-bit kernels, vdso=[012] means the same thing. + */ +__setup("vdso32=", vdso_setup); -extern asmlinkage void ia32_sysenter_target(void); +#ifdef CONFIG_X86_32 +__setup_param("vdso=", vdso32_setup, vdso_setup, 0); + +EXPORT_SYMBOL_GPL(vdso_enabled); +#endif static __init void reloc_symtab(Elf32_Ehdr *ehdr, unsigned offset, unsigned size) @@ -79,7 +99,7 @@ static __init void reloc_symtab(Elf32_Ehdr *ehdr, case STT_FUNC: case STT_SECTION: case STT_FILE: - sym->st_value += VDSO_HIGH_BASE; + sym->st_value += VDSO_ADDR_ADJUST; } } } @@ -105,7 +125,7 @@ static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) case DT_VERNEED: case DT_ADDRRNGLO ... DT_ADDRRNGHI: /* definitely pointers needing relocation */ - dyn->d_un.d_ptr += VDSO_HIGH_BASE; + dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; break; case DT_ENCODING ... OLD_DT_LOOS-1: @@ -114,7 +134,7 @@ static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) they're even */ if (dyn->d_tag >= DT_ENCODING && (dyn->d_tag & 1) == 0) - dyn->d_un.d_ptr += VDSO_HIGH_BASE; + dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; break; case DT_VERDEFNUM: @@ -143,15 +163,15 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) int i; BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || - !elf_check_arch(ehdr) || + !elf_check_arch_ia32(ehdr) || ehdr->e_type != ET_DYN); - ehdr->e_entry += VDSO_HIGH_BASE; + ehdr->e_entry += VDSO_ADDR_ADJUST; /* rebase phdrs */ phdr = (void *)ehdr + ehdr->e_phoff; for (i = 0; i < ehdr->e_phnum; i++) { - phdr[i].p_vaddr += VDSO_HIGH_BASE; + phdr[i].p_vaddr += VDSO_ADDR_ADJUST; /* relocate dynamic stuff */ if (phdr[i].p_type == PT_DYNAMIC) @@ -164,7 +184,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) if (!(shdr[i].sh_flags & SHF_ALLOC)) continue; - shdr[i].sh_addr += VDSO_HIGH_BASE; + shdr[i].sh_addr += VDSO_ADDR_ADJUST; if (shdr[i].sh_type == SHT_SYMTAB || shdr[i].sh_type == SHT_DYNSYM) @@ -173,6 +193,45 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) } } +/* + * These symbols are defined by vdso32.S to mark the bounds + * of the ELF DSO images included therein. + */ +extern const char vdso32_default_start, vdso32_default_end; +extern const char vdso32_sysenter_start, vdso32_sysenter_end; +static struct page *vdso32_pages[1]; + +#ifdef CONFIG_X86_64 + +static int use_sysenter __read_mostly = -1; + +#define vdso32_sysenter() (use_sysenter > 0) + +/* May not be __init: called during resume */ +void syscall32_cpu_init(void) +{ + if (use_sysenter < 0) + use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); + + /* Load these always in case some future AMD CPU supports + SYSENTER from compat mode too. */ + checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); + checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); + + wrmsrl(MSR_CSTAR, ia32_cstar_target); +} + +#define compat_uses_vma 1 + +static inline void map_compat_vdso(int map) +{ +} + +#else /* CONFIG_X86_32 */ + +#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) + void enable_sep_cpu(void) { int cpu = get_cpu(); @@ -210,13 +269,7 @@ static int __init gate_vma_init(void) return 0; } -/* - * These symbols are defined by vsyscall.o to mark the bounds - * of the ELF DSO images included therein. - */ -extern const char vsyscall_int80_start, vsyscall_int80_end; -extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; -static struct page *syscall_pages[1]; +#define compat_uses_vma 0 static void map_compat_vdso(int map) { @@ -227,31 +280,35 @@ static void map_compat_vdso(int map) vdso_mapped = map; - __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, + __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT, map ? PAGE_READONLY_EXEC : PAGE_NONE); /* flush stray tlbs */ flush_tlb_all(); } +#endif /* CONFIG_X86_64 */ + int __init sysenter_setup(void) { void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); const void *vsyscall; size_t vsyscall_len; - syscall_pages[0] = virt_to_page(syscall_page); + vdso32_pages[0] = virt_to_page(syscall_page); +#ifdef CONFIG_X86_32 gate_vma_init(); printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); +#endif - if (!boot_cpu_has(X86_FEATURE_SEP)) { - vsyscall = &vsyscall_int80_start; - vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; + if (!vdso32_sysenter()) { + vsyscall = &vdso32_default_start; + vsyscall_len = &vdso32_default_end - &vdso32_default_start; } else { - vsyscall = &vsyscall_sysenter_start; - vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; + vsyscall = &vdso32_sysenter_start; + vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; } memcpy(syscall_page, vsyscall, vsyscall_len); @@ -284,7 +341,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) ret = addr; goto up_fail; } + } + if (compat_uses_vma || !compat) { /* * MAYWRITE to allow gdb to COW and set breakpoints * @@ -298,7 +357,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, - syscall_pages); + vdso32_pages); if (ret) goto up_fail; @@ -314,6 +373,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) return ret; } +#ifdef CONFIG_X86_64 + +__initcall(sysenter_setup); + +#else /* CONFIG_X86_32 */ + const char *arch_vma_name(struct vm_area_struct *vma) { if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) @@ -342,3 +407,5 @@ int in_gate_area_no_task(unsigned long addr) { return 0; } + +#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S index cab020c99c3..1e36f72cab8 100644 --- a/arch/x86/vdso/vdso32.S +++ b/arch/x86/vdso/vdso32.S @@ -2,14 +2,18 @@ __INITDATA - .globl vsyscall_int80_start, vsyscall_int80_end -vsyscall_int80_start: + .globl vdso32_default_start, vdso32_default_end +vdso32_default_start: +#ifdef CONFIG_X86_32 .incbin "arch/x86/vdso/vdso32-int80.so" -vsyscall_int80_end: +#else + .incbin "arch/x86/vdso/vdso32-syscall.so" +#endif +vdso32_default_end: - .globl vsyscall_sysenter_start, vsyscall_sysenter_end -vsyscall_sysenter_start: + .globl vdso32_sysenter_start, vdso32_sysenter_end +vdso32_sysenter_start: .incbin "arch/x86/vdso/vdso32-sysenter.so" -vsyscall_sysenter_end: +vdso32_sysenter_end: __FINIT diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index fd91568090f..7d6d0ef5589 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -62,8 +62,8 @@ static void xen_idle(void) */ static void fiddle_vdso(void) { - extern char vsyscall_int80_start; - u32 *mask = VDSO32_SYMBOL(&vsyscall_int80_start, NOTE_MASK); + extern const char vdso32_default_start; + u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK); *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; } diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h index 70edff2d567..60f5101d948 100644 --- a/include/asm-x86/elf.h +++ b/include/asm-x86/elf.h @@ -74,17 +74,19 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #ifdef __KERNEL__ +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch_ia32(x) \ + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) + #ifdef CONFIG_X86_32 #include #include /* for savesegment */ #include #include -/* - * This is used to ensure we don't load something for the wrong architecture. - */ -#define elf_check_arch(x) \ - (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) +#define elf_check_arch(x) elf_check_arch_ia32(x) /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx contains a pointer to a function which might be registered using `atexit'. @@ -247,10 +249,6 @@ extern int dump_task_extended_fpu (struct task_struct *, #define ELF_CORE_XFPREG_TYPE NT_PRXFPREG #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) -#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) - -#define VDSO_ENTRY \ - ((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ @@ -262,6 +260,8 @@ do if (vdso_enabled) { \ #else /* CONFIG_X86_32 */ +#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ + /* 1GB for 64bit, 8MB for 32bit */ #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) @@ -272,6 +272,11 @@ do if (vdso_enabled) { \ #endif /* !CONFIG_X86_32 */ +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) + +#define VDSO_ENTRY \ + ((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) + struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 -- cgit v1.2.3-70-g09d2 From 8b2cb7a8f531d6ca72a8aff873b9bb1c6b3122ba Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Wed, 30 Jan 2008 13:32:11 +0100 Subject: x86: 32-bit EFI runtime service support: fixes in sync with 64-bit support support according to fixes of x86_64 support. - Delete efi_rt_lock because it is used during system early boot, before SMP is initialized. - Change local_flush_tlb() to __flush_tlb_all() to flush global page mapping. - Clean up includes. - Revise Kconfig description. - Enable noefi kernel parameter on i386. Signed-off-by: Huang Ying Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 2 ++ Documentation/x86_64/boot-options.txt | 4 ---- arch/x86/Kconfig | 19 ++++++++----------- arch/x86/kernel/efi.c | 7 +++++++ arch/x86/kernel/efi_32.c | 25 +++++-------------------- arch/x86/kernel/efi_64.c | 7 ------- arch/x86/kernel/setup_32.c | 6 +++--- 7 files changed, 25 insertions(+), 45 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b3f20beea41..e7910f8b4fc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1169,6 +1169,8 @@ and is between 256 and 4096 characters. It is defined in the file nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects. + noefi [X86-32,X86-64] Disable EFI runtime services support. + noexec [IA-64] noexec [X86-32,X86-64] diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 638bf46ca05..34abae4e944 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -306,8 +306,4 @@ Debugging newfallback: use new unwinder but fall back to old if it gets stuck (default) -EFI - - noefi Disable EFI support - Miscellaneous diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d89b9452815..fa6fa52248d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -961,21 +961,18 @@ config MTRR config EFI def_bool n - prompt "Boot from EFI support" + prompt "EFI runtime service support" depends on ACPI ---help--- - This enables the kernel to boot on EFI platforms using - system configuration information passed to it from the firmware. - This also enables the kernel to use any EFI runtime services that are + This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). - This option is only useful on systems that have EFI firmware - and will result in a kernel image that is ~8k larger. In addition, - you must use the latest ELILO loader available at - in order to take advantage of - kernel initialization using EFI information (neither GRUB nor LILO know - anything about EFI). However, even with this option, the resultant - kernel should continue to boot on existing non-EFI platforms. + This option is only useful on systems that have EFI firmware. + In addition, you should use the latest ELILO loader available + at in order to take advantage + of EFI runtime services. However, even with this option, the + resultant kernel should continue to boot on existing non-EFI + platforms. config IRQBALANCE def_bool y diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 0a61522e85c..2939b015c2e 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -55,6 +55,13 @@ struct efi_memory_map memmap; struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static int __init setup_noefi(char *arg) +{ + efi_enabled = 0; + return 0; +} +early_param("noefi", setup_noefi); + static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { return efi_call_virt2(get_time, tm, tc); diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 30f93711628..afd2c3b039d 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -20,27 +20,15 @@ */ #include -#include -#include #include -#include -#include -#include #include -#include #include -#include -#include #include #include #include -#include -#include #include -#define PFX "EFI: " - /* * To make EFI call EFI runtime service in physical addressing mode we need * prelog/epilog before/after the invocation to disable interrupt, to @@ -49,16 +37,14 @@ */ static unsigned long efi_rt_eflags; -static DEFINE_SPINLOCK(efi_rt_lock); static pgd_t efi_bak_pg_dir_pointer[2]; -void efi_call_phys_prelog(void) __acquires(efi_rt_lock) +void efi_call_phys_prelog(void) { unsigned long cr4; unsigned long temp; struct desc_ptr gdt_descr; - spin_lock(&efi_rt_lock); local_irq_save(efi_rt_eflags); /* @@ -88,14 +74,14 @@ void efi_call_phys_prelog(void) __acquires(efi_rt_lock) /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); gdt_descr.address = __pa(get_cpu_gdt_table(0)); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); } -void efi_call_phys_epilog(void) __releases(efi_rt_lock) +void efi_call_phys_epilog(void) { unsigned long cr4; struct desc_ptr gdt_descr; @@ -119,10 +105,9 @@ void efi_call_phys_epilog(void) __releases(efi_rt_lock) /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); local_irq_restore(efi_rt_eflags); - spin_unlock(&efi_rt_lock); } /* @@ -135,7 +120,7 @@ void __init efi_map_memmap(void) memmap.map = bt_ioremap((unsigned long) memmap.phys_map, (memmap.nr_map * memmap.desc_size)); if (memmap.map == NULL) - printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); + printk(KERN_ERR "Could not remap the EFI memmap!\n"); memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); } diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index f2000dbc719..269de2e049a 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -40,13 +40,6 @@ static pgd_t save_pgd __initdata; static unsigned long efi_flags __initdata; -static int __init setup_noefi(char *arg) -{ - efi_enabled = 0; - return 0; -} -early_param("noefi", setup_noefi); - static void __init early_mapping_set_exec(unsigned long start, unsigned long end, int executable) diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 2e805da337a..704550fdb84 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -648,9 +648,6 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "BIOS-provided physical RAM map:\n"); print_memory_map(memory_setup()); - if (efi_enabled) - efi_init(); - copy_edd(); if (!boot_params.hdr.root_flags) @@ -677,6 +674,9 @@ void __init setup_arch(char **cmdline_p) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; + if (efi_enabled) + efi_init(); + max_low_pfn = setup_memory(); #ifdef CONFIG_VMI -- cgit v1.2.3-70-g09d2 From a25bd94964e87b1b93903a822fba5025d995d4da Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 30 Jan 2008 13:33:08 +0100 Subject: x86: add the "print code before the trapping instruction" feature to 64 bit The 32 bit x86 tree has a very useful feature that prints the Code: line for the code even before the trapping instrution (and the start of the trapping instruction is then denoted with a <>). Unfortunately, the 64 bit x86 tree does not yet have this feature, making diagnosing backtraces harder than needed. This patch adds this feature in the same was as the 32 bit tree has (including the same kernel boot parameter), and including a bugfix to make the code use probe_kernel_address() rarther than a buggy (deadlocking) __get_user. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 4 ++-- arch/x86/kernel/traps_64.c | 44 +++++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 13 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e7910f8b4fc..40db7dd1d92 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -416,8 +416,8 @@ and is between 256 and 4096 characters. It is defined in the file [SPARC64] tick [X86-64] hpet,tsc - code_bytes [IA32] How many bytes of object code to print in an - oops report. + code_bytes [IA32/X86_64] How many bytes of object code to print + in an oops report. Range: 0 - 8192 Default: 64 diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 304ca6b4a1c..0bba7924604 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -74,6 +74,8 @@ asmlinkage void alignment_check(void); asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); +static unsigned int code_bytes = 64; + static inline void conditional_sti(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -459,12 +461,15 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = !user_mode(regs); unsigned long sp; const int cpu = smp_processor_id(); struct task_struct *cur = cpu_pda(cpu)->pcurrent; + u8 *ip; + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; sp = regs->sp; + ip = (u8 *) regs->ip - code_prologue; printk("CPU %d ", cpu); __show_regs(regs); printk("Process %s (pid: %d, threadinfo %p, task %p)\n", @@ -474,22 +479,28 @@ void show_registers(struct pt_regs *regs) * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (in_kernel) { + if (!user_mode(regs)) { + unsigned char c; printk("Stack: "); _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); + printk("\n"); - printk("\nCode: "); - if (regs->ip < PAGE_OFFSET) - goto bad; - - for (i=0; i<20; i++) { - unsigned char c; - if (__get_user(c, &((unsigned char*)regs->ip)[i])) { -bad: + printk(KERN_EMERG "Code: "); + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + /* try starting at RIP */ + ip = (u8 *) regs->ip; + code_len = code_len - code_prologue + 1; + } + for (i = 0; i < code_len; i++, ip++) { + if (ip < (u8 *)PAGE_OFFSET || + probe_kernel_address(ip, c)) { printk(" Bad RIP value."); break; } - printk("%02x ", c); + if (ip == (u8 *)regs->ip) + printk("<%02x> ", c); + else + printk("%02x ", c); } } printk("\n"); @@ -1164,3 +1175,14 @@ static int __init kstack_setup(char *s) return 0; } early_param("kstack", kstack_setup); + + +static int __init code_bytes_setup(char *s) +{ + code_bytes = simple_strtoul(s, NULL, 0); + if (code_bytes > 8192) + code_bytes = 8192; + + return 1; +} +__setup("code_bytes=", code_bytes_setup); -- cgit v1.2.3-70-g09d2 From aaf230424204864e2833dcc1da23e2cb0b9f39cd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 30 Jan 2008 13:33:09 +0100 Subject: x86: disable the GART early, 64-bit For K8 system: 4G RAM with memory hole remapping enabled, or more than 4G RAM installed. when try to use kexec second kernel, and the first doesn't include gart_shutdown. the second kernel could have different aper position than the first kernel. and second kernel could use that hole as RAM that is still used by GART set by the first kernel. esp. when try to kexec 2.6.24 with sparse mem enable from previous kernel (from RHEL 5 or SLES 10). the new kernel will use aper by GART (set by first kernel) for vmemmap. and after new kernel setting one new GART. the position will be real RAM. the _mapcount set is lost. Bad page state in process 'swapper' page:ffffe2000e600020 flags:0x0000000000000000 mapping:0000000000000000 mapcount:1 count:0 Trying to fix it up, but a reboot is needed Backtrace: Pid: 0, comm: swapper Not tainted 2.6.24-rc7-smp-gcdf71a10-dirty #13 Call Trace: [] bad_page+0x63/0x8d [] __free_pages_ok+0x7c/0x2a5 [] free_all_bootmem_core+0xd0/0x198 [] numa_free_all_bootmem+0x3b/0x76 [] mem_init+0x3b/0x152 [] start_kernel+0x236/0x2c2 [] _sinittext+0x11a/0x121 and [ffffe2000e600000-ffffe2000e7fffff] PMD ->ffff81001c200000 on node 0 phys addr is : 0x1c200000 RHEL 5.1 kernel -53 said: PCI-DMA: aperture base @ 1c000000 size 65536 KB new kernel said: Mapping aperture over 65536 KB of RAM @ 3c000000 So could try to disable that GART if possible. According to Ingo > hm, i'm wondering, instead of modifying the GART, why dont we simply > _detect_ whatever GART settings we have inherited, and propagate that > into our e820 maps? I.e. if there's inconsistency, then punch that out > from the memory maps and just dont use that memory. > > that way it would not matter whether the GART settings came from a [old > or crashing] Linux kernel that has not called gart_iommu_shutdown(), or > whether it's a BIOS that has set up an aperture hole inconsistent with > the memory map it passed. (or the memory map we _think_ i tried to pass > us) > > it would also be more robust to only read and do a memory map quirk > based on that, than actively trying to change the GART so early in the > bootup. Later on we have to re-enable the GART _anyway_ and have to > punch a hole for it. > > and as a bonus, we would have shored up our defenses against crappy > BIOSes as well. add e820 modification for gart inconsistent setting. gart_fix_e820=off could be used to disable e820 fix. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 4 ++ arch/x86/kernel/aperture_64.c | 89 +++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 12 +++++ arch/x86/kernel/setup_64.c | 3 ++ include/asm-x86/e820_64.h | 1 + include/asm-x86/gart.h | 5 +++ 6 files changed, 114 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 40db7dd1d92..860a9087549 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -660,6 +660,10 @@ and is between 256 and 4096 characters. It is defined in the file gamma= [HW,DRM] + gart_fix_e820= [X86_64] disable the fix e820 for K8 GART + Format: off | on + default: on + gdth= [HW,SCSI] See header of drivers/scsi/gdth.c. diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 52d2beac455..bf1b469d584 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -218,6 +218,95 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) return 0; } +static int gart_fix_e820 __initdata = 1; + +static int __init parse_gart_mem(char *p) +{ + if (!p) + return -EINVAL; + + if (!strncmp(p, "off", 3)) + gart_fix_e820 = 0; + else if (!strncmp(p, "on", 2)) + gart_fix_e820 = 1; + + return 0; +} +early_param("gart_fix_e820", parse_gart_mem); + +void __init early_gart_iommu_check(void) +{ + /* + * in case it is enabled before, esp for kexec/kdump, + * previous kernel already enable that. memset called + * by allocate_aperture/__alloc_bootmem_nopanic cause restart. + * or second kernel have different position for GART hole. and new + * kernel could use hole as RAM that is still used by GART set by + * first kernel + * or BIOS forget to put that in reserved. + * try to update e820 to make that region as reserved. + */ + int fix, num; + u32 ctl; + u32 aper_size = 0, aper_order = 0, last_aper_order = 0; + u64 aper_base = 0, last_aper_base = 0; + int aper_enabled = 0, last_aper_enabled = 0; + + if (!early_pci_allowed()) + return; + + fix = 0; + for (num = 24; num < 32; num++) { + if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) + continue; + + ctl = read_pci_config(0, num, 3, 0x90); + aper_enabled = ctl & 1; + aper_order = (ctl >> 1) & 7; + aper_size = (32 * 1024 * 1024) << aper_order; + aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; + aper_base <<= 25; + + if ((last_aper_order && aper_order != last_aper_order) || + (last_aper_base && aper_base != last_aper_base) || + (last_aper_enabled && aper_enabled != last_aper_enabled)) { + fix = 1; + break; + } + last_aper_order = aper_order; + last_aper_base = aper_base; + last_aper_enabled = aper_enabled; + } + + if (!fix && !aper_enabled) + return; + + if (!aper_base || !aper_size || aper_base + aper_size > 0x100000000UL) + fix = 1; + + if (gart_fix_e820 && !fix && aper_enabled) { + if (e820_any_mapped(aper_base, aper_base + aper_size, + E820_RAM)) { + /* reserved it, so we can resuse it in second kernel */ + printk(KERN_INFO "update e820 for GART\n"); + add_memory_region(aper_base, aper_size, E820_RESERVED); + update_e820(); + } + return; + } + + /* different nodes have different setting, disable them all at first*/ + for (num = 24; num < 32; num++) { + if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) + continue; + + ctl = read_pci_config(0, num, 3, 0x90); + ctl &= ~1; + write_pci_config(0, num, 3, 0x90, ctl); + } + +} + void __init gart_iommu_hole_init(void) { u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index abc473bcabe..07cfaae7ab0 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -728,6 +728,18 @@ void __init finish_e820_parsing(void) } } +void __init update_e820(void) +{ + u8 nr_map; + + nr_map = e820.nr_map; + if (sanitize_e820_map(e820.map, &nr_map)) + return; + e820.nr_map = nr_map; + printk(KERN_INFO "modified physical RAM map:\n"); + e820_print_map("modified"); +} + unsigned long pci_mem_start = 0xaeedbabe; EXPORT_SYMBOL(pci_mem_start); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 07547febac7..12bad27d66f 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -53,6 +53,7 @@ #include