diff options
74 files changed, 728 insertions, 699 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 6a451f47d40..c3b1430cf60 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -304,3 +304,15 @@ Why: The code says it was obsolete when it was written in 2001. Who: Richard Purdie <rpurdie@rpsys.net> --------------------------- + +What: Wireless extensions over netlink (CONFIG_NET_WIRELESS_RTNETLINK) +When: with the merge of wireless-dev, 2.6.22 or later +Why: The option/code is + * not enabled on most kernels + * not required by any userspace tools (except an experimental one, + and even there only for some parts, others use ioctl) + * pointless since wext is no longer evolving and the ioctl + interface needs to be kept +Who: Johannes Berg <johannes@sipsolutions.net> + +--------------------------- diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 72af5de1eff..5484ab5efd4 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -41,6 +41,7 @@ Table of Contents 2.11 /proc/sys/fs/mqueue - POSIX message queues filesystem 2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score 2.13 /proc/<pid>/oom_score - Display current oom-killer score + 2.14 /proc/<pid>/io - Display the IO accounting fields ------------------------------------------------------------------------------ Preface @@ -1990,3 +1991,107 @@ need to recompile the kernel, or even to reboot the system. The files in the command to write value into these files, thereby changing the default settings of the kernel. ------------------------------------------------------------------------------ + +2.14 /proc/<pid>/io - Display the IO accounting fields +------------------------------------------------------- + +This file contains IO statistics for each running process + +Example +------- + +test:/tmp # dd if=/dev/zero of=/tmp/test.dat & +[1] 3828 + +test:/tmp # cat /proc/3828/io +rchar: 323934931 +wchar: 323929600 +syscr: 632687 +syscw: 632675 +read_bytes: 0 +write_bytes: 323932160 +cancelled_write_bytes: 0 + + +Description +----------- + +rchar +----- + +I/O counter: chars read +The number of bytes which this task has caused to be read from storage. This +is simply the sum of bytes which this process passed to read() and pread(). +It includes things like tty IO and it is unaffected by whether or not actual +physical disk IO was required (the read might have been satisfied from +pagecache) + + +wchar +----- + +I/O counter: chars written +The number of bytes which this task has caused, or shall cause to be written +to disk. Similar caveats apply here as with rchar. + + +syscr +----- + +I/O counter: read syscalls +Attempt to count the number of read I/O operations, i.e. syscalls like read() +and pread(). + + +syscw +----- + +I/O counter: write syscalls +Attempt to count the number of write I/O operations, i.e. syscalls like +write() and pwrite(). + + +read_bytes +---------- + +I/O counter: bytes read +Attempt to count the number of bytes which this process really did cause to +be fetched from the storage layer. Done at the submit_bio() level, so it is +accurate for block-backed filesystems. <please add status regarding NFS and +CIFS at a later time> + + +write_bytes +----------- + +I/O counter: bytes written +Attempt to count the number of bytes which this process caused to be sent to +the storage layer. This is done at page-dirtying time. + + +cancelled_write_bytes +--------------------- + +The big inaccuracy here is truncate. If a process writes 1MB to a file and +then deletes the file, it will in fact perform no writeout. But it will have +been accounted as having caused 1MB of write. +In other words: The number of bytes which this process caused to not happen, +by truncating pagecache. A task can cause "negative" IO too. If this task +truncates some dirty pagecache, some IO which another task has been accounted +for (in it's write_bytes) will not be happening. We _could_ just subtract that +from the truncating task's write_bytes, but there is information loss in doing +that. + + +Note +---- + +At its current implementation state, this is a bit racy on 32-bit machines: if +process A reads process B's /proc/pid/io while process B is updating one of +those 64-bit counters, process A could see an intermediate result. + + +More information about this can be found within the taskstats documentation in +Documentation/accounting. + +------------------------------------------------------------------------------ diff --git a/MAINTAINERS b/MAINTAINERS index 1dfba85ca7b..9993b900941 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2553,16 +2553,8 @@ L: i2c@lm-sensors.org S: Maintained PARALLEL PORT SUPPORT -P: Phil Blundell -M: philb@gnu.org -P: Tim Waugh -M: tim@cyberelk.net -P: David Campbell -P: Andrea Arcangeli -M: andrea@suse.de L: linux-parport@lists.infradead.org -W: http://people.redhat.com/twaugh/parport/ -S: Maintained +S: Orphan PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES P: Tim Waugh diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ac2ffdcfbbb..e7baca29f3f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -21,6 +21,10 @@ config ARM config SYS_SUPPORTS_APM_EMULATION bool +config GENERIC_GPIO + bool + default n + config GENERIC_TIME bool default n @@ -163,6 +167,7 @@ config ARCH_VERSATILE config ARCH_AT91 bool "Atmel AT91" + select GENERIC_GPIO help This enables support for systems based on the Atmel AT91RM9200 and AT91SAM9xxx processors. @@ -304,6 +309,7 @@ config ARCH_PXA bool "PXA2xx-based" depends on MMU select ARCH_MTD_XIP + select GENERIC_GPIO select GENERIC_TIME help Support for Intel's PXA2XX processor line. @@ -325,11 +331,13 @@ config ARCH_SA1100 select ISA select ARCH_DISCONTIGMEM_ENABLE select ARCH_MTD_XIP + select GENERIC_GPIO help Support for StrongARM 11x0 based boards. config ARCH_S3C2410 bool "Samsung S3C2410, S3C2412, S3C2413, S3C2440, S3C2442, S3C2443" + select GENERIC_GPIO help Samsung S3C2410X CPU based systems, such as the Simtec Electronics BAST (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or @@ -354,6 +362,7 @@ config ARCH_LH7A40X config ARCH_OMAP bool "TI OMAP" + select GENERIC_GPIO help Support for TI's OMAP platform (OMAP1 and OMAP2). diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index bb059a4e1df..ce4013aee59 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -22,6 +22,10 @@ config AVR32 config UID16 bool +config GENERIC_GPIO + bool + default y + config GENERIC_HARDIRQS bool default y diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 2f7672545fe..cee4ff679d3 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -220,11 +220,11 @@ config PARAVIRT config VMI bool "VMI Paravirt-ops support" - depends on PARAVIRT && !NO_HZ - default y + depends on PARAVIRT help - VMI provides a paravirtualized interface to multiple hypervisors - include VMware ESX server and Xen by connecting to a ROM module + VMI provides a paravirtualized interface to the VMware ESX server + (it could be used by other hypervisors in theory too, but is not + at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. config ACPI_SRAT @@ -1287,12 +1287,3 @@ config X86_TRAMPOLINE config KTIME_SCALAR bool default y - -config NO_IDLE_HZ - bool - depends on PARAVIRT - default y - help - Switches the regular HZ timer off when the system is going idle. - This helps a hypervisor detect that the Linux system is idle, - reducing the overhead of idle systems. diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 7a2c9cbdb51..2383bcf18c5 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -493,8 +493,15 @@ void __init setup_boot_APIC_clock(void) /* No broadcast on UP ! */ if (num_possible_cpus() == 1) return; - } else - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + } else { + /* + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. + */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + } /* Setup the lapic or request the broadcast */ setup_APIC_timer(); diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index e1006b7acc9..f3ab61ee749 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c @@ -201,12 +201,30 @@ static int hpet_next_event(unsigned long delta, } /* + * Clock source related code + */ +static cycle_t read_hpet(void) +{ + return (cycle_t)hpet_readl(HPET_COUNTER); +} + +static struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = HPET_MASK, + .shift = HPET_SHIFT, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* * Try to setup the HPET timer */ int __init hpet_enable(void) { unsigned long id; uint64_t hpet_freq; + u64 tmp; if (!is_hpet_capable()) return 0; @@ -253,6 +271,25 @@ int __init hpet_enable(void) /* Start the counter */ hpet_start_counter(); + /* Initialize and register HPET clocksource + * + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + clocksource_register(&clocksource_hpet); + + if (id & HPET_ID_LEGSUP) { hpet_enable_int(); hpet_reserve_platform_timers(id); @@ -273,49 +310,6 @@ out_nohpet: return 0; } -/* - * Clock source related code - */ -static cycle_t read_hpet(void) -{ - return (cycle_t)hpet_readl(HPET_COUNTER); -} - -static struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = HPET_MASK, - .shift = HPET_SHIFT, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static int __init init_hpet_clocksource(void) -{ - u64 tmp; - - if (!hpet_virt_address) - return -ENODEV; - - /* - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * aproximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult - */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; - - return clocksource_register(&clocksource_hpet); -} - -module_init(init_hpet_clocksource); #ifdef CONFIG_HPET_EMULATE_RTC diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c index a6bc7bb3883..5cbb776b308 100644 --- a/arch/i386/kernel/i8253.c +++ b/arch/i386/kernel/i8253.c @@ -195,4 +195,4 @@ static int __init init_pit_clocksource(void) clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); return clocksource_register(&clocksource_pit); } -module_init(init_pit_clocksource); +arch_initcall(init_pit_clocksource); diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index c156ecfa387..2ec331e03fa 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -32,6 +32,7 @@ #include <asm/fixmap.h> #include <asm/apic.h> #include <asm/tlbflush.h> +#include <asm/timer.h> /* nop stub */ static void native_nop(void) @@ -493,7 +494,7 @@ struct paravirt_ops paravirt_ops = { .memory_setup = machine_specific_memory_setup, .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, - .time_init = time_init_hook, + .time_init = hpet_time_init, .init_IRQ = native_init_IRQ, .cpuid = native_cpuid, @@ -520,6 +521,8 @@ struct paravirt_ops paravirt_ops = { .write_msr = native_write_msr, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .get_scheduled_cycles = native_read_tsc, + .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, @@ -535,7 +538,6 @@ struct paravirt_ops paravirt_ops = { .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, - .const_udelay = __const_udelay, #ifdef CONFIG_X86_LOCAL_APIC .apic_write = native_apic_write, @@ -550,6 +552,8 @@ struct paravirt_ops paravirt_ops = { .flush_tlb_kernel = native_flush_tlb_global, .flush_tlb_single = native_flush_tlb_single, + .map_pt_hook = (void *)native_nop, + .alloc_pt = (void *)native_nop, .alloc_pd = (void *)native_nop, .alloc_pd_clone = (void *)native_nop, diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 122623dcc6e..698c24fe482 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -657,5 +657,4 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif - tsc_init(); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 48bfcaa13ec..9b0dd2744c8 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -33,11 +33,6 @@ * Dave Jones : Report invalid combinations of Athlon CPUs. * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ - -/* SMP boot always wants to use real time delay to allow sufficient time for - * the APs to come online */ -#define USE_REAL_TIME_DELAY - #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a5350059557..94e5cb09110 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -262,14 +262,23 @@ void notify_arch_cmos_timer(void) extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ -static void __init hpet_time_init(void) +void __init hpet_time_init(void) { if (!hpet_enable()) setup_pit_timer(); - do_time_init(); + time_init_hook(); } +/* + * This is called directly from init code; we must delay timer setup in the + * HPET case as we can't make the decision to turn on HPET this early in the + * boot process. + * + * The chosen time_init function will usually be hpet_time_init, above, but + * in the case of virtual hardware, an alternative function may be substituted. + */ void __init time_init(void) { - late_time_init = hpet_time_init; + tsc_init(); + late_time_init = choose_time_init(); } diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 3082a418635..875d8a6ecc0 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -14,6 +14,7 @@ #include <asm/delay.h> #include <asm/tsc.h> #include <asm/io.h> +#include <asm/timer.h> #include "mach_timer.h" @@ -102,9 +103,6 @@ unsigned long long sched_clock(void) { unsigned long long this_offset; - if (unlikely(custom_sched_clock)) - return (*custom_sched_clock)(); - /* * Fall back to jiffies if there's no TSC available: */ @@ -113,13 +111,13 @@ unsigned long long sched_clock(void) return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); /* read the Time Stamp Counter: */ - rdtscll(this_offset); + get_scheduled_cycles(this_offset); /* return the value in ns */ return cycles_2_ns(this_offset); } -static unsigned long calculate_cpu_khz(void) +unsigned long native_calculate_cpu_khz(void) { unsigned long long start, end; unsigned long count; @@ -186,34 +184,6 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); -void __init tsc_init(void) -{ - if (!cpu_has_tsc || tsc_disable) - goto out_no_tsc; - - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - - if (!cpu_khz) - goto out_no_tsc; - - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); - - set_cyc2ns_scale(cpu_khz); - use_tsc_delay(); - return; - -out_no_tsc: - /* - * Set the tsc_disable flag if there's no TSC support, this - * makes it a fast flag for the kernel to see whether it - * should be using the TSC. - */ - tsc_disable = 1; -} - #ifdef CONFIG_CPU_FREQ /* @@ -383,28 +353,47 @@ static void __init check_geode_tsc_reliable(void) static inline void check_geode_tsc_reliable(void) { } #endif -static int __init init_tsc_clocksource(void) + +void __init tsc_init(void) { + if (!cpu_has_tsc || tsc_disable) + goto out_no_tsc; - if (cpu_has_tsc && tsc_khz && !tsc_disable) { - /* check blacklist */ - dmi_check_system(bad_tsc_dmi_table); + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; - unsynchronized_tsc(); - check_geode_tsc_reliable(); - current_tsc_khz = tsc_khz; - clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, - clocksource_tsc.shift); - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } + if (!cpu_khz) + goto out_no_tsc; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + set_cyc2ns_scale(cpu_khz); + use_tsc_delay(); - return clocksource_register(&clocksource_tsc); + /* Check and install the TSC clocksource */ + dmi_check_system(bad_tsc_dmi_table); + + unsynchronized_tsc(); + check_geode_tsc_reliable(); + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + /* lower the rating if we already know its unstable: */ + if (check_tsc_unstable()) { + clocksource_tsc.rating = 0; + clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; } + clocksource_register(&clocksource_tsc); - return 0; -} + return; -module_init(init_tsc_clocksource); +out_no_tsc: + /* + * Set the tsc_disable flag if there's no TSC support, this + * makes it a fast flag for the kernel to see whether it + * should be using the TSC. + */ + tsc_disable = 1; +} diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index bb5a7abf949..fbf45fa0832 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -35,6 +35,7 @@ #include <asm/processor.h> #include <asm/timer.h> #include <asm/vmi_time.h> +#include <asm/kmap_types.h> /* Convenient for calling VMI functions indirectly in the ROM */ typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); @@ -48,12 +49,13 @@ typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); static struct vrom_header *vmi_rom; static int license_gplok; -static int disable_nodelay; static int disable_pge; static int disable_pse; static int disable_sep; static int disable_tsc; static int disable_mtrr; +static int disable_noidle; +static int disable_vmi_timer; /* Cached VMI operations */ struct { @@ -255,7 +257,6 @@ static void vmi_nop(void) } /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ -#ifdef CONFIG_NO_IDLE_HZ static fastcall void vmi_safe_halt(void) { int idle = vmi_stop_hz_timer(); @@ -266,7 +267,6 @@ static fastcall void vmi_safe_halt(void) local_irq_enable(); } } -#endif #ifdef CONFIG_DEBUG_PAGE_TYPE @@ -371,6 +371,24 @@ static void vmi_check_page_type(u32 pfn, int type) #define vmi_check_page_type(p,t) do { } while (0) #endif +static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn) +{ + /* + * Internally, the VMI ROM must map virtual addresses to physical + * addresses for processing MMU updates. By the time MMU updates + * are issued, this information is typically already lost. + * Fortunately, the VMI provides a cache of mapping slots for active + * page tables. + * + * We use slot zero for the linear mapping of physical memory, and + * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1. + * + * args: SLOT VA COUNT PFN + */ + BUG_ON(type != KM_PTE0 && type != KM_PTE1); + vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn); +} + static void vmi_allocate_pt(u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); @@ -508,13 +526,14 @@ void vmi_pmd_clear(pmd_t *pmd) #endif #ifdef CONFIG_SMP -struct vmi_ap_state ap; extern void setup_pda(void); -static void __init /* XXX cpu hotplug */ +static void __devinit vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) { + struct vmi_ap_state ap; + /* Default everything to zero. This is fine for most GPRs. */ memset(&ap, 0, sizeof(struct vmi_ap_state)); @@ -553,7 +572,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, /* Protected mode, paging, AM, WP, NE, MP. */ ap.cr0 = 0x80050023; ap.cr4 = mmu_cr4_features; - vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid); + vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid); } #endif @@ -645,12 +664,12 @@ static inline int __init probe_vmi_rom(void) void vmi_bringup(void) { /* We must establish the lowmem mapping for MMU ops to work */ - if (vmi_rom) + if (vmi_ops.set_linear_mapping) vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); } /* - * Return a pointer to the VMI function or a NOP stub + * Return a pointer to a VMI function or NULL if unimplemented */ static void *vmi_get_function(int vmicall) { @@ -661,12 +680,13 @@ static void *vmi_get_function(int vmicall) if (rel->type == VMI_RELOCATION_CALL_REL) return (void *)rel->eip; else - return (void *)vmi_nop; + return NULL; } /* * Helper macro for making the VMI paravirt-ops fill code readable. - * For unimplemented operations, fall back to default. + * For unimplemented operations, fall back to default, unless nop + * is returned by the ROM. */ #define para_fill(opname, vmicall) \ do { \ @@ -675,9 +695,29 @@ do { \ if (rel->type != VMI_RELOCATION_NONE) { \ BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \ paravirt_ops.opname = (void *)rel->eip; \ + } else if (rel->type == VMI_RELOCATION_NOP) \ + paravirt_ops.opname = (void *)vmi_nop; \ +} while (0) + +/* + * Helper macro for making the VMI paravirt-ops fill code readable. + * For cached operations which do not match the VMI ROM ABI and must + * go through a tranlation stub. Ignore NOPs, since it is not clear + * a NOP * VMI function corresponds to a NOP paravirt-op when the + * functions are not in 1-1 correspondence. + */ +#define para_wrap(opname, wrapper, cache, vmicall) \ +do { \ + reloc = call_vrom_long_func(vmi_rom, get_reloc, \ + VMI_CALL_##vmicall); \ + BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ + if (rel->type == VMI_RELOCATION_CALL_REL) { \ + paravirt_ops.opname = wrapper; \ + vmi_ops.cache = (void *)rel->eip; \ } \ } while (0) + /* * Activate the VMI interface and switch into paravirtualized mode */ @@ -714,13 +754,8 @@ static inline int __init activate_vmi(void) * rdpmc is not yet used in Linux */ - /* CPUID is special, so very special */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.cpuid = (void *)rel->eip; - paravirt_ops.cpuid = vmi_cpuid; - } + /* CPUID is special, so very special it gets wrapped like a present */ + para_wrap(cpuid, vmi_cpuid, cpuid, CPUID); para_fill(clts, CLTS); para_fill(get_debugreg, GetDR); @@ -737,38 +772,26 @@ static inline int __init activate_vmi(void) para_fill(restore_fl, SetInterruptMask); para_fill(irq_disable, DisableInterrupts); para_fill(irq_enable, EnableInterrupts); + /* irq_save_disable !!! sheer pain */ patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], (char *)paravirt_ops.save_fl); patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], (char *)paravirt_ops.irq_disable); -#ifndef CONFIG_NO_IDLE_HZ - para_fill(safe_halt, Halt); -#else - vmi_ops.halt = vmi_get_function(VMI_CALL_Halt); - paravirt_ops.safe_halt = vmi_safe_halt; -#endif + para_fill(wbinvd, WBINVD); + para_fill(read_tsc, RDTSC); + + /* The following we emulate with trap and emulate for now */ /* paravirt_ops.read_msr = vmi_rdmsr */ /* paravirt_ops.write_msr = vmi_wrmsr */ - para_fill(read_tsc, RDTSC); /* paravirt_ops.rdpmc = vmi_rdpmc */ - /* TR interface doesn't pass TR value */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.set_tr = (void *)rel->eip; - paravirt_ops.load_tr_desc = vmi_set_tr; - } + /* TR interface doesn't pass TR value, wrap */ + para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR); /* LDT is special, too */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops._set_ldt = (void *)rel->eip; - paravirt_ops.set_ldt = vmi_set_ldt; - } + para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT); para_fill(load_gdt, SetGDT); para_fill(load_idt, SetIDT); @@ -779,28 +802,14 @@ static inline int __init activate_vmi(void) para_fill(write_ldt_entry, WriteLDTEntry); para_fill(write_gdt_entry, WriteGDTEntry); para_fill(write_idt_entry, WriteIDTEntry); - reloc = call_vrom_long_func(vmi_rom, get_reloc, - VMI_CALL_UpdateKernelStack); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.set_kernel_stack = (void *)rel->eip; - paravirt_ops.load_esp0 = vmi_load_esp0; - } - + para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); para_fill(set_iopl_mask, SetIOPLMask); - paravirt_ops.io_delay = (void *)vmi_nop; - if (!disable_nodelay) { - paravirt_ops.const_udelay = (void *)vmi_nop; - } - + para_fill(io_delay, IODelay); para_fill(set_lazy_mode, SetLazyMode); - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB); - if (rel->type != VMI_RELOCATION_NONE) { - vmi_ops.flush_tlb = (void *)rel->eip; - paravirt_ops.flush_tlb_user = vmi_flush_tlb_user; - paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel; - } + /* user and kernel flush are just handled with different flags to FlushTLB */ + para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB); + para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB); para_fill(flush_tlb_single, InvalPage); /* @@ -815,27 +824,40 @@ static inline int __init activate_vmi(void) vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); #endif - vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); - vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); - vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); - paravirt_ops.alloc_pt = vmi_allocate_pt; - paravirt_ops.alloc_pd = vmi_allocate_pd; - paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; - paravirt_ops.release_pt = vmi_release_pt; - paravirt_ops.release_pd = vmi_release_pd; - paravirt_ops.set_pte = vmi_set_pte; - paravirt_ops.set_pte_at = vmi_set_pte_at; - paravirt_ops.set_pmd = vmi_set_pmd; - paravirt_ops.pte_update = vmi_update_pte; - paravirt_ops.pte_update_defer = vmi_update_pte_defer; + if (vmi_ops.set_pte) { + paravirt_ops.set_pte = vmi_set_pte; + paravirt_ops.set_pte_at = vmi_set_pte_at; + paravirt_ops.set_pmd = vmi_set_pmd; #ifdef CONFIG_X86_PAE - paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; - paravirt_ops.set_pte_present = vmi_set_pte_present; - paravirt_ops.set_pud = vmi_set_pud; - paravirt_ops.pte_clear = vmi_pte_clear; - paravirt_ops.pmd_clear = vmi_pmd_clear; + paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; + paravirt_ops.set_pte_present = vmi_set_pte_present; + paravirt_ops.set_pud = vmi_set_pud; + paravirt_ops.pte_clear = vmi_pte_clear; + paravirt_ops.pmd_clear = vmi_pmd_clear; #endif + } + + if (vmi_ops.update_pte) { + paravirt_ops.pte_update = vmi_update_pte; + paravirt_ops.pte_update_defer = vmi_update_pte_defer; + } + + vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); + if (vmi_ops.allocate_page) { + paravirt_ops.alloc_pt = vmi_allocate_pt; + paravirt_ops.alloc_pd = vmi_allocate_pd; + paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; + } + + vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); + if (vmi_ops.release_page) { + paravirt_ops.release_pt = vmi_release_pt; + paravirt_ops.release_pd = vmi_release_pd; + } + para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping, + SetLinearMapping); + /* * These MUST always be patched. Don't support indirect jumps * through these operations, as the VMI interface may use either @@ -847,21 +869,20 @@ static inline int __init activate_vmi(void) paravirt_ops.iret = (void *)0xbadbab0; #ifdef CONFIG_SMP - paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook; - vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState); + para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); #endif #ifdef CONFIG_X86_LOCAL_APIC - paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead); - paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite); - paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite); + para_fill(apic_read, APICRead); + para_fill(apic_write, APICWrite); + para_fill(apic_write_atomic, APICWrite); #endif /* * Check for VMI timer functionality by probing for a cycle frequency method */ reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); - if (rel->type != VMI_RELOCATION_NONE) { + if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) { vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; vmi_timer_ops.get_cycle_counter = vmi_get_function(VMI_CALL_GetCycleCounter); @@ -879,9 +900,22 @@ static inline int __init activate_vmi(void) paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; #endif - custom_sched_clock = vmi_sched_clock; + paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; + paravirt_ops.get_cpu_khz = vmi_cpu_khz; + + /* We have true wallclock functions; disable CMOS clock sync */ + no_sync_cmos_clock = 1; + } else { + disable_noidle = 1; + disable_vmi_timer = 1; } + /* No idle HZ mode only works if VMI timer and no idle is enabled */ + if (disable_noidle || disable_vmi_timer) + para_fill(safe_halt, Halt); + else + para_wrap(safe_halt, vmi_safe_halt, halt, Halt); + /* * Alternative instruction rewriting doesn't happen soon enough * to convert VMI_IRET to a call instead of a jump; so we have @@ -914,7 +948,9 @@ void __init vmi_init(void) local_irq_save(flags); activate_vmi(); -#ifdef CONFIG_SMP + +#ifdef CONFIG_X86_IO_APIC + /* This is virtual hardware; timer routing is wired correctly */ no_timer_check = 1; #endif local_irq_restore(flags & X86_EFLAGS_IF); @@ -925,9 +961,7 @@ static int __init parse_vmi(char *arg) if (!arg) return -EINVAL; - if (!strcmp(arg, "disable_nodelay")) - disable_nodelay = 1; - else if (!strcmp(arg, "disable_pge")) { + if (!strcmp(arg, "disable_pge")) { clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); disable_pge = 1; } else if (!strcmp(arg, "disable_pse")) { @@ -942,7 +976,11 @@ static int __init parse_vmi(char *arg) } else if (!strcmp(arg, "disable_mtrr")) { clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); disable_mtrr = 1; - } + } else if (!strcmp(arg, "disable_timer")) { + disable_vmi_timer = 1; + disable_noidle = 1; + } else if (!strcmp(arg, "disable_noidle")) + disable_noidle = 1; return 0; } diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c index 76d2adcae5a..8dc72d57566 100644 --- a/arch/i386/kernel/vmitime.c +++ b/arch/i386/kernel/vmitime.c @@ -153,13 +153,6 @@ static void vmi_get_wallclock_ts(struct timespec *ts) ts->tv_sec = wallclock; } -static void update_xtime_from_wallclock(void) -{ - struct timespec ts; - vmi_get_wallclock_ts(&ts); - do_settimeofday(&ts); -} - unsigned long vmi_get_wallclock(void) { struct timespec ts; @@ -172,11 +165,20 @@ int vmi_set_wallclock(unsigned long now) return -1; } -unsigned long long vmi_sched_clock(void) +unsigned long long vmi_get_sched_cycles(void) { return read_available_cycles(); } +unsigned long vmi_cpu_khz(void) +{ + unsigned long long khz; + + khz = vmi_timer_ops.get_cycle_frequency(); + (void)do_div(khz, 1000); + return khz; +} + void __init vmi_time_init(void) { unsigned long long cycles_per_sec, cycles_per_msec; @@ -188,25 +190,16 @@ void __init vmi_time_init(void) set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); #endif - no_sync_cmos_clock = 1; - - vmi_get_wallclock_ts(&xtime); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - real_cycles_accounted_system = read_real_cycles(); - update_xtime_from_wallclock(); per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); - cycles_per_jiffy = cycles_per_sec; (void)do_div(cycles_per_jiffy, HZ); cycles_per_alarm = cycles_per_sec; (void)do_div(cycles_per_alarm, alarm_hz); cycles_per_msec = cycles_per_sec; (void)do_div(cycles_per_msec, 1000); - cpu_khz = cycles_per_msec; printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, @@ -250,7 +243,7 @@ void __init vmi_timer_setup_boot_alarm(void) /* Initialize the time accounting variables for an AP on an SMP system. * Also, set the local alarm for the AP. */ -void __init vmi_timer_setup_secondary_alarm(void) +void __devinit vmi_timer_setup_secondary_alarm(void) { int cpu = smp_processor_id(); @@ -276,16 +269,13 @@ static void vmi_account_real_cycles(unsigned long long cur_real_cycles) cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; while (cycles_not_accounted >= cycles_per_jiffy) { - /* systems wide jiffies and wallclock. */ + /* systems wide jiffies. */ do_timer(1); cycles_not_accounted -= cycles_per_jiffy; real_cycles_accounted_system += cycles_per_jiffy; } - if (vmi_timer_ops.wallclock_updated()) - update_xtime_from_wallclock(); - write_sequnlock(&xtime_lock); } @@ -380,7 +370,6 @@ int vmi_stop_hz_timer(void) unsigned long seq, next; unsigned long long real_cycles_expiry; int cpu = smp_processor_id(); - int idle; BUG_ON(!irqs_disabled()); if (sysctl_hz_timer != 0) @@ -388,13 +377,13 @@ int vmi_stop_hz_timer(void) cpu_set(cpu, nohz_cpu_mask); smp_mb(); + if (rcu_needs_cpu(cpu) || local_softirq_pending() || - (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { + (next = next_timer_interrupt(), + time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) { cpu_clear(cpu, nohz_cpu_mask); - next = jiffies; - idle = 0; - } else - idle = 1; + return 0; + } /* Convert jiffies to the real cycle counter. */ do { @@ -404,17 +393,13 @@ int vmi_stop_hz_timer(void) } while (read_seqretry(&xtime_lock, seq)); /* This cpu is going idle. Disable the periodic alarm. */ - if (idle) { - vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); - per_cpu(idle_start_jiffies, cpu) = jiffies; - } - + vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); + per_cpu(idle_start_jiffies, cpu) = jiffies; /* Set the real time alarm to expire at the next event. */ vmi_timer_ops.set_alarm( - VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, - real_cycles_expiry, 0); - - return idle; + VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, + real_cycles_expiry, 0); + return 1; } static void vmi_reenable_hz_timer(int cpu) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 919fbf56849..10093082685 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -968,7 +968,6 @@ void pci_scan_msi_device(struct pci_dev *dev) {} int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) {return -1;} void pci_disable_msix(struct pci_dev *dev) {} void msi_remove_pci_irq_vectors(struct pci_dev *dev) {} -void disable_msi_mode(struct pci_dev *dev, int pos, int type) {} void pci_no_msi(void) {} EXPORT_SYMBOL(pci_enable_msix); EXPORT_SYMBOL(pci_disable_msix); diff --git a/arch/x86_64/kernel/hpet.c b/arch/x86_64/kernel/hpet.c index 65a0edd71a1..8cf0b8a1377 100644 --- a/arch/x86_64/kernel/hpet.c +++ b/arch/x86_64/kernel/hpet.c @@ -12,6 +12,12 @@ #include <asm/timex.h> #include <asm/hpet.h> +#define HPET_MASK 0xFFFFFFFF +#define HPET_SHIFT 22 + +/* FSEC = 10^-15 NSEC = 10^-9 */ +#define FSEC_PER_NSEC 1000000 + int nohpet __initdata; unsigned long hpet_address; @@ -106,9 +112,31 @@ int hpet_timer_stop_set_go(unsigned long tick) return 0; } +static cycle_t read_hpet(void) +{ + return (cycle_t)hpet_readl(HPET_COUNTER); +} + +static cycle_t __vsyscall_fn vread_hpet(void) +{ + return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); +} + +struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = (cycle_t)HPET_MASK, + .mult = 0, /* set below */ + .shift = HPET_SHIFT, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .vread = vread_hpet, +}; + int hpet_arch_init(void) { unsigned int id; + u64 tmp; if (!hpet_address) return -1; @@ -132,6 +160,22 @@ int hpet_arch_init(void) hpet_use_timer = (id & HPET_ID_LEGSUP); + /* + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + clocksource_register(&clocksource_hpet); + return hpet_timer_stop_set_go(hpet_tick); } @@ -444,68 +488,3 @@ static int __init nohpet_setup(char *s) } __setup("nohpet", nohpet_setup); - -#define HPET_MASK 0xFFFFFFFF -#define HPET_SHIFT 22 - -/* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000 - -static void *hpet_ptr; - -static cycle_t read_hpet(void) -{ - return (cycle_t)readl(hpet_ptr); -} - -static cycle_t __vsyscall_fn vread_hpet(void) -{ - return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); -} - -struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = (cycle_t)HPET_MASK, - .mult = 0, /* set below */ - .shift = HPET_SHIFT, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .vread = vread_hpet, -}; - -static int __init init_hpet_clocksource(void) -{ - unsigned long hpet_period; - void __iomem *hpet_base; - u64 tmp; - - if (!hpet_address) - return -ENODEV; - - /* calculate the hpet address: */ - hpet_base = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); - hpet_ptr = hpet_base + HPET_COUNTER; - - /* calculate the frequency: */ - hpet_period = readl(hpet_base + HPET_PERIOD); - - /* - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * aproximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult - */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; - - return clocksource_register(&clocksource_hpet); -} - -module_init(init_hpet_clocksource); diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 0a91368f8b6..c6a5bc7e811 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -789,7 +789,6 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_cfg *cfg = irq_cfg + irq; struct IO_APIC_route_entry entry; cpumask_t mask; - unsigned long flags; if (!IO_APIC_IRQ(irq)) return; diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index c9addcfb96d..75d73a9aa9f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -358,6 +358,8 @@ void __init time_init(void) set_cyc2ns_scale(cpu_khz); printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + init_tsc_clocksource(); + setup_irq(0, &irq0); } diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c index 89583186501..1a0edbbffaa 100644 --- a/arch/x86_64/kernel/tsc.c +++ b/arch/x86_64/kernel/tsc.c @@ -210,7 +210,7 @@ void mark_tsc_unstable(void) } EXPORT_SYMBOL_GPL(mark_tsc_unstable); -static int __init init_tsc_clocksource(void) +void __init init_tsc_clocksource(void) { if (!notsc) { clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, @@ -218,9 +218,6 @@ static int __init init_tsc_clocksource(void) if (check_tsc_unstable()) clocksource_tsc.rating = 0; - return clocksource_register(&clocksource_tsc); + clocksource_register(&clocksource_tsc); } - return 0; } - -module_init(init_tsc_clocksource); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index dc13ebacedf..44cd7b2ddf0 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -376,6 +376,25 @@ static int send_request(struct request *req) return 0; } +static void viocd_end_request(struct request *req, int uptodate) +{ + int nsectors = req->hard_nr_sectors; + + /* + * Make sure it's fully ended, and ensure that we process + * at least one sector. + */ + if (blk_pc_request(req)) + nsectors = (req->data_len + 511) >> 9; + if (!nsectors) + nsectors = 1; + + if (end_that_request_first(req, uptodate, nsectors)) + BUG(); + add_disk_randomness(req->rq_disk); + blkdev_dequeue_request(req); + end_that_request_last(req, uptodate); +} static int rwreq; @@ -385,11 +404,11 @@ static void do_viocd_request(request_queue_t *q) while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) { if (!blk_fs_request(req)) - end_request(req, 0); + viocd_end_request(req, 0); else if (send_request(req) < 0) { printk(VIOCD_KERN_WARNING "unable to send message to OS/400!"); - end_request(req, 0); + viocd_end_request(req, 0); } else rwreq++; } @@ -601,9 +620,9 @@ return_complete: "with rc %d:0x%04X: %s\n", req, event->xRc, bevent->sub_result, err->msg); - end_request(req, 0); + viocd_end_request(req, 0); } else - end_request(req, 1); + viocd_end_request(req, 1); /* restart handling of incoming requests */ spin_unlock_irqrestore(&viocd_reqlock, flags); diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index 54df35527bc..16dc5d1d3cb 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -3501,6 +3501,7 @@ get_serial_info(struct cyclades_port *info, tmp.irq = cinfo->irq; tmp.flags = info->flags; tmp.close_delay = info->close_delay; + tmp.closing_wait = info->closing_wait; tmp.baud_base = info->baud; tmp.custom_divisor = info->custom_divisor; tmp.hub6 = 0; /*!!! */ diff --git a/drivers/char/epca.c b/drivers/char/epca.c index 88fc24fc439..de5be30484a 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -209,7 +209,6 @@ static void digi_send_break(struct channel *ch, int msec); static void setup_empty_event(struct tty_struct *tty, struct channel *ch); void epca_setup(char *, int *); -static int get_termio(struct tty_struct *, struct termio __user *); static int pc_write(struct tty_struct *, const unsigned char *, int); static int pc_init(void); static int init_PCI(void); @@ -2362,15 +2361,6 @@ static int pc_ioctl(struct tty_struct *tty, struct file * file, switch (cmd) { /* Begin switch cmd */ - -#if 0 /* Handled by calling layer properly */ - case TCGETS: - if (copy_to_user(argp, tty->termios, sizeof(struct ktermios))) - return -EFAULT; - return 0; - case TCGETA: - return get_termio(tty, argp); -#endif case TCSBRK: /* SVID version: non-zero arg --> no break */ retval = tty_check_change(tty); if (retval) @@ -2735,13 +2725,6 @@ static void setup_empty_event(struct tty_struct *tty, struct channel *ch) memoff(ch); } /* End setup_empty_event */ -/* --------------------- Begin get_termio ----------------------- */ - -static int get_termio(struct tty_struct * tty, struct termio __user * termio) -{ /* Begin get_termio */ - return kernel_termios_to_user_termio(termio, tty->termios); -} /* End get_termio */ - /* ---------------------- Begin epca_setup -------------------------- */ void epca_setup(char *str, int *ints) { /* Begin epca_setup */ diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index a7b33d2f599..e22146546ad 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2478,6 +2478,11 @@ static __devinit void default_find_bmc(void) if (!info) return; +#ifdef CONFIG_PPC_MERGE + if (check_legacy_ioport(ipmi_defaults[i].port)) + continue; +#endif + info->addr_source = NULL; info->si_type = ipmi_defaults[i].type; diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index ccaa6a39cb4..d42060ede93 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -214,4 +214,7 @@ pm_good: return clocksource_register(&clocksource_acpi_pm); } -module_init(init_acpi_pm_clocksource); +/* We use fs_initcall because we want the PCI fixups to have run + * but we still need to load before device_initcall + */ +fs_initcall(init_acpi_pm_clocksource); diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c index 4f3925ceb36..1bde303b970 100644 --- a/drivers/clocksource/cyclone.c +++ b/drivers/clocksource/cyclone.c @@ -116,4 +116,4 @@ static int __init init_cyclone_clocksource(void) return clocksource_register(&clocksource_cyclone); } -module_init(init_cyclone_clocksource); +arch_initcall(init_cyclone_clocksource); diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 64509689fa6..f17e9c7d4b3 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -215,14 +215,16 @@ config KEYBOARD_AAED2000 module will be called aaed2000_kbd. config KEYBOARD_GPIO - tristate "Buttons on CPU GPIOs (PXA)" - depends on (ARCH_SA1100 || ARCH_PXA || ARCH_S3C2410) + tristate "GPIO Buttons" + depends on GENERIC_GPIO help This driver implements support for buttons connected - directly to GPIO pins of SA1100, PXA or S3C24xx CPUs. + to GPIO pins of various CPUs (and some other chips). Say Y here if your device has buttons connected - directly to GPIO pins of the CPU. + directly to such GPIO pins. Your board-specific + setup logic must also provide a platform device, + with configuration data saying which GPIOs are used. To compile this driver as a module, choose M here: the module will be called gpio-keys. diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index fa03a00b4c6..ccf6df387b6 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -23,11 +23,9 @@ #include <linux/platform_device.h> #include <linux/input.h> #include <linux/irq.h> +#include <linux/gpio_keys.h> #include <asm/gpio.h> -#include <asm/arch/hardware.h> - -#include <asm/hardware/gpio_keys.h> static irqreturn_t gpio_keys_isr(int irq, void *dev_id) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d247429ee5e..54a1ad5eef4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3071,7 +3071,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped release_stripe(sh); } spin_lock_irq(&conf->device_lock); - conf->expand_progress = (sector_nr + i)*(conf->raid_disks-1); + conf->expand_progress = (sector_nr + i) * new_data_disks; spin_unlock_irq(&conf->device_lock); /* Ok, those stripe are ready. We can start scheduling * reads on the source stripes. diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 68555c11f55..01869b1782e 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -38,6 +38,36 @@ static int msi_cache_init(void) return 0; } +static void msi_set_enable(struct pci_dev *dev, int enable) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (pos) { + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); + control &= ~PCI_MSI_FLAGS_ENABLE; + if (enable) + control |= PCI_MSI_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); + } +} + +static void msix_set_enable(struct pci_dev *dev, int enable) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + if (pos) { + pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); + control &= ~PCI_MSIX_FLAGS_ENABLE; + if (enable) + control |= PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); + } +} + static void msi_set_mask_bit(unsigned int irq, int flag) { struct msi_desc *entry; @@ -55,6 +85,8 @@ static void msi_set_mask_bit(unsigned int irq, int flag) mask_bits &= ~(1); mask_bits |= flag; pci_write_config_dword(entry->dev, pos, mask_bits); + } else { + msi_set_enable(entry->dev, !flag); } break; case PCI_CAP_ID_MSIX: @@ -192,44 +224,6 @@ static struct msi_desc* alloc_msi_entry(void) return entry; } -static void enable_msi_mode(struct pci_dev *dev, int pos, int type) -{ - u16 control; - - pci_read_config_word(dev, msi_control_reg(pos), &control); - if (type == PCI_CAP_ID_MSI) { - /* Set enabled bits to single MSI & enable MSI_enable bit */ - msi_enable(control, 1); - pci_write_config_word(dev, msi_control_reg(pos), control); - dev->msi_enabled = 1; - } else { - msix_enable(control); - pci_write_config_word(dev, msi_control_reg(pos), control); - dev->msix_enabled = 1; - } - - pci_intx(dev, 0); /* disable intx */ -} - -void disable_msi_mode(struct pci_dev *dev, int pos, int type) -{ - u16 control; - - pci_read_config_word(dev, msi_control_reg(pos), &control); - if (type == PCI_CAP_ID_MSI) { - /* Set enabled bits to single MSI & enable MSI_enable bit */ - msi_disable(control); - pci_write_config_word(dev, msi_control_reg(pos), control); - dev->msi_enabled = 0; - } else { - msix_disable(control); - pci_write_config_word(dev, msi_control_reg(pos), control); - dev->msix_enabled = 0; - } - - pci_intx(dev, 1); /* enable intx */ -} - #ifdef CONFIG_PM static int __pci_save_msi_state(struct pci_dev *dev) { @@ -238,12 +232,11 @@ static int __pci_save_msi_state(struct pci_dev *dev) struct pci_cap_saved_state *save_state; u32 *cap; - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); - if (pos <= 0 || dev->no_msi) + if (!dev->msi_enabled) return 0; - pci_read_config_word(dev, msi_control_reg(pos), &control); - if (!(control & PCI_MSI_FLAGS_ENABLE)) + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (pos <= 0) return 0; save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5, @@ -276,13 +269,18 @@ static void __pci_restore_msi_state(struct pci_dev *dev) struct pci_cap_saved_state *save_state; u32 *cap; + if (!dev->msi_enabled) + return; + save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI); pos = pci_find_capability(dev, PCI_CAP_ID_MSI); if (!save_state || pos <= 0) return; cap = &save_state->data[0]; + pci_intx(dev, 0); /* disable intx */ control = cap[i++] >> 16; + msi_set_enable(dev, 0); pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]); if (control & PCI_MSI_FLAGS_64BIT) { pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]); @@ -292,7 +290,6 @@ static void __pci_restore_msi_state(struct pci_dev *dev) if (control & PCI_MSI_FLAGS_MASKBIT) pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]); pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); - enable_msi_mode(dev, pos, PCI_CAP_ID_MSI); pci_remove_saved_cap(save_state); kfree(save_state); } @@ -308,13 +305,11 @@ static int __pci_save_msix_state(struct pci_dev *dev) return 0; pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (pos <= 0 || dev->no_msi) + if (pos <= 0) return 0; /* save the capability */ pci_read_config_word(dev, msi_control_reg(pos), &control); - if (!(control & PCI_MSIX_FLAGS_ENABLE)) - return 0; save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16), GFP_KERNEL); if (!save_state) { @@ -376,6 +371,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev) return; /* route the table */ + pci_intx(dev, 0); /* disable intx */ + msix_set_enable(dev, 0); irq = head = dev->first_msi_irq; while (head != tail) { entry = get_irq_msi(irq); @@ -386,7 +383,6 @@ static void __pci_restore_msix_state(struct pci_dev *dev) } pci_write_config_word(dev, msi_control_reg(pos), save); - enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); } void pci_restore_msi_state(struct pci_dev *dev) @@ -411,6 +407,8 @@ static int msi_capability_init(struct pci_dev *dev) int pos, irq; u16 control; + msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); pci_read_config_word(dev, msi_control_reg(pos), &control); /* MSI Entry Initialization */ @@ -454,7 +452,9 @@ static int msi_capability_init(struct pci_dev *dev) set_irq_msi(irq, entry); /* Set MSI enabled bits */ - enable_msi_mode(dev, pos, PCI_CAP_ID_MSI); + pci_intx(dev, 0); /* disable intx */ + msi_set_enable(dev, 1); + dev->msi_enabled = 1; dev->irq = irq; return 0; @@ -481,6 +481,8 @@ static int msix_capability_init(struct pci_dev *dev, u8 bir; void __iomem *base; + msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); /* Request & Map MSI-X table region */ pci_read_config_word(dev, msi_control_reg(pos), &control); @@ -549,7 +551,9 @@ static int msix_capability_init(struct pci_dev *dev, } dev->first_msi_irq = entries[0].vector; /* Set MSI-X enabled bits */ - enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); + pci_intx(dev, 0); /* disable intx */ + msix_set_enable(dev, 1); + dev->msix_enabled = 1; return 0; } @@ -611,12 +615,11 @@ int pci_enable_msi(struct pci_dev* dev) WARN_ON(!!dev->msi_enabled); /* Check whether driver already requested for MSI-X irqs */ - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (pos > 0 && dev->msix_enabled) { - printk(KERN_INFO "PCI: %s: Can't enable MSI. " - "Device already has MSI-X enabled\n", - pci_name(dev)); - return -EINVAL; + if (dev->msix_enabled) { + printk(KERN_INFO "PCI: %s: Can't enable MSI. " + "Device already has MSI-X enabled\n", + pci_name(dev)); + return -EINVAL; } status = msi_capability_init(dev); return status; @@ -625,8 +628,7 @@ int pci_enable_msi(struct pci_dev* dev) void pci_disable_msi(struct pci_dev* dev) { struct msi_desc *entry; - int pos, default_irq; - u16 control; + int default_irq; if (!pci_msi_enable) return; @@ -636,16 +638,9 @@ void pci_disable_msi(struct pci_dev* dev) if (!dev->msi_enabled) return; - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); - if (!pos) - return; - - pci_read_config_word(dev, msi_control_reg(pos), &control); - if (!(control & PCI_MSI_FLAGS_ENABLE)) - return; - - - disable_msi_mode(dev, pos, PCI_CAP_ID_MSI); + msi_set_enable(dev, 0); + pci_intx(dev, 1); /* enable intx */ + dev->msi_enabled = 0; entry = get_irq_msi(dev->first_msi_irq); if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) { @@ -746,8 +741,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) WARN_ON(!!dev->msix_enabled); /* Check whether driver already requested for MSI irq */ - if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 && - dev->msi_enabled) { + if (dev->msi_enabled) { printk(KERN_INFO "PCI: %s: Can't enable MSI-X. " "Device already has an MSI irq assigned\n", pci_name(dev)); @@ -760,8 +754,6 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) void pci_disable_msix(struct pci_dev* dev) { int irq, head, tail = 0, warning = 0; - int pos; - u16 control; if (!pci_msi_enable) return; @@ -771,15 +763,9 @@ void pci_disable_msix(struct pci_dev* dev) if (!dev->msix_enabled) return; - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (!pos) - return; - - pci_read_config_word(dev, msi_control_reg(pos), &control); - if (!(control & PCI_MSIX_FLAGS_ENABLE)) - return; - - disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX); + msix_set_enable(dev, 0); + pci_intx(dev, 1); /* enable intx */ + dev->msix_enabled = 0; irq = head = dev->first_msi_irq; while (head != tail) { diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1e74e1ee8bd..df495300ce3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -881,13 +881,6 @@ pci_disable_device(struct pci_dev *dev) if (atomic_sub_return(1, &dev->enable_cnt) != 0) return; - if (dev->msi_enabled) - disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), - PCI_CAP_ID_MSI); - if (dev->msix_enabled) - disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), - PCI_CAP_ID_MSIX); - pci_read_config_word(dev, PCI_COMMAND, &pci_command); if (pci_command & PCI_COMMAND_MASTER) { pci_command &= ~PCI_COMMAND_MASTER; @@ -1277,6 +1270,33 @@ pci_intx(struct pci_dev *pdev, int enable) } } +/** + * pci_msi_off - disables any msi or msix capabilities + * @pdev: the PCI device to operate on + * + * If you want to use msi see pci_enable_msi and friends. + * This is a lower level primitive that allows us to disable + * msi operation at the device level. + */ +void pci_msi_off(struct pci_dev *dev) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (pos) { + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); + control &= ~PCI_MSI_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); + } + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + if (pos) { + pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); + control &= ~PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); + } +} + #ifndef HAVE_ARCH_PCI_SET_DMA_MASK /* * These can be overridden by arch-specific implementations diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index a4f2d580625..ae7a975995a 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -46,10 +46,8 @@ extern struct rw_semaphore pci_bus_sem; extern unsigned int pci_pm_d3_delay; #ifdef CONFIG_PCI_MSI -void disable_msi_mode(struct pci_dev *dev, int pos, int type); void pci_no_msi(void); #else -static inline void disable_msi_mode(struct pci_dev *dev, int pos, int type) { } static inline void pci_no_msi(void) { } #endif diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1bf54828756..7f94fc098cd 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1438,8 +1438,8 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quir */ static void __devinit quirk_pcie_pxh(struct pci_dev *dev) { - disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), - PCI_CAP_ID_MSI); + pci_msi_off(dev); + dev->no_msi = 1; printk(KERN_WARNING "PCI: PXH quirk detected, " diff --git a/drivers/serial/dz.c b/drivers/serial/dz.c index 587d87b9eb3..d31721f2744 100644 --- a/drivers/serial/dz.c +++ b/drivers/serial/dz.c @@ -170,8 +170,7 @@ static void dz_enable_ms(struct uart_port *port) * This routine deals with inputs from any lines. * ------------------------------------------------------------ */ -static inline void dz_receive_chars(struct dz_port *dport_in, - struct pt_regs *regs) +static inline void dz_receive_chars(struct dz_port *dport_in) { struct dz_port *dport; struct tty_struct *tty = NULL; @@ -226,7 +225,7 @@ static inline void dz_receive_chars(struct dz_port *dport_in, break; } - if (uart_handle_sysrq_char(&dport->port, ch, regs)) + if (uart_handle_sysrq_char(&dport->port, ch)) continue; if ((status & dport->port.ignore_status_mask) == 0) { @@ -332,7 +331,7 @@ static irqreturn_t dz_interrupt(int irq, void *dev) status = dz_in(dport, DZ_CSR); if ((status & (DZ_RDONE | DZ_RIE)) == (DZ_RDONE | DZ_RIE)) - dz_receive_chars(dport, regs); + dz_receive_chars(dport); if ((status & (DZ_TRDY | DZ_TIE)) == (DZ_TRDY | DZ_TIE)) dz_transmit_chars(dport); diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 7e7ec29782f..8e898e3d861 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -55,7 +55,7 @@ #include <linux/slab.h> #include <linux/kthread.h> #include <linux/mutex.h> -#include <linux/utsrelease.h> +#include <linux/utsname.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -547,7 +547,7 @@ static int get_device_info(struct us_data *us, const struct usb_device_id *id) idesc->bInterfaceSubClass, idesc->bInterfaceProtocol, msgs[msg], - UTS_RELEASE); + utsname()->release); } return 0; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 00a51835fd8..d7627fc4f11 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -131,7 +131,8 @@ #define PRINTKI(fmt, args...) printk(KERN_INFO "atyfb: " fmt, ## args) #define PRINTKE(fmt, args...) printk(KERN_ERR "atyfb: " fmt, ## args) -#if defined(CONFIG_PM) || defined(CONFIG_PMAC_BACKLIGHT) || defined (CONFIG_FB_ATY_GENERIC_LCD) +#if defined(CONFIG_PM) || defined(CONFIG_PMAC_BACKLIGHT) || \ +defined (CONFIG_FB_ATY_GENERIC_LCD) || defined(CONFIG_FB_ATY_BACKLIGHT) static const u32 lt_lcd_regs[] = { CONFIG_PANEL_LG, LCD_GEN_CNTL_LG, diff --git a/drivers/video/aty/mach64_ct.c b/drivers/video/aty/mach64_ct.c index f3b487b8710..1fdcfdbf669 100644 --- a/drivers/video/aty/mach64_ct.c +++ b/drivers/video/aty/mach64_ct.c @@ -598,7 +598,6 @@ static void aty_resume_pll_ct(const struct fb_info *info, struct atyfb_par *par = info->par; if (par->mclk_per != par->xclk_per) { - int i; /* * This disables the sclk, crashes the computer as reported: * aty_st_pll_ct(SPLL_CNTL2, 3, info); @@ -614,7 +613,7 @@ static void aty_resume_pll_ct(const struct fb_info *info, * helps for Rage Mobilities that sometimes crash when * we switch to sclk. (Daniel Mantione, 13-05-2003) */ - for (i=0;i<=0x1ffff;i++); + udelay(500); } aty_st_pll_ct(PLL_REF_DIV, pll->ct.pll_ref_div, par); diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c index 58c0ac733db..0a44c44672c 100644 --- a/drivers/video/sm501fb.c +++ b/drivers/video/sm501fb.c @@ -1074,9 +1074,9 @@ static ssize_t sm501fb_crtsrc_store(struct device *dev, if (len < 1) return -EINVAL; - if (strnicmp(buf, "crt", sizeof("crt")) == 0) + if (strnicmp(buf, "crt", 3) == 0) head = HEAD_CRT; - else if (strnicmp(buf, "panel", sizeof("panel")) == 0) + else if (strnicmp(buf, "panel", 5) == 0) head = HEAD_PANEL; else return -EINVAL; @@ -1098,7 +1098,7 @@ static ssize_t sm501fb_crtsrc_store(struct device *dev, writel(ctrl, info->regs + SM501_DC_CRT_CONTROL); sm501fb_sync_regs(info); - return (head == HEAD_CRT) ? 3 : 5; + return len; } /* Prepare the device_attr for registration with sysfs later */ diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 0cfff4fefa9..e62f3fc7241 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -168,9 +168,9 @@ static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file, goto out; } i_size_write(inode, 0); - ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode, inode, - ecryptfs_dentry, - ECRYPTFS_LOWER_I_MUTEX_NOT_HELD); + rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode, + inode, ecryptfs_dentry, + ECRYPTFS_LOWER_I_MUTEX_NOT_HELD); ecryptfs_inode_to_private(inode)->crypt_stat.flags |= ECRYPTFS_NEW_FILE; out: return rc; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 812427e6805..fc4a3a22464 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -484,18 +484,12 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) struct vfsmount *lower_mnt; memset(&nd, 0, sizeof(struct nameidata)); - rc = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); + rc = path_lookup(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd); if (rc) { ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); goto out; } lower_root = nd.dentry; - if (!lower_root->d_inode) { - ecryptfs_printk(KERN_WARNING, - "No directory to interpose on\n"); - rc = -ENOENT; - goto out_free; - } lower_mnt = nd.mnt; ecryptfs_set_superblock_lower(sb, lower_root->d_sb); sb->s_maxbytes = lower_root->d_sb->s_maxbytes; diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 7be8e91b5ba..b731b09499c 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -446,6 +446,7 @@ static int ecryptfs_write_inode_size_to_header(struct file *lower_file, const struct address_space_operations *lower_a_ops; u64 file_size; +retry: header_page = grab_cache_page(lower_inode->i_mapping, 0); if (!header_page) { ecryptfs_printk(KERN_ERR, "grab_cache_page for " @@ -456,9 +457,10 @@ static int ecryptfs_write_inode_size_to_header(struct file *lower_file, lower_a_ops = lower_inode->i_mapping->a_ops; rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8); if (rc) { - if (rc == AOP_TRUNCATED_PAGE) + if (rc == AOP_TRUNCATED_PAGE) { ecryptfs_release_lower_page(header_page, 0); - else + goto retry; + } else ecryptfs_release_lower_page(header_page, 1); goto out; } @@ -473,9 +475,10 @@ static int ecryptfs_write_inode_size_to_header(struct file *lower_file, if (rc < 0) ecryptfs_printk(KERN_ERR, "Error commiting header page " "write\n"); - if (rc == AOP_TRUNCATED_PAGE) + if (rc == AOP_TRUNCATED_PAGE) { ecryptfs_release_lower_page(header_page, 0); - else + goto retry; + } else ecryptfs_release_lower_page(header_page, 1); lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; mark_inode_dirty_sync(inode); @@ -502,7 +505,8 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode, goto out; } lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); - if (!lower_dentry->d_inode->i_op->getxattr) { + if (!lower_dentry->d_inode->i_op->getxattr || + !lower_dentry->d_inode->i_op->setxattr) { printk(KERN_WARNING "No support for setting xattr in lower filesystem\n"); rc = -ENOSYS; @@ -564,6 +568,7 @@ int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, { int rc = 0; +retry: *lower_page = grab_cache_page(lower_inode->i_mapping, lower_page_index); if (!(*lower_page)) { rc = -EINVAL; @@ -577,18 +582,18 @@ int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, byte_offset, region_bytes); if (rc) { - ecryptfs_printk(KERN_ERR, "prepare_write for " + if (rc == AOP_TRUNCATED_PAGE) { + ecryptfs_release_lower_page(*lower_page, 0); + goto retry; + } else { + ecryptfs_printk(KERN_ERR, "prepare_write for " "lower_page_index = [0x%.16x] failed; rc = " "[%d]\n", lower_page_index, rc); - } -out: - if (rc && (*lower_page)) { - if (rc == AOP_TRUNCATED_PAGE) - ecryptfs_release_lower_page(*lower_page, 0); - else ecryptfs_release_lower_page(*lower_page, 1); - (*lower_page) = NULL; + (*lower_page) = NULL; + } } +out: return rc; } diff --git a/fs/libfs.c b/fs/libfs.c index cf79196535e..d93842d3c0a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -190,6 +190,10 @@ const struct inode_operations simple_dir_inode_operations = { .lookup = simple_lookup, }; +static const struct super_operations simple_super_operations = { + .statfs = simple_statfs, +}; + /* * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) @@ -199,7 +203,6 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, struct vfsmount *mnt) { struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); - static const struct super_operations default_ops = {.statfs = simple_statfs}; struct dentry *dentry; struct inode *root; struct qstr d_name = {.name = name, .len = strlen(name)}; @@ -212,7 +215,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = magic; - s->s_op = ops ? ops : &default_ops; + s->s_op = ops ? ops : &simple_super_operations; s->s_time_gran = 1; root = new_inode(s); if (!root) @@ -359,7 +362,6 @@ int simple_commit_write(struct file *file, struct page *page, int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files) { - static struct super_operations s_ops = {.statfs = simple_statfs}; struct inode *inode; struct dentry *root; struct dentry *dentry; @@ -368,7 +370,7 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files s->s_blocksize = PAGE_CACHE_SIZE; s->s_blocksize_bits = PAGE_CACHE_SHIFT; s->s_magic = magic; - s->s_op = &s_ops; + s->s_op = &simple_super_operations; s->s_time_gran = 1; inode = new_inode(s); diff --git a/include/asm-i386/delay.h b/include/asm-i386/delay.h index 32d6678d0bb..9ae5e3782ed 100644 --- a/include/asm-i386/delay.h +++ b/include/asm-i386/delay.h @@ -16,13 +16,6 @@ extern void __ndelay(unsigned long nsecs); extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); -#if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY) -#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul) - -#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul) - -#else /* !PARAVIRT || USE_REAL_TIME_DELAY */ - /* 0x10c7 is 2**32 / 1000000 (rounded up) */ #define udelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ @@ -32,7 +25,6 @@ extern void __delay(unsigned long loops); #define ndelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) -#endif void use_tsc_delay(void); diff --git a/include/asm-i386/io_apic.h b/include/asm-i386/io_apic.h index 059a9ff28b4..340764076d5 100644 --- a/include/asm-i386/io_apic.h +++ b/include/asm-i386/io_apic.h @@ -3,6 +3,7 @@ #include <asm/types.h> #include <asm/mpspec.h> +#include <asm/apicdef.h> /* * Intel IO-APIC support for SMP and UP systems. diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index b04333ea6f3..64544cb85d6 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -33,7 +33,7 @@ extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; -#define NMI_DEFAULT -1 +#define NMI_DEFAULT 0 #define NMI_NONE 0 #define NMI_IO_APIC 1 #define NMI_LOCAL_APIC 2 diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 6317e0a4d73..f8319cae2ac 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -94,6 +94,8 @@ struct paravirt_ops u64 (*read_tsc)(void); u64 (*read_pmc)(void); + u64 (*get_scheduled_cycles)(void); + unsigned long (*get_cpu_khz)(void); void (*load_tr_desc)(void); void (*load_gdt)(const struct Xgt_desc_struct *); @@ -115,7 +117,6 @@ struct paravirt_ops void (*set_iopl_mask)(unsigned mask); void (*io_delay)(void); - void (*const_udelay)(unsigned long loops); #ifdef CONFIG_X86_LOCAL_APIC void (*apic_write)(unsigned long reg, unsigned long v); @@ -129,6 +130,8 @@ struct paravirt_ops void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(u32 addr); + void (fastcall *map_pt_hook)(int type, pte_t *va, u32 pfn); + void (*alloc_pt)(u32 pfn); void (*alloc_pd)(u32 pfn); void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); @@ -183,9 +186,9 @@ static inline int set_wallclock(unsigned long nowtime) return paravirt_ops.set_wallclock(nowtime); } -static inline void do_time_init(void) +static inline void (*choose_time_init(void))(void) { - return paravirt_ops.time_init(); + return paravirt_ops.time_init; } /* The paravirtualized CPUID instruction. */ @@ -273,6 +276,9 @@ static inline void halt(void) #define rdtscll(val) (val = paravirt_ops.read_tsc()) +#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) +#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) + #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) #define rdpmc(counter,low,high) do { \ @@ -349,6 +355,8 @@ static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) +#define paravirt_map_pt_hook(type, va, pfn) paravirt_ops.map_pt_hook(type, va, pfn) + #define paravirt_alloc_pt(pfn) paravirt_ops.alloc_pt(pfn) #define paravirt_release_pt(pfn) paravirt_ops.release_pt(pfn) diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index e6a4723f0eb..c3b58d473a5 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -263,6 +263,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) +#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) #endif /* @@ -469,10 +470,24 @@ extern pte_t *lookup_address(unsigned long address); #endif #if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) -#define pte_offset_map_nested(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) +#define pte_offset_map(dir, address) \ +({ \ + pte_t *__ptep; \ + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ + __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE0);\ + paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ + __ptep = __ptep + pte_index(address); \ + __ptep; \ +}) +#define pte_offset_map_nested(dir, address) \ +({ \ + pte_t *__ptep; \ + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ + __ptep = (pte_t *)kmap_atomic(pfn_to_page(pfn),KM_PTE1);\ + paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ + __ptep = __ptep + pte_index(address); \ + __ptep; \ +}) #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) #else diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h index 571b4294dc2..eac011366dc 100644 --- a/include/asm-i386/time.h +++ b/include/asm-i386/time.h @@ -28,14 +28,16 @@ static inline int native_set_wallclock(unsigned long nowtime) return retval; } +extern void (*late_time_init)(void); +extern void hpet_time_init(void); + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> -extern unsigned long long native_sched_clock(void); #else /* !CONFIG_PARAVIRT */ #define get_wallclock() native_get_wallclock() #define set_wallclock(x) native_set_wallclock(x) -#define do_time_init() time_init_hook() +#define choose_time_init() hpet_time_init #endif /* CONFIG_PARAVIRT */ diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 4752c3a6a70..12dd67bf760 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -4,13 +4,21 @@ #include <linux/pm.h> #define TICK_SIZE (tick_nsec / 1000) + void setup_pit_timer(void); +unsigned long long native_sched_clock(void); +unsigned long native_calculate_cpu_khz(void); + /* Modifiers for buggy PIT handling */ extern int pit_latch_buggy; extern int timer_ack; extern int no_timer_check; -extern unsigned long long (*custom_sched_clock)(void); extern int no_sync_cmos_clock; extern int recalibrate_cpu_khz(void); +#ifndef CONFIG_PARAVIRT +#define get_scheduled_cycles(val) rdtscll(val) +#define calculate_cpu_khz() native_calculate_cpu_khz() +#endif + #endif diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index ac58580ad66..7fc512d90ea 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -85,7 +85,6 @@ static inline int node_to_first_cpu(int node) .idle_idx = 1, \ .newidle_idx = 2, \ .wake_idx = 1, \ - .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ diff --git a/include/asm-i386/vmi.h b/include/asm-i386/vmi.h index 43c89333037..eb8bd892c01 100644 --- a/include/asm-i386/vmi.h +++ b/include/asm-i386/vmi.h @@ -97,6 +97,7 @@ #define VMI_CALL_SetInitialAPState 62 #define VMI_CALL_APICWrite 63 #define VMI_CALL_APICRead 64 +#define VMI_CALL_IODelay 65 #define VMI_CALL_SetLazyMode 73 /* diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h index c1293121100..1f971eb7f71 100644 --- a/include/asm-i386/vmi_time.h +++ b/include/asm-i386/vmi_time.h @@ -49,7 +49,8 @@ extern struct vmi_timer_ops { extern void __init vmi_time_init(void); extern unsigned long vmi_get_wallclock(void); extern int vmi_set_wallclock(unsigned long now); -extern unsigned long long vmi_sched_clock(void); +extern unsigned long long vmi_get_sched_cycles(void); +extern unsigned long vmi_cpu_khz(void); #ifdef CONFIG_X86_LOCAL_APIC extern void __init vmi_timer_setup_boot_alarm(void); diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index 22ed6749557..233f1caae04 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h @@ -65,7 +65,6 @@ void build_cpu_to_node_map(void); .max_interval = 4, \ .busy_factor = 64, \ .imbalance_pct = 125, \ - .per_cpu_gain = 100, \ .cache_nice_tries = 2, \ .busy_idx = 2, \ .idle_idx = 1, \ @@ -97,7 +96,6 @@ void build_cpu_to_node_map(void); .newidle_idx = 0, /* unused */ \ .wake_idx = 1, \ .forkexec_idx = 1, \ - .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ diff --git a/include/asm-mips/mach-ip27/topology.h b/include/asm-mips/mach-ip27/topology.h index 44790fdc5d0..61d9be3f317 100644 --- a/include/asm-mips/mach-ip27/topology.h +++ b/include/asm-mips/mach-ip27/topology.h @@ -28,7 +28,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; .busy_factor = 32, \ .imbalance_pct = 125, \ .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_WAKE_BALANCE, \ diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h index 6610495f5f1..0ad21a849b5 100644 --- a/include/asm-powerpc/topology.h +++ b/include/asm-powerpc/topology.h @@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *bus) .busy_factor = 32, \ .imbalance_pct = 125, \ .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ .busy_idx = 3, \ .idle_idx = 1, \ .newidle_idx = 2, \ diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h index f4fb238c89f..969d225a935 100644 --- a/include/asm-x86_64/io_apic.h +++ b/include/asm-x86_64/io_apic.h @@ -3,6 +3,7 @@ #include <asm/types.h> #include <asm/mpspec.h> +#include <asm/apicdef.h> /* * Intel IO-APIC support for SMP and UP systems. diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index 72375e7d32a..ceb3d8dac33 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -64,7 +64,7 @@ extern int setup_nmi_watchdog(char *); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; -#define NMI_DEFAULT -1 +#define NMI_DEFAULT 0 #define NMI_NONE 0 #define NMI_IO_APIC 1 #define NMI_LOCAL_APIC 2 diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 2facec5914d..4fd6fb23953 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -43,7 +43,6 @@ extern int __node_distance(int, int); .newidle_idx = 0, \ .wake_idx = 1, \ .forkexec_idx = 1, \ - .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ diff --git a/include/asm-x86_64/tsc.h b/include/asm-x86_64/tsc.h index 9a0a368852c..26c3e982828 100644 --- a/include/asm-x86_64/tsc.h +++ b/include/asm-x86_64/tsc.h @@ -55,6 +55,7 @@ static __always_inline cycles_t get_cycles_sync(void) extern void tsc_init(void); extern void mark_tsc_unstable(void); extern int unsynchronized_tsc(void); +extern void init_tsc_clocksource(void); /* * Boot-time check whether the TSCs are synchronized across diff --git a/include/asm-arm/hardware/gpio_keys.h b/include/linux/gpio_keys.h index 2b217c7b931..2b217c7b931 100644 --- a/include/asm-arm/hardware/gpio_keys.h +++ b/include/linux/gpio_keys.h diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 48148e0cdbd..75e55dcdeb1 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -5,6 +5,14 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); +/* Check if a vma is migratable */ +static inline int vma_migratable(struct vm_area_struct *vma) +{ + if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED)) + return 0; + return 1; +} + #ifdef CONFIG_MIGRATION extern int isolate_lru_page(struct page *p, struct list_head *pagelist); extern int putback_lru_pages(struct list_head *l); diff --git a/include/linux/pci.h b/include/linux/pci.h index 2c4b6842dfb..78417e421b4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -543,6 +543,7 @@ void pci_set_master(struct pci_dev *dev); int __must_check pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); +void pci_msi_off(struct pci_dev *dev); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 7a6d34ee5ab..f09cce2357f 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -292,9 +292,10 @@ #define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ #define PCI_MSI_MASK_BIT 16 /* Mask bits register */ -/* MSI-X registers (these are at offset PCI_MSI_FLAGS) */ -#define PCI_MSIX_FLAGS_QSIZE 0x7FF -#define PCI_MSIX_FLAGS_ENABLE (1 << 15) +/* MSI-X registers (these are at offset PCI_MSIX_FLAGS) */ +#define PCI_MSIX_FLAGS 2 +#define PCI_MSIX_FLAGS_QSIZE 0x7FF +#define PCI_MSIX_FLAGS_ENABLE (1 << 15) #define PCI_MSIX_FLAGS_BIRMASK (7 << 0) #define PCI_MSIX_FLAGS_BITMASK (1 << 0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7c9a4d80e..49fe2997a01 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -684,7 +684,6 @@ struct sched_domain { unsigned int imbalance_pct; /* No balance until over watermark */ unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ unsigned int busy_idx; unsigned int idle_idx; unsigned int newidle_idx; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 61fef376ed2..a946176db63 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -283,6 +283,43 @@ do { \ }) /* + * Locks two spinlocks l1 and l2. + * l1_first indicates if spinlock l1 should be taken first. + */ +static inline void double_spin_lock(spinlock_t *l1, spinlock_t *l2, + bool l1_first) + __acquires(l1) + __acquires(l2) +{ + if (l1_first) { + spin_lock(l1); + spin_lock(l2); + } else { + spin_lock(l2); + spin_lock(l1); + } +} + +/* + * Unlocks two spinlocks l1 and l2. + * l1_taken_first indicates if spinlock l1 was taken first and therefore + * should be released after spinlock l2. + */ +static inline void double_spin_unlock(spinlock_t *l1, spinlock_t *l2, + bool l1_taken_first) + __releases(l1) + __releases(l2) +{ + if (l1_taken_first) { + spin_unlock(l2); + spin_unlock(l1); + } else { + spin_unlock(l1); + spin_unlock(l2); + } +} + +/* * Pull the atomic_t declaration: * (asm-mips/atomic.h needs above definitions) */ diff --git a/include/linux/topology.h b/include/linux/topology.h index 6c5a6e6e813..a9d1f049cc1 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -96,7 +96,6 @@ .busy_factor = 64, \ .imbalance_pct = 110, \ .cache_nice_tries = 0, \ - .per_cpu_gain = 25, \ .busy_idx = 0, \ .idle_idx = 0, \ .newidle_idx = 1, \ @@ -128,7 +127,6 @@ .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ .busy_idx = 2, \ .idle_idx = 1, \ .newidle_idx = 2, \ @@ -159,7 +157,6 @@ .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ .busy_idx = 2, \ .idle_idx = 1, \ .newidle_idx = 2, \ @@ -193,7 +190,6 @@ .newidle_idx = 0, /* unused */ \ .wake_idx = 0, /* unused */ \ .forkexec_idx = 0, /* unused */ \ - .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 476cb0c0b4a..de93a8176ca 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1355,17 +1355,16 @@ static void migrate_hrtimers(int cpu) tick_cancel_sched_timer(cpu); local_irq_disable(); - - spin_lock(&new_base->lock); - spin_lock(&old_base->lock); + double_spin_lock(&new_base->lock, &old_base->lock, + smp_processor_id() < cpu); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); + double_spin_unlock(&new_base->lock, &old_base->lock, + smp_processor_id() < cpu); local_irq_enable(); put_cpu_var(hrtimer_bases); } diff --git a/kernel/sched.c b/kernel/sched.c index 5f102e6c7a4..a4ca632c477 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3006,23 +3006,6 @@ static inline void idle_balance(int cpu, struct rq *rq) } #endif -static inline void wake_priority_sleeper(struct rq *rq) -{ -#ifdef CONFIG_SCHED_SMT - if (!rq->nr_running) - return; - - spin_lock(&rq->lock); - /* - * If an SMT sibling task has been put to sleep for priority - * reasons reschedule the idle task to see if it can now run. - */ - if (rq->nr_running) - resched_task(rq->idle); - spin_unlock(&rq->lock); -#endif -} - DEFINE_PER_CPU(struct kernel_stat, kstat); EXPORT_PER_CPU_SYMBOL(kstat); @@ -3239,10 +3222,7 @@ void scheduler_tick(void) update_cpu_clock(p, rq, now); - if (p == rq->idle) - /* Task on the idle queue */ - wake_priority_sleeper(rq); - else + if (p != rq->idle) task_running_tick(rq, p); #ifdef CONFIG_SMP update_load(rq); @@ -3251,136 +3231,6 @@ void scheduler_tick(void) #endif } -#ifdef CONFIG_SCHED_SMT -static inline void wakeup_busy_runqueue(struct rq *rq) -{ - /* If an SMT runqueue is sleeping due to priority reasons wake it up */ - if (rq->curr == rq->idle && rq->nr_running) - resched_task(rq->idle); -} - -/* - * Called with interrupt disabled and this_rq's runqueue locked. - */ -static void wake_sleeping_dependent(int this_cpu) -{ - struct sched_domain *tmp, *sd = NULL; - int i; - - for_each_domain(this_cpu, tmp) { - if (tmp->flags & SD_SHARE_CPUPOWER) { - sd = tmp; - break; - } - } - - if (!sd) - return; - - for_each_cpu_mask(i, sd->span) { - struct rq *smt_rq = cpu_rq(i); - - if (i == this_cpu) - continue; - if (unlikely(!spin_trylock(&smt_rq->lock))) - continue; - - wakeup_busy_runqueue(smt_rq); - spin_unlock(&smt_rq->lock); - } -} - -/* - * number of 'lost' timeslices this task wont be able to fully - * utilize, if another task runs on a sibling. This models the - * slowdown effect of other tasks running on siblings: - */ -static inline unsigned long -smt_slice(struct task_struct *p, struct sched_domain *sd) -{ - return p->time_slice * (100 - sd->per_cpu_gain) / 100; -} - -/* - * To minimise lock contention and not have to drop this_rq's runlock we only - * trylock the sibling runqueues and bypass those runqueues if we fail to - * acquire their lock. As we only trylock the normal locking order does not - * need to be obeyed. - */ -static int -dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p) -{ - struct sched_domain *tmp, *sd = NULL; - int ret = 0, i; - - /* kernel/rt threads do not participate in dependent sleeping */ - if (!p->mm || rt_task(p)) - return 0; - - for_each_domain(this_cpu, tmp) { - if (tmp->flags & SD_SHARE_CPUPOWER) { - sd = tmp; - break; - } - } - - if (!sd) - return 0; - - for_each_cpu_mask(i, sd->span) { - struct task_struct *smt_curr; - struct rq *smt_rq; - - if (i == this_cpu) - continue; - - smt_rq = cpu_rq(i); - if (unlikely(!spin_trylock(&smt_rq->lock))) - continue; - - smt_curr = smt_rq->curr; - - if (!smt_curr->mm) - goto unlock; - - /* - * If a user task with lower static priority than the - * running task on the SMT sibling is trying to schedule, - * delay it till there is proportionately less timeslice - * left of the sibling task to prevent a lower priority - * task from using an unfair proportion of the - * physical cpu's resources. -ck - */ - if (rt_task(smt_curr)) { - /* - * With real time tasks we run non-rt tasks only - * per_cpu_gain% of the time. - */ - if ((jiffies % DEF_TIMESLICE) > - (sd->per_cpu_gain * DEF_TIMESLICE / 100)) - ret = 1; - } else { - if (smt_curr->static_prio < p->static_prio && - !TASK_PREEMPTS_CURR(p, smt_rq) && - smt_slice(smt_curr, sd) > task_timeslice(p)) - ret = 1; - } -unlock: - spin_unlock(&smt_rq->lock); - } - return ret; -} -#else -static inline void wake_sleeping_dependent(int this_cpu) -{ -} -static inline int -dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p) -{ - return 0; -} -#endif - #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) void fastcall add_preempt_count(int val) @@ -3507,7 +3357,6 @@ need_resched_nonpreemptible: if (!rq->nr_running) { next = rq->idle; rq->expired_timestamp = 0; - wake_sleeping_dependent(cpu); goto switch_tasks; } } @@ -3547,8 +3396,6 @@ need_resched_nonpreemptible: } } next->sleep_type = SLEEP_NORMAL; - if (rq->nr_running == 1 && dependent_sleeper(cpu, rq, next)) - next = rq->idle; switch_tasks: if (next == rq->idle) schedstat_inc(rq, sched_goidle); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 193a0793af9..5b0e46b56fd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -55,16 +55,18 @@ static DEFINE_SPINLOCK(clocksource_lock); static char override_name[32]; static int finished_booting; -/* clocksource_done_booting - Called near the end of bootup +/* clocksource_done_booting - Called near the end of core bootup * - * Hack to avoid lots of clocksource churn at boot time + * Hack to avoid lots of clocksource churn at boot time. + * We use fs_initcall because we want this to start before + * device_initcall but after subsys_initcall. */ static int __init clocksource_done_booting(void) { finished_booting = 1; return 0; } -late_initcall(clocksource_done_booting); +fs_initcall(clocksource_done_booting); #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static LIST_HEAD(watchdog_list); diff --git a/kernel/timer.c b/kernel/timer.c index 6663a87f730..8ad384253ef 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1651,8 +1651,8 @@ static void __devinit migrate_timers(int cpu) new_base = get_cpu_var(tvec_bases); local_irq_disable(); - spin_lock(&new_base->lock); - spin_lock(&old_base->lock); + double_spin_lock(&new_base->lock, &old_base->lock, + smp_processor_id() < cpu); BUG_ON(old_base->running_timer); @@ -1665,8 +1665,8 @@ static void __devinit migrate_timers(int cpu) migrate_timer_list(new_base, old_base->tv5.vec + i); } - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); + double_spin_unlock(&new_base->lock, &old_base->lock, + smp_processor_id() < cpu); local_irq_enable(); put_cpu_var(tvec_bases); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index cf2a5381030..d76e8eb342d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -321,15 +321,6 @@ static inline int check_pgd_range(struct vm_area_struct *vma, return 0; } -/* Check if a vma is migratable */ -static inline int vma_migratable(struct vm_area_struct *vma) -{ - if (vma->vm_flags & ( - VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED)) - return 0; - return 1; -} - /* * Check if all pages in a range are on a set of nodes. * If pagelist != NULL then isolate pages from the LRU and diff --git a/mm/migrate.c b/mm/migrate.c index e9b161bde95..7a66ca25dc8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -781,7 +781,7 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, err = -EFAULT; vma = find_vma(mm, pp->addr); - if (!vma) + if (!vma || !vma_migratable(vma)) goto set_status; page = follow_page(vma, pp->addr, FOLL_GET); diff --git a/mm/shmem.c b/mm/shmem.c index fcb07882c8e..b8c429a2d27 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -175,7 +175,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); } -static struct super_operations shmem_ops; +static const struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; static const struct file_operations shmem_file_operations; static const struct inode_operations shmem_inode_operations; @@ -2383,7 +2383,7 @@ static const struct inode_operations shmem_special_inode_operations = { #endif }; -static struct super_operations shmem_ops = { +static const struct super_operations shmem_ops = { .alloc_inode = shmem_alloc_inode, .destroy_inode = shmem_destroy_inode, #ifdef CONFIG_TMPFS |